#include #include #include #include #include using namespace std; #define UNAVAILABLE 0 #define SOLD_OUT 1 #define AVAILABLE 2 #define TWO_FOR_ONE 3 #define PREVIEW 4 #define FREE_TICKETED 5 void readFile (int fileNumber, string &buffer) { string fileName; fileName = "Search _ Edinburgh Festival Fringe" + to_string(fileNumber) + ".html"; ifstream inf(fileName.c_str()); inf.seekg(0, ios::end); buffer.reserve(inf.tellg()); inf.seekg(0, std::ios::beg); buffer.assign((istreambuf_iterator(inf)), istreambuf_iterator()); inf.close(); for(size_t i = 0; i < buffer.length(); i++) if(buffer[i] == ',') buffer[i] = ';'; } // readFile() size_t setGenres(string &buffer, size_t pos, ofstream &outf) { size_t pos2 = buffer.find("", pos) +6; // begin of genres pos = buffer.find("", pos) + 1; // beginning of title pos = buffer.find("<", pos2); //cout << "Title: " << buffer.substr(pos2, pos - pos2) << endl; outf << buffer.substr(pos2, pos - pos2) << ','; return pos; } // setTitle() size_t setPresenter(string &buffer, size_t pos, ofstream &outf, string &presenter) { pos = buffer.find("eventPresenter__", pos) + 1; size_t pos2 = buffer.find(">", pos) + 1; // beginning of presenter pos = buffer.find("<", pos2); presenter = buffer.substr(pos2, pos - pos2); cout << presenter << endl; for (size_t i = 0; i < presenter.length(); i++) // eliminates end of lines if(presenter[i] == 10 || presenter[i] == 13) presenter[i] = ' '; return pos; } // setPresenter() size_t setDates(string &buffer, size_t pos, ofstream &outf) { string dataStr; int startDate = 0; pos = buffer.find("eventDate__", pos) + 11; pos = buffer.find("", pos) + 6; // beginning of actual first date if(isdigit(buffer[pos])) { while(isdigit(buffer[pos])) { startDate = startDate * 10 + buffer[pos] -'0'; outf << buffer[pos++]; // write out first date to file } outf << ','; size_t pos2 = pos; while(!isdigit(buffer[pos]) && pos - pos2 < 6) // skip through spaces and hyphen pos++; if (pos - pos2 >= 6) { outf << startDate << ','; } // if start date not followed by spaces and hyphens else // found last date { while(isdigit(buffer[pos])) outf << buffer[pos++]; // write out first date to file outf << ','; } // found last date } // if first character is a digit return pos; } // setDates() size_t setStartTime(string &buffer, size_t pos, ofstream &outf, size_t &startTime) { int hours = 0, minutes = 0; pos = buffer.find("Time__", pos); size_t pos2 = buffer.find(">", pos) + 1; // beginning of time pos = buffer.find("<", pos2); outf << buffer.substr(pos2, pos - pos2) << ','; size_t pos3 = buffer.find("Various", pos2); if(pos3 > pos || pos3 == string::npos) // not Various times { while(isdigit(buffer[pos2])) hours = hours * 10 + buffer[pos2++] - '0'; pos2++; // get past colon while(isdigit(buffer[pos2])) minutes = minutes * 10 + buffer[pos2++] - '0'; startTime = hours * 60 + minutes; } // if not Various times; else // Various times startTime = 0; return pos; } // setTime() size_t setDurationAndEndTime(string &buffer, size_t pos, ofstream &outf, size_t startTime) { size_t pos2, hours = 0, minutes = 0, endTime; pos = buffer.find("Duration__", pos); pos2 = buffer.find(">", pos) + 1; // beginning of duration pos = buffer.find("<", pos2); // end of duration string durationStr = buffer.substr(pos2, pos - pos2); pos2 = durationStr.find("hour"); if(pos2 != string::npos) // if hour { hours = stoi(durationStr); durationStr = durationStr.substr(pos2 + 3); // eliminate hour pos2 = durationStr.find(" "); if(pos2 != string::npos) // if a space after hour { durationStr = durationStr.substr(pos2); minutes = stoi(durationStr); } // if a space after hour } // if an hour else // no hour minutes = stoi(durationStr); int duration = hours * 60 + minutes; endTime = startTime + duration; hours = (endTime / 60) % 24; minutes = endTime % 60; outf << duration << ',' << hours<< ':'; if(minutes < 10) outf << '0'; outf << minutes << ','; return pos; } // setDuration() size_t setDescription(string &buffer, size_t pos, ofstream &outf, string &description) { pos = buffer.find("eventDescription__", pos) + 26; size_t pos2 = buffer.find(">", pos) + 1; // beginning of description pos = buffer.find("<", pos2); description = buffer.substr(pos2, pos - pos2); for (size_t i = 0; i < description.length(); i++) // eliminates end of lines if(description[i] == 10 || description[i] == 13) description[i] = ' '; return pos; } // setDescription() size_t setLocation(string &buffer, size_t pos, ofstream &outf) { pos = buffer.find("eventLocation__", pos) + 30; pos = buffer.find("div>", pos) + 6; // start of location pos = buffer.find("", pos2); // after the number numberStr = buffer.substr(pos2, pos - pos2); availability = UNAVAILABLE; if(numberStr.find("Preview)") != string::npos) availability = PREVIEW; else if(numberStr.find("TwoForOne") != string::npos) availability = TWO_FOR_ONE; else if(numberStr.find("SoldOut") != string::npos) availability = SOLD_OUT; else if(numberStr.find("Available") != string::npos) availability = AVAILABLE; else if(numberStr.find("FreeTicketed") != string::npos) availability = FREE_TICKETED; outf << availability << ','; } // for each day return pos; } // setAvailability() size_t readShow(string &buffer, size_t pos, ofstream &outf) { string description, presenter; size_t startTime; pos = setGenres(buffer, pos, outf); pos = setTitle(buffer, pos, outf); pos = setPresenter(buffer, pos, outf, presenter); pos = setDates(buffer, pos, outf); pos = setStartTime(buffer, pos, outf, startTime); pos = setDurationAndEndTime(buffer, pos, outf, startTime); pos = setDescription(buffer, pos, outf, description); pos = setLocation(buffer, pos, outf); pos = setAvailability(buffer, pos, outf); outf << presenter << ',' << description << endl; return pos; } // readShow() int main() { string buffer; int fileNumber; size_t pos; ofstream outf("AllShows2026.csv"); outf << "Genre1,Genre2,Title,First,Last,Start,Length,End,Venue,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,Presenter,Description\n"; outf <<" , , , , , , , , ,S,M,T,W,T,F,S,S,M,T,W,T,F,S,S,M,T,W,T,F,S,S,M,T,W,T,F,S,S,M\n"; for (fileNumber = 1; fileNumber < 76; fileNumber++) { cout << "File#: " << fileNumber << endl; readFile (fileNumber, buffer); pos = buffer.find("Genre:", 0); while (pos != string::npos) { pos = readShow(buffer, pos, outf); pos = buffer.find("Genre:", pos); } // while more shows, which start with "Genre:" } // for fileNumber outf.close(); return 0; } // main()