#include #include #include #include #include #include #include #include #include #include #include #include "Constants.h" #include "StringInterner.h" #include "StringPtrUtils.h" #include "VideoInfo.h" #include "TopKVideoHolder.h" #include "HashtagInfo.h" #include "SoundInfo.h" #include "Utils.h" bool processHashtags(const std::string& filename, std::ofstream& outputFile) { std::ifstream inputFile(filename); if (!inputFile.is_open()) { std::cerr << "Cannot open input file: " << filename << std::endl; return false; } StringInterner interner; std::unordered_map hashtagData; hashtagData.reserve(250000); std::string line; int inputOrderCounter = 0; VideoInfo currentVideo; std::string text_buffer; while (std::getline(inputFile, line)) { if (line.length() < 10) continue; inputOrderCounter++; if (parseLineForHashtags(line, inputOrderCounter, interner, currentVideo, text_buffer)) { if (!text_buffer.empty()) { extractHashtags(text_buffer, hashtagData, interner, currentVideo); } } } inputFile.close(); std::priority_queue, CompareHashtagPtrForHeap> top20Hashtags; typedef std::unordered_map HashtagMapType; for (HashtagMapType::iterator it = hashtagData.begin(); it != hashtagData.end(); ++it) { HashtagInfo* currentHashtagPtr = &(it->second); if (top20Hashtags.size() < TOP_N_OUTPUT) { top20Hashtags.push(currentHashtagPtr); } else { const HashtagInfo* topPtr = top20Hashtags.top(); bool is_better = CompareHashtagPtr()(currentHashtagPtr, topPtr); if (is_better) { top20Hashtags.pop(); top20Hashtags.push(currentHashtagPtr); } } } std::vector finalTop20; finalTop20.reserve(top20Hashtags.size()); while (!top20Hashtags.empty()) { finalTop20.push_back(top20Hashtags.top()); top20Hashtags.pop(); } std::sort(finalTop20.begin(), finalTop20.end(), CompareHashtagPtr()); outputFile << "trending hashtags:\n\n"; for (size_t i = 0; i < finalTop20.size(); ++i) { HashtagInfo* currentHashtag = finalTop20[i]; outputFile << "========================\n"; outputFile << "#" << (currentHashtag->name ? *currentHashtag->name : "null") << "\n"; outputFile << "used " << currentHashtag->usageCount << " times\n"; outputFile << currentHashtag->totalViews << " views\n\n"; extractSortAndPrintTop3Videos(outputFile, currentHashtag->topVideos); outputFile << "========================"; if (i < finalTop20.size() - 1) { outputFile << "\n"; } else { outputFile << "\n"; } } return true; } bool processSounds(const std::string& filename, std::ofstream& outputFile) { std::ifstream inputFile(filename); if (!inputFile.is_open()) { std::cerr << "Cannot open input file: " << filename << std::endl; return false; } StringInterner interner; std::unordered_map soundData; soundData.reserve(50000); std::string line; int inputOrderCounter = 0; VideoInfo currentVideo; const std::string* musicIdPtr = nullptr; const std::string* musicNamePtr = nullptr; const std::string* musicAuthorPtr = nullptr; while (std::getline(inputFile, line)) { if (line.length() < 10) continue; inputOrderCounter++; musicIdPtr = nullptr; musicNamePtr = nullptr; musicAuthorPtr = nullptr; if (parseLineForSounds(line, inputOrderCounter, interner, currentVideo, musicIdPtr, musicNamePtr, musicAuthorPtr)) { if (musicIdPtr == nullptr || musicIdPtr->empty()) { continue; } typedef std::unordered_map SoundMapType; SoundMapType::iterator it = soundData.find(musicIdPtr); if (it == soundData.end()) { std::pair emplace_result = soundData.emplace(musicIdPtr, SoundInfo(musicIdPtr, musicNamePtr, musicAuthorPtr)); it = emplace_result.first; } it->second.totalViews += currentVideo.playCount; if (it->second.musicName->empty() && !musicNamePtr->empty()) { it->second.musicName = musicNamePtr; } if (it->second.musicAuthor->empty() && !musicAuthorPtr->empty()) { it->second.musicAuthor = musicAuthorPtr; } it->second.topVideos.add(currentVideo); } } inputFile.close(); std::priority_queue, CompareSoundPtrForHeap> top20Sounds; typedef std::unordered_map SoundMapType; for (SoundMapType::iterator it = soundData.begin(); it != soundData.end(); ++it) { SoundInfo* currentSoundPtr = &(it->second); if (top20Sounds.size() < TOP_N_OUTPUT) { top20Sounds.push(currentSoundPtr); } else { const SoundInfo* topPtr = top20Sounds.top(); bool is_better = CompareSoundPtr()(currentSoundPtr, topPtr); if (is_better) { top20Sounds.pop(); top20Sounds.push(currentSoundPtr); } } } std::vector finalTop20; finalTop20.reserve(top20Sounds.size()); while (!top20Sounds.empty()) { finalTop20.push_back(top20Sounds.top()); top20Sounds.pop(); } std::sort(finalTop20.begin(), finalTop20.end(), CompareSoundPtr()); outputFile << "trending sounds:\n\n"; for (size_t i = 0; i < finalTop20.size(); ++i) { SoundInfo* currentSound = finalTop20[i]; outputFile << "========================\n"; if (currentSound->musicName == nullptr || currentSound->musicName->empty()) { outputFile << "\n"; } else { outputFile << *currentSound->musicName << "\n"; } outputFile << currentSound->totalViews << " views\n"; if (currentSound->musicAuthor == nullptr || currentSound->musicAuthor->empty()) { outputFile << "\n"; } else { outputFile << *currentSound->musicAuthor << "\n"; } outputFile << "music id: " << (currentSound->musicId && !currentSound->musicId->empty() ? *currentSound->musicId : "null") << "\n"; if (!currentSound->topVideos.empty()) { outputFile << "\n"; } extractSortAndPrintTop3Videos(outputFile, currentSound->topVideos); outputFile << "========================"; if (i < finalTop20.size() - 1) { outputFile << "\n"; } else { outputFile << "\n"; } } return true; } int main(int argc, char* argv[]) { if (argc != 4) { std::cerr << "Usage: nytrends.exe \n"; std::cerr << "Mode can be 'hashtag' or 'sound'\n"; return 1; } std::string inputFileName = argv[1]; std::string outputFileName = argv[2]; std::string mode = argv[3]; std::ofstream outputFile(outputFileName); if (!outputFile.is_open()) { std::cerr << "Error: Cannot open output file " << outputFileName << std::endl; return 1; } std::ios_base::sync_with_stdio(false); bool success = false; if (mode == "hashtag") { success = processHashtags(inputFileName, outputFile); } else if (mode == "sound") { success = processSounds(inputFileName, outputFile); } else { std::cerr << "Error: Invalid mode '" << mode << "'. Must be 'hashtag' or 'sound'." << std::endl; outputFile.close(); return 1; } outputFile.close(); return success ? 0 : 1; }