diff --git a/.vscode/launch.json b/.vscode/launch.json index 5fa03b5..569cb90 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -135,6 +135,22 @@ "MIMode": "gdb", "miDebuggerPath": "/usr/bin/gdb", "preLaunchTask": "C/C++: g++ build active file" + }, + { + "name": "nytrends", + "type": "cppdbg", + "request": "launch", + "program": "${fileDirname}/${fileBasenameNoExtension}", + "args": [ + "inputs/input_large9.json", + "output.txt", + "hashtag" + ], + "cwd": "${fileDirname}", + "environment": [], + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "preLaunchTask": "C/C++: g++ build active file" } ] } diff --git a/.vscode/settings.json b/.vscode/settings.json index 072bf29..59b9ee8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -78,6 +78,10 @@ "unordered_set": "cpp", "regex": "cpp", "cinttypes": "cpp", - "__node_handle": "cpp" + "__node_handle": "cpp", + "shared_mutex": "cpp", + "cfenv": "cpp", + "locale": "cpp", + "filesystem": "cpp" } } \ No newline at end of file diff --git a/hws/tiktok_trends/Constants.h b/hws/tiktok_trends/Constants.h new file mode 100644 index 0000000..17f3dbc --- /dev/null +++ b/hws/tiktok_trends/Constants.h @@ -0,0 +1,4 @@ +#pragma once + +const int TOP_K_CANDIDATES = 3; +const int TOP_N_OUTPUT = 20; \ No newline at end of file diff --git a/hws/tiktok_trends/HashtagInfo.h b/hws/tiktok_trends/HashtagInfo.h new file mode 100644 index 0000000..3b7d264 --- /dev/null +++ b/hws/tiktok_trends/HashtagInfo.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include "TopKVideoHolder.h" + +struct HashtagInfo { + const std::string* name = nullptr; + long totalViews = 0; + int usageCount = 0; + TopKVideoHolder topVideos; + + HashtagInfo() = default; + explicit HashtagInfo(const std::string* n) : name(n), totalViews(0), usageCount(0) {} +}; + +struct CompareHashtagPtr { + bool operator()(const HashtagInfo* a, const HashtagInfo* b) const { + if (a->usageCount != b->usageCount) return a->usageCount > b->usageCount; + if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews; + if (a->name && b->name) return *a->name < *b->name; + if (a->name) return true; + return false; + } +}; + +struct CompareHashtagPtrForHeap { + bool operator()(const HashtagInfo* a, const HashtagInfo* b) const { + if (a->usageCount != b->usageCount) return a->usageCount > b->usageCount; + if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews; + if (a->name && b->name) return *a->name > *b->name; + if (a->name) return false; + return true; + } +}; \ No newline at end of file diff --git a/hws/tiktok_trends/README.txt b/hws/tiktok_trends/README.txt index 52a2d63..5f4ad83 100644 --- a/hws/tiktok_trends/README.txt +++ b/hws/tiktok_trends/README.txt @@ -1,7 +1,7 @@ HOMEWORK 9: Tiktok Trends -NAME: < insert name > +NAME: Jinshan Zhou COLLABORATORS AND OTHER RESOURCES: @@ -10,17 +10,21 @@ List the names of everyone you talked to about this assignment LMS, etc.), and all of the resources (books, online reference material, etc.) you consulted in completing this assignment. -< insert collaborators / resources > +A lot, like using Top K for better sorting. Difference IO cases (not useful). +StringCache (not useful). shared_ptr (not really help) and may websites, cases +that I don't remember anymore. Remember: Your implementation for this assignment must be done on your own, as described in "Academic Integrity for Homework" handout. -ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: < insert # hours > +ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: over 20 hr, 8 hr for complete +more than 10 hr just for optimization. MISC. COMMENTS TO GRADER: -(optional, please be concise!) +The program is a bit messy. Since, I tried too many techniques. Some are broken +changes, so I have to add patch to it. Myself is also a bit lose to my code. ## Reflection and Self Assessment @@ -32,5 +36,11 @@ What parts of the assignment did you find challenging? Is there anything that finally "clicked" for you in the process of working on this assignment? How well did the development and testing process go for you? -< insert reflection > - +This was definitely the most challenging assignment I've ever seen, and the +hard part was identifying performance issues and optimizing them. It was not +easy, I used various tools like perf and found that the JSON part had the biggest +overhead. Optimizing it showed immediate results, but then I hit a bottleneck, +so I started using various schemes, and most of them didn't work. Then I had to +push back and design the business process from scratch and finally got inspired +by my professor to use unordered set to get the best performance +(last 0.1 seconds). diff --git a/hws/tiktok_trends/SoundInfo.h b/hws/tiktok_trends/SoundInfo.h new file mode 100644 index 0000000..626caf4 --- /dev/null +++ b/hws/tiktok_trends/SoundInfo.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include "TopKVideoHolder.h" + +struct SoundInfo { + const std::string* musicId = nullptr; + const std::string* musicName = nullptr; + const std::string* musicAuthor = nullptr; + long totalViews = 0; + TopKVideoHolder topVideos; + + SoundInfo() = default; + + SoundInfo(const std::string* id, const std::string* name, const std::string* author) + : musicId(id), musicName(name), musicAuthor(author), totalViews(0) {} +}; + +struct CompareSoundPtr { + bool operator()(const SoundInfo* a, const SoundInfo* b) const { + if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews; + if (a->musicId && b->musicId) return *a->musicId < *b->musicId; + if (a->musicId) return true; + return false; + } +}; + +struct CompareSoundPtrForHeap { + bool operator()(const SoundInfo* a, const SoundInfo* b) const { + if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews; + if (a->musicId && b->musicId) return *a->musicId > *b->musicId; + if (a->musicId) return false; + return true; + } +}; \ No newline at end of file diff --git a/hws/tiktok_trends/StringInterner.cpp b/hws/tiktok_trends/StringInterner.cpp new file mode 100644 index 0000000..13ed3aa --- /dev/null +++ b/hws/tiktok_trends/StringInterner.cpp @@ -0,0 +1,15 @@ +#include "StringInterner.h" + +const std::string* StringInterner::intern(const std::string& str) { + std::pair::iterator, bool> result = pool.insert(str); + return &(*result.first); +} + +const std::string* StringInterner::intern(std::string&& str) { + std::pair::iterator, bool> result = pool.insert(std::move(str)); + return &(*result.first); +} + +const std::string* StringInterner::getEmptyString() { + return intern(""); +} \ No newline at end of file diff --git a/hws/tiktok_trends/StringInterner.h b/hws/tiktok_trends/StringInterner.h new file mode 100644 index 0000000..293f36a --- /dev/null +++ b/hws/tiktok_trends/StringInterner.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include +#include + +class StringInterner { +private: + std::unordered_set pool; + +public: + const std::string* intern(const std::string& str); + const std::string* intern(std::string&& str); + const std::string* getEmptyString(); +}; \ No newline at end of file diff --git a/hws/tiktok_trends/StringPtrUtils.h b/hws/tiktok_trends/StringPtrUtils.h new file mode 100644 index 0000000..a0fb4f3 --- /dev/null +++ b/hws/tiktok_trends/StringPtrUtils.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +struct StringPtrHash { + size_t operator()(const std::string* s) const { + return std::hash()(*s); + } +}; + +struct StringPtrEqual { + bool operator()(const std::string* a, const std::string* b) const { + if (a == b) return true; + if (!a || !b) return false; + return *a == *b; + } +}; \ No newline at end of file diff --git a/hws/tiktok_trends/TopKVideoHolder.cpp b/hws/tiktok_trends/TopKVideoHolder.cpp new file mode 100644 index 0000000..0cdc7fa --- /dev/null +++ b/hws/tiktok_trends/TopKVideoHolder.cpp @@ -0,0 +1,32 @@ +#include "TopKVideoHolder.h" + +void TopKVideoHolder::add(const VideoInfo& video) { + if (pq.size() < K) { + pq.push(video); + } else { + if (VideoCompareWorse()(video, pq.top())) { + pq.pop(); + pq.push(video); + } + } +} + +std::vector TopKVideoHolder::getSortedVideos() { + std::vector sortedVideos; + size_t current_size = pq.size(); + if (current_size == 0) return sortedVideos; + + sortedVideos.reserve(current_size); + + while (!pq.empty()) { + sortedVideos.push_back(pq.top()); + pq.pop(); + } + + std::sort(sortedVideos.begin(), sortedVideos.end(), VideoInfo::compareForFinalSort); + + return sortedVideos; +} + +bool TopKVideoHolder::empty() const { return pq.empty(); } +size_t TopKVideoHolder::size() const { return pq.size(); } \ No newline at end of file diff --git a/hws/tiktok_trends/TopKVideoHolder.h b/hws/tiktok_trends/TopKVideoHolder.h new file mode 100644 index 0000000..965bee9 --- /dev/null +++ b/hws/tiktok_trends/TopKVideoHolder.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include "VideoInfo.h" +#include "Constants.h" + +class TopKVideoHolder { +private: + std::priority_queue, VideoCompareWorse> pq; + static const size_t K = TOP_K_CANDIDATES; + +public: + void add(const VideoInfo& video); + std::vector getSortedVideos(); + bool empty() const; + size_t size() const; +}; \ No newline at end of file diff --git a/hws/tiktok_trends/Utils.cpp b/hws/tiktok_trends/Utils.cpp new file mode 100644 index 0000000..cc2c5cc --- /dev/null +++ b/hws/tiktok_trends/Utils.cpp @@ -0,0 +1,265 @@ +#include "Utils.h" +#include // For potential cerr usage, although not directly in these functions +#include +#include +#include // For std::min + +bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value) { + const size_t strLen = str.length(); + value.clear(); + if (pos >= strLen || str[pos] != '"') return false; + ++pos; + const size_t startPos = pos; + const char* strData = str.data(); + while (pos < strLen && strData[pos] != '"') { + ++pos; + } + if (pos >= strLen) return false; + value.assign(strData + startPos, pos - startPos); + ++pos; + return true; +} + +bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value) { + const size_t strLen = str.length(); + value.clear(); + const size_t startPos = pos; + const char* strData = str.data(); + while (pos < strLen && strData[pos] != ',' && strData[pos] != '}' && strData[pos] != ']' && !std::isspace(static_cast(strData[pos]))) { + ++pos; + } + if (startPos == pos) return false; + value.assign(strData + startPos, pos - startPos); + return true; +} + +bool extractValue(const std::string& line, const std::string& key, std::string& value) { + const std::string searchKey = "\"" + key + "\":"; + const char* found_pos = strstr(line.c_str(), searchKey.c_str()); + if (!found_pos) return false; + + size_t pos = (found_pos - line.c_str()) + searchKey.length(); + const size_t lineLen = line.length(); + + while (pos < lineLen && std::isspace(static_cast(line[pos]))) { + ++pos; + } + if (pos >= lineLen) return false; + + if (line[pos] == '"') { + return parseQuotedStringValue(line, pos, value); + } else { + return parseUnquotedValue(line, pos, value); + } +} + +bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj) { + const std::string searchKey = "\"" + key + "\":"; + const char* found_pos = strstr(line.c_str(), searchKey.c_str()); + if (!found_pos) return false; + + size_t pos = (found_pos - line.c_str()) + searchKey.length(); + const size_t lineLen = line.length(); + + while (pos < lineLen && std::isspace(static_cast(line[pos]))) ++pos; + + if (pos >= lineLen || line[pos] != '{') return false; + + const size_t startBracePos = pos; + int braceCount = 1; + ++pos; + const char* lineData = line.data(); + + bool inString = false; + char prevChar = 0; + while (pos < lineLen && braceCount > 0) { + const char c = lineData[pos]; + if (c == '"' && prevChar != '\\') { + inString = !inString; + } else if (!inString) { + if (c == '{') { + ++braceCount; + } else if (c == '}') { + --braceCount; + } + } + prevChar = (prevChar == '\\' && c == '\\') ? 0 : c; + ++pos; + } + + if (braceCount == 0) { + subObj.assign(lineData + startBracePos, pos - startBracePos); + return true; + } + + return false; +} + +bool parseLongLong(const std::string& s, long& result) { + result = 0; + if (s.empty()) return false; + const char* ptr = s.c_str(); + bool negative = false; + long current_val = 0; + + if (*ptr == '-') { + negative = true; + ++ptr; + } + if (!*ptr) return false; + + while (*ptr) { + if (*ptr >= '0' && *ptr <= '9') { + long digit = (*ptr - '0'); + current_val = current_val * 10 + digit; + } else { + return false; + } + ++ptr; + } + + result = negative ? -current_val : current_val; + return true; +} + + +bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner, + VideoInfo& outVideo, std::string& outText) +{ + outText.clear(); + + std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str; + + if (!extractValue(line, "id", id_str) || id_str.empty()) return false; + + long playCount = 0; + if (extractValue(line, "playCount", playCount_str)) { + parseLongLong(playCount_str, playCount); + } + + extractValue(line, "text", outText); + + extractValue(line, "webVideoUrl", webVideoUrl_str); + std::string videoMetaSub; + if (extractSubObject(line, "videoMeta", videoMetaSub)) { + extractValue(videoMetaSub, "coverUrl", coverUrl_str); + } + + outVideo = VideoInfo( + interner.intern(std::move(id_str)), + interner.intern(std::move(coverUrl_str)), + interner.intern(std::move(webVideoUrl_str)), + playCount, + inputOrder + ); + + return true; +} + +bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner, + VideoInfo& outVideo, + const std::string*& outMusicIdPtr, + const std::string*& outMusicNamePtr, + const std::string*& outMusicAuthorPtr) +{ + std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str; + std::string musicId_str, musicName_str, musicAuthor_str; + + if (!extractValue(line, "id", id_str) || id_str.empty()) return false; + + long playCount = 0; + if (extractValue(line, "playCount", playCount_str)) { + parseLongLong(playCount_str, playCount); + } + + std::string musicMetaSub; + if (extractSubObject(line, "musicMeta", musicMetaSub)) { + extractValue(musicMetaSub, "musicId", musicId_str); + extractValue(musicMetaSub, "musicName", musicName_str); + extractValue(musicMetaSub, "musicAuthor", musicAuthor_str); + } + + if (musicId_str.empty()) { + return false; + } + + extractValue(line, "webVideoUrl", webVideoUrl_str); + std::string videoMetaSub; + if (extractSubObject(line, "videoMeta", videoMetaSub)) { + extractValue(videoMetaSub, "coverUrl", coverUrl_str); + } + + outVideo = VideoInfo( + interner.intern(std::move(id_str)), + interner.intern(std::move(coverUrl_str)), + interner.intern(std::move(webVideoUrl_str)), + playCount, + inputOrder + ); + outMusicIdPtr = interner.intern(std::move(musicId_str)); + outMusicNamePtr = interner.intern(std::move(musicName_str)); + outMusicAuthorPtr = interner.intern(std::move(musicAuthor_str)); + + return true; +} + + +void extractHashtags(const std::string& text, + std::unordered_map& hashtagData, + StringInterner& interner, + const VideoInfo& video) +{ + const size_t textLen = text.length(); + const char* textData = text.data(); + size_t pos = 0; + std::string tag_buffer; + tag_buffer.reserve(50); + + while (pos < textLen) { + while (pos < textLen && textData[pos] != '#') { + pos++; + } + if (pos >= textLen) break; + + size_t start = pos + 1; + if (start >= textLen) break; + + size_t end = start; + + while (end < textLen && (std::isalnum(static_cast(textData[end])) || textData[end] == '_')) { + end++; + } + + if (end > start) { + tag_buffer.assign(textData + start, end - start); + const std::string* hashtagPtr = interner.intern(tag_buffer); + + typedef std::unordered_map HashtagMapType; + HashtagMapType::iterator it = hashtagData.find(hashtagPtr); + + if (it == hashtagData.end()) { + std::pair emplace_result = + hashtagData.emplace(hashtagPtr, HashtagInfo(hashtagPtr)); + it = emplace_result.first; + } + + it->second.usageCount++; + it->second.totalViews += video.playCount; + it->second.topVideos.add(video); + } + + pos = end; + } +} + +void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos) { + std::vector sortedTopVideos = topVideos.getSortedVideos(); + + int videosToPrint = std::min(static_cast(sortedTopVideos.size()), TOP_K_CANDIDATES); + for (int i = 0; i < videosToPrint; ++i) { + const VideoInfo& video = sortedTopVideos[i]; + + fout << "cover url: " << (video.coverUrl && !video.coverUrl->empty() ? *video.coverUrl : "null") << "\n"; + fout << "web video url: " << (video.webVideoUrl && !video.webVideoUrl->empty() ? *video.webVideoUrl : "null") << "\n"; + } +} \ No newline at end of file diff --git a/hws/tiktok_trends/Utils.h b/hws/tiktok_trends/Utils.h new file mode 100644 index 0000000..845fcdb --- /dev/null +++ b/hws/tiktok_trends/Utils.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include +#include "StringInterner.h" +#include "VideoInfo.h" +#include "HashtagInfo.h" +#include "SoundInfo.h" +#include "StringPtrUtils.h" // Needed for HashtagMapType/SoundMapType in function signatures + +bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value); +bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value); +bool extractValue(const std::string& line, const std::string& key, std::string& value); +bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj); +bool parseLongLong(const std::string& s, long& result); + +bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner, + VideoInfo& outVideo, std::string& outText); + +bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner, + VideoInfo& outVideo, + const std::string*& outMusicIdPtr, + const std::string*& outMusicNamePtr, + const std::string*& outMusicAuthorPtr); + +void extractHashtags(const std::string& text, + std::unordered_map& hashtagData, + StringInterner& interner, + const VideoInfo& video); + +void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos); \ No newline at end of file diff --git a/hws/tiktok_trends/VideoInfo.h b/hws/tiktok_trends/VideoInfo.h new file mode 100644 index 0000000..c2cbc2e --- /dev/null +++ b/hws/tiktok_trends/VideoInfo.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include "Constants.h" + +struct VideoInfo { + const std::string* videoId = nullptr; + const std::string* coverUrl = nullptr; + const std::string* webVideoUrl = nullptr; + long playCount = 0; + int inputOrder = -1; + + VideoInfo() = default; + + VideoInfo(const std::string* id, const std::string* cover, const std::string* web, + long plays, int order) + : videoId(id), coverUrl(cover), webVideoUrl(web), playCount(plays), inputOrder(order) {} + + static bool compareForFinalSort(const VideoInfo& a, const VideoInfo& b) { + if (a.playCount != b.playCount) return a.playCount > b.playCount; + if (a.videoId && b.videoId && *a.videoId != *b.videoId) return *a.videoId < *b.videoId; + return a.inputOrder < b.inputOrder; + } + + bool operator<(const VideoInfo& other) const { + if (playCount != other.playCount) return playCount > other.playCount; + return inputOrder < other.inputOrder; + } +}; + +struct VideoCompareWorse { + bool operator()(const VideoInfo& a, const VideoInfo& b) const { + if (a.playCount != b.playCount) return a.playCount > b.playCount; + return a.inputOrder < b.inputOrder; + } +}; \ No newline at end of file diff --git a/hws/tiktok_trends/main.cpp b/hws/tiktok_trends/main.cpp new file mode 100644 index 0000000..2b58269 --- /dev/null +++ b/hws/tiktok_trends/main.cpp @@ -0,0 +1,256 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Constants.h" +#include "StringInterner.h" +#include "StringPtrUtils.h" +#include "VideoInfo.h" +#include "TopKVideoHolder.h" +#include "HashtagInfo.h" +#include "SoundInfo.h" +#include "Utils.h" + + +bool processHashtags(const std::string& filename, std::ofstream& outputFile) { + std::ifstream inputFile(filename); + if (!inputFile.is_open()) { + std::cerr << "Cannot open input file: " << filename << std::endl; + return false; + } + + StringInterner interner; + std::unordered_map hashtagData; + hashtagData.reserve(250000); + + std::string line; + int inputOrderCounter = 0; + VideoInfo currentVideo; + std::string text_buffer; + + while (std::getline(inputFile, line)) { + if (line.length() < 10) continue; + inputOrderCounter++; + + if (parseLineForHashtags(line, inputOrderCounter, interner, currentVideo, text_buffer)) { + if (!text_buffer.empty()) { + extractHashtags(text_buffer, hashtagData, interner, currentVideo); + } + } + } + inputFile.close(); + + std::priority_queue, CompareHashtagPtrForHeap> top20Hashtags; + typedef std::unordered_map HashtagMapType; + + for (HashtagMapType::iterator it = hashtagData.begin(); it != hashtagData.end(); ++it) { + HashtagInfo* currentHashtagPtr = &(it->second); + + if (top20Hashtags.size() < TOP_N_OUTPUT) { + top20Hashtags.push(currentHashtagPtr); + } else { + const HashtagInfo* topPtr = top20Hashtags.top(); + bool is_better = CompareHashtagPtr()(currentHashtagPtr, topPtr); + + if (is_better) { + top20Hashtags.pop(); + top20Hashtags.push(currentHashtagPtr); + } + } + } + + std::vector finalTop20; + finalTop20.reserve(top20Hashtags.size()); + while (!top20Hashtags.empty()) { + finalTop20.push_back(top20Hashtags.top()); + top20Hashtags.pop(); + } + + std::sort(finalTop20.begin(), finalTop20.end(), CompareHashtagPtr()); + + outputFile << "trending hashtags:\n\n"; + for (size_t i = 0; i < finalTop20.size(); ++i) { + HashtagInfo* currentHashtag = finalTop20[i]; + + outputFile << "========================\n"; + + outputFile << "#" << (currentHashtag->name ? *currentHashtag->name : "null") << "\n"; + outputFile << "used " << currentHashtag->usageCount << " times\n"; + outputFile << currentHashtag->totalViews << " views\n\n"; + + extractSortAndPrintTop3Videos(outputFile, currentHashtag->topVideos); + + outputFile << "========================"; + if (i < finalTop20.size() - 1) { + outputFile << "\n"; + } else { + outputFile << "\n"; + } + } + + return true; +} + +bool processSounds(const std::string& filename, std::ofstream& outputFile) { + std::ifstream inputFile(filename); + if (!inputFile.is_open()) { + std::cerr << "Cannot open input file: " << filename << std::endl; + return false; + } + + StringInterner interner; + std::unordered_map soundData; + soundData.reserve(50000); + + std::string line; + int inputOrderCounter = 0; + VideoInfo currentVideo; + const std::string* musicIdPtr = nullptr; + const std::string* musicNamePtr = nullptr; + const std::string* musicAuthorPtr = nullptr; + + while (std::getline(inputFile, line)) { + if (line.length() < 10) continue; + inputOrderCounter++; + + musicIdPtr = nullptr; + musicNamePtr = nullptr; + musicAuthorPtr = nullptr; + + if (parseLineForSounds(line, inputOrderCounter, interner, currentVideo, + musicIdPtr, musicNamePtr, musicAuthorPtr)) + { + if (musicIdPtr == nullptr || musicIdPtr->empty()) { + continue; + } + + typedef std::unordered_map SoundMapType; + SoundMapType::iterator it = soundData.find(musicIdPtr); + + if (it == soundData.end()) { + std::pair emplace_result = + soundData.emplace(musicIdPtr, SoundInfo(musicIdPtr, musicNamePtr, musicAuthorPtr)); + it = emplace_result.first; + } + + it->second.totalViews += currentVideo.playCount; + + if (it->second.musicName->empty() && !musicNamePtr->empty()) { + it->second.musicName = musicNamePtr; + } + if (it->second.musicAuthor->empty() && !musicAuthorPtr->empty()) { + it->second.musicAuthor = musicAuthorPtr; + } + it->second.topVideos.add(currentVideo); + } + } + inputFile.close(); + + std::priority_queue, CompareSoundPtrForHeap> top20Sounds; + typedef std::unordered_map SoundMapType; + + for (SoundMapType::iterator it = soundData.begin(); it != soundData.end(); ++it) { + SoundInfo* currentSoundPtr = &(it->second); + + if (top20Sounds.size() < TOP_N_OUTPUT) { + top20Sounds.push(currentSoundPtr); + } else { + const SoundInfo* topPtr = top20Sounds.top(); + bool is_better = CompareSoundPtr()(currentSoundPtr, topPtr); + + if (is_better) { + top20Sounds.pop(); + top20Sounds.push(currentSoundPtr); + } + } + } + + std::vector finalTop20; + finalTop20.reserve(top20Sounds.size()); + while (!top20Sounds.empty()) { + finalTop20.push_back(top20Sounds.top()); + top20Sounds.pop(); + } + + std::sort(finalTop20.begin(), finalTop20.end(), CompareSoundPtr()); + + outputFile << "trending sounds:\n\n"; + for (size_t i = 0; i < finalTop20.size(); ++i) { + SoundInfo* currentSound = finalTop20[i]; + + outputFile << "========================\n"; + + if (currentSound->musicName == nullptr || currentSound->musicName->empty()) { + outputFile << "\n"; + } else { + outputFile << *currentSound->musicName << "\n"; + } + + outputFile << currentSound->totalViews << " views\n"; + + if (currentSound->musicAuthor == nullptr || currentSound->musicAuthor->empty()) { + outputFile << "\n"; + } else { + outputFile << *currentSound->musicAuthor << "\n"; + } + + outputFile << "music id: " << (currentSound->musicId && !currentSound->musicId->empty() ? *currentSound->musicId : "null") << "\n"; + + if (!currentSound->topVideos.empty()) { + outputFile << "\n"; + } + + extractSortAndPrintTop3Videos(outputFile, currentSound->topVideos); + + outputFile << "========================"; + if (i < finalTop20.size() - 1) { + outputFile << "\n"; + } else { + outputFile << "\n"; + } + } + + return true; +} + + +int main(int argc, char* argv[]) { + if (argc != 4) { + std::cerr << "Usage: nytrends.exe \n"; + std::cerr << "Mode can be 'hashtag' or 'sound'\n"; + return 1; + } + std::string inputFileName = argv[1]; + std::string outputFileName = argv[2]; + std::string mode = argv[3]; + + std::ofstream outputFile(outputFileName); + if (!outputFile.is_open()) { + std::cerr << "Error: Cannot open output file " << outputFileName << std::endl; + return 1; + } + + std::ios_base::sync_with_stdio(false); + + bool success = false; + if (mode == "hashtag") { + success = processHashtags(inputFileName, outputFile); + } else if (mode == "sound") { + success = processSounds(inputFileName, outputFile); + } else { + std::cerr << "Error: Invalid mode '" << mode << "'. Must be 'hashtag' or 'sound'." << std::endl; + outputFile.close(); + return 1; + } + + outputFile.close(); + return success ? 0 : 1; +} \ No newline at end of file diff --git a/hws/tiktok_trends/output.txt b/hws/tiktok_trends/output.txt new file mode 100644 index 0000000..357dbd3 --- /dev/null +++ b/hws/tiktok_trends/output.txt @@ -0,0 +1,242 @@ +trending hashtags: + +======================== +#fyp +used 7600 times +261199234341 views + +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/567504ab3e4648dea968213ce979f281?x-expires=1700449200&x-signature=bjGEgY4bdEVOMMHQa2S0qrzNCQY%3D +web video url: https://www.tiktok.com/@bellapoarch/video/6862153058223197445 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/567504ab3e4648dea968213ce979f281?x-expires=1700449200&x-signature=bjGEgY4bdEVOMMHQa2S0qrzNCQY%3D +web video url: https://www.tiktok.com/@bellapoarch/video/6862153058223197445 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oAJCgD1khIANGRcTLhqQZNCi3ohAuAzoyEdIaf?x-expires=1700449200&x-signature=hu1Kg0Cpz%2BzVRXqYkv%2Fl6E8%2Ftgk%3D +web video url: https://www.tiktok.com/@tool_tips/video/7212981630904864005 +======================== +======================== +#foryou +used 2765 times +92282640558 views + +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/c4c7c98ecb5f4a8980ed7d58cdea2df3_1676378432?x-expires=1700449200&x-signature=QIchR40Etr%2BAjbAuzYbwTKnD7dA%3D +web video url: https://www.tiktok.com/@gorillatiks/video/7199990500512894213 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D +web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D +web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267 +======================== +======================== +#viral +used 1759 times +59270543842 views + +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o4n0aDMCxQPkleFE5JnbeaoIw9uEBRQiTkIzAB?x-expires=1700449200&x-signature=zOxX4QIMqL%2BNOyl6R57PLiVKb%2BE%3D +web video url: https://www.tiktok.com/@dada_ahoufe_/video/7247202774696447238 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o4n0aDMCxQPkleFE5JnbeaoIw9uEBRQiTkIzAB?x-expires=1700449200&x-signature=zOxX4QIMqL%2BNOyl6R57PLiVKb%2BE%3D +web video url: https://www.tiktok.com/@dada_ahoufe_/video/7247202774696447238 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oYRUDAt9kFIA2SIwCWomEVfha623AyrLzxgaAo?x-expires=1700449200&x-signature=xVeyOReZuykD9rFS4KFcN%2FFL44g%3D +web video url: https://www.tiktok.com/@carrosseriereparation/video/7217942797360303365 +======================== +======================== +#makeuptutorial +used 1709 times +22311707100 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D +web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D +web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/5461c70dd8ee4a0d891e7f2529f6b8ea_1670789072?x-expires=1700503200&x-signature=TqqnBqyBh5cnb150Ri0jXfwaL9s%3D +web video url: https://www.tiktok.com/@alicekingmakeup/video/7175984394950167813 +======================== +======================== +#couplestiktok +used 1610 times +14706422100 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/693775bbddac4df4ad008ff880041fbc?x-expires=1700503200&x-signature=UU8VVoLrIaXIVFnYLf3jl8IYO%2BE%3D +web video url: https://www.tiktok.com/@misiaaa621/video/7149368989611773227 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/693775bbddac4df4ad008ff880041fbc?x-expires=1700503200&x-signature=UU8VVoLrIaXIVFnYLf3jl8IYO%2BE%3D +web video url: https://www.tiktok.com/@misiaaa621/video/7149368989611773227 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4b83dfa1cc0c47318e408a6bcde34bb6_1677549193?x-expires=1700503200&x-signature=%2FN8FYhRpVldGSaxbP6dgYeEroqI%3D +web video url: https://www.tiktok.com/@debbiekval/video/7205018880622857515 +======================== +======================== +#lifehack +used 1585 times +33681856600 views + +cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/d6a1c1c323614919975fad3ee1c1ef9e~tplv-dmt-logom:tos-alisg-i-0000/4124427fcd3045968ac1c3136bd92d6c.image?x-expires=1700452800&x-signature=qCaN1hrF7pqQ0kvZJnlFnc9jI6Q%3D +web video url: https://www.tiktok.com/@tresorfie/video/7039091515863403778 +cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/d6a1c1c323614919975fad3ee1c1ef9e~tplv-dmt-logom:tos-alisg-i-0000/4124427fcd3045968ac1c3136bd92d6c.image?x-expires=1700449200&x-signature=WSl3XKN1HPXy7jpguj8v0AaI3FU%3D +web video url: https://www.tiktok.com/@tresorfie/video/7039091515863403778 +cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/501627c6b36849e282740c764611f2a7_1634994542~tplv-dmt-logom:tos-alisg-pv-0037/f3273e6f3e92421d860be8f5e72ac0bd.image?x-expires=1700452800&x-signature=DkwRLgyyY5ec0757c1hCq372yJM%3D +web video url: https://www.tiktok.com/@issei0806/video/7022248055625846018 +======================== +======================== +#funnyvideos +used 1573 times +67029374400 views + +cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D +web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731 +cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D +web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=vv04JjjwKgR1P3t117v%2B5HMvnpI%3D +web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731 +======================== +======================== +#foryoupage +used 1550 times +49067115500 views + +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D +web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D +web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oYRUDAt9kFIA2SIwCWomEVfha623AyrLzxgaAo?x-expires=1700449200&x-signature=xVeyOReZuykD9rFS4KFcN%2FFL44g%3D +web video url: https://www.tiktok.com/@carrosseriereparation/video/7217942797360303365 +======================== +======================== +#newyorkcity +used 1545 times +8642836600 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D +web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/1ea2545bde2645ec8f1106a4b9de6c2e_1648602515?x-expires=1700452800&x-signature=HNr6UCQsc4q0m%2FFShx%2FJJNWb1Jg%3D +web video url: https://www.tiktok.com/@thekatieromero/video/7080693879141485870 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c898b993f308477e92334437f9f0e1e1?x-expires=1700452800&x-signature=ZNHG9%2FMBw9qp5DSm%2BNnbhwX6xK8%3D +web video url: https://www.tiktok.com/@thekatieromero/video/7105552208422374699 +======================== +======================== +#ifweeverbrokeup +used 1543 times +1337044198 views + +cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/ee79eb2bea6445739ed71cef3e9b84b6_1686646723?x-expires=1700456400&x-signature=0MADrs89I23eeCudb%2FJxkI%2FJbR8%3D +web video url: https://www.tiktok.com/@zanmangloopyofficial/video/7244092495129218312 +cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/ee79eb2bea6445739ed71cef3e9b84b6_1686646723?x-expires=1700456400&x-signature=0MADrs89I23eeCudb%2FJxkI%2FJbR8%3D +web video url: https://www.tiktok.com/@zanmangloopyofficial/video/7244092495129218312 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/7895be3e406d435ba0db9e6f5db349e2?x-expires=1700456400&x-signature=ahrkOjcQRlDhAOf1upGEu%2B2ECYU%3D +web video url: https://www.tiktok.com/@bebopandbebe/video/7238437685537328426 +======================== +======================== +#springcleaning +used 1416 times +2156123000 views + +cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/56ce7e79491a4b27b371517ce134fa82_1631381225~tplv-dmt-logom:tos-maliva-p-0000/415cfd01b3484fb38f7b088aa6efda67.image?x-expires=1700503200&x-signature=YL4yGwa%2F1gZ59cKHMov7ficsK9E%3D +web video url: https://www.tiktok.com/@livecomposed/video/7006728991067491589 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4c54b4d1332c4000a615d9c5fc172be8_1677780053?x-expires=1700503200&x-signature=aD4zRpPLbhn9wz4vtTQWZRa2I1U%3D +web video url: https://www.tiktok.com/@atmeikasa/video/7206010371004583214 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4c54b4d1332c4000a615d9c5fc172be8_1677780053?x-expires=1700503200&x-signature=aD4zRpPLbhn9wz4vtTQWZRa2I1U%3D +web video url: https://www.tiktok.com/@atmeikasa/video/7206010371004583214 +======================== +======================== +#funny +used 1382 times +53648909500 views + +cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D +web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731 +cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D +web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=vv04JjjwKgR1P3t117v%2B5HMvnpI%3D +web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731 +======================== +======================== +#happymonday +used 1308 times +741991700 views + +cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/oQgeH8BRJnj20JEFoQ5tAf1MIb976nBD89QiFB~tplv-dmt-logom:tos-useast2a-v-0068/4763cd9418ac4d7faccbf52906bcf43c.image?x-expires=1700449200&x-signature=DPRcWm2Xhpe7r2HmxxGBzOyhwVs%3D +web video url: https://www.tiktok.com/@joinparallel.io/video/7192338389255916806 +cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/oQgeH8BRJnj20JEFoQ5tAf1MIb976nBD89QiFB~tplv-dmt-logom:tos-useast2a-v-0068/4763cd9418ac4d7faccbf52906bcf43c.image?x-expires=1700449200&x-signature=DPRcWm2Xhpe7r2HmxxGBzOyhwVs%3D +web video url: https://www.tiktok.com/@joinparallel.io/video/7192338389255916806 +cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/002965b791d641d5b2f3d86ee0019604_1675130079?x-expires=1700449200&x-signature=TMCJOBJCXYeu5rsjFWpohtGwT8M%3D +web video url: https://www.tiktok.com/@mondayhaircare/video/7194628805414161665 +======================== +======================== +#nyc +used 990 times +5577241000 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D +web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/1ea2545bde2645ec8f1106a4b9de6c2e_1648602515?x-expires=1700452800&x-signature=HNr6UCQsc4q0m%2FFShx%2FJJNWb1Jg%3D +web video url: https://www.tiktok.com/@thekatieromero/video/7080693879141485870 +cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c898b993f308477e92334437f9f0e1e1?x-expires=1700452800&x-signature=ZNHG9%2FMBw9qp5DSm%2BNnbhwX6xK8%3D +web video url: https://www.tiktok.com/@thekatieromero/video/7105552208422374699 +======================== +======================== +#makeup +used 976 times +15309874500 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D +web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D +web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/1404c560d1e74fe7881a0a4ae6414de5_1652622380?x-expires=1700449200&x-signature=m%2BIawkKwQBwnUaTqBTTMtqLQPZo%3D +web video url: https://www.tiktok.com/@mimles/video/7097959048515013894 +======================== +======================== +#trending +used 721 times +21692028406 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/bbdfd4ef0c4040b2bf9c52e9bb81d770?x-expires=1700449200&x-signature=fMm4z9wGlJCa1VFXvU5jQ0ot6tA%3D +web video url: https://www.tiktok.com/@phuonglinh.ido/video/7215533760039865646 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c92407e5bca34ce78eb17db878630adc?x-expires=1700449200&x-signature=836u0V7z2PC7tFMLlsvVDFDU1wU%3D +web video url: https://www.tiktok.com/@asmr.mus/video/7212985350124375342 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c92407e5bca34ce78eb17db878630adc?x-expires=1700449200&x-signature=836u0V7z2PC7tFMLlsvVDFDU1wU%3D +web video url: https://www.tiktok.com/@asmr.mus/video/7212985350124375342 +======================== +======================== +#comedy +used 579 times +14364510900 views + +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/49a912da569f4c69b3658762357f3922_1572472757?x-expires=1700449200&x-signature=rCokiz5pbl88BrzDzX3AB1LFCXg%3D +web video url: https://www.tiktok.com/@kisonkee/video/6753718966637677830 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/49a912da569f4c69b3658762357f3922_1572472757?x-expires=1700449200&x-signature=rCokiz5pbl88BrzDzX3AB1LFCXg%3D +web video url: https://www.tiktok.com/@kisonkee/video/6753718966637677830 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o0EnyBknREPO4GdeDo4nxAIJRFJfbfzAzGQSDf?x-expires=1700449200&x-signature=zuGbpMoTS01F4waRsGo2r2AoVxk%3D +web video url: https://www.tiktok.com/@ricoanimations0/video/7241573984590957830 +======================== +======================== +#newyork +used 555 times +3126420800 views + +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/4a878de5dbe241b5b5e25635f4200a51_1650915958?x-expires=1700449200&x-signature=1l%2F8aGh0jktub1R%2BX23PAe64Dys%3D +web video url: https://www.tiktok.com/@mdmotivator/video/7090629995546070277 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/4a878de5dbe241b5b5e25635f4200a51_1650915958?x-expires=1700449200&x-signature=1l%2F8aGh0jktub1R%2BX23PAe64Dys%3D +web video url: https://www.tiktok.com/@mdmotivator/video/7090629995546070277 +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D +web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370 +======================== +======================== +#couple +used 439 times +5628511600 views + +cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/014d01e5b7f848fc8f8899e88e8fa483?x-expires=1700449200&x-signature=IMmyHEigmMoVtLEuTWPZwe%2Fksb0%3D +web video url: https://www.tiktok.com/@mamalindy/video/7079555791962885419 +cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-aiso/cf3359fa45444f0994cf0dcc1c201b2d_1681930130?x-expires=1700449200&x-signature=tABQmhr%2FtklzlsNqYWGZnNrxwhI%3D +web video url: https://www.tiktok.com/@kajsablock/video/7223834852305456410 +cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-aiso/cf3359fa45444f0994cf0dcc1c201b2d_1681930130?x-expires=1700449200&x-signature=tABQmhr%2FtklzlsNqYWGZnNrxwhI%3D +web video url: https://www.tiktok.com/@kajsablock/video/7223834852305456410 +======================== +======================== +#fy +used 397 times +16901215000 views + +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/ef952b508c8043bb8b4ba98e3db850fb_1679074109?x-expires=1700449200&x-signature=AOKtuDMNxX%2BU2b1dRfBvofZLZfk%3D +web video url: https://www.tiktok.com/@noelgoescrazy/video/7211568359798803717 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/ef952b508c8043bb8b4ba98e3db850fb_1679074109?x-expires=1700449200&x-signature=AOKtuDMNxX%2BU2b1dRfBvofZLZfk%3D +web video url: https://www.tiktok.com/@noelgoescrazy/video/7211568359798803717 +cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/509605b7a901400589cd15d6731aaf8c_1677431421?x-expires=1700449200&x-signature=LoywgvGN5XLKIpvwV2UnR6pml6s%3D +web video url: https://www.tiktok.com/@noelgoescrazy/video/7204513074097769733 +======================== diff --git a/hws/tiktok_trends/test.py b/hws/tiktok_trends/test.py new file mode 100644 index 0000000..8840ccb --- /dev/null +++ b/hws/tiktok_trends/test.py @@ -0,0 +1,456 @@ +import subprocess +import os +import filecmp +import glob +import sys # Import sys for platform detection +import time +import shutil +import re # Import re for regex parsing on macOS + +# --- Configuration --- +CXX = "g++" +CXXFLAGS = ["-Wall", "-O2", "-std=c++11"] +EXECUTABLE = "./nytrends.exe" +SOURCE_FILES_PATTERN = "*.cpp" +INPUT_DIR = "inputs" +EXPECTED_OUTPUT_DIR = "outputs" +TEMP_OUTPUT_FILE = "output_unit_test.txt" +TEST_TIMEOUT = 120 + +# Configuration for memory measurement +MEASURE_MEMORY = True # Master switch +TIME_COMMAND = "/usr/bin/time" +# --- Platform Specific Time Config --- +TIME_COMMAND_MODE = None # Will be 'linux' or 'macos' or None +LINUX_TIME_FORMAT = "%M" # Format specifier for Max RSS (KB) on Linux +LINUX_TIME_OUTPUT_FILE = "time_mem_output.tmp" # Temp file for Linux time output +MACOS_MEM_REGEX = re.compile(r"^\s*(\d+)\s+maximum resident set size", re.IGNORECASE | re.MULTILINE) + +# Configuration for suppressing program output +SUPPRESS_PROGRAM_OUTPUT = True + +# ANSI Color Codes +# ... (colors remain the same) ... +COLOR_GREEN = '\033[92m' +COLOR_RED = '\033[91m' +COLOR_YELLOW = '\033[93m' +COLOR_BLUE = '\033[94m' +COLOR_RESET = '\033[0m' + +# --- Helper Functions --- + +def print_color(text, color): + """Prints text in a specified color.""" + print(f"{color}{text}{COLOR_RESET}") + +def check_time_command(): + """ + Check if /usr/bin/time command exists and is usable for memory measurement + based on the OS. Sets TIME_COMMAND_MODE. Returns True if usable, False otherwise. + """ + global TIME_COMMAND_MODE + if not shutil.which(TIME_COMMAND): + print_color(f"Warning: '{TIME_COMMAND}' not found. Memory measurement disabled.", COLOR_YELLOW) + TIME_COMMAND_MODE = None + return False + + platform = sys.platform + test_command = [] + capture_stderr = False + + if platform.startswith("linux"): + test_command = [TIME_COMMAND, '-f', LINUX_TIME_FORMAT, 'true'] + capture_stderr = False # Output goes to stdout/stderr, just check exit code + TIME_COMMAND_MODE = "linux" + print(f"Detected Linux platform. Testing {TIME_COMMAND} with '-f {LINUX_TIME_FORMAT}'...") + + elif platform == "darwin": # macOS + test_command = [TIME_COMMAND, '-l', 'true'] + capture_stderr = True # Need to capture stderr to check output format + TIME_COMMAND_MODE = "macos" + print(f"Detected macOS platform. Testing {TIME_COMMAND} with '-l'...") + + else: + print_color(f"Warning: Unsupported platform '{platform}' for memory measurement. Disabled.", COLOR_YELLOW) + TIME_COMMAND_MODE = None + return False + + try: + # Run test command + process = subprocess.run(test_command, + capture_output=True, # Capture both stdout/stderr + text=True, + check=True, # Raise exception on non-zero exit + timeout=3) + + # Additional check for macOS output format + if TIME_COMMAND_MODE == "macos": + if MACOS_MEM_REGEX.search(process.stderr): + print_color(f"Memory measurement enabled using '{TIME_COMMAND} -l'.", COLOR_GREEN) + return True # Format looks okay + else: + print_color(f"Warning: '{TIME_COMMAND} -l' output format not recognized (missing 'maximum resident set size'). Memory measurement disabled.", COLOR_YELLOW) + TIME_COMMAND_MODE = None + return False + else: # Linux check passed if check=True didn't raise exception + print_color(f"Memory measurement enabled using '{TIME_COMMAND} -f {LINUX_TIME_FORMAT}'.", COLOR_GREEN) + return True + + except subprocess.CalledProcessError as e: + # This is where the original macOS error occurred + print_color(f"Warning: {TIME_COMMAND} test command failed (exit code {e.returncode}). Memory measurement disabled.", COLOR_YELLOW) + if e.stderr: print(f"Stderr:\n{e.stderr}") + TIME_COMMAND_MODE = None + return False + except FileNotFoundError: # Should have been caught by shutil.which, but belt-and-suspenders + print_color(f"Warning: '{TIME_COMMAND}' not found during test run. Memory measurement disabled.", COLOR_YELLOW) + TIME_COMMAND_MODE = None + return False + except Exception as e: + print_color(f"Warning: An unexpected error occurred while testing {TIME_COMMAND}. Memory measurement disabled. Error: {e}", COLOR_YELLOW) + TIME_COMMAND_MODE = None + return False + +# --- compile_program() remains the same --- +def compile_program(): + """Compiles the C++ source files.""" + print_color(f"--- Starting Compilation ---", COLOR_BLUE) + source_files = glob.glob(SOURCE_FILES_PATTERN) + if not source_files: + print_color(f"Error: No source files found matching pattern '{SOURCE_FILES_PATTERN}'.", COLOR_RED) + return False + + compile_command = [CXX] + CXXFLAGS + ["-o", os.path.basename(EXECUTABLE)] + source_files + command_str = " ".join(compile_command) + print(f"Running: {command_str}") + + try: + start_time = time.perf_counter() + process = subprocess.run(compile_command, check=False, capture_output=True, text=True) + end_time = time.perf_counter() + duration = end_time - start_time + + if process.returncode == 0: + print_color(f"Compilation successful (took {duration:.3f}s).", COLOR_GREEN) + if process.stderr: + print_color("Compiler Warnings/Messages:", COLOR_YELLOW) + print(process.stderr) + return True + else: + print_color(f"Compilation failed with exit code {process.returncode} (took {duration:.3f}s).", COLOR_RED) + print_color("Compiler Error Output:", COLOR_RED) + print(process.stderr if process.stderr else "(No compiler error output captured)") + return False + except FileNotFoundError: + print_color(f"Error: Compiler '{CXX}' not found.", COLOR_RED) + return False + except Exception as e: + print_color(f"An unexpected error occurred during compilation: {e}", COLOR_RED) + return False + +def run_test(test_name, input_file, expected_output_file, argument): + """ + Runs test, measures time/memory (platform-specific), suppresses output. + Returns: tuple (passed: bool, reason: str, duration: float | None, memory_kb: int | None) + """ + global MEASURE_MEMORY, TIME_COMMAND_MODE # Access potentially updated flags + + print_color(f"--- Running {test_name} ---", COLOR_BLUE) + duration = None + memory_kb = None + captured_stderr_for_mem = None # Store stderr specifically for macos parsing + + # Prerequisite checks + if not os.path.exists(input_file): return False, "Input file missing", None, None + if not os.path.exists(expected_output_file): return False, "Expected output file missing", None, None + if not os.path.exists(EXECUTABLE): return False, "Executable not found", None, None + + # --- Command Construction & subprocess args --- + base_command = [EXECUTABLE, input_file, TEMP_OUTPUT_FILE, argument] + run_command = [] + subprocess_kwargs = { # Base arguments for subprocess.run + "check": False, + "timeout": TEST_TIMEOUT + } + + if MEASURE_MEMORY and TIME_COMMAND_MODE: # Check both desire and capability + if TIME_COMMAND_MODE == "linux": + run_command = [TIME_COMMAND, '-f', LINUX_TIME_FORMAT, '-o', LINUX_TIME_OUTPUT_FILE] + base_command + if os.path.exists(LINUX_TIME_OUTPUT_FILE): + try: os.remove(LINUX_TIME_OUTPUT_FILE) + except OSError: pass + # For Linux, memory info goes to file, handle stdout/stderr normally based on suppression + subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None + subprocess_kwargs["stderr"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None + + elif TIME_COMMAND_MODE == "macos": + run_command = [TIME_COMMAND, '-l'] + base_command + # On macOS, need to capture stderr for parsing memory, stdout handles suppression + subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None + subprocess_kwargs["stderr"] = subprocess.PIPE # Capture stderr for parsing + subprocess_kwargs["text"] = True # Decode captured stderr + + else: # Not measuring memory or platform unsupported + run_command = base_command + subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None + subprocess_kwargs["stderr"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None + + command_str = " ".join(run_command) + print(f"Executing: {command_str}") + + # --- Execution and Measurement --- + if os.path.exists(TEMP_OUTPUT_FILE): + try: os.remove(TEMP_OUTPUT_FILE) + except OSError as e: print_color(f"Warning: Could not remove {TEMP_OUTPUT_FILE}: {e}", COLOR_YELLOW) + + try: + start_time = time.perf_counter() + process = subprocess.run(run_command, **subprocess_kwargs) + end_time = time.perf_counter() + duration = end_time - start_time + print(f"Execution Time: {duration:.3f} seconds") + + # --- Process Memory Output (Platform Specific) --- + if MEASURE_MEMORY and TIME_COMMAND_MODE: + if TIME_COMMAND_MODE == "linux": + if os.path.exists(LINUX_TIME_OUTPUT_FILE): + try: + with open(LINUX_TIME_OUTPUT_FILE, 'r') as f_time: + mem_str = f_time.read().strip() + if mem_str: + memory_kb = int(mem_str) # Already in KB + print(f"Peak Memory Usage: {memory_kb} KB") + else: print_color(f"Warning: {LINUX_TIME_OUTPUT_FILE} was empty.", COLOR_YELLOW) + except (ValueError, IOError) as e: print_color(f"Warning: Could not parse memory (Linux) from {LINUX_TIME_OUTPUT_FILE}: {e}", COLOR_YELLOW) + finally: + try: os.remove(LINUX_TIME_OUTPUT_FILE) + except OSError: pass + else: print_color(f"Warning: {LINUX_TIME_OUTPUT_FILE} was not created.", COLOR_YELLOW) + + elif TIME_COMMAND_MODE == "macos": + # Parse memory from captured stderr (process.stderr) + if process.stderr: + match = MACOS_MEM_REGEX.search(process.stderr) + if match: + try: + mem_bytes = int(match.group(1)) + memory_kb = mem_bytes // 1024 # Convert Bytes to KB + print(f"Peak Memory Usage: {memory_kb} KB ({mem_bytes} Bytes)") + except (ValueError, IndexError): + print_color(f"Warning: Could not parse memory value (macOS) from captured output.", COLOR_YELLOW) + # Optional: print process.stderr here for debugging + # print(f"--- time -l stderr ---\n{process.stderr}\n----------------------") + else: + print_color(f"Warning: 'maximum resident set size' not found in 'time -l' output (macOS).", COLOR_YELLOW) + # Optional: print process.stderr here for debugging + # print(f"--- time -l stderr ---\n{process.stderr}\n----------------------") + else: + print_color(f"Warning: No stderr captured from 'time -l' (macOS).", COLOR_YELLOW) + + # --- Check Program Result --- + if process.returncode != 0: + print_color(f"Test failed: Program exited with non-zero status {process.returncode}.", COLOR_RED) + # Note: program's own stderr might be in process.stderr ONLY if not suppressed AND on macOS + # It's generally hidden now by design. + return False, "Runtime error", duration, memory_kb + + if not os.path.exists(TEMP_OUTPUT_FILE): + print_color(f"Test failed: Program finished successfully but did not create '{TEMP_OUTPUT_FILE}'.", COLOR_RED) + return False, "Output file not created", duration, memory_kb + + # --- Compare Output File --- + if filecmp.cmp(TEMP_OUTPUT_FILE, expected_output_file, shallow=False): + print_color(f"Test Result: PASSED", COLOR_GREEN) + return True, "Passed", duration, memory_kb + else: + # ... (diff printing remains the same) ... + print_color(f"Test Result: FAILED - Output mismatch.", COLOR_RED) + print_color(f" Expected: {expected_output_file}", COLOR_YELLOW) + print_color(f" Actual: {TEMP_OUTPUT_FILE}", COLOR_YELLOW) + try: + diff_proc = subprocess.run(['diff', '-u', expected_output_file, TEMP_OUTPUT_FILE], capture_output=True, text=True) + print_color("--- Diff ---", COLOR_YELLOW) + print(diff_proc.stdout if diff_proc.stdout else "(No differences found by diff, might be whitespace or encoding issues)") + print_color("------------", COLOR_YELLOW) + except FileNotFoundError: print_color("Could not run 'diff' command.", COLOR_YELLOW) + except Exception as diff_e: print_color(f"Error running diff: {diff_e}", COLOR_YELLOW) + + return False, "Output mismatch", duration, memory_kb + + # --- Exception Handling --- + except subprocess.TimeoutExpired: + end_time = time.perf_counter() + duration = end_time - start_time + print_color(f"Test failed: Program timed out after {duration:.3f}s (limit: {TEST_TIMEOUT}s).", COLOR_RED) + # Attempt to parse memory ONLY if macOS and stderr might have been partially captured (unlikely but possible) + if MEASURE_MEMORY and TIME_COMMAND_MODE == "macos" and process and process.stderr: + match = MACOS_MEM_REGEX.search(process.stderr) + if match: + try: memory_kb = int(match.group(1)) // 1024 + except: memory_kb = None # Ignore parsing errors on timeout + # Clean up Linux temp file if it exists + if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE): + try: os.remove(LINUX_TIME_OUTPUT_FILE) + except OSError: pass + return False, "Timeout", duration, memory_kb + except Exception as e: + print_color(f"An unexpected error occurred during test execution: {e}", COLOR_RED) + # Clean up Linux temp file if it exists + if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE): + try: os.remove(LINUX_TIME_OUTPUT_FILE) + except OSError: pass + return False, f"Execution exception: {e}", None, None + finally: + # General cleanup (Linux temp file might still exist if parsing failed) + if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE): + try: os.remove(LINUX_TIME_OUTPUT_FILE) + except OSError: pass + +# --- Main Execution --- +if __name__ == "__main__": + # 0. Check if memory measurement is desired AND possible + user_wants_memory_measurement = MEASURE_MEMORY + if user_wants_memory_measurement: + can_actually_measure = check_time_command() + MEASURE_MEMORY = can_actually_measure # Update based on check + else: + MEASURE_MEMORY = False + print_color("Memory measurement explicitly disabled by configuration.", COLOR_YELLOW) + + if SUPPRESS_PROGRAM_OUTPUT: + print_color("Program stdout/stderr will be suppressed during tests.", COLOR_BLUE) + + # 1. Compile + if not compile_program(): + print_color("\nCompilation failed. Aborting tests.", COLOR_RED) + sys.exit(1) + + # 2. Define Test Cases + # ... (test_bases and arguments_to_test remain the same) ... + test_bases = [ + ("1", "tiny1"), ("2", "tiny2"), ("3", "small1"), ("4", "small2"), + ("5", "medium1"), ("6", "medium2"), ("7", "large1"), ("8", "large2"), + ("9", "large3"), ("10", "large4"), ("11", "large5"), ("12", "large6"), + ("13", "large7"), ("14", "large8"), ("15", "large9"), + ] + arguments_to_test = ["hashtag", "sound"] + + results = {"passed": 0, "failed": 0, "skipped": 0} + failed_tests = [] + test_durations = [] + test_memory_usages = [] + + # 3. Run Tests + print_color("\n--- Starting Test Execution ---", COLOR_BLUE) + total_start_time = time.perf_counter() + + for id_prefix, base_name in test_bases: + for i, argument in enumerate(arguments_to_test, 1): + # ... (construct test names/paths) ... + test_id = f"{id_prefix}.{i}" + test_name = f"Test Case {test_id}: input {base_name}, {argument}" + input_filename = os.path.join(INPUT_DIR, f"input_{base_name}.json") + expected_output_filename = os.path.join(EXPECTED_OUTPUT_DIR, f"output_{base_name}_{argument}.txt") + + passed, reason, duration, memory_kb = run_test(test_name, input_filename, expected_output_filename, argument) + + # ... (Update results logic remains the same, relies on memory_kb being None if not measured) ... + if passed: + results["passed"] += 1 + if duration is not None: test_durations.append(duration) + if MEASURE_MEMORY and memory_kb is not None: test_memory_usages.append(memory_kb) + elif reason in ["Input file missing", "Expected output file missing", "Executable not found"]: + results["skipped"] += 1 + else: + results["failed"] += 1 + duration_str = f" ({duration:.3f}s)" if duration is not None else "" + mem_str = f", {memory_kb} KB" if MEASURE_MEMORY and memory_kb is not None else "" + failed_tests.append(f"{test_name} ({reason}{duration_str}{mem_str})") + print("-" * 40) + + + total_end_time = time.perf_counter() + total_test_suite_duration = total_end_time - total_start_time + + # 4. Clean up + # ... (same cleanup logic) ... + print_color("--- Cleaning Up ---", COLOR_BLUE) + if os.path.exists(TEMP_OUTPUT_FILE): + try: + os.remove(TEMP_OUTPUT_FILE) + print(f"Removed temporary output file: {TEMP_OUTPUT_FILE}") + except OSError as e: print_color(f"Warning: Could not remove {TEMP_OUTPUT_FILE}: {e}", COLOR_YELLOW) + if os.path.exists(EXECUTABLE): + try: + os.remove(EXECUTABLE) + print(f"Removed executable: {EXECUTABLE}") + except OSError as e: print_color(f"Warning: Could not remove {EXECUTABLE}: {e}", COLOR_YELLOW) + + + # 5. Print Summary + # ... (summary printing logic remains the same) ... + # Note: Memory summary section only appears if MEASURE_MEMORY is True at the end. + print_color("\n--- Test Summary ---", COLOR_BLUE) + print_color(f"Passed: {results['passed']}", COLOR_GREEN) + print_color(f"Failed: {results['failed']}", COLOR_RED if results['failed'] > 0 else COLOR_GREEN) + print_color(f"Skipped: {results['skipped']}", COLOR_YELLOW if results['skipped'] > 0 else COLOR_GREEN) + total_run = results['passed'] + results['failed'] + total_defined = total_run + results['skipped'] + print(f"Total Tests Defined: {total_defined}") + print(f"Total Tests Run: {total_run}") + print(f"Total Test Suite Execution Time: {total_test_suite_duration:.3f}s") + + # Performance Summary + if test_durations: + # ... (same calculation and printing) ... + total_passed_time = sum(test_durations) + avg_time = total_passed_time / len(test_durations) + max_time = max(test_durations) + min_time = min(test_durations) + print("\n--- Performance Summary (Passed Tests) ---") + print(f"Total execution time (passed tests): {total_passed_time:.3f}s") + print(f"Average execution time per test: {avg_time:.3f}s") + print(f"Fastest test execution time: {min_time:.3f}s") + print(f"Slowest test execution time: {max_time:.3f}s") + + + # Memory Summary + if MEASURE_MEMORY: # Check final flag state + if test_memory_usages: + # ... (same calculation and printing) ... + total_mem_kb = sum(test_memory_usages) + avg_mem_kb = total_mem_kb / len(test_memory_usages) + max_mem_kb = max(test_memory_usages) + min_mem_kb = min(test_memory_usages) + total_mem_mb = total_mem_kb / 1024 + total_mem_gb = total_mem_mb / 1024 + if total_mem_gb > 1: total_mem_str = f"{total_mem_gb:.2f} GB" + elif total_mem_mb > 1: total_mem_str = f"{total_mem_mb:.2f} MB" + else: total_mem_str = f"{total_mem_kb} KB" + print("\n--- Memory Usage Summary (Passed Tests) ---") + print(f"Cumulative peak memory (passed tests): {total_mem_str} ({total_mem_kb} KB)") + print(f"Average peak memory per test: {avg_mem_kb:.1f} KB") + print(f"Lowest peak memory usage: {min_mem_kb} KB") + print(f"Highest peak memory usage: {max_mem_kb} KB") + + else: + print("\n--- Memory Usage Summary (Passed Tests) ---") + print("(No memory usage data collected for passed tests - check warnings)") + + + # Final Result + if failed_tests: + print_color("\n--- Failed Test Cases ---", COLOR_RED) + for test in failed_tests: + print(f" - {test}") + sys.exit(1) + # ... (rest of exit logic remains the same) ... + elif results['passed'] == 0 and results['skipped'] == total_defined: + print_color("\nWarning: No tests were executed (all skipped).", COLOR_YELLOW) + sys.exit(0) + elif results['passed'] > 0 : + print_color("\nAll executed tests passed successfully!", COLOR_GREEN) + sys.exit(0) + else: + print_color("\nNo tests passed.", COLOR_YELLOW) + sys.exit(1 if results['failed'] > 0 else 0) \ No newline at end of file