solve hw-9

This commit is contained in:
JamesFlare1212
2025-04-15 22:08:11 -04:00
parent 9ec5d3d32c
commit a109046498
17 changed files with 1498 additions and 7 deletions

16
.vscode/launch.json vendored
View File

@@ -135,6 +135,22 @@
"MIMode": "gdb", "MIMode": "gdb",
"miDebuggerPath": "/usr/bin/gdb", "miDebuggerPath": "/usr/bin/gdb",
"preLaunchTask": "C/C++: g++ build active file" "preLaunchTask": "C/C++: g++ build active file"
},
{
"name": "nytrends",
"type": "cppdbg",
"request": "launch",
"program": "${fileDirname}/${fileBasenameNoExtension}",
"args": [
"inputs/input_large9.json",
"output.txt",
"hashtag"
],
"cwd": "${fileDirname}",
"environment": [],
"MIMode": "gdb",
"miDebuggerPath": "/usr/bin/gdb",
"preLaunchTask": "C/C++: g++ build active file"
} }
] ]
} }

View File

@@ -78,6 +78,10 @@
"unordered_set": "cpp", "unordered_set": "cpp",
"regex": "cpp", "regex": "cpp",
"cinttypes": "cpp", "cinttypes": "cpp",
"__node_handle": "cpp" "__node_handle": "cpp",
"shared_mutex": "cpp",
"cfenv": "cpp",
"locale": "cpp",
"filesystem": "cpp"
} }
} }

View File

@@ -0,0 +1,4 @@
#pragma once
const int TOP_K_CANDIDATES = 3;
const int TOP_N_OUTPUT = 20;

View File

@@ -0,0 +1,34 @@
#pragma once
#include <string>
#include "TopKVideoHolder.h"
struct HashtagInfo {
const std::string* name = nullptr;
long totalViews = 0;
int usageCount = 0;
TopKVideoHolder topVideos;
HashtagInfo() = default;
explicit HashtagInfo(const std::string* n) : name(n), totalViews(0), usageCount(0) {}
};
struct CompareHashtagPtr {
bool operator()(const HashtagInfo* a, const HashtagInfo* b) const {
if (a->usageCount != b->usageCount) return a->usageCount > b->usageCount;
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
if (a->name && b->name) return *a->name < *b->name;
if (a->name) return true;
return false;
}
};
struct CompareHashtagPtrForHeap {
bool operator()(const HashtagInfo* a, const HashtagInfo* b) const {
if (a->usageCount != b->usageCount) return a->usageCount > b->usageCount;
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
if (a->name && b->name) return *a->name > *b->name;
if (a->name) return false;
return true;
}
};

View File

@@ -1,7 +1,7 @@
HOMEWORK 9: Tiktok Trends HOMEWORK 9: Tiktok Trends
NAME: < insert name > NAME: Jinshan Zhou
COLLABORATORS AND OTHER RESOURCES: COLLABORATORS AND OTHER RESOURCES:
@@ -10,17 +10,21 @@ List the names of everyone you talked to about this assignment
LMS, etc.), and all of the resources (books, online reference LMS, etc.), and all of the resources (books, online reference
material, etc.) you consulted in completing this assignment. material, etc.) you consulted in completing this assignment.
< insert collaborators / resources > A lot, like using Top K for better sorting. Difference IO cases (not useful).
StringCache (not useful). shared_ptr (not really help) and may websites, cases
that I don't remember anymore.
Remember: Your implementation for this assignment must be done on your Remember: Your implementation for this assignment must be done on your
own, as described in "Academic Integrity for Homework" handout. own, as described in "Academic Integrity for Homework" handout.
ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: < insert # hours > ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: over 20 hr, 8 hr for complete
more than 10 hr just for optimization.
MISC. COMMENTS TO GRADER: MISC. COMMENTS TO GRADER:
(optional, please be concise!) The program is a bit messy. Since, I tried too many techniques. Some are broken
changes, so I have to add patch to it. Myself is also a bit lose to my code.
## Reflection and Self Assessment ## Reflection and Self Assessment
@@ -32,5 +36,11 @@ What parts of the assignment did you find challenging? Is there anything that
finally "clicked" for you in the process of working on this assignment? How well finally "clicked" for you in the process of working on this assignment? How well
did the development and testing process go for you? did the development and testing process go for you?
< insert reflection > This was definitely the most challenging assignment I've ever seen, and the
hard part was identifying performance issues and optimizing them. It was not
easy, I used various tools like perf and found that the JSON part had the biggest
overhead. Optimizing it showed immediate results, but then I hit a bottleneck,
so I started using various schemes, and most of them didn't work. Then I had to
push back and design the business process from scratch and finally got inspired
by my professor to use unordered set to get the best performance
(last 0.1 seconds).

View File

@@ -0,0 +1,35 @@
#pragma once
#include <string>
#include "TopKVideoHolder.h"
struct SoundInfo {
const std::string* musicId = nullptr;
const std::string* musicName = nullptr;
const std::string* musicAuthor = nullptr;
long totalViews = 0;
TopKVideoHolder topVideos;
SoundInfo() = default;
SoundInfo(const std::string* id, const std::string* name, const std::string* author)
: musicId(id), musicName(name), musicAuthor(author), totalViews(0) {}
};
struct CompareSoundPtr {
bool operator()(const SoundInfo* a, const SoundInfo* b) const {
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
if (a->musicId && b->musicId) return *a->musicId < *b->musicId;
if (a->musicId) return true;
return false;
}
};
struct CompareSoundPtrForHeap {
bool operator()(const SoundInfo* a, const SoundInfo* b) const {
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
if (a->musicId && b->musicId) return *a->musicId > *b->musicId;
if (a->musicId) return false;
return true;
}
};

View File

@@ -0,0 +1,15 @@
#include "StringInterner.h"
const std::string* StringInterner::intern(const std::string& str) {
std::pair<std::unordered_set<std::string>::iterator, bool> result = pool.insert(str);
return &(*result.first);
}
const std::string* StringInterner::intern(std::string&& str) {
std::pair<std::unordered_set<std::string>::iterator, bool> result = pool.insert(std::move(str));
return &(*result.first);
}
const std::string* StringInterner::getEmptyString() {
return intern("");
}

View File

@@ -0,0 +1,15 @@
#pragma once
#include <string>
#include <unordered_set>
#include <utility>
class StringInterner {
private:
std::unordered_set<std::string> pool;
public:
const std::string* intern(const std::string& str);
const std::string* intern(std::string&& str);
const std::string* getEmptyString();
};

View File

@@ -0,0 +1,18 @@
#pragma once
#include <string>
#include <functional>
struct StringPtrHash {
size_t operator()(const std::string* s) const {
return std::hash<std::string>()(*s);
}
};
struct StringPtrEqual {
bool operator()(const std::string* a, const std::string* b) const {
if (a == b) return true;
if (!a || !b) return false;
return *a == *b;
}
};

View File

@@ -0,0 +1,32 @@
#include "TopKVideoHolder.h"
void TopKVideoHolder::add(const VideoInfo& video) {
if (pq.size() < K) {
pq.push(video);
} else {
if (VideoCompareWorse()(video, pq.top())) {
pq.pop();
pq.push(video);
}
}
}
std::vector<VideoInfo> TopKVideoHolder::getSortedVideos() {
std::vector<VideoInfo> sortedVideos;
size_t current_size = pq.size();
if (current_size == 0) return sortedVideos;
sortedVideos.reserve(current_size);
while (!pq.empty()) {
sortedVideos.push_back(pq.top());
pq.pop();
}
std::sort(sortedVideos.begin(), sortedVideos.end(), VideoInfo::compareForFinalSort);
return sortedVideos;
}
bool TopKVideoHolder::empty() const { return pq.empty(); }
size_t TopKVideoHolder::size() const { return pq.size(); }

View File

@@ -0,0 +1,19 @@
#pragma once
#include <vector>
#include <queue>
#include <algorithm>
#include "VideoInfo.h"
#include "Constants.h"
class TopKVideoHolder {
private:
std::priority_queue<VideoInfo, std::vector<VideoInfo>, VideoCompareWorse> pq;
static const size_t K = TOP_K_CANDIDATES;
public:
void add(const VideoInfo& video);
std::vector<VideoInfo> getSortedVideos();
bool empty() const;
size_t size() const;
};

265
hws/tiktok_trends/Utils.cpp Normal file
View File

@@ -0,0 +1,265 @@
#include "Utils.h"
#include <iostream> // For potential cerr usage, although not directly in these functions
#include <cctype>
#include <cstring>
#include <algorithm> // For std::min
bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value) {
const size_t strLen = str.length();
value.clear();
if (pos >= strLen || str[pos] != '"') return false;
++pos;
const size_t startPos = pos;
const char* strData = str.data();
while (pos < strLen && strData[pos] != '"') {
++pos;
}
if (pos >= strLen) return false;
value.assign(strData + startPos, pos - startPos);
++pos;
return true;
}
bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value) {
const size_t strLen = str.length();
value.clear();
const size_t startPos = pos;
const char* strData = str.data();
while (pos < strLen && strData[pos] != ',' && strData[pos] != '}' && strData[pos] != ']' && !std::isspace(static_cast<unsigned char>(strData[pos]))) {
++pos;
}
if (startPos == pos) return false;
value.assign(strData + startPos, pos - startPos);
return true;
}
bool extractValue(const std::string& line, const std::string& key, std::string& value) {
const std::string searchKey = "\"" + key + "\":";
const char* found_pos = strstr(line.c_str(), searchKey.c_str());
if (!found_pos) return false;
size_t pos = (found_pos - line.c_str()) + searchKey.length();
const size_t lineLen = line.length();
while (pos < lineLen && std::isspace(static_cast<unsigned char>(line[pos]))) {
++pos;
}
if (pos >= lineLen) return false;
if (line[pos] == '"') {
return parseQuotedStringValue(line, pos, value);
} else {
return parseUnquotedValue(line, pos, value);
}
}
bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj) {
const std::string searchKey = "\"" + key + "\":";
const char* found_pos = strstr(line.c_str(), searchKey.c_str());
if (!found_pos) return false;
size_t pos = (found_pos - line.c_str()) + searchKey.length();
const size_t lineLen = line.length();
while (pos < lineLen && std::isspace(static_cast<unsigned char>(line[pos]))) ++pos;
if (pos >= lineLen || line[pos] != '{') return false;
const size_t startBracePos = pos;
int braceCount = 1;
++pos;
const char* lineData = line.data();
bool inString = false;
char prevChar = 0;
while (pos < lineLen && braceCount > 0) {
const char c = lineData[pos];
if (c == '"' && prevChar != '\\') {
inString = !inString;
} else if (!inString) {
if (c == '{') {
++braceCount;
} else if (c == '}') {
--braceCount;
}
}
prevChar = (prevChar == '\\' && c == '\\') ? 0 : c;
++pos;
}
if (braceCount == 0) {
subObj.assign(lineData + startBracePos, pos - startBracePos);
return true;
}
return false;
}
bool parseLongLong(const std::string& s, long& result) {
result = 0;
if (s.empty()) return false;
const char* ptr = s.c_str();
bool negative = false;
long current_val = 0;
if (*ptr == '-') {
negative = true;
++ptr;
}
if (!*ptr) return false;
while (*ptr) {
if (*ptr >= '0' && *ptr <= '9') {
long digit = (*ptr - '0');
current_val = current_val * 10 + digit;
} else {
return false;
}
++ptr;
}
result = negative ? -current_val : current_val;
return true;
}
bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner,
VideoInfo& outVideo, std::string& outText)
{
outText.clear();
std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str;
if (!extractValue(line, "id", id_str) || id_str.empty()) return false;
long playCount = 0;
if (extractValue(line, "playCount", playCount_str)) {
parseLongLong(playCount_str, playCount);
}
extractValue(line, "text", outText);
extractValue(line, "webVideoUrl", webVideoUrl_str);
std::string videoMetaSub;
if (extractSubObject(line, "videoMeta", videoMetaSub)) {
extractValue(videoMetaSub, "coverUrl", coverUrl_str);
}
outVideo = VideoInfo(
interner.intern(std::move(id_str)),
interner.intern(std::move(coverUrl_str)),
interner.intern(std::move(webVideoUrl_str)),
playCount,
inputOrder
);
return true;
}
bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner,
VideoInfo& outVideo,
const std::string*& outMusicIdPtr,
const std::string*& outMusicNamePtr,
const std::string*& outMusicAuthorPtr)
{
std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str;
std::string musicId_str, musicName_str, musicAuthor_str;
if (!extractValue(line, "id", id_str) || id_str.empty()) return false;
long playCount = 0;
if (extractValue(line, "playCount", playCount_str)) {
parseLongLong(playCount_str, playCount);
}
std::string musicMetaSub;
if (extractSubObject(line, "musicMeta", musicMetaSub)) {
extractValue(musicMetaSub, "musicId", musicId_str);
extractValue(musicMetaSub, "musicName", musicName_str);
extractValue(musicMetaSub, "musicAuthor", musicAuthor_str);
}
if (musicId_str.empty()) {
return false;
}
extractValue(line, "webVideoUrl", webVideoUrl_str);
std::string videoMetaSub;
if (extractSubObject(line, "videoMeta", videoMetaSub)) {
extractValue(videoMetaSub, "coverUrl", coverUrl_str);
}
outVideo = VideoInfo(
interner.intern(std::move(id_str)),
interner.intern(std::move(coverUrl_str)),
interner.intern(std::move(webVideoUrl_str)),
playCount,
inputOrder
);
outMusicIdPtr = interner.intern(std::move(musicId_str));
outMusicNamePtr = interner.intern(std::move(musicName_str));
outMusicAuthorPtr = interner.intern(std::move(musicAuthor_str));
return true;
}
void extractHashtags(const std::string& text,
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual>& hashtagData,
StringInterner& interner,
const VideoInfo& video)
{
const size_t textLen = text.length();
const char* textData = text.data();
size_t pos = 0;
std::string tag_buffer;
tag_buffer.reserve(50);
while (pos < textLen) {
while (pos < textLen && textData[pos] != '#') {
pos++;
}
if (pos >= textLen) break;
size_t start = pos + 1;
if (start >= textLen) break;
size_t end = start;
while (end < textLen && (std::isalnum(static_cast<unsigned char>(textData[end])) || textData[end] == '_')) {
end++;
}
if (end > start) {
tag_buffer.assign(textData + start, end - start);
const std::string* hashtagPtr = interner.intern(tag_buffer);
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
HashtagMapType::iterator it = hashtagData.find(hashtagPtr);
if (it == hashtagData.end()) {
std::pair<HashtagMapType::iterator, bool> emplace_result =
hashtagData.emplace(hashtagPtr, HashtagInfo(hashtagPtr));
it = emplace_result.first;
}
it->second.usageCount++;
it->second.totalViews += video.playCount;
it->second.topVideos.add(video);
}
pos = end;
}
}
void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos) {
std::vector<VideoInfo> sortedTopVideos = topVideos.getSortedVideos();
int videosToPrint = std::min(static_cast<int>(sortedTopVideos.size()), TOP_K_CANDIDATES);
for (int i = 0; i < videosToPrint; ++i) {
const VideoInfo& video = sortedTopVideos[i];
fout << "cover url: " << (video.coverUrl && !video.coverUrl->empty() ? *video.coverUrl : "null") << "\n";
fout << "web video url: " << (video.webVideoUrl && !video.webVideoUrl->empty() ? *video.webVideoUrl : "null") << "\n";
}
}

33
hws/tiktok_trends/Utils.h Normal file
View File

@@ -0,0 +1,33 @@
#pragma once
#include <string>
#include <vector>
#include <unordered_map>
#include <fstream>
#include "StringInterner.h"
#include "VideoInfo.h"
#include "HashtagInfo.h"
#include "SoundInfo.h"
#include "StringPtrUtils.h" // Needed for HashtagMapType/SoundMapType in function signatures
bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value);
bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value);
bool extractValue(const std::string& line, const std::string& key, std::string& value);
bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj);
bool parseLongLong(const std::string& s, long& result);
bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner,
VideoInfo& outVideo, std::string& outText);
bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner,
VideoInfo& outVideo,
const std::string*& outMusicIdPtr,
const std::string*& outMusicNamePtr,
const std::string*& outMusicAuthorPtr);
void extractHashtags(const std::string& text,
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual>& hashtagData,
StringInterner& interner,
const VideoInfo& video);
void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos);

View File

@@ -0,0 +1,37 @@
#pragma once
#include <string>
#include <algorithm>
#include "Constants.h"
struct VideoInfo {
const std::string* videoId = nullptr;
const std::string* coverUrl = nullptr;
const std::string* webVideoUrl = nullptr;
long playCount = 0;
int inputOrder = -1;
VideoInfo() = default;
VideoInfo(const std::string* id, const std::string* cover, const std::string* web,
long plays, int order)
: videoId(id), coverUrl(cover), webVideoUrl(web), playCount(plays), inputOrder(order) {}
static bool compareForFinalSort(const VideoInfo& a, const VideoInfo& b) {
if (a.playCount != b.playCount) return a.playCount > b.playCount;
if (a.videoId && b.videoId && *a.videoId != *b.videoId) return *a.videoId < *b.videoId;
return a.inputOrder < b.inputOrder;
}
bool operator<(const VideoInfo& other) const {
if (playCount != other.playCount) return playCount > other.playCount;
return inputOrder < other.inputOrder;
}
};
struct VideoCompareWorse {
bool operator()(const VideoInfo& a, const VideoInfo& b) const {
if (a.playCount != b.playCount) return a.playCount > b.playCount;
return a.inputOrder < b.inputOrder;
}
};

256
hws/tiktok_trends/main.cpp Normal file
View File

@@ -0,0 +1,256 @@
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <functional>
#include <utility>
#include <cstring>
#include "Constants.h"
#include "StringInterner.h"
#include "StringPtrUtils.h"
#include "VideoInfo.h"
#include "TopKVideoHolder.h"
#include "HashtagInfo.h"
#include "SoundInfo.h"
#include "Utils.h"
bool processHashtags(const std::string& filename, std::ofstream& outputFile) {
std::ifstream inputFile(filename);
if (!inputFile.is_open()) {
std::cerr << "Cannot open input file: " << filename << std::endl;
return false;
}
StringInterner interner;
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> hashtagData;
hashtagData.reserve(250000);
std::string line;
int inputOrderCounter = 0;
VideoInfo currentVideo;
std::string text_buffer;
while (std::getline(inputFile, line)) {
if (line.length() < 10) continue;
inputOrderCounter++;
if (parseLineForHashtags(line, inputOrderCounter, interner, currentVideo, text_buffer)) {
if (!text_buffer.empty()) {
extractHashtags(text_buffer, hashtagData, interner, currentVideo);
}
}
}
inputFile.close();
std::priority_queue<HashtagInfo*, std::vector<HashtagInfo*>, CompareHashtagPtrForHeap> top20Hashtags;
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
for (HashtagMapType::iterator it = hashtagData.begin(); it != hashtagData.end(); ++it) {
HashtagInfo* currentHashtagPtr = &(it->second);
if (top20Hashtags.size() < TOP_N_OUTPUT) {
top20Hashtags.push(currentHashtagPtr);
} else {
const HashtagInfo* topPtr = top20Hashtags.top();
bool is_better = CompareHashtagPtr()(currentHashtagPtr, topPtr);
if (is_better) {
top20Hashtags.pop();
top20Hashtags.push(currentHashtagPtr);
}
}
}
std::vector<HashtagInfo*> finalTop20;
finalTop20.reserve(top20Hashtags.size());
while (!top20Hashtags.empty()) {
finalTop20.push_back(top20Hashtags.top());
top20Hashtags.pop();
}
std::sort(finalTop20.begin(), finalTop20.end(), CompareHashtagPtr());
outputFile << "trending hashtags:\n\n";
for (size_t i = 0; i < finalTop20.size(); ++i) {
HashtagInfo* currentHashtag = finalTop20[i];
outputFile << "========================\n";
outputFile << "#" << (currentHashtag->name ? *currentHashtag->name : "null") << "\n";
outputFile << "used " << currentHashtag->usageCount << " times\n";
outputFile << currentHashtag->totalViews << " views\n\n";
extractSortAndPrintTop3Videos(outputFile, currentHashtag->topVideos);
outputFile << "========================";
if (i < finalTop20.size() - 1) {
outputFile << "\n";
} else {
outputFile << "\n";
}
}
return true;
}
bool processSounds(const std::string& filename, std::ofstream& outputFile) {
std::ifstream inputFile(filename);
if (!inputFile.is_open()) {
std::cerr << "Cannot open input file: " << filename << std::endl;
return false;
}
StringInterner interner;
std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> soundData;
soundData.reserve(50000);
std::string line;
int inputOrderCounter = 0;
VideoInfo currentVideo;
const std::string* musicIdPtr = nullptr;
const std::string* musicNamePtr = nullptr;
const std::string* musicAuthorPtr = nullptr;
while (std::getline(inputFile, line)) {
if (line.length() < 10) continue;
inputOrderCounter++;
musicIdPtr = nullptr;
musicNamePtr = nullptr;
musicAuthorPtr = nullptr;
if (parseLineForSounds(line, inputOrderCounter, interner, currentVideo,
musicIdPtr, musicNamePtr, musicAuthorPtr))
{
if (musicIdPtr == nullptr || musicIdPtr->empty()) {
continue;
}
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
SoundMapType::iterator it = soundData.find(musicIdPtr);
if (it == soundData.end()) {
std::pair<SoundMapType::iterator, bool> emplace_result =
soundData.emplace(musicIdPtr, SoundInfo(musicIdPtr, musicNamePtr, musicAuthorPtr));
it = emplace_result.first;
}
it->second.totalViews += currentVideo.playCount;
if (it->second.musicName->empty() && !musicNamePtr->empty()) {
it->second.musicName = musicNamePtr;
}
if (it->second.musicAuthor->empty() && !musicAuthorPtr->empty()) {
it->second.musicAuthor = musicAuthorPtr;
}
it->second.topVideos.add(currentVideo);
}
}
inputFile.close();
std::priority_queue<SoundInfo*, std::vector<SoundInfo*>, CompareSoundPtrForHeap> top20Sounds;
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
for (SoundMapType::iterator it = soundData.begin(); it != soundData.end(); ++it) {
SoundInfo* currentSoundPtr = &(it->second);
if (top20Sounds.size() < TOP_N_OUTPUT) {
top20Sounds.push(currentSoundPtr);
} else {
const SoundInfo* topPtr = top20Sounds.top();
bool is_better = CompareSoundPtr()(currentSoundPtr, topPtr);
if (is_better) {
top20Sounds.pop();
top20Sounds.push(currentSoundPtr);
}
}
}
std::vector<SoundInfo*> finalTop20;
finalTop20.reserve(top20Sounds.size());
while (!top20Sounds.empty()) {
finalTop20.push_back(top20Sounds.top());
top20Sounds.pop();
}
std::sort(finalTop20.begin(), finalTop20.end(), CompareSoundPtr());
outputFile << "trending sounds:\n\n";
for (size_t i = 0; i < finalTop20.size(); ++i) {
SoundInfo* currentSound = finalTop20[i];
outputFile << "========================\n";
if (currentSound->musicName == nullptr || currentSound->musicName->empty()) {
outputFile << "\n";
} else {
outputFile << *currentSound->musicName << "\n";
}
outputFile << currentSound->totalViews << " views\n";
if (currentSound->musicAuthor == nullptr || currentSound->musicAuthor->empty()) {
outputFile << "\n";
} else {
outputFile << *currentSound->musicAuthor << "\n";
}
outputFile << "music id: " << (currentSound->musicId && !currentSound->musicId->empty() ? *currentSound->musicId : "null") << "\n";
if (!currentSound->topVideos.empty()) {
outputFile << "\n";
}
extractSortAndPrintTop3Videos(outputFile, currentSound->topVideos);
outputFile << "========================";
if (i < finalTop20.size() - 1) {
outputFile << "\n";
} else {
outputFile << "\n";
}
}
return true;
}
int main(int argc, char* argv[]) {
if (argc != 4) {
std::cerr << "Usage: nytrends.exe <input.json> <output.txt> <mode>\n";
std::cerr << "Mode can be 'hashtag' or 'sound'\n";
return 1;
}
std::string inputFileName = argv[1];
std::string outputFileName = argv[2];
std::string mode = argv[3];
std::ofstream outputFile(outputFileName);
if (!outputFile.is_open()) {
std::cerr << "Error: Cannot open output file " << outputFileName << std::endl;
return 1;
}
std::ios_base::sync_with_stdio(false);
bool success = false;
if (mode == "hashtag") {
success = processHashtags(inputFileName, outputFile);
} else if (mode == "sound") {
success = processSounds(inputFileName, outputFile);
} else {
std::cerr << "Error: Invalid mode '" << mode << "'. Must be 'hashtag' or 'sound'." << std::endl;
outputFile.close();
return 1;
}
outputFile.close();
return success ? 0 : 1;
}

View File

@@ -0,0 +1,242 @@
trending hashtags:
========================
#fyp
used 7600 times
261199234341 views
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/567504ab3e4648dea968213ce979f281?x-expires=1700449200&x-signature=bjGEgY4bdEVOMMHQa2S0qrzNCQY%3D
web video url: https://www.tiktok.com/@bellapoarch/video/6862153058223197445
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/567504ab3e4648dea968213ce979f281?x-expires=1700449200&x-signature=bjGEgY4bdEVOMMHQa2S0qrzNCQY%3D
web video url: https://www.tiktok.com/@bellapoarch/video/6862153058223197445
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oAJCgD1khIANGRcTLhqQZNCi3ohAuAzoyEdIaf?x-expires=1700449200&x-signature=hu1Kg0Cpz%2BzVRXqYkv%2Fl6E8%2Ftgk%3D
web video url: https://www.tiktok.com/@tool_tips/video/7212981630904864005
========================
========================
#foryou
used 2765 times
92282640558 views
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/c4c7c98ecb5f4a8980ed7d58cdea2df3_1676378432?x-expires=1700449200&x-signature=QIchR40Etr%2BAjbAuzYbwTKnD7dA%3D
web video url: https://www.tiktok.com/@gorillatiks/video/7199990500512894213
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
========================
========================
#viral
used 1759 times
59270543842 views
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o4n0aDMCxQPkleFE5JnbeaoIw9uEBRQiTkIzAB?x-expires=1700449200&x-signature=zOxX4QIMqL%2BNOyl6R57PLiVKb%2BE%3D
web video url: https://www.tiktok.com/@dada_ahoufe_/video/7247202774696447238
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o4n0aDMCxQPkleFE5JnbeaoIw9uEBRQiTkIzAB?x-expires=1700449200&x-signature=zOxX4QIMqL%2BNOyl6R57PLiVKb%2BE%3D
web video url: https://www.tiktok.com/@dada_ahoufe_/video/7247202774696447238
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oYRUDAt9kFIA2SIwCWomEVfha623AyrLzxgaAo?x-expires=1700449200&x-signature=xVeyOReZuykD9rFS4KFcN%2FFL44g%3D
web video url: https://www.tiktok.com/@carrosseriereparation/video/7217942797360303365
========================
========================
#makeuptutorial
used 1709 times
22311707100 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/5461c70dd8ee4a0d891e7f2529f6b8ea_1670789072?x-expires=1700503200&x-signature=TqqnBqyBh5cnb150Ri0jXfwaL9s%3D
web video url: https://www.tiktok.com/@alicekingmakeup/video/7175984394950167813
========================
========================
#couplestiktok
used 1610 times
14706422100 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/693775bbddac4df4ad008ff880041fbc?x-expires=1700503200&x-signature=UU8VVoLrIaXIVFnYLf3jl8IYO%2BE%3D
web video url: https://www.tiktok.com/@misiaaa621/video/7149368989611773227
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/693775bbddac4df4ad008ff880041fbc?x-expires=1700503200&x-signature=UU8VVoLrIaXIVFnYLf3jl8IYO%2BE%3D
web video url: https://www.tiktok.com/@misiaaa621/video/7149368989611773227
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4b83dfa1cc0c47318e408a6bcde34bb6_1677549193?x-expires=1700503200&x-signature=%2FN8FYhRpVldGSaxbP6dgYeEroqI%3D
web video url: https://www.tiktok.com/@debbiekval/video/7205018880622857515
========================
========================
#lifehack
used 1585 times
33681856600 views
cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/d6a1c1c323614919975fad3ee1c1ef9e~tplv-dmt-logom:tos-alisg-i-0000/4124427fcd3045968ac1c3136bd92d6c.image?x-expires=1700452800&x-signature=qCaN1hrF7pqQ0kvZJnlFnc9jI6Q%3D
web video url: https://www.tiktok.com/@tresorfie/video/7039091515863403778
cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/d6a1c1c323614919975fad3ee1c1ef9e~tplv-dmt-logom:tos-alisg-i-0000/4124427fcd3045968ac1c3136bd92d6c.image?x-expires=1700449200&x-signature=WSl3XKN1HPXy7jpguj8v0AaI3FU%3D
web video url: https://www.tiktok.com/@tresorfie/video/7039091515863403778
cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/501627c6b36849e282740c764611f2a7_1634994542~tplv-dmt-logom:tos-alisg-pv-0037/f3273e6f3e92421d860be8f5e72ac0bd.image?x-expires=1700452800&x-signature=DkwRLgyyY5ec0757c1hCq372yJM%3D
web video url: https://www.tiktok.com/@issei0806/video/7022248055625846018
========================
========================
#funnyvideos
used 1573 times
67029374400 views
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=vv04JjjwKgR1P3t117v%2B5HMvnpI%3D
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
========================
========================
#foryoupage
used 1550 times
49067115500 views
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oYRUDAt9kFIA2SIwCWomEVfha623AyrLzxgaAo?x-expires=1700449200&x-signature=xVeyOReZuykD9rFS4KFcN%2FFL44g%3D
web video url: https://www.tiktok.com/@carrosseriereparation/video/7217942797360303365
========================
========================
#newyorkcity
used 1545 times
8642836600 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D
web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/1ea2545bde2645ec8f1106a4b9de6c2e_1648602515?x-expires=1700452800&x-signature=HNr6UCQsc4q0m%2FFShx%2FJJNWb1Jg%3D
web video url: https://www.tiktok.com/@thekatieromero/video/7080693879141485870
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c898b993f308477e92334437f9f0e1e1?x-expires=1700452800&x-signature=ZNHG9%2FMBw9qp5DSm%2BNnbhwX6xK8%3D
web video url: https://www.tiktok.com/@thekatieromero/video/7105552208422374699
========================
========================
#ifweeverbrokeup
used 1543 times
1337044198 views
cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/ee79eb2bea6445739ed71cef3e9b84b6_1686646723?x-expires=1700456400&x-signature=0MADrs89I23eeCudb%2FJxkI%2FJbR8%3D
web video url: https://www.tiktok.com/@zanmangloopyofficial/video/7244092495129218312
cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/ee79eb2bea6445739ed71cef3e9b84b6_1686646723?x-expires=1700456400&x-signature=0MADrs89I23eeCudb%2FJxkI%2FJbR8%3D
web video url: https://www.tiktok.com/@zanmangloopyofficial/video/7244092495129218312
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/7895be3e406d435ba0db9e6f5db349e2?x-expires=1700456400&x-signature=ahrkOjcQRlDhAOf1upGEu%2B2ECYU%3D
web video url: https://www.tiktok.com/@bebopandbebe/video/7238437685537328426
========================
========================
#springcleaning
used 1416 times
2156123000 views
cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/56ce7e79491a4b27b371517ce134fa82_1631381225~tplv-dmt-logom:tos-maliva-p-0000/415cfd01b3484fb38f7b088aa6efda67.image?x-expires=1700503200&x-signature=YL4yGwa%2F1gZ59cKHMov7ficsK9E%3D
web video url: https://www.tiktok.com/@livecomposed/video/7006728991067491589
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4c54b4d1332c4000a615d9c5fc172be8_1677780053?x-expires=1700503200&x-signature=aD4zRpPLbhn9wz4vtTQWZRa2I1U%3D
web video url: https://www.tiktok.com/@atmeikasa/video/7206010371004583214
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4c54b4d1332c4000a615d9c5fc172be8_1677780053?x-expires=1700503200&x-signature=aD4zRpPLbhn9wz4vtTQWZRa2I1U%3D
web video url: https://www.tiktok.com/@atmeikasa/video/7206010371004583214
========================
========================
#funny
used 1382 times
53648909500 views
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=vv04JjjwKgR1P3t117v%2B5HMvnpI%3D
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
========================
========================
#happymonday
used 1308 times
741991700 views
cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/oQgeH8BRJnj20JEFoQ5tAf1MIb976nBD89QiFB~tplv-dmt-logom:tos-useast2a-v-0068/4763cd9418ac4d7faccbf52906bcf43c.image?x-expires=1700449200&x-signature=DPRcWm2Xhpe7r2HmxxGBzOyhwVs%3D
web video url: https://www.tiktok.com/@joinparallel.io/video/7192338389255916806
cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/oQgeH8BRJnj20JEFoQ5tAf1MIb976nBD89QiFB~tplv-dmt-logom:tos-useast2a-v-0068/4763cd9418ac4d7faccbf52906bcf43c.image?x-expires=1700449200&x-signature=DPRcWm2Xhpe7r2HmxxGBzOyhwVs%3D
web video url: https://www.tiktok.com/@joinparallel.io/video/7192338389255916806
cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/002965b791d641d5b2f3d86ee0019604_1675130079?x-expires=1700449200&x-signature=TMCJOBJCXYeu5rsjFWpohtGwT8M%3D
web video url: https://www.tiktok.com/@mondayhaircare/video/7194628805414161665
========================
========================
#nyc
used 990 times
5577241000 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D
web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/1ea2545bde2645ec8f1106a4b9de6c2e_1648602515?x-expires=1700452800&x-signature=HNr6UCQsc4q0m%2FFShx%2FJJNWb1Jg%3D
web video url: https://www.tiktok.com/@thekatieromero/video/7080693879141485870
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c898b993f308477e92334437f9f0e1e1?x-expires=1700452800&x-signature=ZNHG9%2FMBw9qp5DSm%2BNnbhwX6xK8%3D
web video url: https://www.tiktok.com/@thekatieromero/video/7105552208422374699
========================
========================
#makeup
used 976 times
15309874500 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/1404c560d1e74fe7881a0a4ae6414de5_1652622380?x-expires=1700449200&x-signature=m%2BIawkKwQBwnUaTqBTTMtqLQPZo%3D
web video url: https://www.tiktok.com/@mimles/video/7097959048515013894
========================
========================
#trending
used 721 times
21692028406 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/bbdfd4ef0c4040b2bf9c52e9bb81d770?x-expires=1700449200&x-signature=fMm4z9wGlJCa1VFXvU5jQ0ot6tA%3D
web video url: https://www.tiktok.com/@phuonglinh.ido/video/7215533760039865646
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c92407e5bca34ce78eb17db878630adc?x-expires=1700449200&x-signature=836u0V7z2PC7tFMLlsvVDFDU1wU%3D
web video url: https://www.tiktok.com/@asmr.mus/video/7212985350124375342
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c92407e5bca34ce78eb17db878630adc?x-expires=1700449200&x-signature=836u0V7z2PC7tFMLlsvVDFDU1wU%3D
web video url: https://www.tiktok.com/@asmr.mus/video/7212985350124375342
========================
========================
#comedy
used 579 times
14364510900 views
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/49a912da569f4c69b3658762357f3922_1572472757?x-expires=1700449200&x-signature=rCokiz5pbl88BrzDzX3AB1LFCXg%3D
web video url: https://www.tiktok.com/@kisonkee/video/6753718966637677830
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/49a912da569f4c69b3658762357f3922_1572472757?x-expires=1700449200&x-signature=rCokiz5pbl88BrzDzX3AB1LFCXg%3D
web video url: https://www.tiktok.com/@kisonkee/video/6753718966637677830
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o0EnyBknREPO4GdeDo4nxAIJRFJfbfzAzGQSDf?x-expires=1700449200&x-signature=zuGbpMoTS01F4waRsGo2r2AoVxk%3D
web video url: https://www.tiktok.com/@ricoanimations0/video/7241573984590957830
========================
========================
#newyork
used 555 times
3126420800 views
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/4a878de5dbe241b5b5e25635f4200a51_1650915958?x-expires=1700449200&x-signature=1l%2F8aGh0jktub1R%2BX23PAe64Dys%3D
web video url: https://www.tiktok.com/@mdmotivator/video/7090629995546070277
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/4a878de5dbe241b5b5e25635f4200a51_1650915958?x-expires=1700449200&x-signature=1l%2F8aGh0jktub1R%2BX23PAe64Dys%3D
web video url: https://www.tiktok.com/@mdmotivator/video/7090629995546070277
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D
web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370
========================
========================
#couple
used 439 times
5628511600 views
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/014d01e5b7f848fc8f8899e88e8fa483?x-expires=1700449200&x-signature=IMmyHEigmMoVtLEuTWPZwe%2Fksb0%3D
web video url: https://www.tiktok.com/@mamalindy/video/7079555791962885419
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-aiso/cf3359fa45444f0994cf0dcc1c201b2d_1681930130?x-expires=1700449200&x-signature=tABQmhr%2FtklzlsNqYWGZnNrxwhI%3D
web video url: https://www.tiktok.com/@kajsablock/video/7223834852305456410
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-aiso/cf3359fa45444f0994cf0dcc1c201b2d_1681930130?x-expires=1700449200&x-signature=tABQmhr%2FtklzlsNqYWGZnNrxwhI%3D
web video url: https://www.tiktok.com/@kajsablock/video/7223834852305456410
========================
========================
#fy
used 397 times
16901215000 views
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/ef952b508c8043bb8b4ba98e3db850fb_1679074109?x-expires=1700449200&x-signature=AOKtuDMNxX%2BU2b1dRfBvofZLZfk%3D
web video url: https://www.tiktok.com/@noelgoescrazy/video/7211568359798803717
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/ef952b508c8043bb8b4ba98e3db850fb_1679074109?x-expires=1700449200&x-signature=AOKtuDMNxX%2BU2b1dRfBvofZLZfk%3D
web video url: https://www.tiktok.com/@noelgoescrazy/video/7211568359798803717
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/509605b7a901400589cd15d6731aaf8c_1677431421?x-expires=1700449200&x-signature=LoywgvGN5XLKIpvwV2UnR6pml6s%3D
web video url: https://www.tiktok.com/@noelgoescrazy/video/7204513074097769733
========================

456
hws/tiktok_trends/test.py Normal file
View File

@@ -0,0 +1,456 @@
import subprocess
import os
import filecmp
import glob
import sys # Import sys for platform detection
import time
import shutil
import re # Import re for regex parsing on macOS
# --- Configuration ---
CXX = "g++"
CXXFLAGS = ["-Wall", "-O2", "-std=c++11"]
EXECUTABLE = "./nytrends.exe"
SOURCE_FILES_PATTERN = "*.cpp"
INPUT_DIR = "inputs"
EXPECTED_OUTPUT_DIR = "outputs"
TEMP_OUTPUT_FILE = "output_unit_test.txt"
TEST_TIMEOUT = 120
# Configuration for memory measurement
MEASURE_MEMORY = True # Master switch
TIME_COMMAND = "/usr/bin/time"
# --- Platform Specific Time Config ---
TIME_COMMAND_MODE = None # Will be 'linux' or 'macos' or None
LINUX_TIME_FORMAT = "%M" # Format specifier for Max RSS (KB) on Linux
LINUX_TIME_OUTPUT_FILE = "time_mem_output.tmp" # Temp file for Linux time output
MACOS_MEM_REGEX = re.compile(r"^\s*(\d+)\s+maximum resident set size", re.IGNORECASE | re.MULTILINE)
# Configuration for suppressing program output
SUPPRESS_PROGRAM_OUTPUT = True
# ANSI Color Codes
# ... (colors remain the same) ...
COLOR_GREEN = '\033[92m'
COLOR_RED = '\033[91m'
COLOR_YELLOW = '\033[93m'
COLOR_BLUE = '\033[94m'
COLOR_RESET = '\033[0m'
# --- Helper Functions ---
def print_color(text, color):
"""Prints text in a specified color."""
print(f"{color}{text}{COLOR_RESET}")
def check_time_command():
"""
Check if /usr/bin/time command exists and is usable for memory measurement
based on the OS. Sets TIME_COMMAND_MODE. Returns True if usable, False otherwise.
"""
global TIME_COMMAND_MODE
if not shutil.which(TIME_COMMAND):
print_color(f"Warning: '{TIME_COMMAND}' not found. Memory measurement disabled.", COLOR_YELLOW)
TIME_COMMAND_MODE = None
return False
platform = sys.platform
test_command = []
capture_stderr = False
if platform.startswith("linux"):
test_command = [TIME_COMMAND, '-f', LINUX_TIME_FORMAT, 'true']
capture_stderr = False # Output goes to stdout/stderr, just check exit code
TIME_COMMAND_MODE = "linux"
print(f"Detected Linux platform. Testing {TIME_COMMAND} with '-f {LINUX_TIME_FORMAT}'...")
elif platform == "darwin": # macOS
test_command = [TIME_COMMAND, '-l', 'true']
capture_stderr = True # Need to capture stderr to check output format
TIME_COMMAND_MODE = "macos"
print(f"Detected macOS platform. Testing {TIME_COMMAND} with '-l'...")
else:
print_color(f"Warning: Unsupported platform '{platform}' for memory measurement. Disabled.", COLOR_YELLOW)
TIME_COMMAND_MODE = None
return False
try:
# Run test command
process = subprocess.run(test_command,
capture_output=True, # Capture both stdout/stderr
text=True,
check=True, # Raise exception on non-zero exit
timeout=3)
# Additional check for macOS output format
if TIME_COMMAND_MODE == "macos":
if MACOS_MEM_REGEX.search(process.stderr):
print_color(f"Memory measurement enabled using '{TIME_COMMAND} -l'.", COLOR_GREEN)
return True # Format looks okay
else:
print_color(f"Warning: '{TIME_COMMAND} -l' output format not recognized (missing 'maximum resident set size'). Memory measurement disabled.", COLOR_YELLOW)
TIME_COMMAND_MODE = None
return False
else: # Linux check passed if check=True didn't raise exception
print_color(f"Memory measurement enabled using '{TIME_COMMAND} -f {LINUX_TIME_FORMAT}'.", COLOR_GREEN)
return True
except subprocess.CalledProcessError as e:
# This is where the original macOS error occurred
print_color(f"Warning: {TIME_COMMAND} test command failed (exit code {e.returncode}). Memory measurement disabled.", COLOR_YELLOW)
if e.stderr: print(f"Stderr:\n{e.stderr}")
TIME_COMMAND_MODE = None
return False
except FileNotFoundError: # Should have been caught by shutil.which, but belt-and-suspenders
print_color(f"Warning: '{TIME_COMMAND}' not found during test run. Memory measurement disabled.", COLOR_YELLOW)
TIME_COMMAND_MODE = None
return False
except Exception as e:
print_color(f"Warning: An unexpected error occurred while testing {TIME_COMMAND}. Memory measurement disabled. Error: {e}", COLOR_YELLOW)
TIME_COMMAND_MODE = None
return False
# --- compile_program() remains the same ---
def compile_program():
"""Compiles the C++ source files."""
print_color(f"--- Starting Compilation ---", COLOR_BLUE)
source_files = glob.glob(SOURCE_FILES_PATTERN)
if not source_files:
print_color(f"Error: No source files found matching pattern '{SOURCE_FILES_PATTERN}'.", COLOR_RED)
return False
compile_command = [CXX] + CXXFLAGS + ["-o", os.path.basename(EXECUTABLE)] + source_files
command_str = " ".join(compile_command)
print(f"Running: {command_str}")
try:
start_time = time.perf_counter()
process = subprocess.run(compile_command, check=False, capture_output=True, text=True)
end_time = time.perf_counter()
duration = end_time - start_time
if process.returncode == 0:
print_color(f"Compilation successful (took {duration:.3f}s).", COLOR_GREEN)
if process.stderr:
print_color("Compiler Warnings/Messages:", COLOR_YELLOW)
print(process.stderr)
return True
else:
print_color(f"Compilation failed with exit code {process.returncode} (took {duration:.3f}s).", COLOR_RED)
print_color("Compiler Error Output:", COLOR_RED)
print(process.stderr if process.stderr else "(No compiler error output captured)")
return False
except FileNotFoundError:
print_color(f"Error: Compiler '{CXX}' not found.", COLOR_RED)
return False
except Exception as e:
print_color(f"An unexpected error occurred during compilation: {e}", COLOR_RED)
return False
def run_test(test_name, input_file, expected_output_file, argument):
"""
Runs test, measures time/memory (platform-specific), suppresses output.
Returns: tuple (passed: bool, reason: str, duration: float | None, memory_kb: int | None)
"""
global MEASURE_MEMORY, TIME_COMMAND_MODE # Access potentially updated flags
print_color(f"--- Running {test_name} ---", COLOR_BLUE)
duration = None
memory_kb = None
captured_stderr_for_mem = None # Store stderr specifically for macos parsing
# Prerequisite checks
if not os.path.exists(input_file): return False, "Input file missing", None, None
if not os.path.exists(expected_output_file): return False, "Expected output file missing", None, None
if not os.path.exists(EXECUTABLE): return False, "Executable not found", None, None
# --- Command Construction & subprocess args ---
base_command = [EXECUTABLE, input_file, TEMP_OUTPUT_FILE, argument]
run_command = []
subprocess_kwargs = { # Base arguments for subprocess.run
"check": False,
"timeout": TEST_TIMEOUT
}
if MEASURE_MEMORY and TIME_COMMAND_MODE: # Check both desire and capability
if TIME_COMMAND_MODE == "linux":
run_command = [TIME_COMMAND, '-f', LINUX_TIME_FORMAT, '-o', LINUX_TIME_OUTPUT_FILE] + base_command
if os.path.exists(LINUX_TIME_OUTPUT_FILE):
try: os.remove(LINUX_TIME_OUTPUT_FILE)
except OSError: pass
# For Linux, memory info goes to file, handle stdout/stderr normally based on suppression
subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
subprocess_kwargs["stderr"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
elif TIME_COMMAND_MODE == "macos":
run_command = [TIME_COMMAND, '-l'] + base_command
# On macOS, need to capture stderr for parsing memory, stdout handles suppression
subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
subprocess_kwargs["stderr"] = subprocess.PIPE # Capture stderr for parsing
subprocess_kwargs["text"] = True # Decode captured stderr
else: # Not measuring memory or platform unsupported
run_command = base_command
subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
subprocess_kwargs["stderr"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
command_str = " ".join(run_command)
print(f"Executing: {command_str}")
# --- Execution and Measurement ---
if os.path.exists(TEMP_OUTPUT_FILE):
try: os.remove(TEMP_OUTPUT_FILE)
except OSError as e: print_color(f"Warning: Could not remove {TEMP_OUTPUT_FILE}: {e}", COLOR_YELLOW)
try:
start_time = time.perf_counter()
process = subprocess.run(run_command, **subprocess_kwargs)
end_time = time.perf_counter()
duration = end_time - start_time
print(f"Execution Time: {duration:.3f} seconds")
# --- Process Memory Output (Platform Specific) ---
if MEASURE_MEMORY and TIME_COMMAND_MODE:
if TIME_COMMAND_MODE == "linux":
if os.path.exists(LINUX_TIME_OUTPUT_FILE):
try:
with open(LINUX_TIME_OUTPUT_FILE, 'r') as f_time:
mem_str = f_time.read().strip()
if mem_str:
memory_kb = int(mem_str) # Already in KB
print(f"Peak Memory Usage: {memory_kb} KB")
else: print_color(f"Warning: {LINUX_TIME_OUTPUT_FILE} was empty.", COLOR_YELLOW)
except (ValueError, IOError) as e: print_color(f"Warning: Could not parse memory (Linux) from {LINUX_TIME_OUTPUT_FILE}: {e}", COLOR_YELLOW)
finally:
try: os.remove(LINUX_TIME_OUTPUT_FILE)
except OSError: pass
else: print_color(f"Warning: {LINUX_TIME_OUTPUT_FILE} was not created.", COLOR_YELLOW)
elif TIME_COMMAND_MODE == "macos":
# Parse memory from captured stderr (process.stderr)
if process.stderr:
match = MACOS_MEM_REGEX.search(process.stderr)
if match:
try:
mem_bytes = int(match.group(1))
memory_kb = mem_bytes // 1024 # Convert Bytes to KB
print(f"Peak Memory Usage: {memory_kb} KB ({mem_bytes} Bytes)")
except (ValueError, IndexError):
print_color(f"Warning: Could not parse memory value (macOS) from captured output.", COLOR_YELLOW)
# Optional: print process.stderr here for debugging
# print(f"--- time -l stderr ---\n{process.stderr}\n----------------------")
else:
print_color(f"Warning: 'maximum resident set size' not found in 'time -l' output (macOS).", COLOR_YELLOW)
# Optional: print process.stderr here for debugging
# print(f"--- time -l stderr ---\n{process.stderr}\n----------------------")
else:
print_color(f"Warning: No stderr captured from 'time -l' (macOS).", COLOR_YELLOW)
# --- Check Program Result ---
if process.returncode != 0:
print_color(f"Test failed: Program exited with non-zero status {process.returncode}.", COLOR_RED)
# Note: program's own stderr might be in process.stderr ONLY if not suppressed AND on macOS
# It's generally hidden now by design.
return False, "Runtime error", duration, memory_kb
if not os.path.exists(TEMP_OUTPUT_FILE):
print_color(f"Test failed: Program finished successfully but did not create '{TEMP_OUTPUT_FILE}'.", COLOR_RED)
return False, "Output file not created", duration, memory_kb
# --- Compare Output File ---
if filecmp.cmp(TEMP_OUTPUT_FILE, expected_output_file, shallow=False):
print_color(f"Test Result: PASSED", COLOR_GREEN)
return True, "Passed", duration, memory_kb
else:
# ... (diff printing remains the same) ...
print_color(f"Test Result: FAILED - Output mismatch.", COLOR_RED)
print_color(f" Expected: {expected_output_file}", COLOR_YELLOW)
print_color(f" Actual: {TEMP_OUTPUT_FILE}", COLOR_YELLOW)
try:
diff_proc = subprocess.run(['diff', '-u', expected_output_file, TEMP_OUTPUT_FILE], capture_output=True, text=True)
print_color("--- Diff ---", COLOR_YELLOW)
print(diff_proc.stdout if diff_proc.stdout else "(No differences found by diff, might be whitespace or encoding issues)")
print_color("------------", COLOR_YELLOW)
except FileNotFoundError: print_color("Could not run 'diff' command.", COLOR_YELLOW)
except Exception as diff_e: print_color(f"Error running diff: {diff_e}", COLOR_YELLOW)
return False, "Output mismatch", duration, memory_kb
# --- Exception Handling ---
except subprocess.TimeoutExpired:
end_time = time.perf_counter()
duration = end_time - start_time
print_color(f"Test failed: Program timed out after {duration:.3f}s (limit: {TEST_TIMEOUT}s).", COLOR_RED)
# Attempt to parse memory ONLY if macOS and stderr might have been partially captured (unlikely but possible)
if MEASURE_MEMORY and TIME_COMMAND_MODE == "macos" and process and process.stderr:
match = MACOS_MEM_REGEX.search(process.stderr)
if match:
try: memory_kb = int(match.group(1)) // 1024
except: memory_kb = None # Ignore parsing errors on timeout
# Clean up Linux temp file if it exists
if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE):
try: os.remove(LINUX_TIME_OUTPUT_FILE)
except OSError: pass
return False, "Timeout", duration, memory_kb
except Exception as e:
print_color(f"An unexpected error occurred during test execution: {e}", COLOR_RED)
# Clean up Linux temp file if it exists
if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE):
try: os.remove(LINUX_TIME_OUTPUT_FILE)
except OSError: pass
return False, f"Execution exception: {e}", None, None
finally:
# General cleanup (Linux temp file might still exist if parsing failed)
if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE):
try: os.remove(LINUX_TIME_OUTPUT_FILE)
except OSError: pass
# --- Main Execution ---
if __name__ == "__main__":
# 0. Check if memory measurement is desired AND possible
user_wants_memory_measurement = MEASURE_MEMORY
if user_wants_memory_measurement:
can_actually_measure = check_time_command()
MEASURE_MEMORY = can_actually_measure # Update based on check
else:
MEASURE_MEMORY = False
print_color("Memory measurement explicitly disabled by configuration.", COLOR_YELLOW)
if SUPPRESS_PROGRAM_OUTPUT:
print_color("Program stdout/stderr will be suppressed during tests.", COLOR_BLUE)
# 1. Compile
if not compile_program():
print_color("\nCompilation failed. Aborting tests.", COLOR_RED)
sys.exit(1)
# 2. Define Test Cases
# ... (test_bases and arguments_to_test remain the same) ...
test_bases = [
("1", "tiny1"), ("2", "tiny2"), ("3", "small1"), ("4", "small2"),
("5", "medium1"), ("6", "medium2"), ("7", "large1"), ("8", "large2"),
("9", "large3"), ("10", "large4"), ("11", "large5"), ("12", "large6"),
("13", "large7"), ("14", "large8"), ("15", "large9"),
]
arguments_to_test = ["hashtag", "sound"]
results = {"passed": 0, "failed": 0, "skipped": 0}
failed_tests = []
test_durations = []
test_memory_usages = []
# 3. Run Tests
print_color("\n--- Starting Test Execution ---", COLOR_BLUE)
total_start_time = time.perf_counter()
for id_prefix, base_name in test_bases:
for i, argument in enumerate(arguments_to_test, 1):
# ... (construct test names/paths) ...
test_id = f"{id_prefix}.{i}"
test_name = f"Test Case {test_id}: input {base_name}, {argument}"
input_filename = os.path.join(INPUT_DIR, f"input_{base_name}.json")
expected_output_filename = os.path.join(EXPECTED_OUTPUT_DIR, f"output_{base_name}_{argument}.txt")
passed, reason, duration, memory_kb = run_test(test_name, input_filename, expected_output_filename, argument)
# ... (Update results logic remains the same, relies on memory_kb being None if not measured) ...
if passed:
results["passed"] += 1
if duration is not None: test_durations.append(duration)
if MEASURE_MEMORY and memory_kb is not None: test_memory_usages.append(memory_kb)
elif reason in ["Input file missing", "Expected output file missing", "Executable not found"]:
results["skipped"] += 1
else:
results["failed"] += 1
duration_str = f" ({duration:.3f}s)" if duration is not None else ""
mem_str = f", {memory_kb} KB" if MEASURE_MEMORY and memory_kb is not None else ""
failed_tests.append(f"{test_name} ({reason}{duration_str}{mem_str})")
print("-" * 40)
total_end_time = time.perf_counter()
total_test_suite_duration = total_end_time - total_start_time
# 4. Clean up
# ... (same cleanup logic) ...
print_color("--- Cleaning Up ---", COLOR_BLUE)
if os.path.exists(TEMP_OUTPUT_FILE):
try:
os.remove(TEMP_OUTPUT_FILE)
print(f"Removed temporary output file: {TEMP_OUTPUT_FILE}")
except OSError as e: print_color(f"Warning: Could not remove {TEMP_OUTPUT_FILE}: {e}", COLOR_YELLOW)
if os.path.exists(EXECUTABLE):
try:
os.remove(EXECUTABLE)
print(f"Removed executable: {EXECUTABLE}")
except OSError as e: print_color(f"Warning: Could not remove {EXECUTABLE}: {e}", COLOR_YELLOW)
# 5. Print Summary
# ... (summary printing logic remains the same) ...
# Note: Memory summary section only appears if MEASURE_MEMORY is True at the end.
print_color("\n--- Test Summary ---", COLOR_BLUE)
print_color(f"Passed: {results['passed']}", COLOR_GREEN)
print_color(f"Failed: {results['failed']}", COLOR_RED if results['failed'] > 0 else COLOR_GREEN)
print_color(f"Skipped: {results['skipped']}", COLOR_YELLOW if results['skipped'] > 0 else COLOR_GREEN)
total_run = results['passed'] + results['failed']
total_defined = total_run + results['skipped']
print(f"Total Tests Defined: {total_defined}")
print(f"Total Tests Run: {total_run}")
print(f"Total Test Suite Execution Time: {total_test_suite_duration:.3f}s")
# Performance Summary
if test_durations:
# ... (same calculation and printing) ...
total_passed_time = sum(test_durations)
avg_time = total_passed_time / len(test_durations)
max_time = max(test_durations)
min_time = min(test_durations)
print("\n--- Performance Summary (Passed Tests) ---")
print(f"Total execution time (passed tests): {total_passed_time:.3f}s")
print(f"Average execution time per test: {avg_time:.3f}s")
print(f"Fastest test execution time: {min_time:.3f}s")
print(f"Slowest test execution time: {max_time:.3f}s")
# Memory Summary
if MEASURE_MEMORY: # Check final flag state
if test_memory_usages:
# ... (same calculation and printing) ...
total_mem_kb = sum(test_memory_usages)
avg_mem_kb = total_mem_kb / len(test_memory_usages)
max_mem_kb = max(test_memory_usages)
min_mem_kb = min(test_memory_usages)
total_mem_mb = total_mem_kb / 1024
total_mem_gb = total_mem_mb / 1024
if total_mem_gb > 1: total_mem_str = f"{total_mem_gb:.2f} GB"
elif total_mem_mb > 1: total_mem_str = f"{total_mem_mb:.2f} MB"
else: total_mem_str = f"{total_mem_kb} KB"
print("\n--- Memory Usage Summary (Passed Tests) ---")
print(f"Cumulative peak memory (passed tests): {total_mem_str} ({total_mem_kb} KB)")
print(f"Average peak memory per test: {avg_mem_kb:.1f} KB")
print(f"Lowest peak memory usage: {min_mem_kb} KB")
print(f"Highest peak memory usage: {max_mem_kb} KB")
else:
print("\n--- Memory Usage Summary (Passed Tests) ---")
print("(No memory usage data collected for passed tests - check warnings)")
# Final Result
if failed_tests:
print_color("\n--- Failed Test Cases ---", COLOR_RED)
for test in failed_tests:
print(f" - {test}")
sys.exit(1)
# ... (rest of exit logic remains the same) ...
elif results['passed'] == 0 and results['skipped'] == total_defined:
print_color("\nWarning: No tests were executed (all skipped).", COLOR_YELLOW)
sys.exit(0)
elif results['passed'] > 0 :
print_color("\nAll executed tests passed successfully!", COLOR_GREEN)
sys.exit(0)
else:
print_color("\nNo tests passed.", COLOR_YELLOW)
sys.exit(1 if results['failed'] > 0 else 0)