solve hw-9
This commit is contained in:
16
.vscode/launch.json
vendored
16
.vscode/launch.json
vendored
@@ -135,6 +135,22 @@
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "/usr/bin/gdb",
|
||||
"preLaunchTask": "C/C++: g++ build active file"
|
||||
},
|
||||
{
|
||||
"name": "nytrends",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${fileDirname}/${fileBasenameNoExtension}",
|
||||
"args": [
|
||||
"inputs/input_large9.json",
|
||||
"output.txt",
|
||||
"hashtag"
|
||||
],
|
||||
"cwd": "${fileDirname}",
|
||||
"environment": [],
|
||||
"MIMode": "gdb",
|
||||
"miDebuggerPath": "/usr/bin/gdb",
|
||||
"preLaunchTask": "C/C++: g++ build active file"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
6
.vscode/settings.json
vendored
6
.vscode/settings.json
vendored
@@ -78,6 +78,10 @@
|
||||
"unordered_set": "cpp",
|
||||
"regex": "cpp",
|
||||
"cinttypes": "cpp",
|
||||
"__node_handle": "cpp"
|
||||
"__node_handle": "cpp",
|
||||
"shared_mutex": "cpp",
|
||||
"cfenv": "cpp",
|
||||
"locale": "cpp",
|
||||
"filesystem": "cpp"
|
||||
}
|
||||
}
|
||||
4
hws/tiktok_trends/Constants.h
Normal file
4
hws/tiktok_trends/Constants.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
|
||||
const int TOP_K_CANDIDATES = 3;
|
||||
const int TOP_N_OUTPUT = 20;
|
||||
34
hws/tiktok_trends/HashtagInfo.h
Normal file
34
hws/tiktok_trends/HashtagInfo.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "TopKVideoHolder.h"
|
||||
|
||||
struct HashtagInfo {
|
||||
const std::string* name = nullptr;
|
||||
long totalViews = 0;
|
||||
int usageCount = 0;
|
||||
TopKVideoHolder topVideos;
|
||||
|
||||
HashtagInfo() = default;
|
||||
explicit HashtagInfo(const std::string* n) : name(n), totalViews(0), usageCount(0) {}
|
||||
};
|
||||
|
||||
struct CompareHashtagPtr {
|
||||
bool operator()(const HashtagInfo* a, const HashtagInfo* b) const {
|
||||
if (a->usageCount != b->usageCount) return a->usageCount > b->usageCount;
|
||||
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
|
||||
if (a->name && b->name) return *a->name < *b->name;
|
||||
if (a->name) return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompareHashtagPtrForHeap {
|
||||
bool operator()(const HashtagInfo* a, const HashtagInfo* b) const {
|
||||
if (a->usageCount != b->usageCount) return a->usageCount > b->usageCount;
|
||||
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
|
||||
if (a->name && b->name) return *a->name > *b->name;
|
||||
if (a->name) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
@@ -1,7 +1,7 @@
|
||||
HOMEWORK 9: Tiktok Trends
|
||||
|
||||
|
||||
NAME: < insert name >
|
||||
NAME: Jinshan Zhou
|
||||
|
||||
|
||||
COLLABORATORS AND OTHER RESOURCES:
|
||||
@@ -10,17 +10,21 @@ List the names of everyone you talked to about this assignment
|
||||
LMS, etc.), and all of the resources (books, online reference
|
||||
material, etc.) you consulted in completing this assignment.
|
||||
|
||||
< insert collaborators / resources >
|
||||
A lot, like using Top K for better sorting. Difference IO cases (not useful).
|
||||
StringCache (not useful). shared_ptr (not really help) and may websites, cases
|
||||
that I don't remember anymore.
|
||||
|
||||
Remember: Your implementation for this assignment must be done on your
|
||||
own, as described in "Academic Integrity for Homework" handout.
|
||||
|
||||
|
||||
ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: < insert # hours >
|
||||
ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: over 20 hr, 8 hr for complete
|
||||
more than 10 hr just for optimization.
|
||||
|
||||
|
||||
MISC. COMMENTS TO GRADER:
|
||||
(optional, please be concise!)
|
||||
The program is a bit messy. Since, I tried too many techniques. Some are broken
|
||||
changes, so I have to add patch to it. Myself is also a bit lose to my code.
|
||||
|
||||
## Reflection and Self Assessment
|
||||
|
||||
@@ -32,5 +36,11 @@ What parts of the assignment did you find challenging? Is there anything that
|
||||
finally "clicked" for you in the process of working on this assignment? How well
|
||||
did the development and testing process go for you?
|
||||
|
||||
< insert reflection >
|
||||
|
||||
This was definitely the most challenging assignment I've ever seen, and the
|
||||
hard part was identifying performance issues and optimizing them. It was not
|
||||
easy, I used various tools like perf and found that the JSON part had the biggest
|
||||
overhead. Optimizing it showed immediate results, but then I hit a bottleneck,
|
||||
so I started using various schemes, and most of them didn't work. Then I had to
|
||||
push back and design the business process from scratch and finally got inspired
|
||||
by my professor to use unordered set to get the best performance
|
||||
(last 0.1 seconds).
|
||||
|
||||
35
hws/tiktok_trends/SoundInfo.h
Normal file
35
hws/tiktok_trends/SoundInfo.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "TopKVideoHolder.h"
|
||||
|
||||
struct SoundInfo {
|
||||
const std::string* musicId = nullptr;
|
||||
const std::string* musicName = nullptr;
|
||||
const std::string* musicAuthor = nullptr;
|
||||
long totalViews = 0;
|
||||
TopKVideoHolder topVideos;
|
||||
|
||||
SoundInfo() = default;
|
||||
|
||||
SoundInfo(const std::string* id, const std::string* name, const std::string* author)
|
||||
: musicId(id), musicName(name), musicAuthor(author), totalViews(0) {}
|
||||
};
|
||||
|
||||
struct CompareSoundPtr {
|
||||
bool operator()(const SoundInfo* a, const SoundInfo* b) const {
|
||||
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
|
||||
if (a->musicId && b->musicId) return *a->musicId < *b->musicId;
|
||||
if (a->musicId) return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompareSoundPtrForHeap {
|
||||
bool operator()(const SoundInfo* a, const SoundInfo* b) const {
|
||||
if (a->totalViews != b->totalViews) return a->totalViews > b->totalViews;
|
||||
if (a->musicId && b->musicId) return *a->musicId > *b->musicId;
|
||||
if (a->musicId) return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
15
hws/tiktok_trends/StringInterner.cpp
Normal file
15
hws/tiktok_trends/StringInterner.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
#include "StringInterner.h"
|
||||
|
||||
const std::string* StringInterner::intern(const std::string& str) {
|
||||
std::pair<std::unordered_set<std::string>::iterator, bool> result = pool.insert(str);
|
||||
return &(*result.first);
|
||||
}
|
||||
|
||||
const std::string* StringInterner::intern(std::string&& str) {
|
||||
std::pair<std::unordered_set<std::string>::iterator, bool> result = pool.insert(std::move(str));
|
||||
return &(*result.first);
|
||||
}
|
||||
|
||||
const std::string* StringInterner::getEmptyString() {
|
||||
return intern("");
|
||||
}
|
||||
15
hws/tiktok_trends/StringInterner.h
Normal file
15
hws/tiktok_trends/StringInterner.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
class StringInterner {
|
||||
private:
|
||||
std::unordered_set<std::string> pool;
|
||||
|
||||
public:
|
||||
const std::string* intern(const std::string& str);
|
||||
const std::string* intern(std::string&& str);
|
||||
const std::string* getEmptyString();
|
||||
};
|
||||
18
hws/tiktok_trends/StringPtrUtils.h
Normal file
18
hws/tiktok_trends/StringPtrUtils.h
Normal file
@@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <functional>
|
||||
|
||||
struct StringPtrHash {
|
||||
size_t operator()(const std::string* s) const {
|
||||
return std::hash<std::string>()(*s);
|
||||
}
|
||||
};
|
||||
|
||||
struct StringPtrEqual {
|
||||
bool operator()(const std::string* a, const std::string* b) const {
|
||||
if (a == b) return true;
|
||||
if (!a || !b) return false;
|
||||
return *a == *b;
|
||||
}
|
||||
};
|
||||
32
hws/tiktok_trends/TopKVideoHolder.cpp
Normal file
32
hws/tiktok_trends/TopKVideoHolder.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
#include "TopKVideoHolder.h"
|
||||
|
||||
void TopKVideoHolder::add(const VideoInfo& video) {
|
||||
if (pq.size() < K) {
|
||||
pq.push(video);
|
||||
} else {
|
||||
if (VideoCompareWorse()(video, pq.top())) {
|
||||
pq.pop();
|
||||
pq.push(video);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VideoInfo> TopKVideoHolder::getSortedVideos() {
|
||||
std::vector<VideoInfo> sortedVideos;
|
||||
size_t current_size = pq.size();
|
||||
if (current_size == 0) return sortedVideos;
|
||||
|
||||
sortedVideos.reserve(current_size);
|
||||
|
||||
while (!pq.empty()) {
|
||||
sortedVideos.push_back(pq.top());
|
||||
pq.pop();
|
||||
}
|
||||
|
||||
std::sort(sortedVideos.begin(), sortedVideos.end(), VideoInfo::compareForFinalSort);
|
||||
|
||||
return sortedVideos;
|
||||
}
|
||||
|
||||
bool TopKVideoHolder::empty() const { return pq.empty(); }
|
||||
size_t TopKVideoHolder::size() const { return pq.size(); }
|
||||
19
hws/tiktok_trends/TopKVideoHolder.h
Normal file
19
hws/tiktok_trends/TopKVideoHolder.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <algorithm>
|
||||
#include "VideoInfo.h"
|
||||
#include "Constants.h"
|
||||
|
||||
class TopKVideoHolder {
|
||||
private:
|
||||
std::priority_queue<VideoInfo, std::vector<VideoInfo>, VideoCompareWorse> pq;
|
||||
static const size_t K = TOP_K_CANDIDATES;
|
||||
|
||||
public:
|
||||
void add(const VideoInfo& video);
|
||||
std::vector<VideoInfo> getSortedVideos();
|
||||
bool empty() const;
|
||||
size_t size() const;
|
||||
};
|
||||
265
hws/tiktok_trends/Utils.cpp
Normal file
265
hws/tiktok_trends/Utils.cpp
Normal file
@@ -0,0 +1,265 @@
|
||||
#include "Utils.h"
|
||||
#include <iostream> // For potential cerr usage, although not directly in these functions
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <algorithm> // For std::min
|
||||
|
||||
bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value) {
|
||||
const size_t strLen = str.length();
|
||||
value.clear();
|
||||
if (pos >= strLen || str[pos] != '"') return false;
|
||||
++pos;
|
||||
const size_t startPos = pos;
|
||||
const char* strData = str.data();
|
||||
while (pos < strLen && strData[pos] != '"') {
|
||||
++pos;
|
||||
}
|
||||
if (pos >= strLen) return false;
|
||||
value.assign(strData + startPos, pos - startPos);
|
||||
++pos;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value) {
|
||||
const size_t strLen = str.length();
|
||||
value.clear();
|
||||
const size_t startPos = pos;
|
||||
const char* strData = str.data();
|
||||
while (pos < strLen && strData[pos] != ',' && strData[pos] != '}' && strData[pos] != ']' && !std::isspace(static_cast<unsigned char>(strData[pos]))) {
|
||||
++pos;
|
||||
}
|
||||
if (startPos == pos) return false;
|
||||
value.assign(strData + startPos, pos - startPos);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool extractValue(const std::string& line, const std::string& key, std::string& value) {
|
||||
const std::string searchKey = "\"" + key + "\":";
|
||||
const char* found_pos = strstr(line.c_str(), searchKey.c_str());
|
||||
if (!found_pos) return false;
|
||||
|
||||
size_t pos = (found_pos - line.c_str()) + searchKey.length();
|
||||
const size_t lineLen = line.length();
|
||||
|
||||
while (pos < lineLen && std::isspace(static_cast<unsigned char>(line[pos]))) {
|
||||
++pos;
|
||||
}
|
||||
if (pos >= lineLen) return false;
|
||||
|
||||
if (line[pos] == '"') {
|
||||
return parseQuotedStringValue(line, pos, value);
|
||||
} else {
|
||||
return parseUnquotedValue(line, pos, value);
|
||||
}
|
||||
}
|
||||
|
||||
bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj) {
|
||||
const std::string searchKey = "\"" + key + "\":";
|
||||
const char* found_pos = strstr(line.c_str(), searchKey.c_str());
|
||||
if (!found_pos) return false;
|
||||
|
||||
size_t pos = (found_pos - line.c_str()) + searchKey.length();
|
||||
const size_t lineLen = line.length();
|
||||
|
||||
while (pos < lineLen && std::isspace(static_cast<unsigned char>(line[pos]))) ++pos;
|
||||
|
||||
if (pos >= lineLen || line[pos] != '{') return false;
|
||||
|
||||
const size_t startBracePos = pos;
|
||||
int braceCount = 1;
|
||||
++pos;
|
||||
const char* lineData = line.data();
|
||||
|
||||
bool inString = false;
|
||||
char prevChar = 0;
|
||||
while (pos < lineLen && braceCount > 0) {
|
||||
const char c = lineData[pos];
|
||||
if (c == '"' && prevChar != '\\') {
|
||||
inString = !inString;
|
||||
} else if (!inString) {
|
||||
if (c == '{') {
|
||||
++braceCount;
|
||||
} else if (c == '}') {
|
||||
--braceCount;
|
||||
}
|
||||
}
|
||||
prevChar = (prevChar == '\\' && c == '\\') ? 0 : c;
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (braceCount == 0) {
|
||||
subObj.assign(lineData + startBracePos, pos - startBracePos);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parseLongLong(const std::string& s, long& result) {
|
||||
result = 0;
|
||||
if (s.empty()) return false;
|
||||
const char* ptr = s.c_str();
|
||||
bool negative = false;
|
||||
long current_val = 0;
|
||||
|
||||
if (*ptr == '-') {
|
||||
negative = true;
|
||||
++ptr;
|
||||
}
|
||||
if (!*ptr) return false;
|
||||
|
||||
while (*ptr) {
|
||||
if (*ptr >= '0' && *ptr <= '9') {
|
||||
long digit = (*ptr - '0');
|
||||
current_val = current_val * 10 + digit;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
++ptr;
|
||||
}
|
||||
|
||||
result = negative ? -current_val : current_val;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner,
|
||||
VideoInfo& outVideo, std::string& outText)
|
||||
{
|
||||
outText.clear();
|
||||
|
||||
std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str;
|
||||
|
||||
if (!extractValue(line, "id", id_str) || id_str.empty()) return false;
|
||||
|
||||
long playCount = 0;
|
||||
if (extractValue(line, "playCount", playCount_str)) {
|
||||
parseLongLong(playCount_str, playCount);
|
||||
}
|
||||
|
||||
extractValue(line, "text", outText);
|
||||
|
||||
extractValue(line, "webVideoUrl", webVideoUrl_str);
|
||||
std::string videoMetaSub;
|
||||
if (extractSubObject(line, "videoMeta", videoMetaSub)) {
|
||||
extractValue(videoMetaSub, "coverUrl", coverUrl_str);
|
||||
}
|
||||
|
||||
outVideo = VideoInfo(
|
||||
interner.intern(std::move(id_str)),
|
||||
interner.intern(std::move(coverUrl_str)),
|
||||
interner.intern(std::move(webVideoUrl_str)),
|
||||
playCount,
|
||||
inputOrder
|
||||
);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner,
|
||||
VideoInfo& outVideo,
|
||||
const std::string*& outMusicIdPtr,
|
||||
const std::string*& outMusicNamePtr,
|
||||
const std::string*& outMusicAuthorPtr)
|
||||
{
|
||||
std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str;
|
||||
std::string musicId_str, musicName_str, musicAuthor_str;
|
||||
|
||||
if (!extractValue(line, "id", id_str) || id_str.empty()) return false;
|
||||
|
||||
long playCount = 0;
|
||||
if (extractValue(line, "playCount", playCount_str)) {
|
||||
parseLongLong(playCount_str, playCount);
|
||||
}
|
||||
|
||||
std::string musicMetaSub;
|
||||
if (extractSubObject(line, "musicMeta", musicMetaSub)) {
|
||||
extractValue(musicMetaSub, "musicId", musicId_str);
|
||||
extractValue(musicMetaSub, "musicName", musicName_str);
|
||||
extractValue(musicMetaSub, "musicAuthor", musicAuthor_str);
|
||||
}
|
||||
|
||||
if (musicId_str.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
extractValue(line, "webVideoUrl", webVideoUrl_str);
|
||||
std::string videoMetaSub;
|
||||
if (extractSubObject(line, "videoMeta", videoMetaSub)) {
|
||||
extractValue(videoMetaSub, "coverUrl", coverUrl_str);
|
||||
}
|
||||
|
||||
outVideo = VideoInfo(
|
||||
interner.intern(std::move(id_str)),
|
||||
interner.intern(std::move(coverUrl_str)),
|
||||
interner.intern(std::move(webVideoUrl_str)),
|
||||
playCount,
|
||||
inputOrder
|
||||
);
|
||||
outMusicIdPtr = interner.intern(std::move(musicId_str));
|
||||
outMusicNamePtr = interner.intern(std::move(musicName_str));
|
||||
outMusicAuthorPtr = interner.intern(std::move(musicAuthor_str));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void extractHashtags(const std::string& text,
|
||||
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual>& hashtagData,
|
||||
StringInterner& interner,
|
||||
const VideoInfo& video)
|
||||
{
|
||||
const size_t textLen = text.length();
|
||||
const char* textData = text.data();
|
||||
size_t pos = 0;
|
||||
std::string tag_buffer;
|
||||
tag_buffer.reserve(50);
|
||||
|
||||
while (pos < textLen) {
|
||||
while (pos < textLen && textData[pos] != '#') {
|
||||
pos++;
|
||||
}
|
||||
if (pos >= textLen) break;
|
||||
|
||||
size_t start = pos + 1;
|
||||
if (start >= textLen) break;
|
||||
|
||||
size_t end = start;
|
||||
|
||||
while (end < textLen && (std::isalnum(static_cast<unsigned char>(textData[end])) || textData[end] == '_')) {
|
||||
end++;
|
||||
}
|
||||
|
||||
if (end > start) {
|
||||
tag_buffer.assign(textData + start, end - start);
|
||||
const std::string* hashtagPtr = interner.intern(tag_buffer);
|
||||
|
||||
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
|
||||
HashtagMapType::iterator it = hashtagData.find(hashtagPtr);
|
||||
|
||||
if (it == hashtagData.end()) {
|
||||
std::pair<HashtagMapType::iterator, bool> emplace_result =
|
||||
hashtagData.emplace(hashtagPtr, HashtagInfo(hashtagPtr));
|
||||
it = emplace_result.first;
|
||||
}
|
||||
|
||||
it->second.usageCount++;
|
||||
it->second.totalViews += video.playCount;
|
||||
it->second.topVideos.add(video);
|
||||
}
|
||||
|
||||
pos = end;
|
||||
}
|
||||
}
|
||||
|
||||
void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos) {
|
||||
std::vector<VideoInfo> sortedTopVideos = topVideos.getSortedVideos();
|
||||
|
||||
int videosToPrint = std::min(static_cast<int>(sortedTopVideos.size()), TOP_K_CANDIDATES);
|
||||
for (int i = 0; i < videosToPrint; ++i) {
|
||||
const VideoInfo& video = sortedTopVideos[i];
|
||||
|
||||
fout << "cover url: " << (video.coverUrl && !video.coverUrl->empty() ? *video.coverUrl : "null") << "\n";
|
||||
fout << "web video url: " << (video.webVideoUrl && !video.webVideoUrl->empty() ? *video.webVideoUrl : "null") << "\n";
|
||||
}
|
||||
}
|
||||
33
hws/tiktok_trends/Utils.h
Normal file
33
hws/tiktok_trends/Utils.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <fstream>
|
||||
#include "StringInterner.h"
|
||||
#include "VideoInfo.h"
|
||||
#include "HashtagInfo.h"
|
||||
#include "SoundInfo.h"
|
||||
#include "StringPtrUtils.h" // Needed for HashtagMapType/SoundMapType in function signatures
|
||||
|
||||
bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value);
|
||||
bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value);
|
||||
bool extractValue(const std::string& line, const std::string& key, std::string& value);
|
||||
bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj);
|
||||
bool parseLongLong(const std::string& s, long& result);
|
||||
|
||||
bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner,
|
||||
VideoInfo& outVideo, std::string& outText);
|
||||
|
||||
bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner,
|
||||
VideoInfo& outVideo,
|
||||
const std::string*& outMusicIdPtr,
|
||||
const std::string*& outMusicNamePtr,
|
||||
const std::string*& outMusicAuthorPtr);
|
||||
|
||||
void extractHashtags(const std::string& text,
|
||||
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual>& hashtagData,
|
||||
StringInterner& interner,
|
||||
const VideoInfo& video);
|
||||
|
||||
void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos);
|
||||
37
hws/tiktok_trends/VideoInfo.h
Normal file
37
hws/tiktok_trends/VideoInfo.h
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "Constants.h"
|
||||
|
||||
struct VideoInfo {
|
||||
const std::string* videoId = nullptr;
|
||||
const std::string* coverUrl = nullptr;
|
||||
const std::string* webVideoUrl = nullptr;
|
||||
long playCount = 0;
|
||||
int inputOrder = -1;
|
||||
|
||||
VideoInfo() = default;
|
||||
|
||||
VideoInfo(const std::string* id, const std::string* cover, const std::string* web,
|
||||
long plays, int order)
|
||||
: videoId(id), coverUrl(cover), webVideoUrl(web), playCount(plays), inputOrder(order) {}
|
||||
|
||||
static bool compareForFinalSort(const VideoInfo& a, const VideoInfo& b) {
|
||||
if (a.playCount != b.playCount) return a.playCount > b.playCount;
|
||||
if (a.videoId && b.videoId && *a.videoId != *b.videoId) return *a.videoId < *b.videoId;
|
||||
return a.inputOrder < b.inputOrder;
|
||||
}
|
||||
|
||||
bool operator<(const VideoInfo& other) const {
|
||||
if (playCount != other.playCount) return playCount > other.playCount;
|
||||
return inputOrder < other.inputOrder;
|
||||
}
|
||||
};
|
||||
|
||||
struct VideoCompareWorse {
|
||||
bool operator()(const VideoInfo& a, const VideoInfo& b) const {
|
||||
if (a.playCount != b.playCount) return a.playCount > b.playCount;
|
||||
return a.inputOrder < b.inputOrder;
|
||||
}
|
||||
};
|
||||
256
hws/tiktok_trends/main.cpp
Normal file
256
hws/tiktok_trends/main.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <cstring>
|
||||
|
||||
#include "Constants.h"
|
||||
#include "StringInterner.h"
|
||||
#include "StringPtrUtils.h"
|
||||
#include "VideoInfo.h"
|
||||
#include "TopKVideoHolder.h"
|
||||
#include "HashtagInfo.h"
|
||||
#include "SoundInfo.h"
|
||||
#include "Utils.h"
|
||||
|
||||
|
||||
bool processHashtags(const std::string& filename, std::ofstream& outputFile) {
|
||||
std::ifstream inputFile(filename);
|
||||
if (!inputFile.is_open()) {
|
||||
std::cerr << "Cannot open input file: " << filename << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
StringInterner interner;
|
||||
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> hashtagData;
|
||||
hashtagData.reserve(250000);
|
||||
|
||||
std::string line;
|
||||
int inputOrderCounter = 0;
|
||||
VideoInfo currentVideo;
|
||||
std::string text_buffer;
|
||||
|
||||
while (std::getline(inputFile, line)) {
|
||||
if (line.length() < 10) continue;
|
||||
inputOrderCounter++;
|
||||
|
||||
if (parseLineForHashtags(line, inputOrderCounter, interner, currentVideo, text_buffer)) {
|
||||
if (!text_buffer.empty()) {
|
||||
extractHashtags(text_buffer, hashtagData, interner, currentVideo);
|
||||
}
|
||||
}
|
||||
}
|
||||
inputFile.close();
|
||||
|
||||
std::priority_queue<HashtagInfo*, std::vector<HashtagInfo*>, CompareHashtagPtrForHeap> top20Hashtags;
|
||||
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
|
||||
|
||||
for (HashtagMapType::iterator it = hashtagData.begin(); it != hashtagData.end(); ++it) {
|
||||
HashtagInfo* currentHashtagPtr = &(it->second);
|
||||
|
||||
if (top20Hashtags.size() < TOP_N_OUTPUT) {
|
||||
top20Hashtags.push(currentHashtagPtr);
|
||||
} else {
|
||||
const HashtagInfo* topPtr = top20Hashtags.top();
|
||||
bool is_better = CompareHashtagPtr()(currentHashtagPtr, topPtr);
|
||||
|
||||
if (is_better) {
|
||||
top20Hashtags.pop();
|
||||
top20Hashtags.push(currentHashtagPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<HashtagInfo*> finalTop20;
|
||||
finalTop20.reserve(top20Hashtags.size());
|
||||
while (!top20Hashtags.empty()) {
|
||||
finalTop20.push_back(top20Hashtags.top());
|
||||
top20Hashtags.pop();
|
||||
}
|
||||
|
||||
std::sort(finalTop20.begin(), finalTop20.end(), CompareHashtagPtr());
|
||||
|
||||
outputFile << "trending hashtags:\n\n";
|
||||
for (size_t i = 0; i < finalTop20.size(); ++i) {
|
||||
HashtagInfo* currentHashtag = finalTop20[i];
|
||||
|
||||
outputFile << "========================\n";
|
||||
|
||||
outputFile << "#" << (currentHashtag->name ? *currentHashtag->name : "null") << "\n";
|
||||
outputFile << "used " << currentHashtag->usageCount << " times\n";
|
||||
outputFile << currentHashtag->totalViews << " views\n\n";
|
||||
|
||||
extractSortAndPrintTop3Videos(outputFile, currentHashtag->topVideos);
|
||||
|
||||
outputFile << "========================";
|
||||
if (i < finalTop20.size() - 1) {
|
||||
outputFile << "\n";
|
||||
} else {
|
||||
outputFile << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool processSounds(const std::string& filename, std::ofstream& outputFile) {
|
||||
std::ifstream inputFile(filename);
|
||||
if (!inputFile.is_open()) {
|
||||
std::cerr << "Cannot open input file: " << filename << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
StringInterner interner;
|
||||
std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> soundData;
|
||||
soundData.reserve(50000);
|
||||
|
||||
std::string line;
|
||||
int inputOrderCounter = 0;
|
||||
VideoInfo currentVideo;
|
||||
const std::string* musicIdPtr = nullptr;
|
||||
const std::string* musicNamePtr = nullptr;
|
||||
const std::string* musicAuthorPtr = nullptr;
|
||||
|
||||
while (std::getline(inputFile, line)) {
|
||||
if (line.length() < 10) continue;
|
||||
inputOrderCounter++;
|
||||
|
||||
musicIdPtr = nullptr;
|
||||
musicNamePtr = nullptr;
|
||||
musicAuthorPtr = nullptr;
|
||||
|
||||
if (parseLineForSounds(line, inputOrderCounter, interner, currentVideo,
|
||||
musicIdPtr, musicNamePtr, musicAuthorPtr))
|
||||
{
|
||||
if (musicIdPtr == nullptr || musicIdPtr->empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
|
||||
SoundMapType::iterator it = soundData.find(musicIdPtr);
|
||||
|
||||
if (it == soundData.end()) {
|
||||
std::pair<SoundMapType::iterator, bool> emplace_result =
|
||||
soundData.emplace(musicIdPtr, SoundInfo(musicIdPtr, musicNamePtr, musicAuthorPtr));
|
||||
it = emplace_result.first;
|
||||
}
|
||||
|
||||
it->second.totalViews += currentVideo.playCount;
|
||||
|
||||
if (it->second.musicName->empty() && !musicNamePtr->empty()) {
|
||||
it->second.musicName = musicNamePtr;
|
||||
}
|
||||
if (it->second.musicAuthor->empty() && !musicAuthorPtr->empty()) {
|
||||
it->second.musicAuthor = musicAuthorPtr;
|
||||
}
|
||||
it->second.topVideos.add(currentVideo);
|
||||
}
|
||||
}
|
||||
inputFile.close();
|
||||
|
||||
std::priority_queue<SoundInfo*, std::vector<SoundInfo*>, CompareSoundPtrForHeap> top20Sounds;
|
||||
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
|
||||
|
||||
for (SoundMapType::iterator it = soundData.begin(); it != soundData.end(); ++it) {
|
||||
SoundInfo* currentSoundPtr = &(it->second);
|
||||
|
||||
if (top20Sounds.size() < TOP_N_OUTPUT) {
|
||||
top20Sounds.push(currentSoundPtr);
|
||||
} else {
|
||||
const SoundInfo* topPtr = top20Sounds.top();
|
||||
bool is_better = CompareSoundPtr()(currentSoundPtr, topPtr);
|
||||
|
||||
if (is_better) {
|
||||
top20Sounds.pop();
|
||||
top20Sounds.push(currentSoundPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SoundInfo*> finalTop20;
|
||||
finalTop20.reserve(top20Sounds.size());
|
||||
while (!top20Sounds.empty()) {
|
||||
finalTop20.push_back(top20Sounds.top());
|
||||
top20Sounds.pop();
|
||||
}
|
||||
|
||||
std::sort(finalTop20.begin(), finalTop20.end(), CompareSoundPtr());
|
||||
|
||||
outputFile << "trending sounds:\n\n";
|
||||
for (size_t i = 0; i < finalTop20.size(); ++i) {
|
||||
SoundInfo* currentSound = finalTop20[i];
|
||||
|
||||
outputFile << "========================\n";
|
||||
|
||||
if (currentSound->musicName == nullptr || currentSound->musicName->empty()) {
|
||||
outputFile << "\n";
|
||||
} else {
|
||||
outputFile << *currentSound->musicName << "\n";
|
||||
}
|
||||
|
||||
outputFile << currentSound->totalViews << " views\n";
|
||||
|
||||
if (currentSound->musicAuthor == nullptr || currentSound->musicAuthor->empty()) {
|
||||
outputFile << "\n";
|
||||
} else {
|
||||
outputFile << *currentSound->musicAuthor << "\n";
|
||||
}
|
||||
|
||||
outputFile << "music id: " << (currentSound->musicId && !currentSound->musicId->empty() ? *currentSound->musicId : "null") << "\n";
|
||||
|
||||
if (!currentSound->topVideos.empty()) {
|
||||
outputFile << "\n";
|
||||
}
|
||||
|
||||
extractSortAndPrintTop3Videos(outputFile, currentSound->topVideos);
|
||||
|
||||
outputFile << "========================";
|
||||
if (i < finalTop20.size() - 1) {
|
||||
outputFile << "\n";
|
||||
} else {
|
||||
outputFile << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 4) {
|
||||
std::cerr << "Usage: nytrends.exe <input.json> <output.txt> <mode>\n";
|
||||
std::cerr << "Mode can be 'hashtag' or 'sound'\n";
|
||||
return 1;
|
||||
}
|
||||
std::string inputFileName = argv[1];
|
||||
std::string outputFileName = argv[2];
|
||||
std::string mode = argv[3];
|
||||
|
||||
std::ofstream outputFile(outputFileName);
|
||||
if (!outputFile.is_open()) {
|
||||
std::cerr << "Error: Cannot open output file " << outputFileName << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::ios_base::sync_with_stdio(false);
|
||||
|
||||
bool success = false;
|
||||
if (mode == "hashtag") {
|
||||
success = processHashtags(inputFileName, outputFile);
|
||||
} else if (mode == "sound") {
|
||||
success = processSounds(inputFileName, outputFile);
|
||||
} else {
|
||||
std::cerr << "Error: Invalid mode '" << mode << "'. Must be 'hashtag' or 'sound'." << std::endl;
|
||||
outputFile.close();
|
||||
return 1;
|
||||
}
|
||||
|
||||
outputFile.close();
|
||||
return success ? 0 : 1;
|
||||
}
|
||||
242
hws/tiktok_trends/output.txt
Normal file
242
hws/tiktok_trends/output.txt
Normal file
@@ -0,0 +1,242 @@
|
||||
trending hashtags:
|
||||
|
||||
========================
|
||||
#fyp
|
||||
used 7600 times
|
||||
261199234341 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/567504ab3e4648dea968213ce979f281?x-expires=1700449200&x-signature=bjGEgY4bdEVOMMHQa2S0qrzNCQY%3D
|
||||
web video url: https://www.tiktok.com/@bellapoarch/video/6862153058223197445
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/567504ab3e4648dea968213ce979f281?x-expires=1700449200&x-signature=bjGEgY4bdEVOMMHQa2S0qrzNCQY%3D
|
||||
web video url: https://www.tiktok.com/@bellapoarch/video/6862153058223197445
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oAJCgD1khIANGRcTLhqQZNCi3ohAuAzoyEdIaf?x-expires=1700449200&x-signature=hu1Kg0Cpz%2BzVRXqYkv%2Fl6E8%2Ftgk%3D
|
||||
web video url: https://www.tiktok.com/@tool_tips/video/7212981630904864005
|
||||
========================
|
||||
========================
|
||||
#foryou
|
||||
used 2765 times
|
||||
92282640558 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/c4c7c98ecb5f4a8980ed7d58cdea2df3_1676378432?x-expires=1700449200&x-signature=QIchR40Etr%2BAjbAuzYbwTKnD7dA%3D
|
||||
web video url: https://www.tiktok.com/@gorillatiks/video/7199990500512894213
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
|
||||
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
|
||||
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
|
||||
========================
|
||||
========================
|
||||
#viral
|
||||
used 1759 times
|
||||
59270543842 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o4n0aDMCxQPkleFE5JnbeaoIw9uEBRQiTkIzAB?x-expires=1700449200&x-signature=zOxX4QIMqL%2BNOyl6R57PLiVKb%2BE%3D
|
||||
web video url: https://www.tiktok.com/@dada_ahoufe_/video/7247202774696447238
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o4n0aDMCxQPkleFE5JnbeaoIw9uEBRQiTkIzAB?x-expires=1700449200&x-signature=zOxX4QIMqL%2BNOyl6R57PLiVKb%2BE%3D
|
||||
web video url: https://www.tiktok.com/@dada_ahoufe_/video/7247202774696447238
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oYRUDAt9kFIA2SIwCWomEVfha623AyrLzxgaAo?x-expires=1700449200&x-signature=xVeyOReZuykD9rFS4KFcN%2FFL44g%3D
|
||||
web video url: https://www.tiktok.com/@carrosseriereparation/video/7217942797360303365
|
||||
========================
|
||||
========================
|
||||
#makeuptutorial
|
||||
used 1709 times
|
||||
22311707100 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
|
||||
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
|
||||
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/5461c70dd8ee4a0d891e7f2529f6b8ea_1670789072?x-expires=1700503200&x-signature=TqqnBqyBh5cnb150Ri0jXfwaL9s%3D
|
||||
web video url: https://www.tiktok.com/@alicekingmakeup/video/7175984394950167813
|
||||
========================
|
||||
========================
|
||||
#couplestiktok
|
||||
used 1610 times
|
||||
14706422100 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/693775bbddac4df4ad008ff880041fbc?x-expires=1700503200&x-signature=UU8VVoLrIaXIVFnYLf3jl8IYO%2BE%3D
|
||||
web video url: https://www.tiktok.com/@misiaaa621/video/7149368989611773227
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/693775bbddac4df4ad008ff880041fbc?x-expires=1700503200&x-signature=UU8VVoLrIaXIVFnYLf3jl8IYO%2BE%3D
|
||||
web video url: https://www.tiktok.com/@misiaaa621/video/7149368989611773227
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4b83dfa1cc0c47318e408a6bcde34bb6_1677549193?x-expires=1700503200&x-signature=%2FN8FYhRpVldGSaxbP6dgYeEroqI%3D
|
||||
web video url: https://www.tiktok.com/@debbiekval/video/7205018880622857515
|
||||
========================
|
||||
========================
|
||||
#lifehack
|
||||
used 1585 times
|
||||
33681856600 views
|
||||
|
||||
cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/d6a1c1c323614919975fad3ee1c1ef9e~tplv-dmt-logom:tos-alisg-i-0000/4124427fcd3045968ac1c3136bd92d6c.image?x-expires=1700452800&x-signature=qCaN1hrF7pqQ0kvZJnlFnc9jI6Q%3D
|
||||
web video url: https://www.tiktok.com/@tresorfie/video/7039091515863403778
|
||||
cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/d6a1c1c323614919975fad3ee1c1ef9e~tplv-dmt-logom:tos-alisg-i-0000/4124427fcd3045968ac1c3136bd92d6c.image?x-expires=1700449200&x-signature=WSl3XKN1HPXy7jpguj8v0AaI3FU%3D
|
||||
web video url: https://www.tiktok.com/@tresorfie/video/7039091515863403778
|
||||
cover url: https://p16-sign-sg.tiktokcdn.com/tos-alisg-p-0037/501627c6b36849e282740c764611f2a7_1634994542~tplv-dmt-logom:tos-alisg-pv-0037/f3273e6f3e92421d860be8f5e72ac0bd.image?x-expires=1700452800&x-signature=DkwRLgyyY5ec0757c1hCq372yJM%3D
|
||||
web video url: https://www.tiktok.com/@issei0806/video/7022248055625846018
|
||||
========================
|
||||
========================
|
||||
#funnyvideos
|
||||
used 1573 times
|
||||
67029374400 views
|
||||
|
||||
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
|
||||
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
|
||||
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
|
||||
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=vv04JjjwKgR1P3t117v%2B5HMvnpI%3D
|
||||
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
|
||||
========================
|
||||
========================
|
||||
#foryoupage
|
||||
used 1550 times
|
||||
49067115500 views
|
||||
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
|
||||
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/d57bd10bd2594b148d48c5443d5571b0?x-expires=1700449200&x-signature=Z%2FTgQwhQ9eSmRMF3cBmH%2BdVHve8%3D
|
||||
web video url: https://www.tiktok.com/@honeycats77/video/7190528800352980267
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/oYRUDAt9kFIA2SIwCWomEVfha623AyrLzxgaAo?x-expires=1700449200&x-signature=xVeyOReZuykD9rFS4KFcN%2FFL44g%3D
|
||||
web video url: https://www.tiktok.com/@carrosseriereparation/video/7217942797360303365
|
||||
========================
|
||||
========================
|
||||
#newyorkcity
|
||||
used 1545 times
|
||||
8642836600 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D
|
||||
web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/1ea2545bde2645ec8f1106a4b9de6c2e_1648602515?x-expires=1700452800&x-signature=HNr6UCQsc4q0m%2FFShx%2FJJNWb1Jg%3D
|
||||
web video url: https://www.tiktok.com/@thekatieromero/video/7080693879141485870
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c898b993f308477e92334437f9f0e1e1?x-expires=1700452800&x-signature=ZNHG9%2FMBw9qp5DSm%2BNnbhwX6xK8%3D
|
||||
web video url: https://www.tiktok.com/@thekatieromero/video/7105552208422374699
|
||||
========================
|
||||
========================
|
||||
#ifweeverbrokeup
|
||||
used 1543 times
|
||||
1337044198 views
|
||||
|
||||
cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/ee79eb2bea6445739ed71cef3e9b84b6_1686646723?x-expires=1700456400&x-signature=0MADrs89I23eeCudb%2FJxkI%2FJbR8%3D
|
||||
web video url: https://www.tiktok.com/@zanmangloopyofficial/video/7244092495129218312
|
||||
cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/ee79eb2bea6445739ed71cef3e9b84b6_1686646723?x-expires=1700456400&x-signature=0MADrs89I23eeCudb%2FJxkI%2FJbR8%3D
|
||||
web video url: https://www.tiktok.com/@zanmangloopyofficial/video/7244092495129218312
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/7895be3e406d435ba0db9e6f5db349e2?x-expires=1700456400&x-signature=ahrkOjcQRlDhAOf1upGEu%2B2ECYU%3D
|
||||
web video url: https://www.tiktok.com/@bebopandbebe/video/7238437685537328426
|
||||
========================
|
||||
========================
|
||||
#springcleaning
|
||||
used 1416 times
|
||||
2156123000 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/56ce7e79491a4b27b371517ce134fa82_1631381225~tplv-dmt-logom:tos-maliva-p-0000/415cfd01b3484fb38f7b088aa6efda67.image?x-expires=1700503200&x-signature=YL4yGwa%2F1gZ59cKHMov7ficsK9E%3D
|
||||
web video url: https://www.tiktok.com/@livecomposed/video/7006728991067491589
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4c54b4d1332c4000a615d9c5fc172be8_1677780053?x-expires=1700503200&x-signature=aD4zRpPLbhn9wz4vtTQWZRa2I1U%3D
|
||||
web video url: https://www.tiktok.com/@atmeikasa/video/7206010371004583214
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/4c54b4d1332c4000a615d9c5fc172be8_1677780053?x-expires=1700503200&x-signature=aD4zRpPLbhn9wz4vtTQWZRa2I1U%3D
|
||||
web video url: https://www.tiktok.com/@atmeikasa/video/7206010371004583214
|
||||
========================
|
||||
========================
|
||||
#funny
|
||||
used 1382 times
|
||||
53648909500 views
|
||||
|
||||
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
|
||||
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
|
||||
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=HTd5Yy2XA1y%2Bn0Gy2PnX9t%2FNpw4%3D
|
||||
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-useast2a-p-0037-euttp/b0fed04ac06b45f58a9c3add061342dd_1686566824?x-expires=1700449200&x-signature=vv04JjjwKgR1P3t117v%2B5HMvnpI%3D
|
||||
web video url: https://www.tiktok.com/@funnnyh/video/7243749070475496731
|
||||
========================
|
||||
========================
|
||||
#happymonday
|
||||
used 1308 times
|
||||
741991700 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/oQgeH8BRJnj20JEFoQ5tAf1MIb976nBD89QiFB~tplv-dmt-logom:tos-useast2a-v-0068/4763cd9418ac4d7faccbf52906bcf43c.image?x-expires=1700449200&x-signature=DPRcWm2Xhpe7r2HmxxGBzOyhwVs%3D
|
||||
web video url: https://www.tiktok.com/@joinparallel.io/video/7192338389255916806
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/tos-maliva-p-0068/oQgeH8BRJnj20JEFoQ5tAf1MIb976nBD89QiFB~tplv-dmt-logom:tos-useast2a-v-0068/4763cd9418ac4d7faccbf52906bcf43c.image?x-expires=1700449200&x-signature=DPRcWm2Xhpe7r2HmxxGBzOyhwVs%3D
|
||||
web video url: https://www.tiktok.com/@joinparallel.io/video/7192338389255916806
|
||||
cover url: https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/002965b791d641d5b2f3d86ee0019604_1675130079?x-expires=1700449200&x-signature=TMCJOBJCXYeu5rsjFWpohtGwT8M%3D
|
||||
web video url: https://www.tiktok.com/@mondayhaircare/video/7194628805414161665
|
||||
========================
|
||||
========================
|
||||
#nyc
|
||||
used 990 times
|
||||
5577241000 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D
|
||||
web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/1ea2545bde2645ec8f1106a4b9de6c2e_1648602515?x-expires=1700452800&x-signature=HNr6UCQsc4q0m%2FFShx%2FJJNWb1Jg%3D
|
||||
web video url: https://www.tiktok.com/@thekatieromero/video/7080693879141485870
|
||||
cover url: https://p19-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c898b993f308477e92334437f9f0e1e1?x-expires=1700452800&x-signature=ZNHG9%2FMBw9qp5DSm%2BNnbhwX6xK8%3D
|
||||
web video url: https://www.tiktok.com/@thekatieromero/video/7105552208422374699
|
||||
========================
|
||||
========================
|
||||
#makeup
|
||||
used 976 times
|
||||
15309874500 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
|
||||
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/ea183fe6de594a319ba917d1ffbff11b?x-expires=1700503200&x-signature=9L4ypK162uI%2BirECcDcFqctjvn8%3D
|
||||
web video url: https://www.tiktok.com/@dollievision/video/7208244986666585386
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/1404c560d1e74fe7881a0a4ae6414de5_1652622380?x-expires=1700449200&x-signature=m%2BIawkKwQBwnUaTqBTTMtqLQPZo%3D
|
||||
web video url: https://www.tiktok.com/@mimles/video/7097959048515013894
|
||||
========================
|
||||
========================
|
||||
#trending
|
||||
used 721 times
|
||||
21692028406 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/bbdfd4ef0c4040b2bf9c52e9bb81d770?x-expires=1700449200&x-signature=fMm4z9wGlJCa1VFXvU5jQ0ot6tA%3D
|
||||
web video url: https://www.tiktok.com/@phuonglinh.ido/video/7215533760039865646
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c92407e5bca34ce78eb17db878630adc?x-expires=1700449200&x-signature=836u0V7z2PC7tFMLlsvVDFDU1wU%3D
|
||||
web video url: https://www.tiktok.com/@asmr.mus/video/7212985350124375342
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/c92407e5bca34ce78eb17db878630adc?x-expires=1700449200&x-signature=836u0V7z2PC7tFMLlsvVDFDU1wU%3D
|
||||
web video url: https://www.tiktok.com/@asmr.mus/video/7212985350124375342
|
||||
========================
|
||||
========================
|
||||
#comedy
|
||||
used 579 times
|
||||
14364510900 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/49a912da569f4c69b3658762357f3922_1572472757?x-expires=1700449200&x-signature=rCokiz5pbl88BrzDzX3AB1LFCXg%3D
|
||||
web video url: https://www.tiktok.com/@kisonkee/video/6753718966637677830
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/49a912da569f4c69b3658762357f3922_1572472757?x-expires=1700449200&x-signature=rCokiz5pbl88BrzDzX3AB1LFCXg%3D
|
||||
web video url: https://www.tiktok.com/@kisonkee/video/6753718966637677830
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/o0EnyBknREPO4GdeDo4nxAIJRFJfbfzAzGQSDf?x-expires=1700449200&x-signature=zuGbpMoTS01F4waRsGo2r2AoVxk%3D
|
||||
web video url: https://www.tiktok.com/@ricoanimations0/video/7241573984590957830
|
||||
========================
|
||||
========================
|
||||
#newyork
|
||||
used 555 times
|
||||
3126420800 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/4a878de5dbe241b5b5e25635f4200a51_1650915958?x-expires=1700449200&x-signature=1l%2F8aGh0jktub1R%2BX23PAe64Dys%3D
|
||||
web video url: https://www.tiktok.com/@mdmotivator/video/7090629995546070277
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/4a878de5dbe241b5b5e25635f4200a51_1650915958?x-expires=1700449200&x-signature=1l%2F8aGh0jktub1R%2BX23PAe64Dys%3D
|
||||
web video url: https://www.tiktok.com/@mdmotivator/video/7090629995546070277
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/08b82fdf19d5468e91a032b30e527861_1692785637?x-expires=1700452800&x-signature=%2F5sW2i0xTGXJJj2PKbwI6VXywWI%3D
|
||||
web video url: https://www.tiktok.com/@erikconover/video/7270458709065731370
|
||||
========================
|
||||
========================
|
||||
#couple
|
||||
used 439 times
|
||||
5628511600 views
|
||||
|
||||
cover url: https://p16-sign.tiktokcdn-us.com/obj/tos-useast5-p-0068-tx/014d01e5b7f848fc8f8899e88e8fa483?x-expires=1700449200&x-signature=IMmyHEigmMoVtLEuTWPZwe%2Fksb0%3D
|
||||
web video url: https://www.tiktok.com/@mamalindy/video/7079555791962885419
|
||||
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-aiso/cf3359fa45444f0994cf0dcc1c201b2d_1681930130?x-expires=1700449200&x-signature=tABQmhr%2FtklzlsNqYWGZnNrxwhI%3D
|
||||
web video url: https://www.tiktok.com/@kajsablock/video/7223834852305456410
|
||||
cover url: https://p16-sign-useast2a.tiktokcdn.com/obj/tos-useast2a-p-0037-aiso/cf3359fa45444f0994cf0dcc1c201b2d_1681930130?x-expires=1700449200&x-signature=tABQmhr%2FtklzlsNqYWGZnNrxwhI%3D
|
||||
web video url: https://www.tiktok.com/@kajsablock/video/7223834852305456410
|
||||
========================
|
||||
========================
|
||||
#fy
|
||||
used 397 times
|
||||
16901215000 views
|
||||
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/ef952b508c8043bb8b4ba98e3db850fb_1679074109?x-expires=1700449200&x-signature=AOKtuDMNxX%2BU2b1dRfBvofZLZfk%3D
|
||||
web video url: https://www.tiktok.com/@noelgoescrazy/video/7211568359798803717
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/ef952b508c8043bb8b4ba98e3db850fb_1679074109?x-expires=1700449200&x-signature=AOKtuDMNxX%2BU2b1dRfBvofZLZfk%3D
|
||||
web video url: https://www.tiktok.com/@noelgoescrazy/video/7211568359798803717
|
||||
cover url: https://p16-sign-va.tiktokcdn.com/obj/tos-maliva-p-0068/509605b7a901400589cd15d6731aaf8c_1677431421?x-expires=1700449200&x-signature=LoywgvGN5XLKIpvwV2UnR6pml6s%3D
|
||||
web video url: https://www.tiktok.com/@noelgoescrazy/video/7204513074097769733
|
||||
========================
|
||||
456
hws/tiktok_trends/test.py
Normal file
456
hws/tiktok_trends/test.py
Normal file
@@ -0,0 +1,456 @@
|
||||
import subprocess
|
||||
import os
|
||||
import filecmp
|
||||
import glob
|
||||
import sys # Import sys for platform detection
|
||||
import time
|
||||
import shutil
|
||||
import re # Import re for regex parsing on macOS
|
||||
|
||||
# --- Configuration ---
|
||||
CXX = "g++"
|
||||
CXXFLAGS = ["-Wall", "-O2", "-std=c++11"]
|
||||
EXECUTABLE = "./nytrends.exe"
|
||||
SOURCE_FILES_PATTERN = "*.cpp"
|
||||
INPUT_DIR = "inputs"
|
||||
EXPECTED_OUTPUT_DIR = "outputs"
|
||||
TEMP_OUTPUT_FILE = "output_unit_test.txt"
|
||||
TEST_TIMEOUT = 120
|
||||
|
||||
# Configuration for memory measurement
|
||||
MEASURE_MEMORY = True # Master switch
|
||||
TIME_COMMAND = "/usr/bin/time"
|
||||
# --- Platform Specific Time Config ---
|
||||
TIME_COMMAND_MODE = None # Will be 'linux' or 'macos' or None
|
||||
LINUX_TIME_FORMAT = "%M" # Format specifier for Max RSS (KB) on Linux
|
||||
LINUX_TIME_OUTPUT_FILE = "time_mem_output.tmp" # Temp file for Linux time output
|
||||
MACOS_MEM_REGEX = re.compile(r"^\s*(\d+)\s+maximum resident set size", re.IGNORECASE | re.MULTILINE)
|
||||
|
||||
# Configuration for suppressing program output
|
||||
SUPPRESS_PROGRAM_OUTPUT = True
|
||||
|
||||
# ANSI Color Codes
|
||||
# ... (colors remain the same) ...
|
||||
COLOR_GREEN = '\033[92m'
|
||||
COLOR_RED = '\033[91m'
|
||||
COLOR_YELLOW = '\033[93m'
|
||||
COLOR_BLUE = '\033[94m'
|
||||
COLOR_RESET = '\033[0m'
|
||||
|
||||
# --- Helper Functions ---
|
||||
|
||||
def print_color(text, color):
|
||||
"""Prints text in a specified color."""
|
||||
print(f"{color}{text}{COLOR_RESET}")
|
||||
|
||||
def check_time_command():
|
||||
"""
|
||||
Check if /usr/bin/time command exists and is usable for memory measurement
|
||||
based on the OS. Sets TIME_COMMAND_MODE. Returns True if usable, False otherwise.
|
||||
"""
|
||||
global TIME_COMMAND_MODE
|
||||
if not shutil.which(TIME_COMMAND):
|
||||
print_color(f"Warning: '{TIME_COMMAND}' not found. Memory measurement disabled.", COLOR_YELLOW)
|
||||
TIME_COMMAND_MODE = None
|
||||
return False
|
||||
|
||||
platform = sys.platform
|
||||
test_command = []
|
||||
capture_stderr = False
|
||||
|
||||
if platform.startswith("linux"):
|
||||
test_command = [TIME_COMMAND, '-f', LINUX_TIME_FORMAT, 'true']
|
||||
capture_stderr = False # Output goes to stdout/stderr, just check exit code
|
||||
TIME_COMMAND_MODE = "linux"
|
||||
print(f"Detected Linux platform. Testing {TIME_COMMAND} with '-f {LINUX_TIME_FORMAT}'...")
|
||||
|
||||
elif platform == "darwin": # macOS
|
||||
test_command = [TIME_COMMAND, '-l', 'true']
|
||||
capture_stderr = True # Need to capture stderr to check output format
|
||||
TIME_COMMAND_MODE = "macos"
|
||||
print(f"Detected macOS platform. Testing {TIME_COMMAND} with '-l'...")
|
||||
|
||||
else:
|
||||
print_color(f"Warning: Unsupported platform '{platform}' for memory measurement. Disabled.", COLOR_YELLOW)
|
||||
TIME_COMMAND_MODE = None
|
||||
return False
|
||||
|
||||
try:
|
||||
# Run test command
|
||||
process = subprocess.run(test_command,
|
||||
capture_output=True, # Capture both stdout/stderr
|
||||
text=True,
|
||||
check=True, # Raise exception on non-zero exit
|
||||
timeout=3)
|
||||
|
||||
# Additional check for macOS output format
|
||||
if TIME_COMMAND_MODE == "macos":
|
||||
if MACOS_MEM_REGEX.search(process.stderr):
|
||||
print_color(f"Memory measurement enabled using '{TIME_COMMAND} -l'.", COLOR_GREEN)
|
||||
return True # Format looks okay
|
||||
else:
|
||||
print_color(f"Warning: '{TIME_COMMAND} -l' output format not recognized (missing 'maximum resident set size'). Memory measurement disabled.", COLOR_YELLOW)
|
||||
TIME_COMMAND_MODE = None
|
||||
return False
|
||||
else: # Linux check passed if check=True didn't raise exception
|
||||
print_color(f"Memory measurement enabled using '{TIME_COMMAND} -f {LINUX_TIME_FORMAT}'.", COLOR_GREEN)
|
||||
return True
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
# This is where the original macOS error occurred
|
||||
print_color(f"Warning: {TIME_COMMAND} test command failed (exit code {e.returncode}). Memory measurement disabled.", COLOR_YELLOW)
|
||||
if e.stderr: print(f"Stderr:\n{e.stderr}")
|
||||
TIME_COMMAND_MODE = None
|
||||
return False
|
||||
except FileNotFoundError: # Should have been caught by shutil.which, but belt-and-suspenders
|
||||
print_color(f"Warning: '{TIME_COMMAND}' not found during test run. Memory measurement disabled.", COLOR_YELLOW)
|
||||
TIME_COMMAND_MODE = None
|
||||
return False
|
||||
except Exception as e:
|
||||
print_color(f"Warning: An unexpected error occurred while testing {TIME_COMMAND}. Memory measurement disabled. Error: {e}", COLOR_YELLOW)
|
||||
TIME_COMMAND_MODE = None
|
||||
return False
|
||||
|
||||
# --- compile_program() remains the same ---
|
||||
def compile_program():
|
||||
"""Compiles the C++ source files."""
|
||||
print_color(f"--- Starting Compilation ---", COLOR_BLUE)
|
||||
source_files = glob.glob(SOURCE_FILES_PATTERN)
|
||||
if not source_files:
|
||||
print_color(f"Error: No source files found matching pattern '{SOURCE_FILES_PATTERN}'.", COLOR_RED)
|
||||
return False
|
||||
|
||||
compile_command = [CXX] + CXXFLAGS + ["-o", os.path.basename(EXECUTABLE)] + source_files
|
||||
command_str = " ".join(compile_command)
|
||||
print(f"Running: {command_str}")
|
||||
|
||||
try:
|
||||
start_time = time.perf_counter()
|
||||
process = subprocess.run(compile_command, check=False, capture_output=True, text=True)
|
||||
end_time = time.perf_counter()
|
||||
duration = end_time - start_time
|
||||
|
||||
if process.returncode == 0:
|
||||
print_color(f"Compilation successful (took {duration:.3f}s).", COLOR_GREEN)
|
||||
if process.stderr:
|
||||
print_color("Compiler Warnings/Messages:", COLOR_YELLOW)
|
||||
print(process.stderr)
|
||||
return True
|
||||
else:
|
||||
print_color(f"Compilation failed with exit code {process.returncode} (took {duration:.3f}s).", COLOR_RED)
|
||||
print_color("Compiler Error Output:", COLOR_RED)
|
||||
print(process.stderr if process.stderr else "(No compiler error output captured)")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
print_color(f"Error: Compiler '{CXX}' not found.", COLOR_RED)
|
||||
return False
|
||||
except Exception as e:
|
||||
print_color(f"An unexpected error occurred during compilation: {e}", COLOR_RED)
|
||||
return False
|
||||
|
||||
def run_test(test_name, input_file, expected_output_file, argument):
|
||||
"""
|
||||
Runs test, measures time/memory (platform-specific), suppresses output.
|
||||
Returns: tuple (passed: bool, reason: str, duration: float | None, memory_kb: int | None)
|
||||
"""
|
||||
global MEASURE_MEMORY, TIME_COMMAND_MODE # Access potentially updated flags
|
||||
|
||||
print_color(f"--- Running {test_name} ---", COLOR_BLUE)
|
||||
duration = None
|
||||
memory_kb = None
|
||||
captured_stderr_for_mem = None # Store stderr specifically for macos parsing
|
||||
|
||||
# Prerequisite checks
|
||||
if not os.path.exists(input_file): return False, "Input file missing", None, None
|
||||
if not os.path.exists(expected_output_file): return False, "Expected output file missing", None, None
|
||||
if not os.path.exists(EXECUTABLE): return False, "Executable not found", None, None
|
||||
|
||||
# --- Command Construction & subprocess args ---
|
||||
base_command = [EXECUTABLE, input_file, TEMP_OUTPUT_FILE, argument]
|
||||
run_command = []
|
||||
subprocess_kwargs = { # Base arguments for subprocess.run
|
||||
"check": False,
|
||||
"timeout": TEST_TIMEOUT
|
||||
}
|
||||
|
||||
if MEASURE_MEMORY and TIME_COMMAND_MODE: # Check both desire and capability
|
||||
if TIME_COMMAND_MODE == "linux":
|
||||
run_command = [TIME_COMMAND, '-f', LINUX_TIME_FORMAT, '-o', LINUX_TIME_OUTPUT_FILE] + base_command
|
||||
if os.path.exists(LINUX_TIME_OUTPUT_FILE):
|
||||
try: os.remove(LINUX_TIME_OUTPUT_FILE)
|
||||
except OSError: pass
|
||||
# For Linux, memory info goes to file, handle stdout/stderr normally based on suppression
|
||||
subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
|
||||
subprocess_kwargs["stderr"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
|
||||
|
||||
elif TIME_COMMAND_MODE == "macos":
|
||||
run_command = [TIME_COMMAND, '-l'] + base_command
|
||||
# On macOS, need to capture stderr for parsing memory, stdout handles suppression
|
||||
subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
|
||||
subprocess_kwargs["stderr"] = subprocess.PIPE # Capture stderr for parsing
|
||||
subprocess_kwargs["text"] = True # Decode captured stderr
|
||||
|
||||
else: # Not measuring memory or platform unsupported
|
||||
run_command = base_command
|
||||
subprocess_kwargs["stdout"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
|
||||
subprocess_kwargs["stderr"] = subprocess.DEVNULL if SUPPRESS_PROGRAM_OUTPUT else None
|
||||
|
||||
command_str = " ".join(run_command)
|
||||
print(f"Executing: {command_str}")
|
||||
|
||||
# --- Execution and Measurement ---
|
||||
if os.path.exists(TEMP_OUTPUT_FILE):
|
||||
try: os.remove(TEMP_OUTPUT_FILE)
|
||||
except OSError as e: print_color(f"Warning: Could not remove {TEMP_OUTPUT_FILE}: {e}", COLOR_YELLOW)
|
||||
|
||||
try:
|
||||
start_time = time.perf_counter()
|
||||
process = subprocess.run(run_command, **subprocess_kwargs)
|
||||
end_time = time.perf_counter()
|
||||
duration = end_time - start_time
|
||||
print(f"Execution Time: {duration:.3f} seconds")
|
||||
|
||||
# --- Process Memory Output (Platform Specific) ---
|
||||
if MEASURE_MEMORY and TIME_COMMAND_MODE:
|
||||
if TIME_COMMAND_MODE == "linux":
|
||||
if os.path.exists(LINUX_TIME_OUTPUT_FILE):
|
||||
try:
|
||||
with open(LINUX_TIME_OUTPUT_FILE, 'r') as f_time:
|
||||
mem_str = f_time.read().strip()
|
||||
if mem_str:
|
||||
memory_kb = int(mem_str) # Already in KB
|
||||
print(f"Peak Memory Usage: {memory_kb} KB")
|
||||
else: print_color(f"Warning: {LINUX_TIME_OUTPUT_FILE} was empty.", COLOR_YELLOW)
|
||||
except (ValueError, IOError) as e: print_color(f"Warning: Could not parse memory (Linux) from {LINUX_TIME_OUTPUT_FILE}: {e}", COLOR_YELLOW)
|
||||
finally:
|
||||
try: os.remove(LINUX_TIME_OUTPUT_FILE)
|
||||
except OSError: pass
|
||||
else: print_color(f"Warning: {LINUX_TIME_OUTPUT_FILE} was not created.", COLOR_YELLOW)
|
||||
|
||||
elif TIME_COMMAND_MODE == "macos":
|
||||
# Parse memory from captured stderr (process.stderr)
|
||||
if process.stderr:
|
||||
match = MACOS_MEM_REGEX.search(process.stderr)
|
||||
if match:
|
||||
try:
|
||||
mem_bytes = int(match.group(1))
|
||||
memory_kb = mem_bytes // 1024 # Convert Bytes to KB
|
||||
print(f"Peak Memory Usage: {memory_kb} KB ({mem_bytes} Bytes)")
|
||||
except (ValueError, IndexError):
|
||||
print_color(f"Warning: Could not parse memory value (macOS) from captured output.", COLOR_YELLOW)
|
||||
# Optional: print process.stderr here for debugging
|
||||
# print(f"--- time -l stderr ---\n{process.stderr}\n----------------------")
|
||||
else:
|
||||
print_color(f"Warning: 'maximum resident set size' not found in 'time -l' output (macOS).", COLOR_YELLOW)
|
||||
# Optional: print process.stderr here for debugging
|
||||
# print(f"--- time -l stderr ---\n{process.stderr}\n----------------------")
|
||||
else:
|
||||
print_color(f"Warning: No stderr captured from 'time -l' (macOS).", COLOR_YELLOW)
|
||||
|
||||
# --- Check Program Result ---
|
||||
if process.returncode != 0:
|
||||
print_color(f"Test failed: Program exited with non-zero status {process.returncode}.", COLOR_RED)
|
||||
# Note: program's own stderr might be in process.stderr ONLY if not suppressed AND on macOS
|
||||
# It's generally hidden now by design.
|
||||
return False, "Runtime error", duration, memory_kb
|
||||
|
||||
if not os.path.exists(TEMP_OUTPUT_FILE):
|
||||
print_color(f"Test failed: Program finished successfully but did not create '{TEMP_OUTPUT_FILE}'.", COLOR_RED)
|
||||
return False, "Output file not created", duration, memory_kb
|
||||
|
||||
# --- Compare Output File ---
|
||||
if filecmp.cmp(TEMP_OUTPUT_FILE, expected_output_file, shallow=False):
|
||||
print_color(f"Test Result: PASSED", COLOR_GREEN)
|
||||
return True, "Passed", duration, memory_kb
|
||||
else:
|
||||
# ... (diff printing remains the same) ...
|
||||
print_color(f"Test Result: FAILED - Output mismatch.", COLOR_RED)
|
||||
print_color(f" Expected: {expected_output_file}", COLOR_YELLOW)
|
||||
print_color(f" Actual: {TEMP_OUTPUT_FILE}", COLOR_YELLOW)
|
||||
try:
|
||||
diff_proc = subprocess.run(['diff', '-u', expected_output_file, TEMP_OUTPUT_FILE], capture_output=True, text=True)
|
||||
print_color("--- Diff ---", COLOR_YELLOW)
|
||||
print(diff_proc.stdout if diff_proc.stdout else "(No differences found by diff, might be whitespace or encoding issues)")
|
||||
print_color("------------", COLOR_YELLOW)
|
||||
except FileNotFoundError: print_color("Could not run 'diff' command.", COLOR_YELLOW)
|
||||
except Exception as diff_e: print_color(f"Error running diff: {diff_e}", COLOR_YELLOW)
|
||||
|
||||
return False, "Output mismatch", duration, memory_kb
|
||||
|
||||
# --- Exception Handling ---
|
||||
except subprocess.TimeoutExpired:
|
||||
end_time = time.perf_counter()
|
||||
duration = end_time - start_time
|
||||
print_color(f"Test failed: Program timed out after {duration:.3f}s (limit: {TEST_TIMEOUT}s).", COLOR_RED)
|
||||
# Attempt to parse memory ONLY if macOS and stderr might have been partially captured (unlikely but possible)
|
||||
if MEASURE_MEMORY and TIME_COMMAND_MODE == "macos" and process and process.stderr:
|
||||
match = MACOS_MEM_REGEX.search(process.stderr)
|
||||
if match:
|
||||
try: memory_kb = int(match.group(1)) // 1024
|
||||
except: memory_kb = None # Ignore parsing errors on timeout
|
||||
# Clean up Linux temp file if it exists
|
||||
if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE):
|
||||
try: os.remove(LINUX_TIME_OUTPUT_FILE)
|
||||
except OSError: pass
|
||||
return False, "Timeout", duration, memory_kb
|
||||
except Exception as e:
|
||||
print_color(f"An unexpected error occurred during test execution: {e}", COLOR_RED)
|
||||
# Clean up Linux temp file if it exists
|
||||
if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE):
|
||||
try: os.remove(LINUX_TIME_OUTPUT_FILE)
|
||||
except OSError: pass
|
||||
return False, f"Execution exception: {e}", None, None
|
||||
finally:
|
||||
# General cleanup (Linux temp file might still exist if parsing failed)
|
||||
if MEASURE_MEMORY and TIME_COMMAND_MODE == "linux" and os.path.exists(LINUX_TIME_OUTPUT_FILE):
|
||||
try: os.remove(LINUX_TIME_OUTPUT_FILE)
|
||||
except OSError: pass
|
||||
|
||||
# --- Main Execution ---
|
||||
if __name__ == "__main__":
|
||||
# 0. Check if memory measurement is desired AND possible
|
||||
user_wants_memory_measurement = MEASURE_MEMORY
|
||||
if user_wants_memory_measurement:
|
||||
can_actually_measure = check_time_command()
|
||||
MEASURE_MEMORY = can_actually_measure # Update based on check
|
||||
else:
|
||||
MEASURE_MEMORY = False
|
||||
print_color("Memory measurement explicitly disabled by configuration.", COLOR_YELLOW)
|
||||
|
||||
if SUPPRESS_PROGRAM_OUTPUT:
|
||||
print_color("Program stdout/stderr will be suppressed during tests.", COLOR_BLUE)
|
||||
|
||||
# 1. Compile
|
||||
if not compile_program():
|
||||
print_color("\nCompilation failed. Aborting tests.", COLOR_RED)
|
||||
sys.exit(1)
|
||||
|
||||
# 2. Define Test Cases
|
||||
# ... (test_bases and arguments_to_test remain the same) ...
|
||||
test_bases = [
|
||||
("1", "tiny1"), ("2", "tiny2"), ("3", "small1"), ("4", "small2"),
|
||||
("5", "medium1"), ("6", "medium2"), ("7", "large1"), ("8", "large2"),
|
||||
("9", "large3"), ("10", "large4"), ("11", "large5"), ("12", "large6"),
|
||||
("13", "large7"), ("14", "large8"), ("15", "large9"),
|
||||
]
|
||||
arguments_to_test = ["hashtag", "sound"]
|
||||
|
||||
results = {"passed": 0, "failed": 0, "skipped": 0}
|
||||
failed_tests = []
|
||||
test_durations = []
|
||||
test_memory_usages = []
|
||||
|
||||
# 3. Run Tests
|
||||
print_color("\n--- Starting Test Execution ---", COLOR_BLUE)
|
||||
total_start_time = time.perf_counter()
|
||||
|
||||
for id_prefix, base_name in test_bases:
|
||||
for i, argument in enumerate(arguments_to_test, 1):
|
||||
# ... (construct test names/paths) ...
|
||||
test_id = f"{id_prefix}.{i}"
|
||||
test_name = f"Test Case {test_id}: input {base_name}, {argument}"
|
||||
input_filename = os.path.join(INPUT_DIR, f"input_{base_name}.json")
|
||||
expected_output_filename = os.path.join(EXPECTED_OUTPUT_DIR, f"output_{base_name}_{argument}.txt")
|
||||
|
||||
passed, reason, duration, memory_kb = run_test(test_name, input_filename, expected_output_filename, argument)
|
||||
|
||||
# ... (Update results logic remains the same, relies on memory_kb being None if not measured) ...
|
||||
if passed:
|
||||
results["passed"] += 1
|
||||
if duration is not None: test_durations.append(duration)
|
||||
if MEASURE_MEMORY and memory_kb is not None: test_memory_usages.append(memory_kb)
|
||||
elif reason in ["Input file missing", "Expected output file missing", "Executable not found"]:
|
||||
results["skipped"] += 1
|
||||
else:
|
||||
results["failed"] += 1
|
||||
duration_str = f" ({duration:.3f}s)" if duration is not None else ""
|
||||
mem_str = f", {memory_kb} KB" if MEASURE_MEMORY and memory_kb is not None else ""
|
||||
failed_tests.append(f"{test_name} ({reason}{duration_str}{mem_str})")
|
||||
print("-" * 40)
|
||||
|
||||
|
||||
total_end_time = time.perf_counter()
|
||||
total_test_suite_duration = total_end_time - total_start_time
|
||||
|
||||
# 4. Clean up
|
||||
# ... (same cleanup logic) ...
|
||||
print_color("--- Cleaning Up ---", COLOR_BLUE)
|
||||
if os.path.exists(TEMP_OUTPUT_FILE):
|
||||
try:
|
||||
os.remove(TEMP_OUTPUT_FILE)
|
||||
print(f"Removed temporary output file: {TEMP_OUTPUT_FILE}")
|
||||
except OSError as e: print_color(f"Warning: Could not remove {TEMP_OUTPUT_FILE}: {e}", COLOR_YELLOW)
|
||||
if os.path.exists(EXECUTABLE):
|
||||
try:
|
||||
os.remove(EXECUTABLE)
|
||||
print(f"Removed executable: {EXECUTABLE}")
|
||||
except OSError as e: print_color(f"Warning: Could not remove {EXECUTABLE}: {e}", COLOR_YELLOW)
|
||||
|
||||
|
||||
# 5. Print Summary
|
||||
# ... (summary printing logic remains the same) ...
|
||||
# Note: Memory summary section only appears if MEASURE_MEMORY is True at the end.
|
||||
print_color("\n--- Test Summary ---", COLOR_BLUE)
|
||||
print_color(f"Passed: {results['passed']}", COLOR_GREEN)
|
||||
print_color(f"Failed: {results['failed']}", COLOR_RED if results['failed'] > 0 else COLOR_GREEN)
|
||||
print_color(f"Skipped: {results['skipped']}", COLOR_YELLOW if results['skipped'] > 0 else COLOR_GREEN)
|
||||
total_run = results['passed'] + results['failed']
|
||||
total_defined = total_run + results['skipped']
|
||||
print(f"Total Tests Defined: {total_defined}")
|
||||
print(f"Total Tests Run: {total_run}")
|
||||
print(f"Total Test Suite Execution Time: {total_test_suite_duration:.3f}s")
|
||||
|
||||
# Performance Summary
|
||||
if test_durations:
|
||||
# ... (same calculation and printing) ...
|
||||
total_passed_time = sum(test_durations)
|
||||
avg_time = total_passed_time / len(test_durations)
|
||||
max_time = max(test_durations)
|
||||
min_time = min(test_durations)
|
||||
print("\n--- Performance Summary (Passed Tests) ---")
|
||||
print(f"Total execution time (passed tests): {total_passed_time:.3f}s")
|
||||
print(f"Average execution time per test: {avg_time:.3f}s")
|
||||
print(f"Fastest test execution time: {min_time:.3f}s")
|
||||
print(f"Slowest test execution time: {max_time:.3f}s")
|
||||
|
||||
|
||||
# Memory Summary
|
||||
if MEASURE_MEMORY: # Check final flag state
|
||||
if test_memory_usages:
|
||||
# ... (same calculation and printing) ...
|
||||
total_mem_kb = sum(test_memory_usages)
|
||||
avg_mem_kb = total_mem_kb / len(test_memory_usages)
|
||||
max_mem_kb = max(test_memory_usages)
|
||||
min_mem_kb = min(test_memory_usages)
|
||||
total_mem_mb = total_mem_kb / 1024
|
||||
total_mem_gb = total_mem_mb / 1024
|
||||
if total_mem_gb > 1: total_mem_str = f"{total_mem_gb:.2f} GB"
|
||||
elif total_mem_mb > 1: total_mem_str = f"{total_mem_mb:.2f} MB"
|
||||
else: total_mem_str = f"{total_mem_kb} KB"
|
||||
print("\n--- Memory Usage Summary (Passed Tests) ---")
|
||||
print(f"Cumulative peak memory (passed tests): {total_mem_str} ({total_mem_kb} KB)")
|
||||
print(f"Average peak memory per test: {avg_mem_kb:.1f} KB")
|
||||
print(f"Lowest peak memory usage: {min_mem_kb} KB")
|
||||
print(f"Highest peak memory usage: {max_mem_kb} KB")
|
||||
|
||||
else:
|
||||
print("\n--- Memory Usage Summary (Passed Tests) ---")
|
||||
print("(No memory usage data collected for passed tests - check warnings)")
|
||||
|
||||
|
||||
# Final Result
|
||||
if failed_tests:
|
||||
print_color("\n--- Failed Test Cases ---", COLOR_RED)
|
||||
for test in failed_tests:
|
||||
print(f" - {test}")
|
||||
sys.exit(1)
|
||||
# ... (rest of exit logic remains the same) ...
|
||||
elif results['passed'] == 0 and results['skipped'] == total_defined:
|
||||
print_color("\nWarning: No tests were executed (all skipped).", COLOR_YELLOW)
|
||||
sys.exit(0)
|
||||
elif results['passed'] > 0 :
|
||||
print_color("\nAll executed tests passed successfully!", COLOR_GREEN)
|
||||
sys.exit(0)
|
||||
else:
|
||||
print_color("\nNo tests passed.", COLOR_YELLOW)
|
||||
sys.exit(1 if results['failed'] > 0 else 0)
|
||||
Reference in New Issue
Block a user