Files
CSCI-1200/hws/tiktok_trends/Utils.cpp
JamesFlare1212 a109046498 solve hw-9
2025-04-15 22:10:48 -04:00

265 lines
8.3 KiB
C++

#include "Utils.h"
#include <iostream> // For potential cerr usage, although not directly in these functions
#include <cctype>
#include <cstring>
#include <algorithm> // For std::min
bool parseQuotedStringValue(const std::string& str, size_t& pos, std::string& value) {
const size_t strLen = str.length();
value.clear();
if (pos >= strLen || str[pos] != '"') return false;
++pos;
const size_t startPos = pos;
const char* strData = str.data();
while (pos < strLen && strData[pos] != '"') {
++pos;
}
if (pos >= strLen) return false;
value.assign(strData + startPos, pos - startPos);
++pos;
return true;
}
bool parseUnquotedValue(const std::string& str, size_t& pos, std::string& value) {
const size_t strLen = str.length();
value.clear();
const size_t startPos = pos;
const char* strData = str.data();
while (pos < strLen && strData[pos] != ',' && strData[pos] != '}' && strData[pos] != ']' && !std::isspace(static_cast<unsigned char>(strData[pos]))) {
++pos;
}
if (startPos == pos) return false;
value.assign(strData + startPos, pos - startPos);
return true;
}
bool extractValue(const std::string& line, const std::string& key, std::string& value) {
const std::string searchKey = "\"" + key + "\":";
const char* found_pos = strstr(line.c_str(), searchKey.c_str());
if (!found_pos) return false;
size_t pos = (found_pos - line.c_str()) + searchKey.length();
const size_t lineLen = line.length();
while (pos < lineLen && std::isspace(static_cast<unsigned char>(line[pos]))) {
++pos;
}
if (pos >= lineLen) return false;
if (line[pos] == '"') {
return parseQuotedStringValue(line, pos, value);
} else {
return parseUnquotedValue(line, pos, value);
}
}
bool extractSubObject(const std::string& line, const std::string& key, std::string& subObj) {
const std::string searchKey = "\"" + key + "\":";
const char* found_pos = strstr(line.c_str(), searchKey.c_str());
if (!found_pos) return false;
size_t pos = (found_pos - line.c_str()) + searchKey.length();
const size_t lineLen = line.length();
while (pos < lineLen && std::isspace(static_cast<unsigned char>(line[pos]))) ++pos;
if (pos >= lineLen || line[pos] != '{') return false;
const size_t startBracePos = pos;
int braceCount = 1;
++pos;
const char* lineData = line.data();
bool inString = false;
char prevChar = 0;
while (pos < lineLen && braceCount > 0) {
const char c = lineData[pos];
if (c == '"' && prevChar != '\\') {
inString = !inString;
} else if (!inString) {
if (c == '{') {
++braceCount;
} else if (c == '}') {
--braceCount;
}
}
prevChar = (prevChar == '\\' && c == '\\') ? 0 : c;
++pos;
}
if (braceCount == 0) {
subObj.assign(lineData + startBracePos, pos - startBracePos);
return true;
}
return false;
}
bool parseLongLong(const std::string& s, long& result) {
result = 0;
if (s.empty()) return false;
const char* ptr = s.c_str();
bool negative = false;
long current_val = 0;
if (*ptr == '-') {
negative = true;
++ptr;
}
if (!*ptr) return false;
while (*ptr) {
if (*ptr >= '0' && *ptr <= '9') {
long digit = (*ptr - '0');
current_val = current_val * 10 + digit;
} else {
return false;
}
++ptr;
}
result = negative ? -current_val : current_val;
return true;
}
bool parseLineForHashtags(const std::string& line, int inputOrder, StringInterner& interner,
VideoInfo& outVideo, std::string& outText)
{
outText.clear();
std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str;
if (!extractValue(line, "id", id_str) || id_str.empty()) return false;
long playCount = 0;
if (extractValue(line, "playCount", playCount_str)) {
parseLongLong(playCount_str, playCount);
}
extractValue(line, "text", outText);
extractValue(line, "webVideoUrl", webVideoUrl_str);
std::string videoMetaSub;
if (extractSubObject(line, "videoMeta", videoMetaSub)) {
extractValue(videoMetaSub, "coverUrl", coverUrl_str);
}
outVideo = VideoInfo(
interner.intern(std::move(id_str)),
interner.intern(std::move(coverUrl_str)),
interner.intern(std::move(webVideoUrl_str)),
playCount,
inputOrder
);
return true;
}
bool parseLineForSounds(const std::string& line, int inputOrder, StringInterner& interner,
VideoInfo& outVideo,
const std::string*& outMusicIdPtr,
const std::string*& outMusicNamePtr,
const std::string*& outMusicAuthorPtr)
{
std::string id_str, coverUrl_str, webVideoUrl_str, playCount_str;
std::string musicId_str, musicName_str, musicAuthor_str;
if (!extractValue(line, "id", id_str) || id_str.empty()) return false;
long playCount = 0;
if (extractValue(line, "playCount", playCount_str)) {
parseLongLong(playCount_str, playCount);
}
std::string musicMetaSub;
if (extractSubObject(line, "musicMeta", musicMetaSub)) {
extractValue(musicMetaSub, "musicId", musicId_str);
extractValue(musicMetaSub, "musicName", musicName_str);
extractValue(musicMetaSub, "musicAuthor", musicAuthor_str);
}
if (musicId_str.empty()) {
return false;
}
extractValue(line, "webVideoUrl", webVideoUrl_str);
std::string videoMetaSub;
if (extractSubObject(line, "videoMeta", videoMetaSub)) {
extractValue(videoMetaSub, "coverUrl", coverUrl_str);
}
outVideo = VideoInfo(
interner.intern(std::move(id_str)),
interner.intern(std::move(coverUrl_str)),
interner.intern(std::move(webVideoUrl_str)),
playCount,
inputOrder
);
outMusicIdPtr = interner.intern(std::move(musicId_str));
outMusicNamePtr = interner.intern(std::move(musicName_str));
outMusicAuthorPtr = interner.intern(std::move(musicAuthor_str));
return true;
}
void extractHashtags(const std::string& text,
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual>& hashtagData,
StringInterner& interner,
const VideoInfo& video)
{
const size_t textLen = text.length();
const char* textData = text.data();
size_t pos = 0;
std::string tag_buffer;
tag_buffer.reserve(50);
while (pos < textLen) {
while (pos < textLen && textData[pos] != '#') {
pos++;
}
if (pos >= textLen) break;
size_t start = pos + 1;
if (start >= textLen) break;
size_t end = start;
while (end < textLen && (std::isalnum(static_cast<unsigned char>(textData[end])) || textData[end] == '_')) {
end++;
}
if (end > start) {
tag_buffer.assign(textData + start, end - start);
const std::string* hashtagPtr = interner.intern(tag_buffer);
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
HashtagMapType::iterator it = hashtagData.find(hashtagPtr);
if (it == hashtagData.end()) {
std::pair<HashtagMapType::iterator, bool> emplace_result =
hashtagData.emplace(hashtagPtr, HashtagInfo(hashtagPtr));
it = emplace_result.first;
}
it->second.usageCount++;
it->second.totalViews += video.playCount;
it->second.topVideos.add(video);
}
pos = end;
}
}
void extractSortAndPrintTop3Videos(std::ofstream& fout, TopKVideoHolder& topVideos) {
std::vector<VideoInfo> sortedTopVideos = topVideos.getSortedVideos();
int videosToPrint = std::min(static_cast<int>(sortedTopVideos.size()), TOP_K_CANDIDATES);
for (int i = 0; i < videosToPrint; ++i) {
const VideoInfo& video = sortedTopVideos[i];
fout << "cover url: " << (video.coverUrl && !video.coverUrl->empty() ? *video.coverUrl : "null") << "\n";
fout << "web video url: " << (video.webVideoUrl && !video.webVideoUrl->empty() ? *video.webVideoUrl : "null") << "\n";
}
}