Files
CSCI-1200/hws/tiktok_trends/main.cpp
JamesFlare1212 a109046498 solve hw-9
2025-04-15 22:10:48 -04:00

256 lines
8.2 KiB
C++

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <functional>
#include <utility>
#include <cstring>
#include "Constants.h"
#include "StringInterner.h"
#include "StringPtrUtils.h"
#include "VideoInfo.h"
#include "TopKVideoHolder.h"
#include "HashtagInfo.h"
#include "SoundInfo.h"
#include "Utils.h"
bool processHashtags(const std::string& filename, std::ofstream& outputFile) {
std::ifstream inputFile(filename);
if (!inputFile.is_open()) {
std::cerr << "Cannot open input file: " << filename << std::endl;
return false;
}
StringInterner interner;
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> hashtagData;
hashtagData.reserve(250000);
std::string line;
int inputOrderCounter = 0;
VideoInfo currentVideo;
std::string text_buffer;
while (std::getline(inputFile, line)) {
if (line.length() < 10) continue;
inputOrderCounter++;
if (parseLineForHashtags(line, inputOrderCounter, interner, currentVideo, text_buffer)) {
if (!text_buffer.empty()) {
extractHashtags(text_buffer, hashtagData, interner, currentVideo);
}
}
}
inputFile.close();
std::priority_queue<HashtagInfo*, std::vector<HashtagInfo*>, CompareHashtagPtrForHeap> top20Hashtags;
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
for (HashtagMapType::iterator it = hashtagData.begin(); it != hashtagData.end(); ++it) {
HashtagInfo* currentHashtagPtr = &(it->second);
if (top20Hashtags.size() < TOP_N_OUTPUT) {
top20Hashtags.push(currentHashtagPtr);
} else {
const HashtagInfo* topPtr = top20Hashtags.top();
bool is_better = CompareHashtagPtr()(currentHashtagPtr, topPtr);
if (is_better) {
top20Hashtags.pop();
top20Hashtags.push(currentHashtagPtr);
}
}
}
std::vector<HashtagInfo*> finalTop20;
finalTop20.reserve(top20Hashtags.size());
while (!top20Hashtags.empty()) {
finalTop20.push_back(top20Hashtags.top());
top20Hashtags.pop();
}
std::sort(finalTop20.begin(), finalTop20.end(), CompareHashtagPtr());
outputFile << "trending hashtags:\n\n";
for (size_t i = 0; i < finalTop20.size(); ++i) {
HashtagInfo* currentHashtag = finalTop20[i];
outputFile << "========================\n";
outputFile << "#" << (currentHashtag->name ? *currentHashtag->name : "null") << "\n";
outputFile << "used " << currentHashtag->usageCount << " times\n";
outputFile << currentHashtag->totalViews << " views\n\n";
extractSortAndPrintTop3Videos(outputFile, currentHashtag->topVideos);
outputFile << "========================";
if (i < finalTop20.size() - 1) {
outputFile << "\n";
} else {
outputFile << "\n";
}
}
return true;
}
bool processSounds(const std::string& filename, std::ofstream& outputFile) {
std::ifstream inputFile(filename);
if (!inputFile.is_open()) {
std::cerr << "Cannot open input file: " << filename << std::endl;
return false;
}
StringInterner interner;
std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> soundData;
soundData.reserve(50000);
std::string line;
int inputOrderCounter = 0;
VideoInfo currentVideo;
const std::string* musicIdPtr = nullptr;
const std::string* musicNamePtr = nullptr;
const std::string* musicAuthorPtr = nullptr;
while (std::getline(inputFile, line)) {
if (line.length() < 10) continue;
inputOrderCounter++;
musicIdPtr = nullptr;
musicNamePtr = nullptr;
musicAuthorPtr = nullptr;
if (parseLineForSounds(line, inputOrderCounter, interner, currentVideo,
musicIdPtr, musicNamePtr, musicAuthorPtr))
{
if (musicIdPtr == nullptr || musicIdPtr->empty()) {
continue;
}
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
SoundMapType::iterator it = soundData.find(musicIdPtr);
if (it == soundData.end()) {
std::pair<SoundMapType::iterator, bool> emplace_result =
soundData.emplace(musicIdPtr, SoundInfo(musicIdPtr, musicNamePtr, musicAuthorPtr));
it = emplace_result.first;
}
it->second.totalViews += currentVideo.playCount;
if (it->second.musicName->empty() && !musicNamePtr->empty()) {
it->second.musicName = musicNamePtr;
}
if (it->second.musicAuthor->empty() && !musicAuthorPtr->empty()) {
it->second.musicAuthor = musicAuthorPtr;
}
it->second.topVideos.add(currentVideo);
}
}
inputFile.close();
std::priority_queue<SoundInfo*, std::vector<SoundInfo*>, CompareSoundPtrForHeap> top20Sounds;
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
for (SoundMapType::iterator it = soundData.begin(); it != soundData.end(); ++it) {
SoundInfo* currentSoundPtr = &(it->second);
if (top20Sounds.size() < TOP_N_OUTPUT) {
top20Sounds.push(currentSoundPtr);
} else {
const SoundInfo* topPtr = top20Sounds.top();
bool is_better = CompareSoundPtr()(currentSoundPtr, topPtr);
if (is_better) {
top20Sounds.pop();
top20Sounds.push(currentSoundPtr);
}
}
}
std::vector<SoundInfo*> finalTop20;
finalTop20.reserve(top20Sounds.size());
while (!top20Sounds.empty()) {
finalTop20.push_back(top20Sounds.top());
top20Sounds.pop();
}
std::sort(finalTop20.begin(), finalTop20.end(), CompareSoundPtr());
outputFile << "trending sounds:\n\n";
for (size_t i = 0; i < finalTop20.size(); ++i) {
SoundInfo* currentSound = finalTop20[i];
outputFile << "========================\n";
if (currentSound->musicName == nullptr || currentSound->musicName->empty()) {
outputFile << "\n";
} else {
outputFile << *currentSound->musicName << "\n";
}
outputFile << currentSound->totalViews << " views\n";
if (currentSound->musicAuthor == nullptr || currentSound->musicAuthor->empty()) {
outputFile << "\n";
} else {
outputFile << *currentSound->musicAuthor << "\n";
}
outputFile << "music id: " << (currentSound->musicId && !currentSound->musicId->empty() ? *currentSound->musicId : "null") << "\n";
if (!currentSound->topVideos.empty()) {
outputFile << "\n";
}
extractSortAndPrintTop3Videos(outputFile, currentSound->topVideos);
outputFile << "========================";
if (i < finalTop20.size() - 1) {
outputFile << "\n";
} else {
outputFile << "\n";
}
}
return true;
}
int main(int argc, char* argv[]) {
if (argc != 4) {
std::cerr << "Usage: nytrends.exe <input.json> <output.txt> <mode>\n";
std::cerr << "Mode can be 'hashtag' or 'sound'\n";
return 1;
}
std::string inputFileName = argv[1];
std::string outputFileName = argv[2];
std::string mode = argv[3];
std::ofstream outputFile(outputFileName);
if (!outputFile.is_open()) {
std::cerr << "Error: Cannot open output file " << outputFileName << std::endl;
return 1;
}
std::ios_base::sync_with_stdio(false);
bool success = false;
if (mode == "hashtag") {
success = processHashtags(inputFileName, outputFile);
} else if (mode == "sound") {
success = processSounds(inputFileName, outputFile);
} else {
std::cerr << "Error: Invalid mode '" << mode << "'. Must be 'hashtag' or 'sound'." << std::endl;
outputFile.close();
return 1;
}
outputFile.close();
return success ? 0 : 1;
}