256 lines
8.2 KiB
C++
256 lines
8.2 KiB
C++
#include <iostream>
|
|
#include <fstream>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <queue>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <utility>
|
|
#include <cstring>
|
|
|
|
#include "Constants.h"
|
|
#include "StringInterner.h"
|
|
#include "StringPtrUtils.h"
|
|
#include "VideoInfo.h"
|
|
#include "TopKVideoHolder.h"
|
|
#include "HashtagInfo.h"
|
|
#include "SoundInfo.h"
|
|
#include "Utils.h"
|
|
|
|
|
|
bool processHashtags(const std::string& filename, std::ofstream& outputFile) {
|
|
std::ifstream inputFile(filename);
|
|
if (!inputFile.is_open()) {
|
|
std::cerr << "Cannot open input file: " << filename << std::endl;
|
|
return false;
|
|
}
|
|
|
|
StringInterner interner;
|
|
std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> hashtagData;
|
|
hashtagData.reserve(250000);
|
|
|
|
std::string line;
|
|
int inputOrderCounter = 0;
|
|
VideoInfo currentVideo;
|
|
std::string text_buffer;
|
|
|
|
while (std::getline(inputFile, line)) {
|
|
if (line.length() < 10) continue;
|
|
inputOrderCounter++;
|
|
|
|
if (parseLineForHashtags(line, inputOrderCounter, interner, currentVideo, text_buffer)) {
|
|
if (!text_buffer.empty()) {
|
|
extractHashtags(text_buffer, hashtagData, interner, currentVideo);
|
|
}
|
|
}
|
|
}
|
|
inputFile.close();
|
|
|
|
std::priority_queue<HashtagInfo*, std::vector<HashtagInfo*>, CompareHashtagPtrForHeap> top20Hashtags;
|
|
typedef std::unordered_map<const std::string*, HashtagInfo, StringPtrHash, StringPtrEqual> HashtagMapType;
|
|
|
|
for (HashtagMapType::iterator it = hashtagData.begin(); it != hashtagData.end(); ++it) {
|
|
HashtagInfo* currentHashtagPtr = &(it->second);
|
|
|
|
if (top20Hashtags.size() < TOP_N_OUTPUT) {
|
|
top20Hashtags.push(currentHashtagPtr);
|
|
} else {
|
|
const HashtagInfo* topPtr = top20Hashtags.top();
|
|
bool is_better = CompareHashtagPtr()(currentHashtagPtr, topPtr);
|
|
|
|
if (is_better) {
|
|
top20Hashtags.pop();
|
|
top20Hashtags.push(currentHashtagPtr);
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<HashtagInfo*> finalTop20;
|
|
finalTop20.reserve(top20Hashtags.size());
|
|
while (!top20Hashtags.empty()) {
|
|
finalTop20.push_back(top20Hashtags.top());
|
|
top20Hashtags.pop();
|
|
}
|
|
|
|
std::sort(finalTop20.begin(), finalTop20.end(), CompareHashtagPtr());
|
|
|
|
outputFile << "trending hashtags:\n\n";
|
|
for (size_t i = 0; i < finalTop20.size(); ++i) {
|
|
HashtagInfo* currentHashtag = finalTop20[i];
|
|
|
|
outputFile << "========================\n";
|
|
|
|
outputFile << "#" << (currentHashtag->name ? *currentHashtag->name : "null") << "\n";
|
|
outputFile << "used " << currentHashtag->usageCount << " times\n";
|
|
outputFile << currentHashtag->totalViews << " views\n\n";
|
|
|
|
extractSortAndPrintTop3Videos(outputFile, currentHashtag->topVideos);
|
|
|
|
outputFile << "========================";
|
|
if (i < finalTop20.size() - 1) {
|
|
outputFile << "\n";
|
|
} else {
|
|
outputFile << "\n";
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool processSounds(const std::string& filename, std::ofstream& outputFile) {
|
|
std::ifstream inputFile(filename);
|
|
if (!inputFile.is_open()) {
|
|
std::cerr << "Cannot open input file: " << filename << std::endl;
|
|
return false;
|
|
}
|
|
|
|
StringInterner interner;
|
|
std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> soundData;
|
|
soundData.reserve(50000);
|
|
|
|
std::string line;
|
|
int inputOrderCounter = 0;
|
|
VideoInfo currentVideo;
|
|
const std::string* musicIdPtr = nullptr;
|
|
const std::string* musicNamePtr = nullptr;
|
|
const std::string* musicAuthorPtr = nullptr;
|
|
|
|
while (std::getline(inputFile, line)) {
|
|
if (line.length() < 10) continue;
|
|
inputOrderCounter++;
|
|
|
|
musicIdPtr = nullptr;
|
|
musicNamePtr = nullptr;
|
|
musicAuthorPtr = nullptr;
|
|
|
|
if (parseLineForSounds(line, inputOrderCounter, interner, currentVideo,
|
|
musicIdPtr, musicNamePtr, musicAuthorPtr))
|
|
{
|
|
if (musicIdPtr == nullptr || musicIdPtr->empty()) {
|
|
continue;
|
|
}
|
|
|
|
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
|
|
SoundMapType::iterator it = soundData.find(musicIdPtr);
|
|
|
|
if (it == soundData.end()) {
|
|
std::pair<SoundMapType::iterator, bool> emplace_result =
|
|
soundData.emplace(musicIdPtr, SoundInfo(musicIdPtr, musicNamePtr, musicAuthorPtr));
|
|
it = emplace_result.first;
|
|
}
|
|
|
|
it->second.totalViews += currentVideo.playCount;
|
|
|
|
if (it->second.musicName->empty() && !musicNamePtr->empty()) {
|
|
it->second.musicName = musicNamePtr;
|
|
}
|
|
if (it->second.musicAuthor->empty() && !musicAuthorPtr->empty()) {
|
|
it->second.musicAuthor = musicAuthorPtr;
|
|
}
|
|
it->second.topVideos.add(currentVideo);
|
|
}
|
|
}
|
|
inputFile.close();
|
|
|
|
std::priority_queue<SoundInfo*, std::vector<SoundInfo*>, CompareSoundPtrForHeap> top20Sounds;
|
|
typedef std::unordered_map<const std::string*, SoundInfo, StringPtrHash, StringPtrEqual> SoundMapType;
|
|
|
|
for (SoundMapType::iterator it = soundData.begin(); it != soundData.end(); ++it) {
|
|
SoundInfo* currentSoundPtr = &(it->second);
|
|
|
|
if (top20Sounds.size() < TOP_N_OUTPUT) {
|
|
top20Sounds.push(currentSoundPtr);
|
|
} else {
|
|
const SoundInfo* topPtr = top20Sounds.top();
|
|
bool is_better = CompareSoundPtr()(currentSoundPtr, topPtr);
|
|
|
|
if (is_better) {
|
|
top20Sounds.pop();
|
|
top20Sounds.push(currentSoundPtr);
|
|
}
|
|
}
|
|
}
|
|
|
|
std::vector<SoundInfo*> finalTop20;
|
|
finalTop20.reserve(top20Sounds.size());
|
|
while (!top20Sounds.empty()) {
|
|
finalTop20.push_back(top20Sounds.top());
|
|
top20Sounds.pop();
|
|
}
|
|
|
|
std::sort(finalTop20.begin(), finalTop20.end(), CompareSoundPtr());
|
|
|
|
outputFile << "trending sounds:\n\n";
|
|
for (size_t i = 0; i < finalTop20.size(); ++i) {
|
|
SoundInfo* currentSound = finalTop20[i];
|
|
|
|
outputFile << "========================\n";
|
|
|
|
if (currentSound->musicName == nullptr || currentSound->musicName->empty()) {
|
|
outputFile << "\n";
|
|
} else {
|
|
outputFile << *currentSound->musicName << "\n";
|
|
}
|
|
|
|
outputFile << currentSound->totalViews << " views\n";
|
|
|
|
if (currentSound->musicAuthor == nullptr || currentSound->musicAuthor->empty()) {
|
|
outputFile << "\n";
|
|
} else {
|
|
outputFile << *currentSound->musicAuthor << "\n";
|
|
}
|
|
|
|
outputFile << "music id: " << (currentSound->musicId && !currentSound->musicId->empty() ? *currentSound->musicId : "null") << "\n";
|
|
|
|
if (!currentSound->topVideos.empty()) {
|
|
outputFile << "\n";
|
|
}
|
|
|
|
extractSortAndPrintTop3Videos(outputFile, currentSound->topVideos);
|
|
|
|
outputFile << "========================";
|
|
if (i < finalTop20.size() - 1) {
|
|
outputFile << "\n";
|
|
} else {
|
|
outputFile << "\n";
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
int main(int argc, char* argv[]) {
|
|
if (argc != 4) {
|
|
std::cerr << "Usage: nytrends.exe <input.json> <output.txt> <mode>\n";
|
|
std::cerr << "Mode can be 'hashtag' or 'sound'\n";
|
|
return 1;
|
|
}
|
|
std::string inputFileName = argv[1];
|
|
std::string outputFileName = argv[2];
|
|
std::string mode = argv[3];
|
|
|
|
std::ofstream outputFile(outputFileName);
|
|
if (!outputFile.is_open()) {
|
|
std::cerr << "Error: Cannot open output file " << outputFileName << std::endl;
|
|
return 1;
|
|
}
|
|
|
|
std::ios_base::sync_with_stdio(false);
|
|
|
|
bool success = false;
|
|
if (mode == "hashtag") {
|
|
success = processHashtags(inputFileName, outputFile);
|
|
} else if (mode == "sound") {
|
|
success = processSounds(inputFileName, outputFile);
|
|
} else {
|
|
std::cerr << "Error: Invalid mode '" << mode << "'. Must be 'hashtag' or 'sound'." << std::endl;
|
|
outputFile.close();
|
|
return 1;
|
|
}
|
|
|
|
outputFile.close();
|
|
return success ? 0 : 1;
|
|
} |