Compare commits
15 Commits
63708b8858
...
54016a2693
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
54016a2693 | ||
|
|
45ee58bda2 | ||
|
|
62611ff8c8 | ||
|
|
2b7a3f5b7f | ||
|
|
2d827df9ab | ||
|
|
d375c06124 | ||
|
|
80cb798eba | ||
|
|
139fe1ee9c | ||
|
|
ef18fb8634 | ||
|
|
ec9a32372c | ||
|
|
be9341c55a | ||
|
|
a4b59701f3 | ||
|
|
4e5ccd5fcb | ||
|
|
762eac0b8e | ||
|
|
edfbfe53e0 |
16
.vscode/launch.json
vendored
@@ -88,6 +88,22 @@
|
|||||||
"MIMode": "gdb",
|
"MIMode": "gdb",
|
||||||
"miDebuggerPath": "/usr/bin/gdb",
|
"miDebuggerPath": "/usr/bin/gdb",
|
||||||
"preLaunchTask": "C/C++: g++ build active file"
|
"preLaunchTask": "C/C++: g++ build active file"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "inverse_word_search",
|
||||||
|
"type": "cppdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${fileDirname}/${fileBasenameNoExtension}",
|
||||||
|
"args": [
|
||||||
|
"puzzle3.txt",
|
||||||
|
"output.txt",
|
||||||
|
"all_solutions"
|
||||||
|
],
|
||||||
|
"cwd": "${fileDirname}",
|
||||||
|
"environment": [],
|
||||||
|
"MIMode": "gdb",
|
||||||
|
"miDebuggerPath": "/usr/bin/gdb",
|
||||||
|
"preLaunchTask": "C/C++: g++ build active file"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
4
.vscode/settings.json
vendored
@@ -71,6 +71,8 @@
|
|||||||
"__hash_table": "cpp",
|
"__hash_table": "cpp",
|
||||||
"__string": "cpp",
|
"__string": "cpp",
|
||||||
"queue": "cpp",
|
"queue": "cpp",
|
||||||
"stack": "cpp"
|
"stack": "cpp",
|
||||||
|
"set": "cpp",
|
||||||
|
"climits": "cpp"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
HOMEWORK 6: INVERSE WORD SEARCH
|
HOMEWORK 6: INVERSE WORD SEARCH
|
||||||
|
|
||||||
|
|
||||||
NAME: < insert name >
|
NAME: Jinshan Zhou
|
||||||
|
|
||||||
|
|
||||||
COLLABORATORS AND OTHER RESOURCES:
|
COLLABORATORS AND OTHER RESOURCES:
|
||||||
@@ -10,18 +10,18 @@ List the names of everyone you talked to about this assignment
|
|||||||
LMS, etc.), and all of the resources (books, online reference
|
LMS, etc.), and all of the resources (books, online reference
|
||||||
material, etc.) you consulted in completing this assignment.
|
material, etc.) you consulted in completing this assignment.
|
||||||
|
|
||||||
< insert collaborators / resources >
|
Lab document -- recursion on path finding
|
||||||
|
|
||||||
Remember: Your implementation for this assignment must be done on your
|
Remember: Your implementation for this assignment must be done on your
|
||||||
own, as described in "Academic Integrity for Homework" handout.
|
own, as described in "Academic Integrity for Homework" handout.
|
||||||
|
|
||||||
ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: < insert # hours >
|
ESTIMATE OF # OF HOURS SPENT ON THIS ASSIGNMENT: 30 hr
|
||||||
|
|
||||||
|
|
||||||
MISC. COMMENTS TO GRADER:
|
MISC. COMMENTS TO GRADER:
|
||||||
Optional, please be concise!
|
Optional, please be concise!
|
||||||
|
|
||||||
|
I tried.
|
||||||
|
|
||||||
## Reflection and Self Assessment
|
## Reflection and Self Assessment
|
||||||
|
|
||||||
@@ -33,4 +33,6 @@ What parts of the assignment did you find challenging? Is there anything that
|
|||||||
finally "clicked" for you in the process of working on this assignment? How well
|
finally "clicked" for you in the process of working on this assignment? How well
|
||||||
did the development and testing process go for you?
|
did the development and testing process go for you?
|
||||||
|
|
||||||
< insert reflection >
|
This homework is very hard for me. It almost take forever to solve some puzzles.
|
||||||
|
I tried a lot of techniques to minimize the cost. But, it seems not enough. But
|
||||||
|
anyway, it's a good practice.
|
||||||
|
|||||||
BIN
hws/inverse_word_search/main
Executable file
337
hws/inverse_word_search/main.cpp
Normal file
@@ -0,0 +1,337 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cctype>
|
||||||
|
#include <climits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct Placement {
|
||||||
|
int r, c; // starting row and column
|
||||||
|
int dr, dc; // direction increments
|
||||||
|
string word; // the required word to place
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if the given string s contains any forbidden word or its reverse.
|
||||||
|
bool containsForbidden(const string &s, const vector<string> &forbWords) {
|
||||||
|
for (size_t i = 0; i < forbWords.size(); i++) {
|
||||||
|
if (s.find(forbWords[i]) != string::npos)
|
||||||
|
return true;
|
||||||
|
string rev = forbWords[i];
|
||||||
|
reverse(rev.begin(), rev.end());
|
||||||
|
if (s.find(rev) != string::npos)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Checks the full board for forbidden words in every contiguous line.
|
||||||
|
bool fullForbiddenCheck(const vector< vector<char> > &board, const vector<string> &forbWords) {
|
||||||
|
int H = board.size();
|
||||||
|
if (H == 0) return false;
|
||||||
|
int W = board[0].size();
|
||||||
|
int dirs[8][2] = { {0,1}, {0,-1}, {1,0}, {-1,0}, {1,1}, {1,-1}, {-1,1}, {-1,-1} };
|
||||||
|
for (int r = 0; r < H; r++) {
|
||||||
|
for (int c = 0; c < W; c++) {
|
||||||
|
for (int d = 0; d < 8; d++) {
|
||||||
|
int dr = dirs[d][0], dc = dirs[d][1];
|
||||||
|
string line = "";
|
||||||
|
int rr = r, cc = c;
|
||||||
|
while (rr >= 0 && rr < H && cc >= 0 && cc < W) {
|
||||||
|
line.push_back(board[rr][cc]);
|
||||||
|
if (containsForbidden(line, forbWords))
|
||||||
|
return true;
|
||||||
|
rr += dr;
|
||||||
|
cc += dc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the board into a single string (rows separated by newline).
|
||||||
|
string boardToString(const vector< vector<char> > &board) {
|
||||||
|
string s = "";
|
||||||
|
for (size_t i = 0; i < board.size(); i++) {
|
||||||
|
for (size_t j = 0; j < board[i].size(); j++) {
|
||||||
|
s.push_back(board[i][j]);
|
||||||
|
}
|
||||||
|
if (i < board.size()-1)
|
||||||
|
s.push_back('\n');
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given a newly assigned cell (r,c), check in all eight directions the contiguous block
|
||||||
|
bool checkCellDirections(int r, int c, const vector< vector<char> > &board, const vector<string> &forbWords, int minForbidLen) {
|
||||||
|
int H = board.size(), W = board[0].size();
|
||||||
|
int dirs[8][2] = { {0,1}, {0,-1}, {1,0}, {-1,0}, {1,1}, {1,-1}, {-1,1}, {-1,-1} };
|
||||||
|
for (int d = 0; d < 8; d++) {
|
||||||
|
int dr = dirs[d][0], dc = dirs[d][1];
|
||||||
|
int sr = r, sc = c;
|
||||||
|
// Go backwards from (r,c)
|
||||||
|
while (true) {
|
||||||
|
int pr = sr - dr, pc = sc - dc;
|
||||||
|
if (pr < 0 || pr >= H || pc < 0 || pc >= W) break;
|
||||||
|
if (board[pr][pc] == '?') break;
|
||||||
|
sr = pr; sc = pc;
|
||||||
|
}
|
||||||
|
// Build the contiguous block from (sr,sc)
|
||||||
|
string block = "";
|
||||||
|
int cr = sr, cc = sc;
|
||||||
|
while (cr >= 0 && cr < H && cc >= 0 && cc < W) {
|
||||||
|
if (board[cr][cc] == '?') break;
|
||||||
|
block.push_back(board[cr][cc]);
|
||||||
|
cr += dr; cc += dc;
|
||||||
|
}
|
||||||
|
if ((int)block.size() >= minForbidLen) {
|
||||||
|
if (containsForbidden(block, forbWords))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursive function to fill free positions with letters
|
||||||
|
bool fillFreePositions(vector<vector<char>> &board,
|
||||||
|
vector<pair<int,int>> &freePositions,
|
||||||
|
int index,
|
||||||
|
const vector<string> &forbWords,
|
||||||
|
int minForbidLen,
|
||||||
|
set<string> &solutionSet,
|
||||||
|
vector<string> &solutions,
|
||||||
|
bool findOne,
|
||||||
|
const string &outputFile) {
|
||||||
|
// Base case: all free positions filled
|
||||||
|
if (index == freePositions.size()) {
|
||||||
|
// Check if the solution is valid
|
||||||
|
if (!fullForbiddenCheck(board, forbWords)) {
|
||||||
|
string solStr = boardToString(board);
|
||||||
|
if (solutionSet.find(solStr) == solutionSet.end()) {
|
||||||
|
solutionSet.insert(solStr);
|
||||||
|
solutions.push_back(solStr);
|
||||||
|
|
||||||
|
// If we need just one solution, write it to file and exit
|
||||||
|
if (findOne) {
|
||||||
|
ofstream fout(outputFile.c_str());
|
||||||
|
fout << "Board:" << endl;
|
||||||
|
istringstream iss(solStr);
|
||||||
|
string line;
|
||||||
|
while(getline(iss, line))
|
||||||
|
fout << " " << line << endl;
|
||||||
|
fout.close();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false; // Continue searching for more solutions
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try each letter for the current free position
|
||||||
|
int r = freePositions[index].first;
|
||||||
|
int c = freePositions[index].second;
|
||||||
|
|
||||||
|
for (char ch = 'a'; ch <= 'z'; ch++) {
|
||||||
|
board[r][c] = ch;
|
||||||
|
|
||||||
|
// Check if the new letter creates any forbidden words
|
||||||
|
if (checkCellDirections(r, c, board, forbWords, minForbidLen)) {
|
||||||
|
// Recursively fill the next position
|
||||||
|
if (fillFreePositions(board, freePositions, index + 1, forbWords, minForbidLen,
|
||||||
|
solutionSet, solutions, findOne, outputFile)) {
|
||||||
|
return true; // Solution found and we only need one
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backtrack: Reset the cell
|
||||||
|
board[r][c] = '?';
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursive function to place required words
|
||||||
|
bool placeRequiredWords(vector<vector<char>> &board,
|
||||||
|
const vector<vector<Placement>> &placements,
|
||||||
|
int wordIndex,
|
||||||
|
const vector<string> &forbWords,
|
||||||
|
int minForbidLen,
|
||||||
|
set<string> &solutionSet,
|
||||||
|
vector<string> &solutions,
|
||||||
|
bool findOne,
|
||||||
|
const string &outputFile) {
|
||||||
|
// Base case: all required words placed
|
||||||
|
if (wordIndex == placements.size()) {
|
||||||
|
// Find free positions (cells marked with '?')
|
||||||
|
vector<pair<int,int>> freePositions;
|
||||||
|
for (int i = 0; i < board.size(); i++) {
|
||||||
|
for (int j = 0; j < board[0].size(); j++) {
|
||||||
|
if (board[i][j] == '?') {
|
||||||
|
freePositions.push_back(make_pair(i, j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively fill free positions
|
||||||
|
return fillFreePositions(board, freePositions, 0, forbWords, minForbidLen,
|
||||||
|
solutionSet, solutions, findOne, outputFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try each placement for the current word
|
||||||
|
for (const Placement &p : placements[wordIndex]) {
|
||||||
|
// Create a temporary copy of the board
|
||||||
|
vector<vector<char>> boardCopy = board;
|
||||||
|
bool conflict = false;
|
||||||
|
|
||||||
|
// Try to place the word
|
||||||
|
for (size_t k = 0; k < p.word.size(); k++) {
|
||||||
|
int r = p.r + p.dr * k;
|
||||||
|
int c = p.c + p.dc * k;
|
||||||
|
|
||||||
|
if (boardCopy[r][c] == '?' || boardCopy[r][c] == p.word[k]) {
|
||||||
|
boardCopy[r][c] = p.word[k];
|
||||||
|
} else {
|
||||||
|
conflict = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no conflict and no forbidden words formed, continue to the next word
|
||||||
|
if (!conflict && !fullForbiddenCheck(boardCopy, forbWords)) {
|
||||||
|
if (placeRequiredWords(boardCopy, placements, wordIndex + 1, forbWords, minForbidLen,
|
||||||
|
solutionSet, solutions, findOne, outputFile)) {
|
||||||
|
return true; // Solution found and we only need one
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false; // No solution found with any placement for this word
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
if (argc != 4) {
|
||||||
|
cout << "Usage: " << argv[0] << " input.txt output.txt one_solution|all_solutions" << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
string inputFile = argv[1];
|
||||||
|
string outputFile = argv[2];
|
||||||
|
string mode = argv[3];
|
||||||
|
bool findOne = false;
|
||||||
|
if (mode == "one_solution") {
|
||||||
|
findOne = true;
|
||||||
|
} else if (mode == "all_solutions") {
|
||||||
|
findOne = false;
|
||||||
|
} else {
|
||||||
|
cout << "Invalid mode. Use one_solution or all_solutions." << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ifstream fin(inputFile.c_str());
|
||||||
|
if (!fin) {
|
||||||
|
cout << "Cannot open input file." << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int width, height;
|
||||||
|
fin >> width >> height;
|
||||||
|
string dummy;
|
||||||
|
getline(fin, dummy); // consume rest of first line
|
||||||
|
|
||||||
|
// Separate required and forbidden words.
|
||||||
|
vector<string> reqWords;
|
||||||
|
vector<string> forbWords;
|
||||||
|
|
||||||
|
while(getline(fin, dummy)) {
|
||||||
|
if(dummy.size() == 0)
|
||||||
|
continue;
|
||||||
|
char sign = dummy[0];
|
||||||
|
string word = "";
|
||||||
|
int pos = 1;
|
||||||
|
while (pos < dummy.size() && isspace(dummy[pos])) pos++;
|
||||||
|
while (pos < dummy.size() && !isspace(dummy[pos])) {
|
||||||
|
word.push_back(dummy[pos]);
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
if (sign == '+')
|
||||||
|
reqWords.push_back(word);
|
||||||
|
else if (sign == '-')
|
||||||
|
forbWords.push_back(word);
|
||||||
|
}
|
||||||
|
fin.close();
|
||||||
|
|
||||||
|
// Precompute minimum forbidden word length.
|
||||||
|
int minForbidLen = INT_MAX;
|
||||||
|
for (size_t i = 0; i < forbWords.size(); i++) {
|
||||||
|
if ((int)forbWords[i].size() < minForbidLen)
|
||||||
|
minForbidLen = forbWords[i].size();
|
||||||
|
}
|
||||||
|
if (forbWords.empty())
|
||||||
|
minForbidLen = 27; // no forbidden word; no check needed
|
||||||
|
|
||||||
|
// Precompute all placements for each required word.
|
||||||
|
vector<vector<Placement>> placements;
|
||||||
|
placements.resize(reqWords.size());
|
||||||
|
int directions[8][2] = { {0,1}, {0,-1}, {1,0}, {-1,0}, {1,1}, {1,-1}, {-1,1}, {-1,-1} };
|
||||||
|
|
||||||
|
for (size_t w = 0; w < reqWords.size(); w++) {
|
||||||
|
string word = reqWords[w];
|
||||||
|
for (int r = 0; r < height; r++) {
|
||||||
|
for (int c = 0; c < width; c++) {
|
||||||
|
for (int d = 0; d < 8; d++) {
|
||||||
|
int dr = directions[d][0], dc = directions[d][1];
|
||||||
|
int end_r = r + dr * (word.size() - 1);
|
||||||
|
int end_c = c + dc * (word.size() - 1);
|
||||||
|
if (end_r < 0 || end_r >= height || end_c < 0 || end_c >= width)
|
||||||
|
continue;
|
||||||
|
Placement p;
|
||||||
|
p.r = r; p.c = c; p.dr = dr; p.dc = dc; p.word = word;
|
||||||
|
placements[w].push_back(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (placements[w].empty()) {
|
||||||
|
ofstream fout(outputFile.c_str());
|
||||||
|
fout << "No solutions found" << endl;
|
||||||
|
fout.close();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use a set to avoid duplicate solutions.
|
||||||
|
set<string> solutionSet;
|
||||||
|
vector<string> solutions;
|
||||||
|
|
||||||
|
// Prepare the base board filled with '?'.
|
||||||
|
vector<vector<char>> board(height, vector<char>(width, '?'));
|
||||||
|
|
||||||
|
// Call the recursive function to place required words
|
||||||
|
placeRequiredWords(board, placements, 0, forbWords, minForbidLen,
|
||||||
|
solutionSet, solutions, findOne, outputFile);
|
||||||
|
|
||||||
|
// Write out all found solutions (if findOne is true, we've already written the solution).
|
||||||
|
if (!findOne || solutions.empty()) {
|
||||||
|
ofstream fout(outputFile.c_str());
|
||||||
|
if (solutions.empty()) {
|
||||||
|
fout << "No solutions found" << endl;
|
||||||
|
} else {
|
||||||
|
fout << solutions.size() << " solution(s)" << endl;
|
||||||
|
for (size_t s = 0; s < solutions.size(); s++) {
|
||||||
|
fout << "Board:" << endl;
|
||||||
|
istringstream iss(solutions[s]);
|
||||||
|
string line;
|
||||||
|
while(getline(iss, line))
|
||||||
|
fout << " " << line << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
41
hws/inverse_word_search/output.txt
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
8 solution(s)
|
||||||
|
Board:
|
||||||
|
aare
|
||||||
|
rrea
|
||||||
|
itss
|
||||||
|
dstt
|
||||||
|
Board:
|
||||||
|
eraa
|
||||||
|
aerr
|
||||||
|
ssti
|
||||||
|
ttsd
|
||||||
|
Board:
|
||||||
|
arid
|
||||||
|
arts
|
||||||
|
rest
|
||||||
|
east
|
||||||
|
Board:
|
||||||
|
dira
|
||||||
|
stra
|
||||||
|
tser
|
||||||
|
tsae
|
||||||
|
Board:
|
||||||
|
east
|
||||||
|
rest
|
||||||
|
arts
|
||||||
|
arid
|
||||||
|
Board:
|
||||||
|
tsae
|
||||||
|
tser
|
||||||
|
stra
|
||||||
|
dira
|
||||||
|
Board:
|
||||||
|
dstt
|
||||||
|
itss
|
||||||
|
rrea
|
||||||
|
aare
|
||||||
|
Board:
|
||||||
|
ttsd
|
||||||
|
ssti
|
||||||
|
aerr
|
||||||
|
eraa
|
||||||
@@ -1,25 +1,3 @@
|
|||||||
<!-- Clarification
|
|
||||||
|
|
||||||
We made a clarification on the discussion forum. In case you didn't pay attenton there, we are adding the clarification here.
|
|
||||||
|
|
||||||
2. However, these are two situations where the above rule does not apply:
|
|
||||||
|
|
||||||
2.1. when constructing the snippet, this above rule does not apply. When constructing the snippet, you just find the first occurrence of that word (or that query), and that really is saying that you can just call the **std::string::find**() function to find the first occurrence of that word (or that query) within the body section of the HTML file. And therefore your snippet may be like this:
|
|
||||||
|
|
||||||
"I am Lady Gaga."
|
|
||||||
|
|
||||||
when the search is a phrase search of "Lady Gaga". So this means that "." after Gaga is okay, we do not care.
|
|
||||||
|
|
||||||
This is also why for test case 4.2, the following is showed in the snippet:
|
|
||||||
|
|
||||||
"Since 1982, The Statue of Liberty-Ellis Island Foundation has partnered with the"
|
|
||||||
|
|
||||||
when the search query is a phrase search of "Statue of Liberty". And this means that "-" after Liberty is okay, we do not care.
|
|
||||||
|
|
||||||
2.2. when counting the number of occurrences of each keyword (in the keyword density score calculation process), the above rule does not apply. When counting the occurrences of each keyword, you can just call the **std::string::find**() function to find the occurrence of that keyword. And therefore, when the keyword is *Gaga*, and the **std::string::find**() function finds *Gaga* in the sentence of "I am Lady Gaga.", that is okay, we will count this one as a valid occurrence even though there is period "." after *Gaga*.
|
|
||||||
|
|
||||||
So you may see that 1 and 2 are not consistent; but the only reason we allow this inconsistence to exist in this assignment is to simplify your task. A fully functioning search engine will need to handle a lot of complicated cases, and that's way beyond the scope of this course.-->
|
|
||||||
|
|
||||||
# Homework 7 — Design and Implementation of a Simple Google
|
# Homework 7 — Design and Implementation of a Simple Google
|
||||||
|
|
||||||
In this assignment you will develop a simple search engine called New York Search. Your program will mimic some of the features provided by Google. Please read the entire handout before starting to code the assignment.
|
In this assignment you will develop a simple search engine called New York Search. Your program will mimic some of the features provided by Google. Please read the entire handout before starting to code the assignment.
|
||||||
@@ -43,12 +21,6 @@ When developing a search engine, the first question we want to ask is, where to
|
|||||||
|
|
||||||
**Note**: in this README, the term web page, page, document, and HTML file, all have the same meaning.
|
**Note**: in this README, the term web page, page, document, and HTML file, all have the same meaning.
|
||||||
|
|
||||||
<!--Term Frequency
|
|
||||||
|
|
||||||
Metadata and Links:
|
|
||||||
|
|
||||||
The index may also store metadata associated with each web page, such as the page's URL, title, and description. Additionally, the index can include information about links from one page to another, which is used for link analysis and page ranking.-->
|
|
||||||
|
|
||||||
When a user enters a search query, the search engine consults its inverted index map to identify the documents that match the query term. These matching documents will then be ranked based on various factors, and the ranked documents will then be presented to the user. And this ranking process is the so-called Page Ranking.
|
When a user enters a search query, the search engine consults its inverted index map to identify the documents that match the query term. These matching documents will then be ranked based on various factors, and the ranked documents will then be presented to the user. And this ranking process is the so-called Page Ranking.
|
||||||
|
|
||||||
## Implementation
|
## Implementation
|
||||||
@@ -391,11 +363,11 @@ Make sure you still include the fstream library.
|
|||||||
In this assignment, you are required to use either std::map or std::set. You can use both if you want to. You are NOT allowed to use any data structures we have not learned so far, but feel free to use any data structures we have already learned, such as std::string, std::vector, std::list. In addition, **the web crawler component of your program must be recursive**.
|
In this assignment, you are required to use either std::map or std::set. You can use both if you want to. You are NOT allowed to use any data structures we have not learned so far, but feel free to use any data structures we have already learned, such as std::string, std::vector, std::list. In addition, **the web crawler component of your program must be recursive**.
|
||||||
|
|
||||||
Use good coding style when you design and implement your program. Organize your program into functions:
|
Use good coding style when you design and implement your program. Organize your program into functions:
|
||||||
don’t put all the code in main! Be sure to read the [Homework Policies](https://www.cs.rpi.edu/academics/courses/spring24/csci1200/homework_policies.php) as you put the finishing touches on your solution. Be sure to make up new test cases to fully debug your program and don’t forget
|
don’t put all the code in main! Be sure to read the [Homework Policies](https://www.cs.rpi.edu/academics/courses/spring25/csci1200/homework_policies.php) as you put the finishing touches on your solution. Be sure to make up new test cases to fully debug your program and don’t forget
|
||||||
to comment your code! Use the provided template [README.txt](./README.txt) file for notes you want the grader to read.
|
to comment your code! Use the provided template [README.txt](./README.txt) file for notes you want the grader to read.
|
||||||
You must do this assignment on your own, as described in the [Collaboration Policy & Academic Integrity](https://www.cs.rpi.edu/academics/courses/spring24/csci1200/academic_integrity.php) page. If you did discuss the problem or error messages, etc. with anyone, please list their names in your README.txt file.
|
You must do this assignment on your own, as described in the [Collaboration Policy & Academic Integrity](https://www.cs.rpi.edu/academics/courses/spring25/csci1200/academic_integrity.php) page. If you did discuss the problem or error messages, etc. with anyone, please list their names in your README.txt file.
|
||||||
|
|
||||||
**Due Date**: 03/21/2024, Thursday, 10pm.
|
**Due Date**: 03/20/2025, Thursday, 10pm.
|
||||||
|
|
||||||
## Instructor's Code
|
## Instructor's Code
|
||||||
|
|
||||||
@@ -423,18 +395,15 @@ A2: All 33 documents.
|
|||||||
- Putting almost everything in the main function. It's better to create separate functions for different tasks. (-2)
|
- Putting almost everything in the main function. It's better to create separate functions for different tasks. (-2)
|
||||||
- Function bodies containing more than one statement are placed in the .h file. (okay for templated classes) (-2)
|
- Function bodies containing more than one statement are placed in the .h file. (okay for templated classes) (-2)
|
||||||
- Functions are not well documented or are poorly commented, in either the .h or the .cpp file. (-1)
|
- Functions are not well documented or are poorly commented, in either the .h or the .cpp file. (-1)
|
||||||
- Improper uses or omissions of const and reference. (-1)
|
|
||||||
- At least one function is excessively long (i.e., more than 200 lines). (-1)
|
- At least one function is excessively long (i.e., more than 200 lines). (-1)
|
||||||
- Overly cramped, excessive whitespace, or poor indentation. (-1)
|
- Overly cramped, excessive whitespace, or poor indentation. (-1)
|
||||||
- Poor file organization: Puts more than one class in a file (okay for very small helper classes) (-1)
|
- Poor file organization: Puts more than one class in a file (okay for very small helper classes) (-1)
|
||||||
- Poor choice of variable names: non-descriptive names (e.g. 'vec', 'str', 'var'), single-letter variable names (except single loop counter), etc. (-2)
|
- Poor choice of variable names: non-descriptive names (e.g. 'vec', 'str', 'var'), single-letter variable names (except single loop counter), etc. (-2)
|
||||||
- Contains useless comments like commented-out code, terminal commands, or silly notes. (-1)
|
|
||||||
- DATA REPRESENTATION (7 pts)
|
- DATA REPRESENTATION (7 pts)
|
||||||
- Uses data structures which have not been covered in this class. (-7)
|
- Uses data structures which have not been covered in this class. (-7)
|
||||||
- Uses filesystem library (i.e., #include <filesystem> ). (-7)
|
- Uses filesystem library (i.e., #include <filesystem> ). (-7)
|
||||||
- Neither std::map nor std::set is used. (-7)
|
- Neither std::map nor std::set is used. (-7)
|
||||||
- Paths to all 32 HTML files are manually specified within the program's code. (The paths should be found by the program during runtime) (-5)
|
- Paths to all 32 HTML files are manually specified within the program's code. (The paths should be found by the program during runtime) (-5)
|
||||||
- Member variables are public. (-2)
|
|
||||||
- RECURSION (3 pts)
|
- RECURSION (3 pts)
|
||||||
- Does not use recursion in the web crawler component. (-3)
|
- Does not use recursion in the web crawler component. (-3)
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 48 KiB |
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 90 KiB After Width: | Height: | Size: 90 KiB |
|
Before Width: | Height: | Size: 4.5 KiB After Width: | Height: | Size: 4.5 KiB |
|
Before Width: | Height: | Size: 2.4 KiB After Width: | Height: | Size: 2.4 KiB |