//--------------------------------------------------------- // This program reads a text file and generates a table // listing the words found in the file and the corresponding // frequencies. The table lists the tokens in alphabetical // order. The output table is written to a text file. // // D. Searls // REPLACE THIS LINE WITH YOUR NAME!! // Asbury University //--------------------------------------------------------- #include #include #include using namespace std; // YOUR list CLASS GOES HERE //********************************************************* // A tokenType object has two data members: a text field, // and an integer field. The object is intended to be used // in a program that prepares a word-frequency distribution // for a text file. The text field of a tokenType object // contains a word and the frequency field contains the // number of times the word appears in the text. //********************************************************* class tokenType { private: string text; int freq; public: //----------------------------------------------------- // Construct a default token whose text field is the // empty string and whose frequency is 0. //----------------------------------------------------- tokenType() { text = ""; freq = 0; } //----------------------------------------------------- // Set this token's text and frequency to the specified // values. // // In Parameter: newText, newFreq // //----------------------------------------------------- void set(string newText, int newFreq) { text = newText; freq = newFreq; } //----------------------------------------------------- // Return an unsigned integer based on the value of the // text in this token. (This value is intended to be // used by a hashing function to determine the hash // table index correpsonding to this token. //----------------------------------------------------- unsigned int hashcode() { } //----------------------------------------------------- // Return true if this token has the same key field // value as the specified token and false otherwise. // // In Parameter: tok //----------------------------------------------------- bool key_equal(tokenType tok) { } //----------------------------------------------------- // Add one to the frequency of this token. //----------------------------------------------------- void incrementFreq() { } //----------------------------------------------------- // Get this token's text. //----------------------------------------------------- string getText() { return text; } //----------------------------------------------------- // Get this token's frequency. //----------------------------------------------------- int getFreq() { return freq; } //----------------------------------------------------- // Return true if this token is less than the // specified token and false otherwise. The comparison // is made on the basis of the key field values. // // In Parameter: rValue //----------------------------------------------------- bool operator < (const tokenType& rValue) { } //----------------------------------------------------- // Return true if this token equals the specified // token and false otherwise. Two tokens are equal // if they have the same key value. // // In Parameter: rValue //----------------------------------------------------- bool operator == (const tokenType& rValue) { } }; //--------------------------------------------------------- // The hashTable class implements a hash table as an array // of linked lists. The hash table can be used to store // objects that implement a hashcode function and a // key_equal function (as defined below). // // unsigned int hashcode() // // The hashcode function must return an unsigned integer // value. This value will be used to determine the location // of the object that invokes the function. The hashcode // function converts the key field value of the object into // an unsigned integer. Given equal key values, the hashcode // function must return the same unsigned integer result. // // bool key_equal(const itemType& item) // // The key_equal function for an object returns true if the // object that invokes the function has the same key field // value as the function parameter and false otherwise. // // When searching for an item, the hashcode function will // be invoked for that item to determine the bucket in // which the item would be located if it was in the table. // The key_equal function will be used to determine if // the item is included in that bucket. // // The size of the hash table (see the constructors) is the // number of buckets in the hash table. A hash table can // hold any number of items (even more than its size). // However, as the number of items increases beyond the // size, search operations will become slightly less // efficient. //--------------------------------------------------------- template class hashTable { private: list* table; int size; public: //----------------------------------------------------- // Construct a hash table with a size of 53 //----------------------------------------------------- hashTable() { } //----------------------------------------------------- // Construct a hash table of the specified size. THE // SIZE SHOULD BE A PRIME NUMBER. // // Precondition: theSize > 0; // // In Parameter: theSize //----------------------------------------------------- hashTable(int theSize) { } //----------------------------------------------------- // Destroy the hash table and release all allocated // memory back to the operating system. //----------------------------------------------------- ~hashTable() { } //----------------------------------------------------- // Insert the specified item into the hash table. // // Precondition: There is no item in the table with the // same key field value as the specified item. Before // inserting an item into the table, the application // should search for the item to make sure it is not // already in the table. // // In Parameter: item //----------------------------------------------------- void insert(itemType item) { } //----------------------------------------------------- // Search for an item in the table whose key value // matches the key value of the specified item (as // determined by the key_equal function in the item // class). If found, this function returns a pointer // to the item. Otherwise, this function returns a // NULL pointer. // // In Parameter: targetItem //----------------------------------------------------- itemType* search(itemType targetItem) { } //----------------------------------------------------- // Convert this hash table to a list and return the // list as an out parameter. // // Postcondition: the list will contain the same items // as the table. The table will not be modified in // any way. // // Out Parameter: list //----------------------------------------------------- void toList(list& theList) { } }; //----------------------------------------------- // getToken // // Returns true if it was possible to extract a // token from infile and false otherwise. A token is // a sequence of characters that begins with a // letter of the alphabet and contains only letters // and (possibly) apostrophes. // // Postcondition: token will contain the first // token found in infile (with all uppercase // characters converted to lower case). The // frequency of the token will be set to 1. // // In/Out Parameter: infile // // Out Parameter: token //----------------------------------------------- bool getToken(istream& infile, tokenType& token) { bool success = false; unsigned int pos; string tokenStr; // Skip non-alpha characters while (infile.peek() != EOF && !isalpha(char(infile.peek()))) { infile.ignore(1); } // Extract token text while (infile.peek() != EOF && (isalpha(char(infile.peek())) || char(infile.peek()) == '\'')) { tokenStr = tokenStr + char(tolower(char(infile.get()))); } // Initialize token if (tokenStr.size() != 0) { token.set(tokenStr, 1); success = true; } return success; } //********************************************************* // M A I N D R I V E R //********************************************************* int main() { ifstream infile; ofstream outfile; tokenType token; hashTable table(199); tokenType* ptr; list tokenList; string infileName; string outfileName; // Get the name of the input file. cout << "Enter the name of the input file: "; cin >> infileName; infile.open(infileName.c_str()); if (infile.fail()) { cout << "Fatal Error: Could not open '" << infileName << "' for input.\n"; exit(0); } cout << endl; // Get the name of the output file. cout << "Enter the name of the output file: "; cin >> outfileName; outfile.open(outfileName.c_str()); if (outfile.fail()) { cout << "Fatal Error: Could not open '" << outfileName << "' for output.\n"; exit(0); } cout << endl; // Generate the frequency distribution as a hash table while (getToken(infile, token)) { ptr = table.search(token); if (ptr == NULL) { table.insert(token); } else { (*ptr).incrementFreq(); } } infile.close(); // Write the frequency distribution to the output file. table.toList(tokenList); tokenList.sort(); outfile << "Token Freq\n"; outfile << "-------------------- -----\n"; for (ptr = tokenList.first(); ptr != tokenList.end(); ptr = tokenList.next()) { outfile << left << setw(20) << (*ptr).getText() << right << setw(7) << (*ptr).getFreq() << endl; } outfile.close(); // Display number of tokens and final message. cout << "The file \"" << infileName << "\" had " << tokenList.size() << " unique tokens.\n\n"; cout << "The frequency distribution table is in the file \"" << outfileName << "\".\n"; return 0; }