C++ Notes: Examples: Word Length Frequency

  1 
  2 
  3 
  4 
  5 
  6 
  7 
  8 
  9 
 10 
 11 
 12 
 13 
 14 
 15 
 16 
 17 
 18 
 19 
 20 
 21 
 22 
 23 
 24 
 25 
 26 
 27 
 28 
 29 
 30 
 31 
 32 
 33 
 34 
 35 
 36 
 37 
 38 
 39 
 40 
 41 
 42 
 43 
 44 
 45 
 46 
 47 
 48 
 49 
 50 
 51 
 52 
 53 
 54 
 55 
 56 
 57 
 58 
 59 
 60 
 61 
 62 
 63 
 64 
 65 
 66 
 67 
 68 
 69 
 70 
 71 
 72 
 73 
 74 
 75 
 76 
 77 
 78 
 79 
 80 
 81 
 82 
 83 
 84 
 85 
 86 
 87 
 88 
 89 
 90 
// word_len_histo.cpp : reads words and lists distribution
//                      of word lengths.
// Fred Swartz, 2002-09-01

// This would be nice to turn into an OO program, where
// a class represented a distribution of values.
// Some elements which are globals here would turn into
// private member elements in the class (eg, valueCount).


//--- includes
#include <iostream>
#include <iomanip>
#include <cctype>
using namespace std;

//--- prototypes
void  countValue(int cnt);
float getAverage();

//--- constants
const int BINS = 21;  // how many numbers can be counted

//--- globals
int valueCount[BINS]; // bins used for counting each number
int totalChars = 0;   // total number of characters

//=========================================================== main
int main() {

    char c;              // input character
    int  wordLen = 0;    // 0 if not in word, else word length

    //--- Initialize counts to zero
    for (int i=0; i<BINS; i++) {
        valueCount[i] = 0;
    }

    //--- Read chars in loop and decide if in a word or not.
    while (cin.get(c)) {
        if (isalpha(c)) { // letters are in words, so
            wordLen++;    // add one to the word length
        } else {
            countValue(wordLen); // end of word
            wordLen = 0;  // not in a word, set to zero
        }
    }
    countValue(wordLen);  // necessary if word ended in EOF

    //--- print the number of words of each length
    cout << "Why does this line disappear?" << endl;
    cout << "Word length    Frequency" << endl;
    for (int j=1; j<BINS; j++) {
        cout << setw(6) << right << j << "       " 
             << setw(8) << right << valueCount[j] << endl;
    }

    //--- print average length
    cout << "\nAverage word length: " << getAverage() << endl;

    return 0;
}//end main


//==================================================== countValue
void countValue(int cnt) {
    if (cnt > 0) {
        // this must be the end of a word
        if (cnt > 20) {
            cnt = 20;  // longer than 20 counts as 20
        }
        valueCount[cnt]++; // count in correct bin
    }
    totalChars += cnt;
}//end countWord


//==================================================== getAverage
float getAverage() {
    int totalCount  = 0;

    for (int i=0; i<BINS; i++) {
        totalCount  += valueCount[i];
    }
    if (totalCount > 0) {
        return (float)totalChars/totalCount;
    } else {
        return 0.0;
    }
}//end getAverage

Extensions

  1. Bar chart. In addition to showing a count of how many times a a length occurred, show a horizontal bar made from asterisks. This can be done by adding a simple asterisk-printing loop after the count is printed. After you have that working, you will notice that for large counts, the bar of asterisks is too long. Scale the printing so that each asterisk represents several occurrences. You can do this by finding the maximum count and dividing by the number of columns that are being used for the bar chart. This will give you the number that each asterisk should represent.