def getTok(txt) : """Takes a piece of text (a single string) as the argument. Returns a list of tokenized words. Punctuations are separated out, and upper case is lowered. """ newtxt = txt.lower().replace(',', ' ,').replace('.', ' .') toklist = newtxt.split() return toklist def getFreq(toks) : """Takes a list of items (such as string tokens) as the argument. Returns a frequency count dictionary. """ # [1] YOUR CODE BELOW: freq = {} return freq # Open gettysburg_address.txt for reading, and then assign # the entire text content to a string variable named 'getty_text'. # Then close your file. # [2] YOUR CODE BELOW: getty_text = '' # We now obtain a tokenized word list by feeding getty_text through getTok(): getty_toks = getTok(getty_text) # Let's take a quick peek into the list by printing out the first 20 tokens: print getty_toks[:20] # We now obtain a word frequency dictionary by feeding getty_toks # through getFreq(): getty_freq = getFreq(getty_toks) # Let's try printing out the first 10 word frequency counts, in alph. order. # [3] Uncomment the two lines below. #for w in sorted(getty_freq)[:10] : # print w, getty_freq[w] #------------------------------------------------------------------------- # It's time to write the results out to a file. # Open a file named 'Ex6.gettysburg.OUT.txt' for writing. # Name the file object 'OUTF' # [4] YOUR CODE BElOW: # First, write out the basic text stats. # [5] Uncomment the three lines. #OUTF.write('The text file has '+str(len(getty_toks))+' word tokens\n') #OUTF.write('and '+str(len(getty_freq.keys()))+' unique word types.\n') #OUTF.write('\n') # Second, print to the file the top 10 most frequent words and their frequency # counts. One per each line, in this order: word, a tab, and the integer. # [6] Uncomment the following line, and write YOUR CODE BELOW. #OUTF.write('The top ten most frequent words are:\n') # Third, print to the file the top 10 longest words and their frequency counts. # [7] Uncomment the following line, and write YOUR CODE BELOW. #OUTF.write('\nThe top ten longest words are:\n') # Finally, close your OUTF file object. # [8] YOUR CODE BELOW: