tale = """It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only.""" def getTok(txt) : """Takes a piece of text (a single string) as the argument. Returns a list of tokenized words. Punctuations are separated out, and upper case is lowered. """ # [1] YOUR CODE BELOW: toklist = [] return toklist def getType(li) : """Takes a list of already tokenized words as the argument. Returns a list of unique word types.""" # [2] YOUR CODE BELOW: typelist = [] return typelist # taletoks is a list of tokenized words from the "tale" string taletoks = getTok(tale) print taletoks print 'There are', len(taletoks), 'word tokens in this text.' # taletypes is a list of uniq word types, obtained by further processing # taletoks through getType() function taletypes = getType(taletoks) print taletypes print 'There are', len(taletypes), 'unique word types in this text.' print '----------------' medlong = [] # This list holds words that are 6 to 9 chars long verylong = [] # This list holds words that are 10 chars or longer # [3] YOUR CODE BELOW. Populate medlong and verylong by going through taletypes. print 'There are', len(medlong), 'medium-length words:' print ' '.join(medlong) print 'There are', len(verylong), 'very long words:' print ' '.join(verylong) print '----------------' wdcount = {} # Dictionary of word count. KEY:word, VALUE:count # [4] YOUR CODE BELOW. Populate wdcount by going through taletoks. # [5] After wdcount is complete, uncomment the following 4 lines. #print '"times" occurs', wdcount['times'], 'times in the text.' #print '"it" occurs', wdcount['it'], 'times in the text.' #print '"of" occurs', wdcount['of'], 'times in the text.' #print '"," occurs', wdcount[','], 'times in the text.' print '----------------' lenlist = {} # Dictionary of word length. # KEY: word length, VALUE: a list of words whose length is KEY. # Example: {2:['it', 'of', 'we'], 7:['despair', 'nothing']} for wd in taletypes: if len(wd) in lenlist : # [6] YOUR CODE BELOW. Comment out "pass", and then pick one of the # two commented out lines and uncomment it. pass #lenlist[len(wd)] + wd #lenlist[len(wd)].append(wd) else : # [7] YOUR CODE BELOW. Comment out "pass", and then pick one of the # two commented out lines and uncomment it. pass #lenlist[len(wd)] = [] #lenlist[len(wd)] = [wd] # [8] After lenlist is complete, uncomment the following 4 lines. #print '5-character words are:' #print ' '.join(lenlist[5]) #print '1-character words are:' #print ' '.join(lenlist[1])