# Dictionary homework

# Problem #1: min and max occurrences of number in a list

def MinMaxNumber(filename):
    
    # start it up -- assume that the first line is an integer
    f = open(filename,'r')
    first_line = f.readline().strip()  # read first line and strip off the trailing \n
    num_first = int(first_line)
    
    # set up answers
    min_num = num_first
    min_count = 1
    max_num = num_first
    max_count = 1
    dnum = {min_num:min_count, max_num:max_count}
    
    while True:
        next_line = f.readline().strip()
        if next_line == '':  # end of file
            break
        if next_line.isdigit():  # OK, it's a valid number
            num = int(next_line)
            if num in dnum:   # must check if the number is already in the dict
                dnum[num] += 1
            else:
                dnum[num] = 1
            if dnum[num] > max_count:
                max_num = num
                max_count = dnum[num]
            if dnum[num] < min_count:
                min_num = num
                min_count = dnum[num]
    return [[min_num,min_count],[max_num,max_count]]

print('Problem #1 results: ', MinMaxNumber('randomNums.txt'))

# Problem #2:  Characters in Hamlet

def CharactersInHamlet(filename):
    f = open(filename,'r')
    s = f.read()
    f.close()
    
    dfreq = {}
    
    # do the counting
    for c in s:
        if ord(c) <= 127:
            c = c.lower()   # this will change only upper-case letters, all others untouched
            if c in dfreq:
                dfreq[c] += 1
            else:
                dfreq[c] = 1
    
    # now create a list of elements that look like this [frequency,character]:
    #  [45,'e'], [179,'t'], [23,'$'], etc.
    sortable = []
    for key in dfreq.keys():
        sortable.append([dfreq[key],key])
        
    sortable.sort()
    print('Least frequent chars: ',sortable[:3])
    print('Most frequent chars: ',sortable[-3:])
    
    # this is not part of the homework, but here are the letters in most-to-least
    #   frequency order...
    etaoin = ''
    for c in sortable[::-1]:
        if c[1].isalpha():
            etaoin += c[1]
    print('Hamlet letter frequency sequence: ',etaoin)
    
    
print('Problem #2 results:')
CharactersInHamlet('hamlet.txt')

# Problem #3 (Challenge): Most frequent words...
def FrequentWordsInHamlet(filename):
    f = open(filename,'r')
    s = f.read().strip()
    f.close()
    
    dwords = {}
    
    # Now, this is tedious, but we have to go through every character to look for
    #  non-letters, because every non-letter is a word separator
    current = ''  # current word being built up...
    for c in s:
        c = c.lower()
        if c.isalpha():  # is it a letter?
            current += c
        elif len(current) > 0:   # all right, this is a separator, so we have built up a word
            if current in dwords:
                dwords[current] += 1
            else:
                dwords[current] = 1
            current = ''
    
    # OK, now all the words and their frequencies are in dwords.  Sort them by frequency
    sortable = []
    for key in dwords.keys():
        sortable.append([dwords[key],key])
    
    sortable.sort()
    print('5 most frequent words in Hamlet:\n',sortable[-5:])
    print('5 least frequence words in Hamlet:\n',sortable[:5])
    
FrequentWordsInHamlet('hamlet.txt')