# Real Data Exercises

# Ex: 1
# ---------- Convert a line of the SAT data into a list of string and integers
#   taking care of the schools with double-quotes, and converting
#   "s" into 0 for values that should have been integers

def ConvertSATLine(line):
    fields=line.split(',')
    # take care of a school name with double-quotes
    if len(fields)!=6:
        # convert all fields that belong to the name back into a single string
        school=','.join(fields[1:-4])
        # remove the double-quotes from the beginning and end
        school=school[1:-1]
        # remove any leading or trailing spaces
        school.strip()
        # Re-compose the fields list
        fields=fields[0:1]+[school]+fields[-4:]
    else:
        fields[1]=fields[1].strip()
    # convert the last 4 into integers if possible
    for i in range(2,6):
        if fields[i].isdigit():
            fields[i]=int(fields[i])
        else:
            fields[i]=0
    return fields
    
def ReadSAT(filename):
    try:
        f=open(filename,'rU')
        lines=f.read().split('\n')
    except:
        return []
    # remove the column headers:
    lines=lines[1:]
    # remove the last line if it's empty
    if lines[-1].strip()=='':
        lines=lines[:-1]
    # now, split the lines into fields, converting the last few into integers
    answer=[]
    for line in lines:
        fields=ConvertSATLine(line)
        answer.append(fields)
    return answer

# ------------------------------------------------------
# Ex. 2
def HighLow(filename):
    schools=ReadSAT('SAT-2010.csv')
    low_math=1000
    high_math=0
    low_school=high_school=''

    for aschool in schools:
        if low_math>aschool[4]>0:
            low_math=aschool[4]
            low_school=aschool[1]
        if aschool[4]>high_math:
            high_math=aschool[4]
            high_school=aschool[1]
    print 'Lowest in math: %s with %d' % (low_school,low_math)
    print 'Highest in math: %s with %d' % (high_school,high_math)
    
# Answers:
# Lowest in math: Brooklyn High Sch for Leadership Community Svc with 281
# Highest in math: STUYVESANT HIGH SCHOOL with 735

# ---------------------------------------------------------
# Ex 3
def BigStats(filename):
    schools=ReadSAT('SAT-2010.csv')
    totals=[0]*4
    for aschool in schools:
        if aschool[4]>0:
            totals[0]+=aschool[2]
            totals[1]+=aschool[2]*aschool[3]
            totals[2]+=aschool[2]*aschool[4]
            totals[3]+=aschool[2]*aschool[5]
    return [totals[0],totals[1]/totals[0],totals[2]/totals[0],totals[3]/totals[0]]
# Answers:
# [40012, 439, 462, 434]

# -----------------------------------------------------------
# Ex. 4
def School2Dict(line):
    # split it into fields, with the correct handling of double-quotes
    fields=ConvertSATLine(line)
    dct={}
    the_keys=['DBN','Name','Number','Reading','Math','Writing']
    for i in range(len(the_keys)):
        if fields[i]==0:
            dct[the_keys[i]]='s'
        else:
            dct[the_keys[i]]=fields[i]
    return dct

# ---------------------------------------------------------------
# Ex. 5
def MakeMaster(filename):
    try:
        f=open(filename,'rU')
        lines=f.read().split('\n')
    except:
        return {}
    lines=lines[1:]
    if len(lines[-1])==0:
        lines=lines[:-1]

    master={}
    for line in lines:
        d=School2Dict(line)
        master[d['DBN']]=d
    return master

# ------------------------------------------------------------------
# Ex 6
def NameQueryHelper(filename):
    try:
        f=open(filename,'rU')
        lines=f.read().split('\n')
    except:
        print 'Cannot read '+filename
        return
    lines=lines[1:]
    if lines[-1]=='':
        lines=lines[:-1]

    lst_fields=[line.split(',') for line in lines]
    return lst_fields

def NameQuery(filename):
    lst=NameQueryHelper(filename)

    while True:
        search=raw_input('\nReport on what name(s)? ').strip().upper()
        if search=='':
            print "OK, that's it."
            return
        found_any=False
        for fields in lst:
            if fields[0].upper().startswith(search) or fields[1].upper().startswith(search):
                print '%s %s has a rating of %s' % (fields[0],fields[1],fields[2])
                found_any=True
        if not found_any:
            print 'No results'
    
# Ex 6a
def compare(a,b):
    ia=int(a[2])
    ib=int(b[2])
    if ia!=ib:
        return ib-ia
    if a[0].upper()>b[0].upper():
        return 1
    return -1

def NameQuerySorted(filename):
    lst=NameQueryHelper(filename)
    
    while True:
        search=raw_input('\nReport on what name(s)? ').strip().upper()
        if search=='':
            print "Ok, that's it."
            return
        found_lst=[]
        for fields in lst:
            if fields[0].upper().startswith(search) or fields[1].upper().startswith(search):
                found_lst.append(fields)
        if len(found_lst)==0:
            print 'No results'
        else:
            found_lst.sort(cmp=compare)
            for fields in found_lst:
                print '%s %s has a rating of %s' % (fields[0],fields[1],fields[2])