# Real Data Exercises # Ex: 1 # ---------- Convert a line of the SAT data into a list of string and integers # taking care of the schools with double-quotes, and converting # "s" into 0 for values that should have been integers def ConvertSATLine(line): fields=line.split(',') # take care of a school name with double-quotes if len(fields)!=6: # convert all fields that belong to the name back into a single string school=','.join(fields[1:-4]) # remove the double-quotes from the beginning and end school=school[1:-1] # remove any leading or trailing spaces school.strip() # Re-compose the fields list fields=fields[0:1]+[school]+fields[-4:] else: fields[1]=fields[1].strip() # convert the last 4 into integers if possible for i in range(2,6): if fields[i].isdigit(): fields[i]=int(fields[i]) else: fields[i]=0 return fields def ReadSAT(filename): try: f=open(filename,'rU') lines=f.read().split('\n') except: return [] # remove the column headers: lines=lines[1:] # remove the last line if it's empty if lines[-1].strip()=='': lines=lines[:-1] # now, split the lines into fields, converting the last few into integers answer=[] for line in lines: fields=ConvertSATLine(line) answer.append(fields) return answer # ------------------------------------------------------ # Ex. 2 def HighLow(filename): schools=ReadSAT('SAT-2010.csv') low_math=1000 high_math=0 low_school=high_school='' for aschool in schools: if low_math>aschool[4]>0: low_math=aschool[4] low_school=aschool[1] if aschool[4]>high_math: high_math=aschool[4] high_school=aschool[1] print 'Lowest in math: %s with %d' % (low_school,low_math) print 'Highest in math: %s with %d' % (high_school,high_math) # Answers: # Lowest in math: Brooklyn High Sch for Leadership Community Svc with 281 # Highest in math: STUYVESANT HIGH SCHOOL with 735 # --------------------------------------------------------- # Ex 3 def BigStats(filename): schools=ReadSAT('SAT-2010.csv') totals=[0]*4 for aschool in schools: if aschool[4]>0: totals[0]+=aschool[2] totals[1]+=aschool[2]*aschool[3] totals[2]+=aschool[2]*aschool[4] totals[3]+=aschool[2]*aschool[5] return [totals[0],totals[1]/totals[0],totals[2]/totals[0],totals[3]/totals[0]] # Answers: # [40012, 439, 462, 434] # ----------------------------------------------------------- # Ex. 4 def School2Dict(line): # split it into fields, with the correct handling of double-quotes fields=ConvertSATLine(line) dct={} the_keys=['DBN','Name','Number','Reading','Math','Writing'] for i in range(len(the_keys)): if fields[i]==0: dct[the_keys[i]]='s' else: dct[the_keys[i]]=fields[i] return dct # --------------------------------------------------------------- # Ex. 5 def MakeMaster(filename): try: f=open(filename,'rU') lines=f.read().split('\n') except: return {} lines=lines[1:] if len(lines[-1])==0: lines=lines[:-1] master={} for line in lines: d=School2Dict(line) master[d['DBN']]=d return master # ------------------------------------------------------------------ # Ex 6 def NameQueryHelper(filename): try: f=open(filename,'rU') lines=f.read().split('\n') except: print 'Cannot read '+filename return lines=lines[1:] if lines[-1]=='': lines=lines[:-1] lst_fields=[line.split(',') for line in lines] return lst_fields def NameQuery(filename): lst=NameQueryHelper(filename) while True: search=raw_input('\nReport on what name(s)? ').strip().upper() if search=='': print "OK, that's it." return found_any=False for fields in lst: if fields[0].upper().startswith(search) or fields[1].upper().startswith(search): print '%s %s has a rating of %s' % (fields[0],fields[1],fields[2]) found_any=True if not found_any: print 'No results' # Ex 6a def compare(a,b): ia=int(a[2]) ib=int(b[2]) if ia!=ib: return ib-ia if a[0].upper()>b[0].upper(): return 1 return -1 def NameQuerySorted(filename): lst=NameQueryHelper(filename) while True: search=raw_input('\nReport on what name(s)? ').strip().upper() if search=='': print "Ok, that's it." return found_lst=[] for fields in lst: if fields[0].upper().startswith(search) or fields[1].upper().startswith(search): found_lst.append(fields) if len(found_lst)==0: print 'No results' else: found_lst.sort(cmp=compare) for fields in found_lst: print '%s %s has a rating of %s' % (fields[0],fields[1],fields[2])