import os
# Find out which directory I'm currently connected to:
print (os.getcwd())
# Get a list of the files in the current directory:
print (os.listdir())
# Get a list of files in another directory called "Downloads"
print (os.listdir('Downloads'))
# Change to a different directory:
os.chdir('Downloads')
The output of the function should be a list of lists. Each element of the primary list will a single school's complete information, as a little list.
So suppose that the SAT file had just the following two lines in it:
01M450,EAST SIDE COMMUNITY SCHOOL,70,377,402,370
02M303,"FACING HISTORY SCHOOL, THE",76,353,358,340
... then the output of SAT_Processor("2012_SAT_Results.csv") would be
[ ['01M450', 'EAST SIDE COMMUNITY SCHOOL', 70, 377, 402, 370],
['02M303', 'FACING HISTORY SCHOOL, THE', 76, 353, 358, 340] ]
Notice that the last 4 strings had been turned into numbers that we can do arithmetic with, and the school with the comma in its name was preserved as a single string.
def SAT_Processing(fname):
# check if we can read the file
try:
f = open(fname,'r')
except:
return 'Cannot open '+fname
# read all data into a string and split that string into lines
s = f.read()
lines = s.split('\n')
f.close()
# process all lines except first and last
lines2 = lines[1:-1]
answer = []
# now process each school that has numeric scores
for aschool in lines2:
# only bother to process a school if its last character is not an "s"
if aschool[-1] != 's':
# split the school's line into elements
elements = aschool.split(',')
# save the school ID, and make a little list out of it
schoolID = [elements[0]]
# get the last 4 elements
last4 = elements[-4:]
# convert those strings into integers
for i in range(4):
last4[i] = int(last4[i])
# now try to work on the school name...
# if there are no double quotes in the school line, then it's easy
if '"' not in aschool:
school_name = [elements[1]]
else:
# find the position of first double-quote and the last double-quote
first = aschool.find('"')
last = aschool.rfind('"')
school_name = [aschool[first+1:last]]
# OK, we now have 3 little lists, let's merge them together
aschool_list = schoolID + school_name + last4
# Done. Add it into the answer list
answer.append(aschool_list)
return answer
# let's try it out and see a few of the results...
a = SAT_Processing('2012_SAT_Results.csv')
a[1:6]
def PrintSomeResults(filename):
schools_list = SAT_Processing(filename)
# check for a file-reading problem
if 'Cannot open' in schools_list:
print (schools_list)
return
# average by school
math_sum = 0
for aschool_list in schools_list:
math_sum += aschool_list[4]
print(math_sum/len(schools_list))
print('Math average by school is %0.1f' % (math_sum/len(schools_list)))
# average by student
sum_students = 0
sum_math_score_mult = 0
for aschool_list in schools_list:
sum_students += aschool_list[2]
sum_math_score_mult += aschool_list[2] * aschool_list[4]
print ('Math average by student: %0.1f' % (sum_math_score_mult/sum_students))
# school with the maximum math score
max_school = schools_list[0]
max_math = max_school[4]
for aschool in schools_list:
if aschool[4] > max_math:
max_math = aschool[4]
max_school = aschool
print ('School with max math: ', max_school)
PrintSomeResults('2012_SAT_Results.csv')