#!python from sys import argv lines = open(argv[1]).readline() lines = lines.split("\r") f = open("process1.txt","w") f.write("id,xc,st,dh,ul,numRaces,cat,catnum,raffles,seriesPts,distOfRace,distFromHome,family,racesPerMonth,age"+"\r\n") catconv = { 'BeginnerMale' :0, 'BeginnerFemale' :1, 'SportMale' :2, 'SportFemale' :3, 'ExpertMale' :4, 'ExpertFemale' :5, 'Semipro(maleonly)' :6, 'ProMale' :7, 'ProFemale' :8 } # category averages Lnumraces = [] Lraffles = [] Lseriespts = [] Ldistofrace = [] Ldistfromhome = [] Lfamily = [] Lrpm = [] for L in Lnumraces, Lraffles, Lseriespts, Ldistofrace, Ldistfromhome, Lfamily, Lrpm: for i in range(9): L.append( [] ) for line in lines[1:]: line = line.split(",") id = line[0] if line[4] != "": xc = 1 #Cross Country else: xc = 0 if line[5] != "": st = 1 #Short Track else: st = 0 if line[6] != "": dh = 1 #Downhill else: dh = 0 if line[7] != "": ul = 1 #Ultra else: ul = 0 numraces = int( "".join(line[8:14]) ) cat = "".join(line[14:23]) cat = cat.replace(" ","") #remove spaces cat = cat.replace("-","") #remove dash catnum = catconv[cat] raffles = int( "0"+"".join(line[32:37]) ) seriespts = int( "0"+"".join(line[37:42]) ) distofrace = int( "0"+"".join(line[42:47]) ) distfromhome = int( "0"+"".join(line[47:52]) ) family = int( "0"+"".join(line[52:57]) ) months = [] month_idxs = range(57,69) for month_idx in month_idxs: if len( line[month_idx] ) > 0: months.append(1) #likes this month to race else: months.append(0) #doesn't like this month to race rpm = int( "0"+"".join(line[69:74]) ) #races per month tor = [] tor_idxs = range(74,81) #tor = TypesOfRacing for tor_idx in tor_idxs: if len( line[tor_idx] ) > 0: tor.append(1) else: tor.append(0) age = "".join(line[81:91]) #### write summary file s = "" for elem in id,xc,st,dh,ul,numraces,cat,catnum,raffles,seriespts,distofrace,distfromhome,family,rpm,age: s += str(elem)+"," # add TOR and MONTHS here s = s[:-1] #chop trailing comma off f.write(s+"\r\n") #debug if catnum ==8: print s ### race by cat Lnumraces[catnum].append( numraces ) Lraffles[catnum].append( raffles ) Lseriespts[catnum].append( seriespts ) Ldistofrace[catnum].append( distofrace ) Ldistfromhome[catnum].append( distfromhome ) Lfamily[catnum].append( family ) Lrpm[catnum].append( rpm ) def aL(l): return 1.*sum(l)/len(l) print "catNum,sizeCat,numRaces,raffles,seriesPts,distOfRace,distFromHome,family,racesPerMonth" for catnum in range(len(Lnumraces)): print catnum,",",len(Lnumraces[catnum]),",", for L in Lnumraces, Lraffles, Lseriespts, Ldistofrace, Ldistfromhome, Lfamily, Lrpm: print aL( L[catnum] ),",", print