#!/usr/bin/python
#
# This script takes a single input file describing educational data.
# It writes, as ouput to stdout, aggregate counts of degrees earned.

# input file format has 5 columns
#  Column  0: field
#  Column  1: degree
#  Column  2: N
#  Column  3: SUM
#  Column  4: mean

import math
import sys
import os


if (len(sys.argv) != 2):
  print "usage:", sys.argv[0], "input"
  sys.exit(-1)


inputName = sys.argv[1]
if (not (inputName[len(inputName) - 4:] == ".csv")):
    print "Input " + inputName + " does not end with .csv, aborting."
    sys.exit(-1)
    
if (not os.path.exists(inputName)):
    print "File " + inputName +  " does not exist, aborting."
    sys.exit(-2)

inputFile = open(inputName, 'r')

rows = []
for line in inputFile:
    line.rstrip()
    words = line.split(",")
    datum = ()
    if (len(words) == 0):
      continue
    if (words[0] == '"unitid"'):
      continue
    elif (len(words) != 5):
      print "input error:", words
      print "  - expecting 5 tokens"
      sys.exit(-1)
    else:
      datum = (words[0], words[1], words[2], words[3],
               words[4])
    rows.append(datum)

processed = []
for datum in rows:
  name = datum[1]
  name = name.replace('"', '')

  field = datum[0]
  field = field.replace('"', '')
  if (field[0:3] == 'Bio'):
    field = "Biological Sciences"
  if (field[0:3] == 'Com'):
    field = "Computer Science"
  if (field[0:3] == 'Eng'):
    field = "Engineering"
  if (field[0:3] == 'Phy'):
    field = "Physical Sciences"
    
  degree = datum[1]
  degree = degree.replace('"', '')
  if (degree[0:3] == 'Bac'):
    degree = "Bachelor's"
  if (degree[0:3] == 'Mas'):
    degree = "Master's"
  if (degree[0:3] == 'Doc'):
    degree = "Doctorate"
    
  count = datum[3]
  count = int(count.replace('"', ''))

  entry = (field, degree, count)
  processed.append(entry)

biob = 0
biom = 0
biod = 0
csb  = 0
csm  = 0
csd  = 0
engb = 0
engm = 0
engd = 0
phyb = 0
phym = 0
phyd = 0

for entry in processed:
  field = entry[0]
  degree = entry[1]

  if (field == "Biological Sciences" and degree == "Bachelor's"):
    biob = biob + entry[2]
  if (field == "Biological Sciences" and degree == "Master's"):
    biom = biom + entry[2]
  if (field == "Biological Sciences" and degree == "Doctorate"):
    biod = biod + entry[2]

  if (field == "Computer Science" and degree == "Bachelor's"):
    csb = csb + entry[2]
  if (field == "Computer Science" and degree == "Master's"):
    csm = csm + entry[2]
  if (field == "Computer Science" and degree == "Doctorate"):
    csd = csd + entry[2]

  if (field == "Engineering" and degree == "Bachelor's"):
    engb = engb + entry[2]
  if (field == "Engineering" and degree == "Master's"):
    engm = engm + entry[2]
  if (field == "Engineering" and degree == "Doctorate"):
    engd = engd + entry[2]

  if (field == "Physical Sciences" and degree == "Bachelor's"):
    phyb = phyb + entry[2]
  if (field == "Physical Sciences" and degree == "Master's"):
    phym = phym + entry[2]
  if (field == "Physical Sciences" and degree == "Doctorate"):
    phyd = phyd + entry[2]

print "Phy\t" + str(phyb) + "\t" + str(phym) + "\t" + str(phyd)
print "Bio\t" + str(biob) + "\t" + str(biom) + "\t" + str(biod)
print "Eng\t" + str(engb) + "\t" + str(engm) + "\t" + str(engd)
print "CS\t"  + str(csb)  + "\t" + str(csm)  + "\t" + str(csd)



 
