#!/usr/bin/python
#
# This script takes a single input file describing degree and job data.
# It writes, as ouput, a PDF plot of the data.  The output file name
# is the input file name, with .txt replaced with .pdf.

# input file format has 5 columns
#  Column  0: field
#  Column  1: bachelor's degrees
#  Column  2: master's degrees
#  Column  3: doctorate degrees
#  Column  4: projected openings per year, 2008-2018

import math
from pylab import *
import sys
import os
import locale
import matplotlib.font_manager

locale.setlocale(locale.LC_ALL, 'en_US')


if (len(sys.argv) != 2):
  print "usage:", sys.argv[0], "input"
  sys.exit(-1)


clf()
fig = figure(1)
fig.set_size_inches(7, 5)

subplots_adjust(left=0.15, right=0.99, bottom=0.5, top=0.95)

xcoords = []

inputName = sys.argv[1]
if (not (inputName[len(inputName) - 4:] == ".txt")):
    print "Input " + inputName + " does not end with .txt, aborting."
    sys.exit(-1)
    
if (not os.path.exists(inputName)):
    print "File " + inputName +  " does not exist, aborting."
    sys.exit(-2)

outputName = inputName[0:-4] + ".pdf"

inputFile = open(inputName, 'r')

data = []
for line in inputFile:
    line.rstrip()
    words = line.split()
    datum = ()
    if (len(words) == 0):
      continue
    elif (len(words) != 5):
      print "input error:", words
      print "  - expecting 5 tokens"
      sys.exit(-1)
    field = words[0]
    if (field[0:3] == 'Bio'):
      field = "Biological Sciences"
    if (field[0:2] == 'CS'):
      field = "Computer Science"
    if (field[0:3] == 'Eng'):
      field = "  Engineering  "
    if (field[0:3] == 'Phy'):
      field = "Physical Sciences"

    datum = (field, int(words[1]), int(words[2]), int(words[3]),
               int(words[4]))
    data.append(datum)


names = []
bachelors = []
masters = []
mbottom = []
doctorate = []
dbottom = []
jobs = []

# Because a stacked bar chart is really a series of boxes whose start
# value is the lower value and whose end value is the lower value
# plus the bar size, use cumulative values here.
for datum in data:
  names.append(datum[0])
  bachelors.append(datum[1])
  masters.append(datum[2])
  doctorate.append(datum[3])
  mbottom.append(datum[1])
  dbottom.append(datum[1] + datum[2])
  jobs.append(datum[4])
  

xtiks = []
for n in names:
  xtiks.append('')

# Calculate the x coordinates of the main and subsets
# Spacing between each pair of bars is 2; the main bar
# is on the left, the sub is on the right.
# This is just an offset for the initial value (0.125 vs 0.875).

xcoordsAll = arange(0.125,  len(names) * 2.0, 2.0)
xcoordsSubset = arange(0.875,  len(names) * 2.0, 2.0)
xlabelcoords = []
for x in xcoordsAll:
  xlabelcoords.append(x + .1)

# Calculate the Y ticks and their coordinates
ycoords = [0, 50000, 100000, 150000, 200000, 250000]
ytiks = ['', '50,000', '100,000', '150,000', '200,000', '250,000']



# Draw the degree bar graphsb
bar(xcoordsAll, doctorate, 0.75, color='#000000', bottom=dbottom, label="Doctorate")
bar(xcoordsAll, masters,  0.75, color='#808080', bottom=mbottom, label="Master's")
bar(xcoordsAll, bachelors,   0.75, color='#FFFFFF', label="Bachelor's")

# Draw the job bar graph
bar(xcoordsSubset, jobs,   0.75, color='#F03030', label="Job openings")

# Draw the numbers on top of the bars
for i in range(len(names)):
  xpos = xcoordsAll[i] + 0.35
  val = doctorate[i] + masters[i] + bachelors[i]
  ypos = val + 2000
  label = locale.format("%d", val, grouping=True)
  text(xpos, ypos, label, horizontalalignment='center', verticalalignment='bottom', fontsize=7 )

  ypos = jobs[i] + 2000
  label = locale.format("%d", jobs[i], grouping=True)
  xpos = xcoordsSubset[i] + 0.35
  text(xpos, ypos, label, horizontalalignment='center', verticalalignment='bottom', fontsize=7 )
  
# Draw the axes
ax1 = subplot(111)
xlen = (2 * len(names)) + 0.2
ax1.xaxis.tick_bottom()
ax1.yaxis.tick_left()
ax1.set_axisbelow(True)
ax1.axis([-0.7, xlen, 0, 200000])
yticks(ycoords, ytiks, fontsize=7)
xticks(xlabelcoords, xtiks, fontsize=7, horizontalalignment='left')

# Draw the legend
prop = matplotlib.font_manager.FontProperties(size=7)
ax1.legend(prop=prop, loc='upper left', bbox_to_anchor=(0.05, 0.95))

# Put labels on the columns
tableX = arange(0.2,  len(names) * 2.0, 2.0)
for i in range(len(names)):
  text(tableX[i], -10000, names[i], fontsize=7, horizontalalignment='left', verticalalignment='center')


savefig(outputName)


 
