###########################################################################
# 04-09-02
# Melanie Martin
# run_matrix.py
#
# Program to create linear correlation matricies by team, by mission 
# from the corpus: /home/uav/lsa/uavcorpus-fold.doc
#
# Command line:
# python run_martix.py index
#
# This program reads the index file, which contains the beginning and ending
# document numbers for each mission of each team, and calls syn on the range 
# on documents.
#
# syn -S -d [range] -S -d [range] > cormatrixXXYY
#
# where [range] indicates the beginning_doc_#-ending_doc_#
#      e.g. range = 3230-3557 
# 
#
#
##########################################################################

import sys
import string
import re
import os


# Open input transcript file
try:
   transcript = open(sys.argv[1],'r')
except IndexError:
   print "usage is wrong"
except IOError:
   print "error opening ", sys.argv[1]


# Regular expression to find the desired line
regex = re.compile(r"^(?P<start>[0-9]+)\t(?P<end>[0-9]+)\t[0-9]+\t(?P<team>[0-9]+)")

lsapath = "/home/pfoltz/infoscale-lx/lsa/bin"
line = transcript.readline()

while line != "":
   if regex.search(line):
      r = regex.search(line)
      synstart = r.group('start')
      synend = r.group('end')
      synteam = r.group('team')
      
   cmdline = ('%s/syn -S -d %s-%s -S -d %s-%s >cormatrix%s') % (lsapath, synstart, synend, synstart, synend, synteam)
   os.system(cmdline)

   line = transcript.readline()

transcript.close()
