/*
   Functions to read molecular information from standard file formats.
   NOTE that these functions were extracted from a larger program and there
   may be unexpected dependencies between this code and things that were not
   brought forward from the program.  Caveat emptor -- but perhaps this is
   better than nothing!
   
   One of these is a highly simplified MDL molecular specification
   file.  Reference:
      CTFile formats, MDL Information Systems, Inc.
      San Leandro, CA 94577, 1999, http://www.mdli.com/

   The other is the Protien Data Bank (PDB) file format.  Reference:
      Protein Data Bank Contents Guide: Atomic Coordinate Entry Format 
      Description, Protein Data Bank, Brookhaven National Laboratory,
      Upton, NY 11973-5000 USA, http://www.pdb.bnl.gov
      
   Proof of concept for chemistry project for intro graphics course
   
   (c) 1999, Steve Cunningham
*/

// file declaration for molecular description input file
FILE	*fp;
int		argc;
char	**argv;

// data for storing molecular description

int		natoms, nbonds;
typedef struct atomdata {
					float x, y, z;
					char  name[5];
					int   colindex;
					} atomdata;
atomdata atoms[AMAX];
typedef struct bonddata{
					int first, second, bondtype;
					} bonddata;
bonddata bonds[BMAX];

char *fname, *mainName, *ext;

// function prototypes
char *getFileName(void);
void separateName(char *);
void readMolFile(char *);
void readPdbFile(char *);
int  lookUpName(int);

char *getFileName(void)
{
	char *myFileName = "";
	
	printf("enter name of molecule file to be displayed > ");
	scanf("%s",myFileName);
	return(myFileName);
}

void separateName(char *fname)
{
	int i=0, j;
	
	mainName = (char *)malloc(20);
	ext = (char *)malloc(4);
	//	loop to find the last period in the file name; save at value j
	//	and separate the file name to extract the extension, which is
	//	used to determine which file reading function is used
	for (i=0; fname[i]!='\0'; i++)
		if (fname[i]=='.')
			j = i;
	for (i=0; i<j; i++)
		mainName[i]=fname[i];
	mainName[j]='\0';
	for (i=0; i<3; i++)
		ext[i]=fname[i+j+1];
	ext[3]='\0';
	return;
}

void readMolFile(char *fname)
{
	int i,j;
	char ignore=' ', myLine[80];

//	read data from file for the model
	if ( (fp = fopen(fname, "r")) == NULL )
		{
			fprintf(stderr,"Unable to open file!!!\n" );
			exit(1);
		}
//	begin by skipping the three-line header block
	for (i=0; i<3; i++) {
		fgets(myLine, 80, fp);
	}
//	now read the counts followed by the atom and bond data
	fscanf(fp,"%d %d%[^\n]\n", &natoms, &nbonds, myLine);
	for (i=0; i<natoms; i++)	//	read atom location data and get table index
		{
			fscanf(fp, "%f %f %f", &atoms[i].x, &atoms[i].y, &atoms[i].z );
			fscanf(fp, "%c", &ignore);
			for (j=0; j<4; j++)
				fscanf(fp,"%c", &atoms[i].name[j]);
			atoms[i].name[4]='\0';
			atoms[i].colindex = lookUpName(i);
			fgets(myLine, 80, fp);
		}
	for (i=0; i<nbonds; i++)	//	get bond data
		{
			fscanf(fp,"%d %d %d", &bonds[i].first, &bonds[i].second,
				&bonds[i].bondtype);
			fgets(myLine, 80, fp);
		}
	fclose(fp);
}

void readPdbFile(char *fname)
{
	int i=0,j=0, k, m, n=0, atomflag = 0, conectflag = 0;
	int source, from, destination, to;
	float x, y, z;
	char ignore=' ', *myLine, *token, *atomName;

	for (k=0; k<BMAX; k++)
		bonds[k].bondtype = 0;
	token = (char *)malloc(8);
	myLine = (char *)malloc(80);
	atomName = (char *)malloc(3);
//	read data from file for the model
	if ( (fp = fopen(fname, "r")) == NULL )
		{
			fprintf(stderr,"Unable to open file!!!\n" );
			exit(1);
		}
	while (fgets(token,7,fp) != NULL) {
		if (strcmp(token,"ATOM  ")==0) {
			fgets(myLine,7,fp);
			fgets(atomName,3,fp);
			fgets(myLine,15,fp);
			fscanf(fp,"%f %f %f",&x,&y,&z);
			// now change atomName into standard form
			// extract first letter of atomName
			k = 0;
			while (atomName[k]==' ') k++;
			atoms[i].name[0] = atomName[k];
			k++;
			if (atomName[k] != ' ') {
				if ((atomName[k] >='a') && (atomName[k] <= 'z'))
					atoms[i].name[1] = atomName[k];
				else 
					if ((atomName[k] >='A') && (atomName[k] <= 'Z'))
						atoms[i].name[1] = tolower(atomName[k]);
					else atoms[i].name[1] = ' ';
				}
			atoms[i].name[2] = ' ';
			atoms[i].name[3] = ' ';
			atoms[i].name[4] = '\0';
			// now look up atom in the table and set data in atom list
			atoms[i].colindex = lookUpName(i);
			atoms[i].x = x;
			atoms[i].y = y;
			atoms[i].z = z;
			i++;
			}
/*
	NOTE: double bonds are not indicated in PDB files...
*/
		if (strcmp(token,"CONECT")==0){
			fgets(token,6,fp);
			source = atoi(token);
			k = 0;
			while (!feof(fp) && (k<4)) {
				fgets(token,6,fp);
				destination = atoi(token);
				if (destination != 0) {
					if (destination < source) {
						to = source;
						from = destination;
					}
					else {
						to = destination;
						from = source;
					}			
					for (m=0;m<n;m++) { // is this bond is already read?
						if ((bonds[m].first == from)&&(bonds[m].second == to)) {
							m = n+1;	// set flag to not enter the bond
							break;
						}
					}
					if (m == n) {
						bonds[n].first = from;
						bonds[n].second = to;
						bonds[n].bondtype++;
						n++;
					}
				}
				k++;
			}
			j++;
		}
		fgets(myLine,80,fp);
	}
	natoms = i;
	nbonds = n;
}

int lookUpName(int i)
{
//	return the index in the atomic tables for which the name in the
//	table matches the name in atoms[i]

	int j;
	
	for (j=0; j<ATSIZE; j++) {
		if (atomNames[j][0]==atoms[i].name[0] &&
		    atomNames[j][1]==atoms[i].name[1] )
				return(j);
		}
	return(-1);
}