/****************************************************************
depthscope.c: Calculating depth and scope of patent references for
the patents of one company over a given period of years.  See
http://www.stanford.edu/~rkatila/software.html for details.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2
(http://www.gnu.org/licenses/gpl.txt) as published by the Free
Software Foundation. This program is distributed in the hope that it
will be useful, but without any warranty, without even the implied
warranty of merchantability or fitness for a particular purpose.  See
the GNU General Public License for more details.

Copyright (c) 1999-2005 Riitta Katila and Risto Miikkulainen
v1.0  9/29/1999 (research version) risto
v2.0 11/19/2005 (public release)   risto

****************************************************************/

#include <stdio.h>

/************ global constants and variables *******************/

/* data definitions */
#define FIRSTYEAR 1974	/* first year included in the sample */
#define LASTYEAR 1999	/* last year included in the sample */
#define MAXYEARS LASTYEAR-FIRSTYEAR+1	/* number of years in the table */
#define NPREYEARS 5     /* number of previous years included in d/s calcs */

/* string sizes; simply make these large enough */
#define MAXFILENAMEL 100/* max length of file name strings */
#define MAXPATREFL 15	/* max length of patent ref (#+auth) strings */

/* convenience constants */
#define UNIQUE 1	/* a list of unique items is formed */
#define NONUNIQUE 0	/* items may be repeated in the list */
#define ADD 1		/* add new refs to the tree */
#define NOADD 0		/* do not add new refs to the tree */
#define TRUE 1
#define FALSE 0
#define EXIT_NORMAL 0	/* exit codes */
#define EXIT_SIZE_ERROR 1
#define EXIT_DATA_ERROR 2
#define EXIT_FILE_ERROR 3

/* output selections */
#define DEBUG FALSE     /* TRUE=verbose output for debugging, FALSE=not */
#define DSCOL "Year  npatents    depth    scope"
#define UDF "."		/* symbol for undefined depth and scope values */

/* __P needs to be defined for cygwin; the undef avoids getting a warning
   about redefining it on other systems */
#undef __P
#define __P(x)x

/* input and output file names */
char inpfile[MAXFILENAMEL + 1], outfile[MAXFILENAMEL + 1];


/*********************** data structures ***********************/

/* patent label strings, stored in a tree. Each string is stored only
   once, and patent data structures just point to these strings. They
   can be more efficiently accessed in a tree than e.g. a list.. */
typedef struct REFSTRINGSTRUCT
  {
    char patref[MAXPATREFL];
    struct REFSTRINGSTRUCT * leftref, * rightref;
  }
REFSTRINGSTRUCT;

/* elements in the reference vectors (stored as ordered trees for
   efficiency */
typedef struct REFSTRUCT
  {
    char * patref;	/* points to the patent label string tree */
    struct REFSTRUCT * leftref, * rightref;
  }
REFSTRUCT;

/* data for one patent */
typedef struct PATENTSTRUCT
  {
    char * patid;
    int year;
    REFSTRUCT * references;
  }
PATENTSTRUCT;

/* patent reference vectors, counts, and depth and scope for one year */
typedef struct PATREFVECTORSTRUCT
  {
    REFSTRUCT
      *patids,		/* patent IDs this year, unique */
      *refs,		/* refs to patents this year, unique */
      *patidpre,	/* patent IDs last NPREYEARS, unique */
      *refspre;	        /* refs to patents last NPREYEARS, nonunique */
    
    int
      npatids,		/* number of patents this year */
      nrefs,		/* number of refs */
      npatidpre,	/* number of patents last NPREYEARS */
      nrefspre;	        /* number of refspre */

    double
      depth,		/* depth */
      scope;		/* scope */
}
PATREFVECTORSTRUCT;



/***********************  function prototypes  ****************************/

static void read_patentdata __P((int *npatents,
				 REFSTRINGSTRUCT * * refstringtree,
				 int * nrefstringtree, int * nrefsptr,
				 PATREFVECTORSTRUCT pv[MAXYEARS]));
static void read_refs __P ((FILE *fp, REFSTRUCT * *references,
			    REFSTRINGSTRUCT * * refstringtree,
			    int * nrefstringtree));
static void setup_patdata __P ((PATENTSTRUCT * patent,int * nrefsptr,
				PATREFVECTORSTRUCT pv[MAXYEARS]));
static void collect_ids __P ((REFSTRUCT * * idtree, int * nidtree,
			      int * nrefsptr, char curpatid[]));
static void collect_refs __P ((REFSTRUCT * * reftree, int * nreftree,
			       int * nrefsptr, REFSTRUCT * curpatrefs,
			       int unique));
static void newpat __P ((char patid[], PATREFVECTORSTRUCT pv[MAXYEARS],
			 int * occid));
static int check_idtree __P ((REFSTRUCT * * treeptr, int * ntree,
			      int *nrefsptr, char curpat[], int unique,
			      int add));
static void print_parameters __P((char programname[], int npatents,
				  int nrefstringtree, int nrefs));
static REFSTRUCT * read_reference __P ((FILE *fp,
					REFSTRINGSTRUCT * * refstringtree,
					int * nrefstringtree));
static void depth_scope __P ((PATREFVECTORSTRUCT pv[MAXYEARS],
			      char outfile[]));
static void depth __P ((REFSTRUCT * tree1, REFSTRUCT * tree2,
			double * countdepth, int ntree1));
static void scope2 __P ((REFSTRUCT * tree1ref,	REFSTRUCT * tree2,
			 REFSTRUCT * tree3, double * countscope, int ntree1));
static void init_pv __P ((PATREFVECTORSTRUCT pv[MAXYEARS]));
static void freereferences __P ((REFSTRUCT * references));
static char * add_if_not_in_stringtree __P ((REFSTRINGSTRUCT * * treeptr,
					     int * ntree, char curpat[]));
static void fgl __P ((FILE *fp));
static void printtree __P ((REFSTRUCT * tree));
static FILE * open_outputfile __P ((char filename[]));
static FILE * open_file __P ((char filename[], char mode[]));



/*********************** main program  ***********************/

main (argc, argv)
     int argc;				/* number of command line args */
     char * * argv;			/* command line args */
{
  FILE *fp, *fp2;

  PATREFVECTORSTRUCT pv[MAXYEARS];      /* refvectors and counts */
  REFSTRINGSTRUCT * refstringtree=NULL;	/* patent label strings */
  int npatents = 0,                     /* number of patents */
    nrefstringtree = 0,		        /* number of distinct labels */
    nrefs = 0;				/* number of patrefs stored in pv */

  /* get the command line arguments */
  sprintf(inpfile, "%s", argv[1]);
  if (argc > 2)
    sprintf(outfile, "%s", argv[2]);
  else
    sprintf(outfile, "results");

  /* set up zero counts and NULL pointers */
  init_pv (pv);

  /* read patent data one at a time and form pv vectors and counts */
  read_patentdata (&npatents, &refstringtree, &nrefstringtree, &nrefs, pv);

  /* print out the parameters */
  print_parameters(argv[0], npatents, nrefstringtree, nrefs);

  /* determine depth and scope and print out */
  depth_scope (pv, outfile);

  exit (EXIT_NORMAL);
}



/*********************** read in patent data  ***********************/

static void
read_patentdata (npatents, refstringtree, nrefstringtree, nrefsptr, pv)
     /* read the patent data and load into pv */
     int * npatents;	                /* total number of patent entries */
     REFSTRINGSTRUCT * * refstringtree;	/* ptr to start of pat labelstr tree */
     int * nrefstringtree,		/* number of label strings */
       *nrefsptr;			/* number of refs in pv */
     PATREFVECTORSTRUCT pv[MAXYEARS];   /* counts and depth/scope variables */
{
  FILE * fp;			        /* input data file */
  PATENTSTRUCT patent;			/* data for one patent entry */
  int pati;				/* patent entry counter */
  char patid[MAXPATREFL];		/* patent label */
  char c;				/* newline */
  int occid;			        /* already in patids */

  /* open the patent data file and read off the comments in it */
  fp = open_file(inpfile, "r");
  while ((c = fgetc(fp)) == '#')	/* comments start with '#' */
    fgl(fp);			        /* read the rest of the line */
  ungetc(c, fp);		        /* put back the last char */


  /* read patent records one at a time until end of file */
  for(pati = 0; fscanf(fp, "%s %d", patid, &patent.year) != EOF; pati++)
    {
      fgl(fp);			        /* read the rest of the line */

      if (patent.year >= FIRSTYEAR && patent.year <= LASTYEAR)
	/* this patent should be included in the sample */
	{
	  /* add current patent label into the tree of labels, save in patent*/
	  patent.patid =
	    add_if_not_in_stringtree (refstringtree, nrefstringtree, patid);
	  
	  /*  read all the references */
	  read_refs(fp, &patent.references, refstringtree, nrefstringtree);
	  
	  /* check if this patent is already included in patids */
	  newpat (patent.patid, pv, &occid);
	  if (!occid)
	    {
	      /* it is a new patent; load the patent data into pv */
	      setup_patdata(&patent, nrefsptr, pv);
	      (*npatents) ++;		/* update the number of entries */
	    }
	  else
	    {
	      /* it is a duplicate; ignore it */
	      fprintf (stderr, "Duplicate patent %s; ignored\n", patent.patid);
	    }
	  
	  freereferences(patent.references); /* free the data for this patent*/
	}
      else
	{
	  /* it is a patent outside the sample; ignore it */
	  fprintf (stderr, "Patent %s %d outside the %d-%d sample; ignored\n",
		   patid, patent.year, FIRSTYEAR, LASTYEAR);
	  fgl (fp);			/* read off the references */
	}
    }
  fclose (fp);
}



static void
read_refs(fp, references, refstringtree, nrefstringtree)
     /* read the ref list of one patent */
     FILE *fp;				/* patent data file */
     REFSTRUCT * *references;		/* list of refs */
     REFSTRINGSTRUCT * * refstringtree;	/* ptr to beginning of ref vector */
     int * nrefstringtree;		/* number of pat labels stored */
{
  REFSTRUCT
    * * lastreferenceptr,		/* pointer to last ref ptr in list */
    * newreference;			/* pointer to the newly created ref */
  
  /* read the reference list */
  *references = NULL;
  lastreferenceptr = references;

  /* read the first reference */
  for (newreference = read_reference (fp, refstringtree, nrefstringtree);
       newreference != NULL;	/* while it was successful */
       newreference = read_reference (fp, refstringtree, nrefstringtree))
    /* read another reference */
    {
      * lastreferenceptr = newreference;
      lastreferenceptr = &newreference->leftref; /* only leftref used (list) */
    }
}


static void
newpat (patid, pv, occid)
     /* check whether this patid is already in patids */
     char patid[];
     PATREFVECTORSTRUCT pv[MAXYEARS];
     int * occid;		        /* found in patids */
{
  int yeari;
  for (yeari = 0; yeari < MAXYEARS; yeari++)
    {
      /* check patids */
      *occid = check_idtree(&pv[yeari].patids, 0, 0, patid, UNIQUE, NOADD);
      if (*occid)
	break;				/* exit as soon as one found */
    }
}


static void
setup_patdata (patent, nrefsptr, pv)
     /* load patent data into those locations in pv where it is needed */
     PATENTSTRUCT * patent;		/* data for one patent entry */
     int *nrefsptr;			/* number of refs in pv */
     PATREFVECTORSTRUCT pv[MAXYEARS];   /* counts  */
{
  int 
    year,				/* current year */
    yeari;				/* pre year index */

  /* figure out the patent year */
  year = patent->year - FIRSTYEAR;

  /* add to year patid and reference vectors */
  collect_ids(&pv[year].patids, &pv[year].npatids,
	      nrefsptr, patent->patid);
  collect_refs(&pv[year].refs, &pv[year].nrefs,
	       nrefsptr, patent->references, UNIQUE);

  /* add to the pre patent id vectors */
  for (yeari = year + 1; yeari <= year + NPREYEARS; yeari++)
    if (yeari >=0 && yeari < MAXYEARS)
      collect_ids(&pv[yeari].patidpre, &pv[yeari].npatidpre,
		  nrefsptr, patent->patid);
  
  /* add to the pre reference vectors */
  for (yeari = year + 1; yeari <= year + NPREYEARS; yeari++)
    if (yeari >=0 && yeari < MAXYEARS)
      collect_refs(&pv[yeari].refspre, &pv[yeari].nrefspre,
		   nrefsptr, patent->references, NONUNIQUE);
}


static void
collect_ids(idtree, nidtree, nrefsptr, curpatid)
     /* add to the patent id vector */
     REFSTRUCT * * idtree;		/* ptr to start of vector */
     int * nidtree,			/* number of patids in vector */
         * nrefsptr;			/* total number of refs in pv */
     char curpatid[];			/* current patent label */
{
  /* check_idtree returns TRUE if the ref was already there and UNIQUE given */
  if (check_idtree (idtree, nidtree, nrefsptr, curpatid, UNIQUE, ADD))
    {
      /* should never get here; uniqueness checked before */
      fprintf (stderr, "Aborted: internal duplicate patent %s\n", curpatid);
      exit (EXIT_DATA_ERROR);
    }
}


static void
collect_refs(reftree, nreftree, nrefsptr, curpatrefs, unique)
     /* add to the reference vector */
     REFSTRUCT * * reftree,		/* ptr to start of vector */
        * curpatrefs;			/* list of patent data refs */
     int * nreftree,			/* number of elements in the vector */
         * nrefsptr;			/* number of total refs in pv */
     int unique;			/* whether items can be repeated */
{
  REFSTRUCT * curref;			/* traversal of patent refs */

  /* add the references to the tree if not there already */
  for (curref = curpatrefs;
       curref != NULL;
       curref = curref->leftref) /* traverse the list */
    check_idtree (reftree, nreftree, nrefsptr, curref->patref, unique, ADD);
}


static int
check_idtree (treeptr, ntree, nrefsptr, curpat, unique, add)
     /* check if the ref exists; if so, return TRUE;otherwise maybe add patid*/
     REFSTRUCT * * treeptr;		/* ptr to beginning of ref vector */
     int * ntree, *nrefsptr;		/* number of refs in the vector */
     char curpat[];			/* current ref label string */
     int unique;			/* whether items can be repeated */
     int add;				/* whether to add or just check */
{
  REFSTRUCT * * currefptr;		/* ref vector traversal */
  REFSTRUCT  * newptr;			/* ref vector traversal */
  int cmp = 0;				/* go left or right in the tree */

  /* find the ref if it already exists, and a place where it should go */
  for (currefptr = treeptr;		/* start from the beginning */
       *currefptr != NULL;		/* stop when at the end/leaves */
       currefptr = ((cmp>0) ? (&((*currefptr)->rightref)) : /* go right */
		              (&((*currefptr)->leftref))))  /* or left */
    {
      cmp = strcasecmp ((*currefptr)->patref, curpat); /* compare strings */
      if (!cmp && unique)		/* if it already exists, exit */
	return(TRUE);			/* ref was already in the tree */
    }

  /* ref was not found */
  if (add)				/* if new refs should be added */
    {
      newptr = (REFSTRUCT *) malloc (sizeof(REFSTRUCT));
      (*nrefsptr)++;			/* update the total pv ref count */
      newptr->leftref = NULL;
      newptr->rightref = NULL;
      *currefptr = newptr;		/* pointer from the previous element */
      newptr->patref = curpat;		/* store just the pointer */
      (*ntree)++;			/* update number of refs in the tree */
    }
  return(FALSE);			/* ref wasn't already in the tree */
}



/*************** print out parameters and patent counts ************/

static void
print_parameters(programname, npatents, nrefstringtree, nrefs)
     /* print info about this run at the top of the output file */
     char programname[];
     int  npatents,			/* number of pats in data */
          nrefstringtree,		/* number of strings */
          nrefs;			/* number of pv refs */
{
  FILE *fp;				/* output file */

  fp = open_outputfile(outfile);
  fprintf (fp, "%s %s %s\n", programname, inpfile, outfile);
  fprintf (fp, "totpatents=%d, FIRSTYEAR=%d, LASTYEAR=%d, NPREYEARS=%d\n",
	   npatents, FIRSTYEAR, LASTYEAR, NPREYEARS);
  if (fp != stdout)			/* don't close stdout! */
    fclose (fp);
}


/************************ depth/scope calculations ******************/

static void
depth_scope (pv, outfile)
     /* for each year, determine depth and scope */
     PATREFVECTORSTRUCT pv[MAXYEARS];   /* vector data */
     char outfile[];
{
  int yeari;		                /* year counter */
  double pdepth, rdepth;                /* patid and ref depths */
  FILE *fp;				/* output file */

  fp = open_outputfile(outfile);
  fprintf (fp, "\n%s\n", DSCOL);

  for (yeari=NPREYEARS; yeari<MAXYEARS; yeari++)
    /* only years with full pre */
    {
#if DEBUG
      /* print out the trees of which overlap is calculated */
      printf("\n%d:\n", yeari+FIRSTYEAR);
      printf("Patids:    ");
      printtree (pv[yeari].patids);
      printf("\nRefs:     ");
      printtree (pv[yeari].refs);
      printf("\nPatidpre:  ");
      printtree (pv[yeari].patidpre);
      printf("\nRefspre:  ");
      printtree (pv[yeari].refspre);
      printf("\n");
#endif
      /* calculate depth */
      pdepth = rdepth = 0;
      depth (pv[yeari].refs, pv[yeari].patidpre,
	     &pdepth, pv[yeari].nrefs);
      depth (pv[yeari].refs, pv[yeari].refspre,
	     &rdepth, pv[yeari].nrefs);
      pv[yeari].depth = pdepth + rdepth;
      
      /* calculate scope (the "2" in scope2 means that scope is calculated
	 based on 2 lists, i.e. patent ids and references) */
      scope2 (pv[yeari].refs,
	      pv[yeari].patidpre, pv[yeari].refspre,
	      &pv[yeari].scope, pv[yeari].nrefs);
    }

  /* print out the results */
  for (yeari=0; yeari<MAXYEARS; yeari++)
    {
      /* year and number of patents that year */
      fprintf(fp, "%4d  %8d", yeari+FIRSTYEAR, pv[yeari].npatids);
      
      /* depth and scope */
      if (yeari >= NPREYEARS)
	fprintf(fp, " %8.3f %8.3f", pv[yeari].depth, pv[yeari].scope);
      else
	fprintf(fp, " %8s %8s", UDF, UDF);

      fprintf(fp, "\n");
    }
  if (fp != stdout)			/* don't close stdout! */
    fclose (fp);
}


static void
depth (tree1ref, tree2, countdepth, ntree1)
     /* sum up how many times each element in tree1 occurs in tree2,
	divided by ntree1 */
     REFSTRUCT
       * tree1ref,			/* current pointer to tree 1*/
       * tree2;				/* beginning of tree 2 */
     double * countdepth;
     int ntree1;		        /* current count */
{
  REFSTRUCT
    * tree2ref;				/* tree 2 traversal */
  int cmp;				/* go left or right */

  if (tree1ref != NULL)			/* we are at a leaf of tree 1; exit */
    {
      for (tree2ref = tree2;		/* start from the top of tree 2*/
	   tree2ref != NULL;		/* until leaves */
	   tree2ref = ((cmp>0) ? tree2ref->rightref : /* go right */
		                 tree2ref->leftref))  /* or left */
	{
	  /* check if the labels are the same */
	  cmp = strcasecmp(tree2ref->patref, tree1ref->patref);
	  if (!cmp)			/* yes; update depth count */
	    (* countdepth) += 1.0 / ntree1; /* ntree1 >0 if tree1ref != NULL */
	}
      /* recursively continue each branch of tree 1 */
      depth (tree1ref->leftref, tree2, countdepth, ntree1);
      depth (tree1ref->rightref, tree2, countdepth, ntree1);
    }
}

static void
scope2 (tree1ref, tree2, tree3, countscope, ntree1)
     /* sum up how many elements of tree1 occur neither in tree2 nor tree3,
	divided by ntree1 */
     REFSTRUCT
       * tree1ref,			/* current pointer to tree 1*/
       * tree2,				/* beginning of tree 2 */
       * tree3;				/* beginning of tree 3 */
     double * countscope;
     int ntree1;		        /* current count */
{
  REFSTRUCT
    * tree2ref,				/* tree 2 traversal */
    * tree3ref;				/* tree 3 traversal */
  int cmp;				/* go left or right */

  if (tree1ref != NULL)			/* we are at a leaf of tree 1; exit */
    {
      for (tree2ref = tree2;		/* start from the top of tree 2 */
	   tree2ref != NULL;		/* until leaves */
	   tree2ref = ((cmp>0) ? tree2ref->rightref : /* go right */
		                 tree2ref->leftref))  /* or left */
	{
	  /* check if the labels are the same */
	  cmp = strcasecmp(tree2ref->patref, tree1ref->patref);
	  if (!cmp)			/* yes */
	    break;			/* check the next elem in tree 1 */
	}
      if (tree2ref == NULL)		/* didn't find in tree 2 */
	for (tree3ref = tree3;		/* start from the top of tree 3 */
	     tree3ref != NULL;		/* until leaves */
	     tree3ref = ((cmp>0) ? tree3ref->rightref : /* go right */
			 tree3ref->leftref))  /* or left */
	  {
	    /* check if the labels are the same */
	    cmp = strcasecmp(tree3ref->patref, tree1ref->patref);
	    if (!cmp)			/* yes */
	      break;			/* check the next elem in tree 1 */
	  }
      if (tree2ref == NULL && tree3ref == NULL) 
	(* countscope) += 1.0 / ntree1; /* ntree1 >0 if tree1ref!=NULL */
	
      /* recursively continue each branch of tree 1 */
      scope2 (tree1ref->leftref, tree2, tree3, countscope, ntree1);
      scope2 (tree1ref->rightref, tree2, tree3, countscope, ntree1);
    }
}



/************************ I/O and list/tree functions ******************/

static void
init_pv (pv)
      /* initialize the internal data storage */
     PATREFVECTORSTRUCT pv[MAXYEARS]; /* vector data */
{
  int yeari;

  for (yeari = 0; yeari < MAXYEARS; yeari++)
    {
      /* lists */
      pv[yeari].patids = NULL;
      pv[yeari].refs = NULL;
      pv[yeari].patidpre = NULL;
      pv[yeari].refspre = NULL;
      
      /* counts */
      pv[yeari].npatids = 0;
      pv[yeari].nrefs = 0;
      pv[yeari].npatidpre = 0;
      pv[yeari].nrefspre = 0;
      
      /* depth and scope */
      pv[yeari].depth = 0;
      pv[yeari].scope = 0;
    }
}


static REFSTRUCT * 
read_reference (fp, refstringtree, nrefstringtree)
     /* get rid of blanks and read one patent reference string */
     FILE *fp;				/* patent file */
     REFSTRINGSTRUCT * * refstringtree;	/* ptr to beginning of ref vector */
     int * nrefstringtree;		/* number of refs in the vector */
{
  int c;				/* read one character at a time */
  char patref[MAXPATREFL];		/* patent label string */
  REFSTRUCT * newreference;		/* pointer to new ref structure */
  
  /* read until end of file or line, or a non-blank character */
  for (c = getc (fp);
       c != EOF && c != '\n' && (c == ' ' || c == '\t' || c == '\r');
       c = getc (fp))
    ;
  if (c == '\n' || c == EOF)		/* end of line or file */
    return (NULL);
  else
    {
      /* create a new patent reference string */
      ungetc(c, fp);
      fscanf (fp, "%s", patref);
      newreference = (REFSTRUCT *) malloc (sizeof(REFSTRUCT));
      newreference->patref =		/* add in the tree of patlabelstrings*/
	add_if_not_in_stringtree (refstringtree, nrefstringtree, patref);
      newreference->leftref = NULL;
      return (newreference);
    }
}


static void
freereferences(references)
     /* free the memory used by the current patent ref vector */
     REFSTRUCT * references;		/* pointer to the patent refs */
{
  REFSTRUCT * nextref, * freeref;	/* traversing the list */

  for (nextref = references;		/* start from first ref */
       nextref != NULL;			/* until end of list */
       free(freeref))			/* free the current ref */
    {
      freeref = nextref;		/* save the pointer */
      nextref = nextref->leftref;	/* traverse the list */
    }
}


static char *
add_if_not_in_stringtree (treeptr, ntree, curpat)
  /* add the patent label in the tree of all labels if it isn't there already*/
     REFSTRINGSTRUCT * * treeptr;	/* ptr to the beginning of the tree */
     int * ntree;			/* number of labels so far */
     char curpat[];			/* patent label string */
{
  REFSTRINGSTRUCT * * currefptr;	/* ref vector traversal */
  REFSTRINGSTRUCT  * newptr;		/* ref vector traversal */
  int cmp = 0;				/* go left or right in the tree */

  for (currefptr = treeptr;		/* start from the beginning */
       *currefptr != NULL;		/* stop when at the end/leaves */
       currefptr = ((cmp>0) ? (&((*currefptr)->rightref)) : /* go right */
                               (&((*currefptr)->leftref)))) /* or left */
    {
      cmp = strcasecmp ((*currefptr)->patref, curpat); /* compare strings */
      if (!cmp)				/* if it exists, exit */
	return((*currefptr)->patref);	/* with pointer to the string */
    }
  /* it is a new reference; add to the tree */
  newptr = (REFSTRINGSTRUCT *) malloc (sizeof(REFSTRINGSTRUCT));
  newptr->leftref = NULL;
  newptr->rightref = NULL;
  *currefptr = newptr;			/* pointer from the previous elem */
  sprintf ( newptr->patref, "%s", curpat); /* store the string */
  (*ntree)++;				/* update number of labels */
  return(newptr->patref);		/* return pointer to new label */
}


static void
fgl (fp)
/* read until the end of line */
     FILE *fp;
{
  char c = '\0';

  while ((c = getc (fp)) != EOF && c != '\n')
    ;
}


#if DEBUG
static void
printtree (tree)
  /* print out the ref vector */
  REFSTRUCT * tree;			/* current ptr to the vector */
{
  if (tree != NULL)			/* terminate this branch */
    {
      printtree(tree->rightref);	/* traverse right */
      printf ("%s ", tree->patref);	/* print current node */
      printtree(tree->leftref);		/* traverse left (or straight list) */
    }
}
#endif

static FILE *
open_outputfile(filename)
     /* open the outputfile, or pipe to stdout */
     char filename[];
{
  /* print on the screen if stdout specified as the output file */
  if (!strcmp(filename, "stdout"))
    return(stdout);
  else
    return(open_file(filename, "a"));
}

static FILE *
open_file(filename, mode)
     /* check that the file can be opened and do it */
     char filename[];
     char mode[];			/* read or append */
{
  FILE *fp;
  if ((fp = fopen (filename, mode)) == NULL)
    {
      fprintf (stderr, "Cannot open %s\n", filename);
      exit (EXIT_FILE_ERROR);
    }
  return(fp);
}
