#include <ctype.h>
#include "index.h"

#define IsHeader(c)        ((c) == '-')
#define IsString(c)        ((c) == '"')
#define IsEOF(c)           ((c) == EOF)

static int NextToken ();
static int SkipToken ();
static char *ScanString ();
static char *ScanToken ();
static long ScanLong ();
static int CompareCanonical ();

INDEX *CreateIndex ()
    {
    INDEX *index;
    
    if ((index = (INDEX *) malloc (sizeof (INDEX))) == NULL)
        AllocError ("CreateIndex");
    index->n_entries = 0;
    index->n_principals = 0;
    index->root = NULL;
    
    return (index);
    }

INDEX *ReadIndex (index, file)
    INDEX *index;
    register FILE *file;
    {
    char buffer[512];
    char *identifier, *file_name;
    long offset, length;
    register int c;
    
    identifier = file_name = NULL;
    offset = 0;
    length = -1;

    /* skip header */
    if (!IsHeader (NextToken (file)))
        IOError ("ReadIndex - bad master index");
    while (!IsHeader (SkipToken (file)));
    SkipToken (file);
    
    /* scan successive records for this index (until next header) */
    while (IsString (NextToken (file)))
        {
        identifier = ScanString (file);
	c = NextToken (file);
        if (IsString (c) || IsHeader (c) || IsEOF (c))
                /* if no new file_name, use pointer to last one created */
            InsertInIndex (index, CreateEntry (synonym, identifier, file_name, offset, length));
        else
                /* next is not an ident or index header, must be file_name */
            {
            file_name = ScanToken (file);
            c = NextToken (file);
            offset = isdigit (c) ? ScanLong (file) : 0L;
            c = NextToken (file);
            length = isdigit (c) ? ScanLong (file) : -1L;
            InsertInIndex (index, CreateEntry (principal, identifier, file_name, offset, length));
            }
        }
    
    return (index);
    }

/*
Scan up to beginning of next token (i.e., run of non-blank
characters) in file.
*/

static int NextToken (file)
    register FILE *file;
    {
    register int c;
    
    while (c = getc (file), isspace (c));
    ungetc (c, file);
    
    return (c);
    }

/*
Scan off next token (run of non-blanks), if any, then scan
up to beginning of the following token.
*/

static int SkipToken (file)
    register FILE *file;
    {
    register int c;
    
    while (c = getc (file), !isspace (c) && c != EOF);
    while (c = getc (file), isspace (c));
    ungetc (c, file);
    
    return (c);
    }

/*
Scan off a sequence of characters delimited by double quotes,
placing those characters in a dynamically allocated buffer to be
returned.
*/

static char *ScanString (file)
    register FILE *file;
    {
    register char *bp, *string;
    register int c;
    char buffer[512];
    
    /* treat "" as a " embedded in the string; just scan through trailing quote */
    bp = buffer;
    getc (file);
    NextToken (file);	/* skip any leading blanks */
    while (((c = getc (file)) != EOF) &&
		((c != '"') || ((c = getc (file)) == '"')))
            *bp++ = c;
    ungetc (c, file);
    while ((bp > buffer) && isspace (*--bp));	/* strip any trailing blanks */
    *++bp = '\0';
    if ((string = malloc (bp - buffer + 1)) == NULL)
        AllocError ("ScanString");
    strcpy (string, buffer);
    
    return (string);
    }

/*
Scan off next token, placing it in a dynamically allocated buffer.
*/

static char *ScanToken (file)
    register FILE *file;
    {
    register char *bp, *string;
    register int c;
    char buffer[512];
    
    bp = buffer;
    while (c = getc (file), !isspace (c) && c != EOF)
        *bp++ = c;
    ungetc (c, file);
    *bp = '\0';
    if ((string = malloc (bp - buffer + 1)) == NULL)
        AllocError ("ScanToken");
    strcpy (string, buffer);
    
    return (string);
    }

/*
Scan off next sequence of numerics, returning result as a long integer.
*/

static long ScanLong (file)
    register FILE *file;
    {
    register long n;
    register int c;
    
    n = 0;
    while (c = getc (file), isdigit (c))
        n = 10 * n + (c - '0');
    ungetc (c, file);
    
    return (n);
    }

INDEX *InsertInIndex (index, entree)
    INDEX *index;
    ENTRY *entree;
    {
    InsertInTree (&(index->root), entree, CompareCanonical);
    index->n_entries++;
    if (TypeOfEntry (entree) == principal)
	index->n_principals++;
    
    return (index);
    }

ENTRY *FindInIndex (index, key)
    INDEX *index;
    char *key;
    {
    TREE *tree;
    
    if ((tree = FindInTree (index->root, key, CompareCanonical)) != NULL)
        return (EntryOfTree (tree));
    else
        return (NULL);
    }

/*
Compare two character strings, ignoring case, leading and trailing blanks,
and runs of embedded blanks, returning negative, 0, or positive for
s1 < s2, s1 == s2, or s1 > s2, respectively.
*/

static int CompareCanonical (s1, s2)
    char *s1, *s2;
    {
    register int c1, c2;

    /* remove leading whitespace */
    while (c1 = *s1, isspace (c1))
	s1++;
    while (c2 = *s2, isspace (c2))
	s2++;

    do
        {
	/* canonicalize next char of s1 */
	c1 = *s1++;
        if (isspace (c1))	/* if whitespace, compress multiple whitespace */
	    {
            while (c1 = *s1, isspace (c1))
		s1++;
	    if (c1 != '\0')	/* ignore trailing whitespace */
	        c1 = ' ';
	    }
	else if (isupper (c1))	/* if uppercase, make lowercase */
            c1 = tolower (c1);
	/* same for s2 */
	c2 = *s2++;
        if (isspace (c2))
	    {
            while (c2 = *s2, isspace (c2))
		s2++;
	    if (c2 != '\0')
	        c2 = ' ';
	    }
	else if (isupper (c2))
            c2 = tolower (c2);
        }
	while (c1 == c2 && c1 != '\0');

    return (c1 - c2);
    }

ENTRY *NthInIndex (index, n)
    INDEX *index;
    long n;
    {
    TREE *tree;
    
    if ((tree = NthInTree (index->root, n)) != NULL)
        return (EntryOfTree (tree));
    else
        return (NULL);
    }
