lexer.c 3.94 KB
Newer Older
dcw's avatar
dcw committed
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
13
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR
dcw's avatar
dcw committed
14
 *	tSTR tNUM tEXPORT tGLOBAL tBEGIN tOPENCURLY tCLOSECURLY tTYPE
dcw's avatar
dcw committed
15 16 17
 */
 

18
#include <stdio.h>
19 20
#include <stdlib.h>
#include <string.h>
dcw's avatar
dcw committed
21
#include <ctype.h>
22

23
#include "struct.h"
dcw's avatar
dcw committed
24 25 26
#include "lexer.h"


dcw's avatar
dcw committed
27
#ifdef DEBUGGING
dcw's avatar
dcw committed
28 29
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
30
        "tCOMMA", "tCLOSEBR", "tSTR", "tNUM", "tEXPORT", "tGLOBAL",
dcw's avatar
dcw committed
31
	"tBEGIN", "tOPENCURLY", "tCLOSECURLY", "tTYPE"
dcw's avatar
dcw committed
32 33 34 35 36 37
};
#endif


/* ----------------- Exported variables ---------------- */

38
#define MAXIDSIZE 100
dcw's avatar
dcw committed
39 40 41

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
42
int lexintval;
dcw's avatar
dcw committed
43 44 45 46 47 48 49
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

static BOOL  havepushedtok = FALSE;
static TOKEN curtok;
50 51
static char curid[ MAXIDSIZE ];
static int curint;
dcw's avatar
dcw committed
52 53 54 55


/* ----------------- Private procedures ---------------- */

Duncan White's avatar
Duncan White committed
56
static void white_space( void )
dcw's avatar
dcw committed
57 58 59 60
{
        int c;

        for(;;) {
dcw's avatar
dcw committed
61
                c = getc(lexfile);
dcw's avatar
dcw committed
62 63 64 65
		if( c == '/' || c == '#' ) /* comment to end of line */
		{
			while( (c=getc(lexfile)) != EOF && c != '\n' );
		}
dcw's avatar
dcw committed
66 67
                if( c == EOF ) break;
                if( c == '\n' ) lineno++;
dcw's avatar
dcw committed
68 69 70 71 72 73 74 75
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

Duncan White's avatar
Duncan White committed
76
void ungettok( void )
dcw's avatar
dcw committed
77
{
78 79 80 81 82
        if( havepushedtok )
	{
		fprintf( stderr, "ungettok: can't push 2 tokens\n" );
		exit(1);
	}
dcw's avatar
dcw committed
83
        havepushedtok = TRUE;
dcw's avatar
dcw committed
84
#ifdef DEBUGGING
dcw's avatar
dcw committed
85 86 87 88 89
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


Duncan White's avatar
Duncan White committed
90
TOKEN nexttok( void )
dcw's avatar
dcw committed
91 92 93 94
{
	int c;
	int  pos;

95 96
        if( havepushedtok )
	{
dcw's avatar
dcw committed
97
                havepushedtok = FALSE;
98 99
        } else
	{
dcw's avatar
dcw committed
100 101
                white_space();
                c = getc(lexfile);
102
		switch( c )
dcw's avatar
dcw committed
103
		{
104 105
                case EOF: curtok = tEOF; break;
		case ';': curtok = tSEMI; break;
106 107
		case '{': curtok = tOPENCURLY; break;
		case '}': curtok = tCLOSECURLY; break;
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
                case '(': curtok = tOPENBR; break;
                case ')': curtok = tCLOSEBR; break;
                case ',': curtok = tCOMMA; break;
                case '|': curtok = tOR; break;
                case '=': curtok = tEQ; break;
		case '"':
			for( pos=0; (c=getc(lexfile)) != '\"'; )
			{
				if( pos<MAXIDSIZE-1 )
				{
					lexidval[pos++] = c;
				}
			}
			while( pos<MAXIDSIZE )
			{
				lexidval[pos++] = '\0';
			}
125 126
			strcpy( curid, lexidval );
			curtok = tSTR;
127 128 129 130 131
			break;
		default:
			if( isalpha( c ) )
			{
				curtok = tID;
132
				for( pos=0; isalpha(c)||isdigit(c)||c=='_'; )
133 134 135 136 137 138 139 140
				{
					if( pos<MAXIDSIZE-1 )
						lexidval[pos++] = c;
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
				while( pos<MAXIDSIZE )
					lexidval[pos++] = '\0';
141
				strcpy( curid, lexidval );
142 143 144 145 146 147
				if( streq( curid, "EXPORT" ) )
				{
					curtok = tEXPORT;
				} else if( streq( curid, "GLOBAL" ) )
				{
					curtok = tGLOBAL;
dcw's avatar
dcw committed
148 149 150
				} else if( streq( curid, "BEGIN" ) )
				{
					curtok = tBEGIN;
151 152 153
				} else if( streq( curid, "TYPE" ) )
				{
					curtok = tTYPE;
154 155 156 157 158 159
				} else if( streq( curid, "OR" ) )
				{
					curtok = tOR;
				} else if( streq( curid, "or" ) )
				{
					curtok = tOR;
160
				}
161 162 163 164 165
			} else if( isdigit( c ) )
			{
				int t;

				curtok = tNUM;
166
				for( t=0; isdigit(c); )
167
				{
168
					t = t*10 + c-'0';
169 170 171
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
172
				curint = lexintval = t;
173
			} else
dcw's avatar
dcw committed
174
			{
175 176 177
				curtok = tERROR;
			}
		}
dcw's avatar
dcw committed
178
        }
dcw's avatar
dcw committed
179
#ifdef DEBUGGING
dcw's avatar
dcw committed
180 181 182 183
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}
184 185


Duncan White's avatar
Duncan White committed
186
BOOL readnextline( char *line )
187 188 189 190 191 192 193 194 195
{
	int c;
	char *s = line;

	while( (c=getc(lexfile)) != EOF && c != '\n' ) *s++ = c;
	*s = '\0';
	lineno++;
	return c != EOF;
}