lexer.c 3.89 KB
Newer Older
dcw's avatar
dcw committed
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
13
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR
dcw's avatar
dcw committed
14
 *	tSTR tNUM tEXPORT tGLOBAL tBEGIN tOPENCURLY tCLOSECURLY tTYPE
dcw's avatar
dcw committed
15 16 17
 */
 

18
#include <stdio.h>
dcw's avatar
dcw committed
19
#include <ctype.h>
20
#include "struct.h"
dcw's avatar
dcw committed
21 22 23
#include "lexer.h"


dcw's avatar
dcw committed
24
#ifdef DEBUGGING
dcw's avatar
dcw committed
25 26
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
27
        "tCOMMA", "tCLOSEBR", "tSTR", "tNUM", "tEXPORT", "tGLOBAL",
dcw's avatar
dcw committed
28
	"tBEGIN", "tOPENCURLY", "tCLOSECURLY", "tTYPE"
dcw's avatar
dcw committed
29 30 31 32 33 34
};
#endif


/* ----------------- Exported variables ---------------- */

35
#define MAXIDSIZE 100
dcw's avatar
dcw committed
36 37 38

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
39
int lexintval;
dcw's avatar
dcw committed
40 41 42 43 44 45 46
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

static BOOL  havepushedtok = FALSE;
static TOKEN curtok;
47 48
static char curid[ MAXIDSIZE ];
static int curint;
dcw's avatar
dcw committed
49 50 51 52 53 54 55 56 57


/* ----------------- Private procedures ---------------- */

static void white_space()
{
        int c;

        for(;;) {
dcw's avatar
dcw committed
58
                c = getc(lexfile);
dcw's avatar
dcw committed
59 60 61 62
		if( c == '/' || c == '#' ) /* comment to end of line */
		{
			while( (c=getc(lexfile)) != EOF && c != '\n' );
		}
dcw's avatar
dcw committed
63 64
                if( c == EOF ) break;
                if( c == '\n' ) lineno++;
dcw's avatar
dcw committed
65 66 67 68 69 70 71 72 73 74
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

void ungettok()
{
75 76 77 78 79
        if( havepushedtok )
	{
		fprintf( stderr, "ungettok: can't push 2 tokens\n" );
		exit(1);
	}
dcw's avatar
dcw committed
80
        havepushedtok = TRUE;
dcw's avatar
dcw committed
81
#ifdef DEBUGGING
dcw's avatar
dcw committed
82 83 84 85 86 87 88 89 90 91
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


TOKEN nexttok()
{
	int c;
	int  pos;

92 93
        if( havepushedtok )
	{
dcw's avatar
dcw committed
94
                havepushedtok = FALSE;
95 96
        } else
	{
dcw's avatar
dcw committed
97 98
                white_space();
                c = getc(lexfile);
99
		switch( c )
dcw's avatar
dcw committed
100
		{
101 102
                case EOF: curtok = tEOF; break;
		case ';': curtok = tSEMI; break;
103 104
		case '{': curtok = tOPENCURLY; break;
		case '}': curtok = tCLOSECURLY; break;
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
                case '(': curtok = tOPENBR; break;
                case ')': curtok = tCLOSEBR; break;
                case ',': curtok = tCOMMA; break;
                case '|': curtok = tOR; break;
                case '=': curtok = tEQ; break;
		case '"':
			for( pos=0; (c=getc(lexfile)) != '\"'; )
			{
				if( pos<MAXIDSIZE-1 )
				{
					lexidval[pos++] = c;
				}
			}
			while( pos<MAXIDSIZE )
			{
				lexidval[pos++] = '\0';
			}
122 123
			strcpy( curid, lexidval );
			curtok = tSTR;
124 125 126 127 128
			break;
		default:
			if( isalpha( c ) )
			{
				curtok = tID;
129
				for( pos=0; isalpha(c)||isdigit(c)||c=='_'; )
130 131 132 133 134 135 136 137
				{
					if( pos<MAXIDSIZE-1 )
						lexidval[pos++] = c;
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
				while( pos<MAXIDSIZE )
					lexidval[pos++] = '\0';
138
				strcpy( curid, lexidval );
139 140 141 142 143 144
				if( streq( curid, "EXPORT" ) )
				{
					curtok = tEXPORT;
				} else if( streq( curid, "GLOBAL" ) )
				{
					curtok = tGLOBAL;
dcw's avatar
dcw committed
145 146 147
				} else if( streq( curid, "BEGIN" ) )
				{
					curtok = tBEGIN;
148 149 150
				} else if( streq( curid, "TYPE" ) )
				{
					curtok = tTYPE;
151 152 153 154 155 156
				} else if( streq( curid, "OR" ) )
				{
					curtok = tOR;
				} else if( streq( curid, "or" ) )
				{
					curtok = tOR;
157
				}
158 159 160 161 162
			} else if( isdigit( c ) )
			{
				int t;

				curtok = tNUM;
163
				for( t=0; isdigit(c); )
164
				{
165
					t = t*10 + c-'0';
166 167 168
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
169
				curint = lexintval = t;
170
			} else
dcw's avatar
dcw committed
171
			{
172 173 174
				curtok = tERROR;
			}
		}
dcw's avatar
dcw committed
175
        }
dcw's avatar
dcw committed
176
#ifdef DEBUGGING
dcw's avatar
dcw committed
177 178 179 180
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}
181 182 183 184 185 186 187 188 189 190 191 192


BOOL readnextline( line ) char *line;
{
	int c;
	char *s = line;

	while( (c=getc(lexfile)) != EOF && c != '\n' ) *s++ = c;
	*s = '\0';
	lineno++;
	return c != EOF;
}