lexer.c 3.93 KB
Newer Older
dcw's avatar
dcw committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
13
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR
dcw's avatar
dcw committed
14
 *	tSTR tNUM tEXPORT tGLOBAL tBEGIN tOPENCURLY tCLOSECURLY tTYPE
dcw's avatar
dcw committed
15
16
17
 */
 

18
#include <stdio.h>
19
20
#include <stdlib.h>
#include <string.h>
dcw's avatar
dcw committed
21
#include <ctype.h>
22

23
#include "struct.h"
dcw's avatar
dcw committed
24
25
26
#include "lexer.h"


dcw's avatar
dcw committed
27
#ifdef DEBUGGING
dcw's avatar
dcw committed
28
29
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
30
        "tCOMMA", "tCLOSEBR", "tSTR", "tNUM", "tEXPORT", "tGLOBAL",
dcw's avatar
dcw committed
31
	"tBEGIN", "tOPENCURLY", "tCLOSECURLY", "tTYPE"
dcw's avatar
dcw committed
32
33
34
35
36
37
};
#endif


/* ----------------- Exported variables ---------------- */

38
#define MAXIDSIZE 100
dcw's avatar
dcw committed
39
40
41

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
42
int lexintval;
dcw's avatar
dcw committed
43
44
45
46
47
48
49
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

static BOOL  havepushedtok = FALSE;
static TOKEN curtok;
50
51
static char curid[ MAXIDSIZE ];
static int curint;
dcw's avatar
dcw committed
52
53
54
55
56
57
58
59
60


/* ----------------- Private procedures ---------------- */

static void white_space()
{
        int c;

        for(;;) {
dcw's avatar
dcw committed
61
                c = getc(lexfile);
dcw's avatar
dcw committed
62
63
64
65
		if( c == '/' || c == '#' ) /* comment to end of line */
		{
			while( (c=getc(lexfile)) != EOF && c != '\n' );
		}
dcw's avatar
dcw committed
66
67
                if( c == EOF ) break;
                if( c == '\n' ) lineno++;
dcw's avatar
dcw committed
68
69
70
71
72
73
74
75
76
77
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

void ungettok()
{
78
79
80
81
82
        if( havepushedtok )
	{
		fprintf( stderr, "ungettok: can't push 2 tokens\n" );
		exit(1);
	}
dcw's avatar
dcw committed
83
        havepushedtok = TRUE;
dcw's avatar
dcw committed
84
#ifdef DEBUGGING
dcw's avatar
dcw committed
85
86
87
88
89
90
91
92
93
94
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


TOKEN nexttok()
{
	int c;
	int  pos;

95
96
        if( havepushedtok )
	{
dcw's avatar
dcw committed
97
                havepushedtok = FALSE;
98
99
        } else
	{
dcw's avatar
dcw committed
100
101
                white_space();
                c = getc(lexfile);
102
		switch( c )
dcw's avatar
dcw committed
103
		{
104
105
                case EOF: curtok = tEOF; break;
		case ';': curtok = tSEMI; break;
106
107
		case '{': curtok = tOPENCURLY; break;
		case '}': curtok = tCLOSECURLY; break;
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
                case '(': curtok = tOPENBR; break;
                case ')': curtok = tCLOSEBR; break;
                case ',': curtok = tCOMMA; break;
                case '|': curtok = tOR; break;
                case '=': curtok = tEQ; break;
		case '"':
			for( pos=0; (c=getc(lexfile)) != '\"'; )
			{
				if( pos<MAXIDSIZE-1 )
				{
					lexidval[pos++] = c;
				}
			}
			while( pos<MAXIDSIZE )
			{
				lexidval[pos++] = '\0';
			}
125
126
			strcpy( curid, lexidval );
			curtok = tSTR;
127
128
129
130
131
			break;
		default:
			if( isalpha( c ) )
			{
				curtok = tID;
132
				for( pos=0; isalpha(c)||isdigit(c)||c=='_'; )
133
134
135
136
137
138
139
140
				{
					if( pos<MAXIDSIZE-1 )
						lexidval[pos++] = c;
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
				while( pos<MAXIDSIZE )
					lexidval[pos++] = '\0';
141
				strcpy( curid, lexidval );
142
143
144
145
146
147
				if( streq( curid, "EXPORT" ) )
				{
					curtok = tEXPORT;
				} else if( streq( curid, "GLOBAL" ) )
				{
					curtok = tGLOBAL;
dcw's avatar
dcw committed
148
149
150
				} else if( streq( curid, "BEGIN" ) )
				{
					curtok = tBEGIN;
151
152
153
				} else if( streq( curid, "TYPE" ) )
				{
					curtok = tTYPE;
154
155
156
157
158
159
				} else if( streq( curid, "OR" ) )
				{
					curtok = tOR;
				} else if( streq( curid, "or" ) )
				{
					curtok = tOR;
160
				}
161
162
163
164
165
			} else if( isdigit( c ) )
			{
				int t;

				curtok = tNUM;
166
				for( t=0; isdigit(c); )
167
				{
168
					t = t*10 + c-'0';
169
170
171
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
172
				curint = lexintval = t;
173
			} else
dcw's avatar
dcw committed
174
			{
175
176
177
				curtok = tERROR;
			}
		}
dcw's avatar
dcw committed
178
        }
dcw's avatar
dcw committed
179
#ifdef DEBUGGING
dcw's avatar
dcw committed
180
181
182
183
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}
184
185
186
187
188
189
190
191
192
193
194
195


BOOL readnextline( line ) char *line;
{
	int c;
	char *s = line;

	while( (c=getc(lexfile)) != EOF && c != '\n' ) *s++ = c;
	*s = '\0';
	lineno++;
	return c != EOF;
}