lexer.c 2.95 KB
Newer Older
dcw's avatar
dcw committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
13
14
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR
 *	tSTR tNUM
dcw's avatar
dcw committed
15
16
17
18
19
20
21
22
23
24
25
 */
 

#include <dcw.h>
#include <ctype.h>
#include "lexer.h"


#ifdef DEBUG
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
26
        "tCOMMA", "tCLOSEBR", "tSTR", "tNUM"
dcw's avatar
dcw committed
27
28
29
30
31
32
};
#endif


/* ----------------- Exported variables ---------------- */

33
#define MAXIDSIZE 100
dcw's avatar
dcw committed
34
35
36

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
37
int lexintval;
dcw's avatar
dcw committed
38
39
40
41
42
43
44
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

static BOOL  havepushedtok = FALSE;
static TOKEN curtok;
45
46
static char curid[ MAXIDSIZE ];
static int curint;
dcw's avatar
dcw committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81


/* ----------------- Private procedures ---------------- */

static void white_space()
{
        int c;

        for(;;) {
                c=getc(lexfile);
                if( c==EOF ) break;
                if( c=='\n' ) lineno++;
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

void ungettok()
{
        ASSERT( ! havepushedtok, ("ungettok: can't push 2 tokens\n") );
        havepushedtok = TRUE;
#ifdef DEBUG
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


TOKEN nexttok()
{
	int c;
	int  pos;

82
83
        if( havepushedtok )
	{
dcw's avatar
dcw committed
84
                havepushedtok = FALSE;
85
86
        } else
	{
dcw's avatar
dcw committed
87
88
                white_space();
                c = getc(lexfile);
89
		switch( c )
dcw's avatar
dcw committed
90
		{
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
                case EOF: curtok = tEOF; break;
		case ';': curtok = tSEMI; break;
                case '(': curtok = tOPENBR; break;
                case ')': curtok = tCLOSEBR; break;
                case ',': curtok = tCOMMA; break;
                case '|': curtok = tOR; break;
                case '=': curtok = tEQ; break;
		case '"':
			for( pos=0; (c=getc(lexfile)) != '\"'; )
			{
				if( pos<MAXIDSIZE-1 )
				{
					lexidval[pos++] = c;
				}
			}
			while( pos<MAXIDSIZE )
			{
				lexidval[pos++] = '\0';
			}
110
111
			strcpy( curid, lexidval );
			curtok = tSTR;
112
113
114
115
116
			break;
		default:
			if( isalpha( c ) )
			{
				curtok = tID;
117
				for( pos=0; isalpha(c)||isdigit(c)||c=='_'; )
118
119
120
121
122
123
124
125
				{
					if( pos<MAXIDSIZE-1 )
						lexidval[pos++] = c;
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
				while( pos<MAXIDSIZE )
					lexidval[pos++] = '\0';
126
				strcpy( curid, lexidval );
127
128
129
130
131
			} else if( isdigit( c ) )
			{
				int t;

				curtok = tNUM;
132
				for( t=0; isdigit(c); )
133
				{
134
					t = t*10 + c-'0';
135
136
137
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
138
				curint = lexintval = t;
139
			} else
dcw's avatar
dcw committed
140
			{
141
142
143
				curtok = tERROR;
			}
		}
dcw's avatar
dcw committed
144
145
146
147
148
149
        }
#ifdef DEBUG
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}