lexer.c 2.96 KB
Newer Older
dcw's avatar
dcw committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
13
14
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR
 *	tSTR tNUM
dcw's avatar
dcw committed
15
16
17
18
19
20
21
22
 */
 

#include <dcw.h>
#include <ctype.h>
#include "lexer.h"


dcw's avatar
dcw committed
23
#ifdef DEBUGGING
dcw's avatar
dcw committed
24
25
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
26
        "tCOMMA", "tCLOSEBR", "tSTR", "tNUM"
dcw's avatar
dcw committed
27
28
29
30
31
32
};
#endif


/* ----------------- Exported variables ---------------- */

33
#define MAXIDSIZE 100
dcw's avatar
dcw committed
34
35
36

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
37
int lexintval;
dcw's avatar
dcw committed
38
39
40
41
42
43
44
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

static BOOL  havepushedtok = FALSE;
static TOKEN curtok;
45
46
static char curid[ MAXIDSIZE ];
static int curint;
dcw's avatar
dcw committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70


/* ----------------- Private procedures ---------------- */

static void white_space()
{
        int c;

        for(;;) {
                c=getc(lexfile);
                if( c==EOF ) break;
                if( c=='\n' ) lineno++;
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

void ungettok()
{
        ASSERT( ! havepushedtok, ("ungettok: can't push 2 tokens\n") );
        havepushedtok = TRUE;
dcw's avatar
dcw committed
71
#ifdef DEBUGGING
dcw's avatar
dcw committed
72
73
74
75
76
77
78
79
80
81
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


TOKEN nexttok()
{
	int c;
	int  pos;

82
83
        if( havepushedtok )
	{
dcw's avatar
dcw committed
84
                havepushedtok = FALSE;
85
86
        } else
	{
dcw's avatar
dcw committed
87
88
                white_space();
                c = getc(lexfile);
89
		switch( c )
dcw's avatar
dcw committed
90
		{
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
                case EOF: curtok = tEOF; break;
		case ';': curtok = tSEMI; break;
                case '(': curtok = tOPENBR; break;
                case ')': curtok = tCLOSEBR; break;
                case ',': curtok = tCOMMA; break;
                case '|': curtok = tOR; break;
                case '=': curtok = tEQ; break;
		case '"':
			for( pos=0; (c=getc(lexfile)) != '\"'; )
			{
				if( pos<MAXIDSIZE-1 )
				{
					lexidval[pos++] = c;
				}
			}
			while( pos<MAXIDSIZE )
			{
				lexidval[pos++] = '\0';
			}
110
111
			strcpy( curid, lexidval );
			curtok = tSTR;
112
113
114
115
116
			break;
		default:
			if( isalpha( c ) )
			{
				curtok = tID;
117
				for( pos=0; isalpha(c)||isdigit(c)||c=='_'; )
118
119
120
121
122
123
124
125
				{
					if( pos<MAXIDSIZE-1 )
						lexidval[pos++] = c;
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
				while( pos<MAXIDSIZE )
					lexidval[pos++] = '\0';
126
				strcpy( curid, lexidval );
127
128
129
130
131
			} else if( isdigit( c ) )
			{
				int t;

				curtok = tNUM;
132
				for( t=0; isdigit(c); )
133
				{
134
					t = t*10 + c-'0';
135
136
137
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
138
				curint = lexintval = t;
139
			} else
dcw's avatar
dcw committed
140
			{
141
142
143
				curtok = tERROR;
			}
		}
dcw's avatar
dcw committed
144
        }
dcw's avatar
dcw committed
145
#ifdef DEBUGGING
dcw's avatar
dcw committed
146
147
148
149
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}