lexer.c 2.77 KB
Newer Older
dcw's avatar
dcw committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR tSTAR
 *
 ******* History
 *
 *      Version Who     Date            Comments
 *      ======= ===     ====            ========
 *
 *      1.0     dcw     2nd Jan 1988    Created
 */
 

#include <dcw.h>
#include <ctype.h>
#include "lexer.h"


#ifdef DEBUG
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
        "tCOMMA", "tCLOSEBR", "tSTAR"
};
#endif


/* ----------------- Exported variables ---------------- */

#define MAXIDSIZE 32

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

static BOOL  havepushedtok = FALSE;
static TOKEN curtok;


/* ----------------- Private procedures ---------------- */

static void white_space()
{
        int c;

        for(;;) {
                c=getc(lexfile);
                if( c==EOF ) break;
                if( c=='\n' ) lineno++;
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

void ungettok()
{
        ASSERT( ! havepushedtok, ("ungettok: can't push 2 tokens\n") );
        havepushedtok = TRUE;
#ifdef DEBUG
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


TOKEN nexttok()
{
	int c;
	int  pos;

        if( havepushedtok ) {
                havepushedtok = FALSE;
        } else {
                white_space();
                c = getc(lexfile);
                if( c==EOF )
		{
			curtok = tEOF;
		} else if( isalpha( c ) )
		{
                        curtok = tID;
                        for( pos=0; isalpha(c) || isdigit(c); )
			{
                                if( pos<MAXIDSIZE-1 ) lexidval[pos++] = c;
                                c=getc(lexfile);
                        }
                        ungetc( c, lexfile );
                        while( pos<MAXIDSIZE ) lexidval[pos++] = '\0';
                } else if( c==';' ) curtok = tSEMI;
                  else if( c=='(' ) curtok = tOPENBR;
                  else if( c==')' ) curtok = tCLOSEBR;
                  else if( c==',' ) curtok = tCOMMA;
                  else if( c=='|' ) curtok = tOR;
                  else if( c=='=' ) curtok = tEQ;
                  else if( c=='*' ) curtok = tSTAR;
                  else              curtok = tERROR;
        }
#ifdef DEBUG
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}