lexer.c 4.02 KB
Newer Older
dcw's avatar
dcw committed
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *                            LEXER FOR THE
 *                            ===== === ===
 *
 *                      DATA DECLARATION BUILDER
 *                      ==== =========== =======
 *
 *      This module is the lexer used by the parser to
 *      interprete the high level data declarations.
 *
 ******* Tokens:
 *
13
 *      tERROR tEOF tSEMI tID tEQ tOR tOPENBR tCOMMA tCLOSEBR
dcw's avatar
dcw committed
14
 *	tSTR tNUM tEXPORT tGLOBAL tBEGIN tOPENCURLY tCLOSECURLY tTYPE
dcw's avatar
dcw committed
15 16 17
 */
 

18
#include <stdio.h>
19
#include <stdlib.h>
20
#include <stdbool.h>
21
#include <string.h>
dcw's avatar
dcw committed
22
#include <ctype.h>
23

24
#include "struct.h"
dcw's avatar
dcw committed
25 26 27
#include "lexer.h"


dcw's avatar
dcw committed
28
#ifdef DEBUGGING
dcw's avatar
dcw committed
29 30
static char *tokenname[] = {
        "tERROR", "tEOF", "tSEMI", "tID", "tEQ", "tOR", "tOPENBR",
31
        "tCOMMA", "tCLOSEBR", "tSTR", "tNUM", "tEXPORT", "tGLOBAL",
32
	"tBEGIN", "tOPENCURLY", "tCLOSECURLY", "tTYPE", "tMINUS",
dcw's avatar
dcw committed
33 34 35 36 37 38
};
#endif


/* ----------------- Exported variables ---------------- */

39
#define MAXIDSIZE 100
dcw's avatar
dcw committed
40 41 42

int  lineno = 1;
char lexidval[ MAXIDSIZE ];
43
int lexintval;
dcw's avatar
dcw committed
44 45 46 47 48
FILE *lexfile;


/* ----------------- Private variables  ---------------- */

49
static bool  havepushedtok = false;
dcw's avatar
dcw committed
50
static TOKEN curtok;
51 52
static char curid[ MAXIDSIZE ];
static int curint;
dcw's avatar
dcw committed
53 54 55 56


/* ----------------- Private procedures ---------------- */

Duncan White's avatar
Duncan White committed
57
static void white_space( void )
dcw's avatar
dcw committed
58 59 60 61
{
        int c;

        for(;;) {
dcw's avatar
dcw committed
62
                c = getc(lexfile);
dcw's avatar
dcw committed
63 64 65 66
		if( c == '/' || c == '#' ) /* comment to end of line */
		{
			while( (c=getc(lexfile)) != EOF && c != '\n' );
		}
dcw's avatar
dcw committed
67 68
                if( c == EOF ) break;
                if( c == '\n' ) lineno++;
dcw's avatar
dcw committed
69 70 71 72 73 74 75 76
                if( c != ' ' && c != '\t' && c != '\n' ) break;
        }
        ungetc( c, lexfile );
}


/* ----------------- Public procedures  ---------------- */

Duncan White's avatar
Duncan White committed
77
void ungettok( void )
dcw's avatar
dcw committed
78
{
79 80 81 82 83
        if( havepushedtok )
	{
		fprintf( stderr, "ungettok: can't push 2 tokens\n" );
		exit(1);
	}
84
        havepushedtok = true;
dcw's avatar
dcw committed
85
#ifdef DEBUGGING
dcw's avatar
dcw committed
86 87 88 89 90
printf( "lexer: ungot token %s\n", tokenname[ curtok ] );
#endif
}


Duncan White's avatar
Duncan White committed
91
TOKEN nexttok( void )
dcw's avatar
dcw committed
92 93 94 95
{
	int c;
	int  pos;

96 97
        if( havepushedtok )
	{
98
                havepushedtok = false;
99 100
        } else
	{
dcw's avatar
dcw committed
101 102
                white_space();
                c = getc(lexfile);
103
		switch( c )
dcw's avatar
dcw committed
104
		{
105 106
                case EOF: curtok = tEOF; break;
		case ';': curtok = tSEMI; break;
107 108
		case '{': curtok = tOPENCURLY; break;
		case '}': curtok = tCLOSECURLY; break;
109 110 111 112
                case '(': curtok = tOPENBR; break;
                case ')': curtok = tCLOSEBR; break;
                case ',': curtok = tCOMMA; break;
                case '|': curtok = tOR; break;
113
                case '-': curtok = tMINUS; break;
114 115 116 117 118 119 120 121 122 123 124 125 126
                case '=': curtok = tEQ; break;
		case '"':
			for( pos=0; (c=getc(lexfile)) != '\"'; )
			{
				if( pos<MAXIDSIZE-1 )
				{
					lexidval[pos++] = c;
				}
			}
			while( pos<MAXIDSIZE )
			{
				lexidval[pos++] = '\0';
			}
127 128
			strcpy( curid, lexidval );
			curtok = tSTR;
129 130 131 132 133
			break;
		default:
			if( isalpha( c ) )
			{
				curtok = tID;
134
				for( pos=0; isalpha(c)||isdigit(c)||c=='_'; )
135 136 137 138 139 140 141 142
				{
					if( pos<MAXIDSIZE-1 )
						lexidval[pos++] = c;
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
				while( pos<MAXIDSIZE )
					lexidval[pos++] = '\0';
143
				strcpy( curid, lexidval );
144 145 146 147 148 149
				if( streq( curid, "EXPORT" ) )
				{
					curtok = tEXPORT;
				} else if( streq( curid, "GLOBAL" ) )
				{
					curtok = tGLOBAL;
dcw's avatar
dcw committed
150 151 152
				} else if( streq( curid, "BEGIN" ) )
				{
					curtok = tBEGIN;
153 154 155
				} else if( streq( curid, "TYPE" ) )
				{
					curtok = tTYPE;
156 157 158 159 160 161
				} else if( streq( curid, "OR" ) )
				{
					curtok = tOR;
				} else if( streq( curid, "or" ) )
				{
					curtok = tOR;
162
				}
163 164 165 166 167
			} else if( isdigit( c ) )
			{
				int t;

				curtok = tNUM;
168
				for( t=0; isdigit(c); )
169
				{
170
					t = t*10 + c-'0';
171 172 173
					c=getc(lexfile);
				}
				ungetc( c, lexfile );
174
				curint = lexintval = t;
175
			} else
dcw's avatar
dcw committed
176
			{
177 178 179
				curtok = tERROR;
			}
		}
dcw's avatar
dcw committed
180
        }
dcw's avatar
dcw committed
181
#ifdef DEBUGGING
dcw's avatar
dcw committed
182 183 184 185
printf( "returning token %s\n", tokenname[ curtok ] );
#endif
        return curtok;
}
186 187


188
bool readnextline( char *line )
189 190 191 192 193 194 195 196 197
{
	int c;
	char *s = line;

	while( (c=getc(lexfile)) != EOF && c != '\n' ) *s++ = c;
	*s = '\0';
	lineno++;
	return c != EOF;
}