diff options
Diffstat (limited to 'jam-files/engine/scan.c')
-rw-r--r-- | jam-files/engine/scan.c | 418 |
1 files changed, 418 insertions, 0 deletions
diff --git a/jam-files/engine/scan.c b/jam-files/engine/scan.c new file mode 100644 index 00000000..11c44c0e --- /dev/null +++ b/jam-files/engine/scan.c @@ -0,0 +1,418 @@ +/* + * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. + * + * This file is part of Jam - see jam.c for Copyright information. + */ + +#include "jam.h" +#include "lists.h" +#include "parse.h" +#include "scan.h" +#include "jamgram.h" +#include "jambase.h" +#include "newstr.h" + +/* + * scan.c - the jam yacc scanner + * + * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk. + * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc. + * Also handle tokens abutting EOF by remembering + * to return EOF now matter how many times yylex() + * reinvokes yyline(). + * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT. + * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is + * defined before Linux's yacc tries to redefine it. + */ + +struct keyword +{ + char * word; + int type; +} keywords[] = +{ +#include "jamgramtab.h" + { 0, 0 } +}; + +struct include +{ + struct include * next; /* next serial include file */ + char * string; /* pointer into current line */ + char * * strings; /* for yyfparse() -- text to parse */ + FILE * file; /* for yyfparse() -- file being read */ + char * fname; /* for yyfparse() -- file name */ + int line; /* line counter for error messages */ + char buf[ 512 ]; /* for yyfparse() -- line buffer */ +}; + +static struct include * incp = 0; /* current file; head of chain */ + +static int scanmode = SCAN_NORMAL; +static int anyerrors = 0; + + +static char * symdump( YYSTYPE * ); + +#define BIGGEST_TOKEN 10240 /* no single token can be larger */ + + +/* + * Set parser mode: normal, string, or keyword. + */ + +void yymode( int n ) +{ + scanmode = n; +} + + +void yyerror( char * s ) +{ + /* We use yylval instead of incp to access the error location information as + * the incp pointer will already be reset to 0 in case the error occurred at + * EOF. + * + * The two may differ only if we get an error while reading a lexical token + * spanning muliple lines, e.g. a multi-line string literal or action body, + * in which case yylval location information will hold the information about + * where this token started while incp will hold the information about where + * reading it broke. + * + * TODO: Test the theory about when yylval and incp location information are + * the same and when they differ. + */ + printf( "%s:%d: %s at %s\n", yylval.file, yylval.line, s, symdump( &yylval ) ); + ++anyerrors; +} + + +int yyanyerrors() +{ + return anyerrors != 0; +} + + +void yyfparse( char * s ) +{ + struct include * i = (struct include *)BJAM_MALLOC( sizeof( *i ) ); + + /* Push this onto the incp chain. */ + i->string = ""; + i->strings = 0; + i->file = 0; + i->fname = copystr( s ); + i->line = 0; + i->next = incp; + incp = i; + + /* If the filename is "+", it means use the internal jambase. */ + if ( !strcmp( s, "+" ) ) + i->strings = jambase; +} + + +/* + * yyline() - read new line and return first character. + * + * Fabricates a continuous stream of characters across include files, returning + * EOF at the bitter end. + */ + +int yyline() +{ + struct include * i = incp; + + if ( !incp ) + return EOF; + + /* Once we start reading from the input stream, we reset the include + * insertion point so that the next include file becomes the head of the + * list. + */ + + /* If there is more data in this line, return it. */ + if ( *i->string ) + return *i->string++; + + /* If we are reading from an internal string list, go to the next string. */ + if ( i->strings ) + { + if ( *i->strings ) + { + ++i->line; + i->string = *(i->strings++); + return *i->string++; + } + } + else + { + /* If necessary, open the file. */ + if ( !i->file ) + { + FILE * f = stdin; + if ( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) ) + perror( i->fname ); + i->file = f; + } + + /* If there is another line in this file, start it. */ + if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) + { + ++i->line; + i->string = i->buf; + return *i->string++; + } + } + + /* This include is done. Free it up and return EOF so yyparse() returns to + * parse_file(). + */ + + incp = i->next; + + /* Close file, free name. */ + if ( i->file && ( i->file != stdin ) ) + fclose( i->file ); + freestr( i->fname ); + BJAM_FREE( (char *)i ); + + return EOF; +} + + +/* + * yylex() - set yylval to current token; return its type. + * + * Macros to move things along: + * + * yychar() - return and advance character; invalid after EOF. + * yyprev() - back up one character; invalid before yychar(). + * + * yychar() returns a continuous stream of characters, until it hits the EOF of + * the current include file. + */ + +#define yychar() ( *incp->string ? *incp->string++ : yyline() ) +#define yyprev() ( incp->string-- ) + +int yylex() +{ + int c; + char buf[ BIGGEST_TOKEN ]; + char * b = buf; + + if ( !incp ) + goto eof; + + /* Get first character (whitespace or of token). */ + c = yychar(); + + if ( scanmode == SCAN_STRING ) + { + /* If scanning for a string (action's {}'s), look for the closing brace. + * We handle matching braces, if they match. + */ + + int nest = 1; + + while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) ) + { + if ( c == '{' ) + ++nest; + + if ( ( c == '}' ) && !--nest ) + break; + + *b++ = c; + + c = yychar(); + + /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */ + if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) ) + --b; + } + + /* We ate the ending brace -- regurgitate it. */ + if ( c != EOF ) + yyprev(); + + /* Check for obvious errors. */ + if ( b == buf + sizeof( buf ) ) + { + yyerror( "action block too big" ); + goto eof; + } + + if ( nest ) + { + yyerror( "unmatched {} in action block" ); + goto eof; + } + + *b = 0; + yylval.type = STRING; + yylval.string = newstr( buf ); + yylval.file = incp->fname; + yylval.line = incp->line; + } + else + { + char * b = buf; + struct keyword * k; + int inquote = 0; + int notkeyword; + + /* Eat white space. */ + for ( ;; ) + { + /* Skip past white space. */ + while ( ( c != EOF ) && isspace( c ) ) + c = yychar(); + + /* Not a comment? */ + if ( c != '#' ) + break; + + /* Swallow up comment line. */ + while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ; + } + + /* c now points to the first character of a token. */ + if ( c == EOF ) + goto eof; + + yylval.file = incp->fname; + yylval.line = incp->line; + + /* While scanning the word, disqualify it for (expensive) keyword lookup + * when we can: $anything, "anything", \anything + */ + notkeyword = c == '$'; + + /* Look for white space to delimit word. "'s get stripped but preserve + * white space. \ protects next character. + */ + while + ( + ( c != EOF ) && + ( b < buf + sizeof( buf ) ) && + ( inquote || !isspace( c ) ) + ) + { + if ( c == '"' ) + { + /* begin or end " */ + inquote = !inquote; + notkeyword = 1; + } + else if ( c != '\\' ) + { + /* normal char */ + *b++ = c; + } + else if ( ( c = yychar() ) != EOF ) + { + /* \c */ + if (c == 'n') + c = '\n'; + else if (c == 'r') + c = '\r'; + else if (c == 't') + c = '\t'; + *b++ = c; + notkeyword = 1; + } + else + { + /* \EOF */ + break; + } + + c = yychar(); + } + + /* Check obvious errors. */ + if ( b == buf + sizeof( buf ) ) + { + yyerror( "string too big" ); + goto eof; + } + + if ( inquote ) + { + yyerror( "unmatched \" in string" ); + goto eof; + } + + /* We looked ahead a character - back up. */ + if ( c != EOF ) + yyprev(); + + /* Scan token table. Do not scan if it is obviously not a keyword or if + * it is an alphabetic when were looking for punctuation. + */ + + *b = 0; + yylval.type = ARG; + + if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) ) + for ( k = keywords; k->word; ++k ) + if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) ) + { + yylval.type = k->type; + yylval.string = k->word; /* used by symdump */ + break; + } + + if ( yylval.type == ARG ) + yylval.string = newstr( buf ); + } + + if ( DEBUG_SCAN ) + printf( "scan %s\n", symdump( &yylval ) ); + + return yylval.type; + +eof: + /* We do not reset yylval.file & yylval.line here so unexpected EOF error + * messages would include correct error location information. + */ + yylval.type = EOF; + return yylval.type; +} + + +static char * symdump( YYSTYPE * s ) +{ + static char buf[ BIGGEST_TOKEN + 20 ]; + switch ( s->type ) + { + case EOF : sprintf( buf, "EOF" ); break; + case 0 : sprintf( buf, "unknown symbol %s", s->string ); break; + case ARG : sprintf( buf, "argument %s" , s->string ); break; + case STRING: sprintf( buf, "string \"%s\"" , s->string ); break; + default : sprintf( buf, "keyword %s" , s->string ); break; + } + return buf; +} + + +/* + * Get information about the current file and line, for those epsilon + * transitions that produce a parse. + */ + +void yyinput_stream( char * * name, int * line ) +{ + if ( incp ) + { + *name = incp->fname; + *line = incp->line; + } + else + { + *name = "(builtin)"; + *line = -1; + } +} |