From 925087356b853e2099c1b60d8b757d7aa02121a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- jam-files/engine/scan.c | 418 ------------------------------------------------ 1 file changed, 418 deletions(-) delete mode 100644 jam-files/engine/scan.c (limited to 'jam-files/engine/scan.c') diff --git a/jam-files/engine/scan.c b/jam-files/engine/scan.c deleted file mode 100644 index 11c44c0e..00000000 --- a/jam-files/engine/scan.c +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -#include "jam.h" -#include "lists.h" -#include "parse.h" -#include "scan.h" -#include "jamgram.h" -#include "jambase.h" -#include "newstr.h" - -/* - * scan.c - the jam yacc scanner - * - * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk. - * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc. - * Also handle tokens abutting EOF by remembering - * to return EOF now matter how many times yylex() - * reinvokes yyline(). - * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT. - * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is - * defined before Linux's yacc tries to redefine it. - */ - -struct keyword -{ - char * word; - int type; -} keywords[] = -{ -#include "jamgramtab.h" - { 0, 0 } -}; - -struct include -{ - struct include * next; /* next serial include file */ - char * string; /* pointer into current line */ - char * * strings; /* for yyfparse() -- text to parse */ - FILE * file; /* for yyfparse() -- file being read */ - char * fname; /* for yyfparse() -- file name */ - int line; /* line counter for error messages */ - char buf[ 512 ]; /* for yyfparse() -- line buffer */ -}; - -static struct include * incp = 0; /* current file; head of chain */ - -static int scanmode = SCAN_NORMAL; -static int anyerrors = 0; - - -static char * symdump( YYSTYPE * ); - -#define BIGGEST_TOKEN 10240 /* no single token can be larger */ - - -/* - * Set parser mode: normal, string, or keyword. - */ - -void yymode( int n ) -{ - scanmode = n; -} - - -void yyerror( char * s ) -{ - /* We use yylval instead of incp to access the error location information as - * the incp pointer will already be reset to 0 in case the error occurred at - * EOF. - * - * The two may differ only if we get an error while reading a lexical token - * spanning muliple lines, e.g. a multi-line string literal or action body, - * in which case yylval location information will hold the information about - * where this token started while incp will hold the information about where - * reading it broke. - * - * TODO: Test the theory about when yylval and incp location information are - * the same and when they differ. - */ - printf( "%s:%d: %s at %s\n", yylval.file, yylval.line, s, symdump( &yylval ) ); - ++anyerrors; -} - - -int yyanyerrors() -{ - return anyerrors != 0; -} - - -void yyfparse( char * s ) -{ - struct include * i = (struct include *)BJAM_MALLOC( sizeof( *i ) ); - - /* Push this onto the incp chain. */ - i->string = ""; - i->strings = 0; - i->file = 0; - i->fname = copystr( s ); - i->line = 0; - i->next = incp; - incp = i; - - /* If the filename is "+", it means use the internal jambase. */ - if ( !strcmp( s, "+" ) ) - i->strings = jambase; -} - - -/* - * yyline() - read new line and return first character. - * - * Fabricates a continuous stream of characters across include files, returning - * EOF at the bitter end. - */ - -int yyline() -{ - struct include * i = incp; - - if ( !incp ) - return EOF; - - /* Once we start reading from the input stream, we reset the include - * insertion point so that the next include file becomes the head of the - * list. - */ - - /* If there is more data in this line, return it. */ - if ( *i->string ) - return *i->string++; - - /* If we are reading from an internal string list, go to the next string. */ - if ( i->strings ) - { - if ( *i->strings ) - { - ++i->line; - i->string = *(i->strings++); - return *i->string++; - } - } - else - { - /* If necessary, open the file. */ - if ( !i->file ) - { - FILE * f = stdin; - if ( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) ) - perror( i->fname ); - i->file = f; - } - - /* If there is another line in this file, start it. */ - if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) - { - ++i->line; - i->string = i->buf; - return *i->string++; - } - } - - /* This include is done. Free it up and return EOF so yyparse() returns to - * parse_file(). - */ - - incp = i->next; - - /* Close file, free name. */ - if ( i->file && ( i->file != stdin ) ) - fclose( i->file ); - freestr( i->fname ); - BJAM_FREE( (char *)i ); - - return EOF; -} - - -/* - * yylex() - set yylval to current token; return its type. - * - * Macros to move things along: - * - * yychar() - return and advance character; invalid after EOF. - * yyprev() - back up one character; invalid before yychar(). - * - * yychar() returns a continuous stream of characters, until it hits the EOF of - * the current include file. - */ - -#define yychar() ( *incp->string ? *incp->string++ : yyline() ) -#define yyprev() ( incp->string-- ) - -int yylex() -{ - int c; - char buf[ BIGGEST_TOKEN ]; - char * b = buf; - - if ( !incp ) - goto eof; - - /* Get first character (whitespace or of token). */ - c = yychar(); - - if ( scanmode == SCAN_STRING ) - { - /* If scanning for a string (action's {}'s), look for the closing brace. - * We handle matching braces, if they match. - */ - - int nest = 1; - - while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) ) - { - if ( c == '{' ) - ++nest; - - if ( ( c == '}' ) && !--nest ) - break; - - *b++ = c; - - c = yychar(); - - /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */ - if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) ) - --b; - } - - /* We ate the ending brace -- regurgitate it. */ - if ( c != EOF ) - yyprev(); - - /* Check for obvious errors. */ - if ( b == buf + sizeof( buf ) ) - { - yyerror( "action block too big" ); - goto eof; - } - - if ( nest ) - { - yyerror( "unmatched {} in action block" ); - goto eof; - } - - *b = 0; - yylval.type = STRING; - yylval.string = newstr( buf ); - yylval.file = incp->fname; - yylval.line = incp->line; - } - else - { - char * b = buf; - struct keyword * k; - int inquote = 0; - int notkeyword; - - /* Eat white space. */ - for ( ;; ) - { - /* Skip past white space. */ - while ( ( c != EOF ) && isspace( c ) ) - c = yychar(); - - /* Not a comment? */ - if ( c != '#' ) - break; - - /* Swallow up comment line. */ - while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ; - } - - /* c now points to the first character of a token. */ - if ( c == EOF ) - goto eof; - - yylval.file = incp->fname; - yylval.line = incp->line; - - /* While scanning the word, disqualify it for (expensive) keyword lookup - * when we can: $anything, "anything", \anything - */ - notkeyword = c == '$'; - - /* Look for white space to delimit word. "'s get stripped but preserve - * white space. \ protects next character. - */ - while - ( - ( c != EOF ) && - ( b < buf + sizeof( buf ) ) && - ( inquote || !isspace( c ) ) - ) - { - if ( c == '"' ) - { - /* begin or end " */ - inquote = !inquote; - notkeyword = 1; - } - else if ( c != '\\' ) - { - /* normal char */ - *b++ = c; - } - else if ( ( c = yychar() ) != EOF ) - { - /* \c */ - if (c == 'n') - c = '\n'; - else if (c == 'r') - c = '\r'; - else if (c == 't') - c = '\t'; - *b++ = c; - notkeyword = 1; - } - else - { - /* \EOF */ - break; - } - - c = yychar(); - } - - /* Check obvious errors. */ - if ( b == buf + sizeof( buf ) ) - { - yyerror( "string too big" ); - goto eof; - } - - if ( inquote ) - { - yyerror( "unmatched \" in string" ); - goto eof; - } - - /* We looked ahead a character - back up. */ - if ( c != EOF ) - yyprev(); - - /* Scan token table. Do not scan if it is obviously not a keyword or if - * it is an alphabetic when were looking for punctuation. - */ - - *b = 0; - yylval.type = ARG; - - if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) ) - for ( k = keywords; k->word; ++k ) - if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) ) - { - yylval.type = k->type; - yylval.string = k->word; /* used by symdump */ - break; - } - - if ( yylval.type == ARG ) - yylval.string = newstr( buf ); - } - - if ( DEBUG_SCAN ) - printf( "scan %s\n", symdump( &yylval ) ); - - return yylval.type; - -eof: - /* We do not reset yylval.file & yylval.line here so unexpected EOF error - * messages would include correct error location information. - */ - yylval.type = EOF; - return yylval.type; -} - - -static char * symdump( YYSTYPE * s ) -{ - static char buf[ BIGGEST_TOKEN + 20 ]; - switch ( s->type ) - { - case EOF : sprintf( buf, "EOF" ); break; - case 0 : sprintf( buf, "unknown symbol %s", s->string ); break; - case ARG : sprintf( buf, "argument %s" , s->string ); break; - case STRING: sprintf( buf, "string \"%s\"" , s->string ); break; - default : sprintf( buf, "keyword %s" , s->string ); break; - } - return buf; -} - - -/* - * Get information about the current file and line, for those epsilon - * transitions that produce a parse. - */ - -void yyinput_stream( char * * name, int * line ) -{ - if ( incp ) - { - *name = incp->fname; - *line = incp->line; - } - else - { - *name = "(builtin)"; - *line = -1; - } -} -- cgit v1.2.3