From 3faecf9a00512dcbc8712c4bca9adae72fb64410 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Sat, 12 May 2012 14:01:52 -0400
Subject: Give in and copy bjam into cdec source code

---
 jam-files/engine/scan.c | 418 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 418 insertions(+)
 create mode 100644 jam-files/engine/scan.c

(limited to 'jam-files/engine/scan.c')

diff --git a/jam-files/engine/scan.c b/jam-files/engine/scan.c
new file mode 100644
index 00000000..11c44c0e
--- /dev/null
+++ b/jam-files/engine/scan.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
+ *
+ * This file is part of Jam - see jam.c for Copyright information.
+ */
+
+#include "jam.h"
+#include "lists.h"
+#include "parse.h"
+#include "scan.h"
+#include "jamgram.h"
+#include "jambase.h"
+#include "newstr.h"
+
+/*
+ * scan.c - the jam yacc scanner
+ *
+ * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk.
+ * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc.
+ *          Also handle tokens abutting EOF by remembering
+ *          to return EOF now matter how many times yylex()
+ *          reinvokes yyline().
+ * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT.
+ * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is
+ *          defined before Linux's yacc tries to redefine it.
+ */
+
+struct keyword
+{
+    char * word;
+    int    type;
+} keywords[] =
+{
+#include "jamgramtab.h"
+    { 0, 0 }
+};
+
+struct include
+{
+    struct include   * next;       /* next serial include file */
+    char             * string;     /* pointer into current line */
+    char           * * strings;    /* for yyfparse() -- text to parse */
+    FILE             * file;       /* for yyfparse() -- file being read */
+    char             * fname;      /* for yyfparse() -- file name */
+    int                line;       /* line counter for error messages */
+    char               buf[ 512 ]; /* for yyfparse() -- line buffer */
+};
+
+static struct include * incp = 0; /* current file; head of chain */
+
+static int scanmode = SCAN_NORMAL;
+static int anyerrors = 0;
+
+
+static char * symdump( YYSTYPE * );
+
+#define BIGGEST_TOKEN 10240  /* no single token can be larger */
+
+
+/*
+ * Set parser mode: normal, string, or keyword.
+ */
+
+void yymode( int n )
+{
+    scanmode = n;
+}
+
+
+void yyerror( char * s )
+{
+    /* We use yylval instead of incp to access the error location information as
+     * the incp pointer will already be reset to 0 in case the error occurred at
+     * EOF.
+     *
+     * The two may differ only if we get an error while reading a lexical token
+     * spanning muliple lines, e.g. a multi-line string literal or action body,
+     * in which case yylval location information will hold the information about
+     * where this token started while incp will hold the information about where
+     * reading it broke.
+     *
+     * TODO: Test the theory about when yylval and incp location information are
+     * the same and when they differ.
+     */
+    printf( "%s:%d: %s at %s\n", yylval.file, yylval.line, s, symdump( &yylval ) );
+    ++anyerrors;
+}
+
+
+int yyanyerrors()
+{
+    return anyerrors != 0;
+}
+
+
+void yyfparse( char * s )
+{
+    struct include * i = (struct include *)BJAM_MALLOC( sizeof( *i ) );
+
+    /* Push this onto the incp chain. */
+    i->string = "";
+    i->strings = 0;
+    i->file = 0;
+    i->fname = copystr( s );
+    i->line = 0;
+    i->next = incp;
+    incp = i;
+
+    /* If the filename is "+", it means use the internal jambase. */
+    if ( !strcmp( s, "+" ) )
+        i->strings = jambase;
+}
+
+
+/*
+ * yyline() - read new line and return first character.
+ *
+ * Fabricates a continuous stream of characters across include files, returning
+ * EOF at the bitter end.
+ */
+
+int yyline()
+{
+    struct include * i = incp;
+
+    if ( !incp )
+        return EOF;
+
+    /* Once we start reading from the input stream, we reset the include
+     * insertion point so that the next include file becomes the head of the
+     * list.
+     */
+
+    /* If there is more data in this line, return it. */
+    if ( *i->string )
+        return *i->string++;
+
+    /* If we are reading from an internal string list, go to the next string. */
+    if ( i->strings )
+    {
+        if ( *i->strings )
+        {
+            ++i->line;
+            i->string = *(i->strings++);
+            return *i->string++;
+        }
+    }
+    else
+    {
+        /* If necessary, open the file. */
+        if ( !i->file )
+        {
+            FILE * f = stdin;
+            if ( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) )
+                perror( i->fname );
+            i->file = f;
+        }
+
+        /* If there is another line in this file, start it. */
+        if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
+        {
+            ++i->line;
+            i->string = i->buf;
+            return *i->string++;
+        }
+    }
+
+    /* This include is done. Free it up and return EOF so yyparse() returns to
+     * parse_file().
+     */
+
+    incp = i->next;
+
+    /* Close file, free name. */
+    if ( i->file && ( i->file != stdin ) )
+        fclose( i->file );
+    freestr( i->fname );
+    BJAM_FREE( (char *)i );
+
+    return EOF;
+}
+
+
+/*
+ * yylex() - set yylval to current token; return its type.
+ *
+ * Macros to move things along:
+ *
+ *  yychar() - return and advance character; invalid after EOF.
+ *  yyprev() - back up one character; invalid before yychar().
+ *
+ * yychar() returns a continuous stream of characters, until it hits the EOF of
+ * the current include file.
+ */
+
+#define yychar() ( *incp->string ? *incp->string++ : yyline() )
+#define yyprev() ( incp->string-- )
+
+int yylex()
+{
+    int c;
+    char buf[ BIGGEST_TOKEN ];
+    char * b = buf;
+
+    if ( !incp )
+        goto eof;
+
+    /* Get first character (whitespace or of token). */
+    c = yychar();
+
+    if ( scanmode == SCAN_STRING )
+    {
+        /* If scanning for a string (action's {}'s), look for the closing brace.
+         * We handle matching braces, if they match.
+         */
+
+        int nest = 1;
+
+        while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
+        {
+            if ( c == '{' )
+                ++nest;
+
+            if ( ( c == '}' ) && !--nest )
+                break;
+
+            *b++ = c;
+
+            c = yychar();
+
+            /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
+            if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
+                --b;
+        }
+
+        /* We ate the ending brace -- regurgitate it. */
+        if ( c != EOF )
+            yyprev();
+
+        /* Check for obvious errors. */
+        if ( b == buf + sizeof( buf ) )
+        {
+            yyerror( "action block too big" );
+            goto eof;
+        }
+
+        if ( nest )
+        {
+            yyerror( "unmatched {} in action block" );
+            goto eof;
+        }
+
+        *b = 0;
+        yylval.type = STRING;
+        yylval.string = newstr( buf );
+        yylval.file = incp->fname;
+        yylval.line = incp->line;
+    }
+    else
+    {
+        char * b = buf;
+        struct keyword * k;
+        int inquote = 0;
+        int notkeyword;
+
+        /* Eat white space. */
+        for ( ;; )
+        {
+            /* Skip past white space. */
+            while ( ( c != EOF ) && isspace( c ) )
+                c = yychar();
+
+            /* Not a comment? */
+            if ( c != '#' )
+                break;
+
+            /* Swallow up comment line. */
+            while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ;
+        }
+
+        /* c now points to the first character of a token. */
+        if ( c == EOF )
+            goto eof;
+
+        yylval.file = incp->fname;
+        yylval.line = incp->line;
+
+        /* While scanning the word, disqualify it for (expensive) keyword lookup
+         * when we can: $anything, "anything", \anything
+         */
+        notkeyword = c == '$';
+
+        /* Look for white space to delimit word. "'s get stripped but preserve
+         * white space. \ protects next character.
+         */
+        while
+        (
+            ( c != EOF ) &&
+            ( b < buf + sizeof( buf ) ) &&
+            ( inquote || !isspace( c ) )
+        )
+        {
+            if ( c == '"' )
+            {
+                /* begin or end " */
+                inquote = !inquote;
+                notkeyword = 1;
+            }
+            else if ( c != '\\' )
+            {
+                /* normal char */
+                *b++ = c;
+            }
+            else if ( ( c = yychar() ) != EOF )
+            {
+                /* \c */
+                if (c == 'n')
+                    c = '\n';
+                else if (c == 'r')
+                    c = '\r';
+                else if (c == 't')
+                    c = '\t';
+                *b++ = c;
+                notkeyword = 1;
+            }
+            else
+            {
+                /* \EOF */
+                break;
+            }
+
+            c = yychar();
+        }
+
+        /* Check obvious errors. */
+        if ( b == buf + sizeof( buf ) )
+        {
+            yyerror( "string too big" );
+            goto eof;
+        }
+
+        if ( inquote )
+        {
+            yyerror( "unmatched \" in string" );
+            goto eof;
+        }
+
+        /* We looked ahead a character - back up. */
+        if ( c != EOF )
+            yyprev();
+
+        /* Scan token table. Do not scan if it is obviously not a keyword or if
+         * it is an alphabetic when were looking for punctuation.
+         */
+
+        *b = 0;
+        yylval.type = ARG;
+
+        if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
+            for ( k = keywords; k->word; ++k )
+                if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
+                { 
+                    yylval.type = k->type;
+                    yylval.string = k->word;  /* used by symdump */
+                    break;
+                }
+
+        if ( yylval.type == ARG )
+            yylval.string = newstr( buf );
+    }
+
+    if ( DEBUG_SCAN )
+        printf( "scan %s\n", symdump( &yylval ) );
+
+    return yylval.type;
+
+eof:
+    /* We do not reset yylval.file & yylval.line here so unexpected EOF error
+     * messages would include correct error location information.
+     */
+    yylval.type = EOF;
+    return yylval.type;
+}
+
+
+static char * symdump( YYSTYPE * s )
+{
+    static char buf[ BIGGEST_TOKEN + 20 ];
+    switch ( s->type )
+    {
+        case EOF   : sprintf( buf, "EOF"                          ); break;
+        case 0     : sprintf( buf, "unknown symbol %s", s->string ); break;
+        case ARG   : sprintf( buf, "argument %s"      , s->string ); break;
+        case STRING: sprintf( buf, "string \"%s\""    , s->string ); break;
+        default    : sprintf( buf, "keyword %s"       , s->string ); break;
+    }
+    return buf;
+}
+
+
+/*
+ * Get information about the current file and line, for those epsilon
+ * transitions that produce a parse.
+ */
+
+void yyinput_stream( char * * name, int * line )
+{
+    if ( incp )
+    {
+        *name = incp->fname;
+        *line = incp->line;
+    }
+    else
+    {
+        *name = "(builtin)";
+        *line = -1;
+    }
+}
-- 
cgit v1.2.3