diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-13 03:35:30 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-05-13 03:35:30 +0200 |
commit | 670a8f984fc6d8342180c59ae9e96b0b76f34d3d (patch) | |
tree | 9f2ce7eec1a77e56b3bb1ad0ad40f212d7a996b0 /jam-files/engine/expand.c | |
parent | eb3ee28dc0eb1d3e5ed01ba0df843be329ae450d (diff) | |
parent | 2f64af3e06a518b93f7ca2c30a9d0aeb2c947031 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'jam-files/engine/expand.c')
-rw-r--r-- | jam-files/engine/expand.c | 733 |
1 files changed, 733 insertions, 0 deletions
diff --git a/jam-files/engine/expand.c b/jam-files/engine/expand.c new file mode 100644 index 00000000..d8e58827 --- /dev/null +++ b/jam-files/engine/expand.c @@ -0,0 +1,733 @@ +/* + * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. + * + * This file is part of Jam - see jam.c for Copyright information. + */ + +# include "jam.h" +# include "lists.h" +# include "variable.h" +# include "expand.h" +# include "pathsys.h" +# include "newstr.h" +# include <assert.h> +# include <stdlib.h> +# include <limits.h> + +# ifdef OS_CYGWIN +# include <sys/cygwin.h> +# include <windows.h> +# endif + +/* + * expand.c - expand a buffer, given variable values + * + * External routines: + * + * var_expand() - variable-expand input string into list of strings + * + * Internal routines: + * + * var_edit_parse() - parse : modifiers into PATHNAME structure. + * var_edit_file() - copy input target name to output, modifying filename. + * var_edit_shift() - do upshift/downshift mods. + * + * 01/25/94 (seiwald) - $(X)$(UNDEF) was expanding like plain $(X) + * 04/13/94 (seiwald) - added shorthand L0 for null list pointer + * 01/11/01 (seiwald) - added support for :E=emptyvalue, :J=joinval + */ + +typedef struct +{ + PATHNAME f; /* :GDBSMR -- pieces */ + char parent; /* :P -- go to parent directory */ + char filemods; /* one of the above applied */ + char downshift; /* :L -- downshift result */ + char upshift; /* :U -- upshift result */ + char to_slashes; /* :T -- convert "\" to "/" */ + char to_windows; /* :W -- convert cygwin to native paths */ + PATHPART empty; /* :E -- default for empties */ + PATHPART join; /* :J -- join list with char */ +} VAR_EDITS ; + +static void var_edit_parse( char * mods, VAR_EDITS * edits ); +static void var_edit_file ( char * in, string * out, VAR_EDITS * edits ); +static void var_edit_shift( string * out, VAR_EDITS * edits ); + +#define MAGIC_COLON '\001' +#define MAGIC_LEFT '\002' +#define MAGIC_RIGHT '\003' + + +/* + * var_expand() - variable-expand input string into list of strings. + * + * Would just copy input to output, performing variable expansion, except that + * since variables can contain multiple values the result of variable expansion + * may contain multiple values (a list). Properly performs "product" operations + * that occur in "$(var1)xxx$(var2)" or even "$($(var2))". + * + * Returns a newly created list. + */ + +LIST * var_expand( LIST * l, char * in, char * end, LOL * lol, int cancopyin ) +{ + char out_buf[ MAXSYM ]; + string buf[ 1 ]; + string out1[ 1 ]; /* temporary buffer */ + size_t prefix_length; + char * out; + char * inp = in; + char * ov; /* for temp copy of variable in outbuf */ + int depth; + + if ( DEBUG_VAREXP ) + printf( "expand '%.*s'\n", end - in, in ); + + /* This gets a lot of cases: $(<) and $(>). */ + if + ( + ( in[ 0 ] == '$' ) && + ( in[ 1 ] == '(' ) && + ( in[ 3 ] == ')' ) && + ( in[ 4 ] == '\0' ) + ) + { + switch ( in[ 2 ] ) + { + case '<': return list_copy( l, lol_get( lol, 0 ) ); + case '>': return list_copy( l, lol_get( lol, 1 ) ); + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return list_copy( l, lol_get( lol, in[ 2 ] - '1' ) ); + } + } + else if ( in[0] == '$' && in[1] == '(' && in[2] == '1' && in[4] == ')' && + in[5] == '\0') { + + switch( in[3] ) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return list_copy( l, lol_get( lol, in[3]-'0'+10-1 ) ); + } + } + + /* Expand @() files, to single item plus accompanying file. */ + if ( ( in[ 0 ] == '@' ) && ( in[ 1 ] == '(' ) && ( *( end - 1 ) == ')' ) ) + { + /* We try the expansion until it fits within the propective output + * buffer. + */ + char * at_buf = 0; + int at_size = MAXJPATH; + int at_len = 0; + do + { + BJAM_FREE( at_buf ); + at_buf = (char *)BJAM_MALLOC_ATOMIC( at_size + 1 ); + at_len = var_string( in, at_buf, at_size, lol ); + at_size *= 2; + } + while ( ( at_len < 0 ) && ( at_size < INT_MAX / 2 ) ); + /* Return the result as a single item list. */ + if ( at_len > 0 ) + { + LIST * r; + string_copy( buf, at_buf ); + r = list_new( l, newstr( buf->value ) ); + string_free( buf ); + BJAM_FREE( at_buf ); + return r; + } + BJAM_FREE( at_buf ); + } + + /* Just try simple copy of in to out. */ + while ( in < end ) + if ( ( *in++ == '$' ) && ( *in == '(' ) ) + goto expand; + + /* No variables expanded - just add copy of input string to list. */ + + /* 'cancopyin' is an optimization: if the input was already a list item, we + * can use copystr() to put it on the new list. Otherwise, we use the slower + * newstr(). + */ + if ( cancopyin ) + return list_new( l, copystr( inp ) ); + + { + LIST * r; + string_new( buf ); + string_append_range( buf, inp, end ); + r = list_new( l, newstr( buf->value ) ); + string_free( buf ); + return r; + } + +expand: + string_new( buf ); + string_append_range( buf, inp, in - 1 ); /* Copy the part before '$'. */ + /* + * Input so far (ignore blanks): + * + * stuff-in-outbuf $(variable) remainder + * ^ ^ + * in end + * Output so far: + * + * stuff-in-outbuf $ + * ^ ^ + * out_buf out + * + * + * We just copied the $ of $(...), so back up one on the output. We now find + * the matching close paren, copying the variable and modifiers between the + * $( and ) temporarily into out_buf, so that we can replace :'s with + * MAGIC_COLON. This is necessary to avoid being confused by modifier values + * that are variables containing :'s. Ugly. + */ + + depth = 1; + inp = ++in; /* Skip over the '('. */ + + while ( ( in < end ) && depth ) + { + switch ( *in++ ) + { + case '(': ++depth; break; + case ')': --depth; break; + } + } + + /* + * Input so far (ignore blanks): + * + * stuff-in-outbuf $(variable) remainder + * ^ ^ ^ + * inp in end + */ + prefix_length = buf->size; + string_append_range( buf, inp, in - 1 ); + + out = buf->value + prefix_length; + for ( ov = out; ov < buf->value + buf->size; ++ov ) + { + switch ( *ov ) + { + case ':': *ov = MAGIC_COLON; break; + case '[': *ov = MAGIC_LEFT ; break; + case ']': *ov = MAGIC_RIGHT; break; + } + } + + /* + * Input so far (ignore blanks): + * + * stuff-in-outbuf $(variable) remainder + * ^ ^ + * in end + * Output so far: + * + * stuff-in-outbuf variable + * ^ ^ ^ + * out_buf out ov + * + * Later we will overwrite 'variable' in out_buf, but we will be done with + * it by then. 'variable' may be a multi-element list, so may each value for + * '$(variable element)', and so may 'remainder'. Thus we produce a product + * of three lists. + */ + { + LIST * variables = 0; + LIST * remainder = 0; + LIST * vars; + + /* Recursively expand variable name & rest of input. */ + if ( out < ov ) variables = var_expand( L0, out, ov, lol, 0 ); + if ( in < end ) remainder = var_expand( L0, in, end, lol, 0 ); + + /* Now produce the result chain. */ + + /* For each variable name. */ + for ( vars = variables; vars; vars = list_next( vars ) ) + { + LIST * value = 0; + LIST * evalue = 0; + char * colon; + char * bracket; + string variable[1]; + char * varname; + int sub1 = 0; + int sub2 = -1; + VAR_EDITS edits; + + /* Look for a : modifier in the variable name. Must copy into + * varname so we can modify it. + */ + string_copy( variable, vars->string ); + varname = variable->value; + + if ( ( colon = strchr( varname, MAGIC_COLON ) ) ) + { + string_truncate( variable, colon - varname ); + var_edit_parse( colon + 1, &edits ); + } + + /* Look for [x-y] subscripting. sub1 and sub2 are x and y. */ + if ( ( bracket = strchr( varname, MAGIC_LEFT ) ) ) + { + /* Make all syntax errors in [] subscripting result in the same + * behavior: silenty return an empty expansion (by setting sub2 + * = 0). Brute force parsing; May get moved into yacc someday. + */ + + char * s = bracket + 1; + + string_truncate( variable, bracket - varname ); + + do /* so we can use "break" */ + { + /* Allow negative indexes. */ + if ( !isdigit( *s ) && ( *s != '-' ) ) + { + sub2 = 0; + break; + } + sub1 = atoi( s ); + + /* Skip over the first symbol, which is either a digit or dash. */ + ++s; + while ( isdigit( *s ) ) ++s; + + if ( *s == MAGIC_RIGHT ) + { + sub2 = sub1; + break; + } + + if ( *s != '-' ) + { + sub2 = 0; + break; + } + + ++s; + + if ( *s == MAGIC_RIGHT ) + { + sub2 = -1; + break; + } + + if ( !isdigit( *s ) && ( *s != '-' ) ) + { + sub2 = 0; + break; + } + + /* First, compute the index of the last element. */ + sub2 = atoi( s ); + while ( isdigit( *++s ) ); + + if ( *s != MAGIC_RIGHT ) + sub2 = 0; + + } while ( 0 ); + + /* Anything but the end of the string, or the colon introducing + * a modifier is a syntax error. + */ + ++s; + if ( *s && ( *s != MAGIC_COLON ) ) + sub2 = 0; + + *bracket = '\0'; + } + + /* Get variable value, with special handling for $(<), $(>), $(n). + */ + if ( !varname[1] ) + { + if ( varname[0] == '<' ) + value = lol_get( lol, 0 ); + else if ( varname[0] == '>' ) + value = lol_get( lol, 1 ); + else if ( ( varname[0] >= '1' ) && ( varname[0] <= '9' ) ) + value = lol_get( lol, varname[0] - '1' ); + else if( varname[0] == '1' && varname[1] >= '0' && + varname[1] <= '9' && !varname[2] ) + value = lol_get( lol, varname[1] - '0' + 10 - 1 ); + } + + if ( !value ) + value = var_get( varname ); + + /* Handle negitive indexes: part two. */ + { + int length = list_length( value ); + + if ( sub1 < 0 ) + sub1 = length + sub1; + else + sub1 -= 1; + + if ( sub2 < 0 ) + sub2 = length + 1 + sub2 - sub1; + else + sub2 -= sub1; + /* The "sub2 < 0" test handles the semantic error of sub2 < + * sub1. + */ + if ( sub2 < 0 ) + sub2 = 0; + } + + /* The fast path: $(x) - just copy the variable value. This is only + * an optimization. + */ + if ( ( out == out_buf ) && !bracket && !colon && ( in == end ) ) + { + string_free( variable ); + l = list_copy( l, value ); + continue; + } + + /* Handle start subscript. */ + while ( ( sub1 > 0 ) && value ) + --sub1, value = list_next( value ); + + /* Empty w/ :E=default?. */ + if ( !value && colon && edits.empty.ptr ) + evalue = value = list_new( L0, newstr( edits.empty.ptr ) ); + + /* For each variable value. */ + string_new( out1 ); + for ( ; value; value = list_next( value ) ) + { + LIST * rem; + size_t postfix_start; + + /* Handle end subscript (length actually). */ + + if ( sub2 >= 0 && --sub2 < 0 ) + break; + + string_truncate( buf, prefix_length ); + + /* Apply : mods, if present */ + + if ( colon && edits.filemods ) + var_edit_file( value->string, out1, &edits ); + else + string_append( out1, value->string ); + + if ( colon && ( edits.upshift || edits.downshift || edits.to_slashes || edits.to_windows ) ) + var_edit_shift( out1, &edits ); + + /* Handle :J=joinval */ + /* If we have more values for this var, just keep appending them + * (using the join value) rather than creating separate LIST + * elements. + */ + if ( colon && edits.join.ptr && + ( list_next( value ) || list_next( vars ) ) ) + { + string_append( out1, edits.join.ptr ); + continue; + } + + string_append( buf, out1->value ); + string_free( out1 ); + string_new( out1 ); + + /* If no remainder, append result to output chain. */ + if ( in == end ) + { + l = list_new( l, newstr( buf->value ) ); + continue; + } + + /* For each remainder, append the complete string to the output + * chain. Remember the end of the variable expansion so we can + * just tack on each instance of 'remainder'. + */ + postfix_start = buf->size; + for ( rem = remainder; rem; rem = list_next( rem ) ) + { + string_truncate( buf, postfix_start ); + string_append( buf, rem->string ); + l = list_new( l, newstr( buf->value ) ); + } + } + string_free( out1 ); + + /* Toss used empty. */ + if ( evalue ) + list_free( evalue ); + + string_free( variable ); + } + + /* variables & remainder were gifts from var_expand and must be freed. */ + if ( variables ) list_free( variables ); + if ( remainder ) list_free( remainder ); + + if ( DEBUG_VAREXP ) + { + printf( "expanded to " ); + list_print( l ); + printf( "\n" ); + } + + string_free( buf ); + return l; + } +} + + +/* + * var_edit_parse() - parse : modifiers into PATHNAME structure + * + * The : modifiers in a $(varname:modifier) currently support replacing or + * omitting elements of a filename, and so they are parsed into a PATHNAME + * structure (which contains pointers into the original string). + * + * Modifiers of the form "X=value" replace the component X with the given value. + * Modifiers without the "=value" cause everything but the component X to be + * omitted. X is one of: + * + * G <grist> + * D directory name + * B base name + * S .suffix + * M (member) + * R root directory - prepended to whole path + * + * This routine sets: + * + * f->f_xxx.ptr = 0 + * f->f_xxx.len = 0 + * -> leave the original component xxx + * + * f->f_xxx.ptr = string + * f->f_xxx.len = strlen( string ) + * -> replace component xxx with string + * + * f->f_xxx.ptr = "" + * f->f_xxx.len = 0 + * -> omit component xxx + * + * var_edit_file() below and path_build() obligingly follow this convention. + */ + +static void var_edit_parse( char * mods, VAR_EDITS * edits ) +{ + int havezeroed = 0; + memset( (char *)edits, 0, sizeof( *edits ) ); + + while ( *mods ) + { + char * p; + PATHPART * fp; + + switch ( *mods++ ) + { + case 'L': edits->downshift = 1; continue; + case 'U': edits->upshift = 1; continue; + case 'P': edits->parent = edits->filemods = 1; continue; + case 'E': fp = &edits->empty; goto strval; + case 'J': fp = &edits->join; goto strval; + case 'G': fp = &edits->f.f_grist; goto fileval; + case 'R': fp = &edits->f.f_root; goto fileval; + case 'D': fp = &edits->f.f_dir; goto fileval; + case 'B': fp = &edits->f.f_base; goto fileval; + case 'S': fp = &edits->f.f_suffix; goto fileval; + case 'M': fp = &edits->f.f_member; goto fileval; + case 'T': edits->to_slashes = 1; continue; + case 'W': edits->to_windows = 1; continue; + default: + return; /* Should complain, but so what... */ + } + + fileval: + /* Handle :CHARS, where each char (without a following =) selects a + * particular file path element. On the first such char, we deselect all + * others (by setting ptr = "", len = 0) and for each char we select + * that element (by setting ptr = 0). + */ + edits->filemods = 1; + + if ( *mods != '=' ) + { + if ( !havezeroed++ ) + { + int i; + for ( i = 0; i < 6; ++i ) + { + edits->f.part[ i ].len = 0; + edits->f.part[ i ].ptr = ""; + } + } + + fp->ptr = 0; + continue; + } + + strval: + /* Handle :X=value, or :X */ + if ( *mods != '=' ) + { + fp->ptr = ""; + fp->len = 0; + } + else if ( ( p = strchr( mods, MAGIC_COLON ) ) ) + { + *p = 0; + fp->ptr = ++mods; + fp->len = p - mods; + mods = p + 1; + } + else + { + fp->ptr = ++mods; + fp->len = strlen( mods ); + mods += fp->len; + } + } +} + + +/* + * var_edit_file() - copy input target name to output, modifying filename. + */ + +static void var_edit_file( char * in, string * out, VAR_EDITS * edits ) +{ + PATHNAME pathname; + + /* Parse apart original filename, putting parts into "pathname". */ + path_parse( in, &pathname ); + + /* Replace any pathname with edits->f */ + if ( edits->f.f_grist .ptr ) pathname.f_grist = edits->f.f_grist; + if ( edits->f.f_root .ptr ) pathname.f_root = edits->f.f_root; + if ( edits->f.f_dir .ptr ) pathname.f_dir = edits->f.f_dir; + if ( edits->f.f_base .ptr ) pathname.f_base = edits->f.f_base; + if ( edits->f.f_suffix.ptr ) pathname.f_suffix = edits->f.f_suffix; + if ( edits->f.f_member.ptr ) pathname.f_member = edits->f.f_member; + + /* If requested, modify pathname to point to parent. */ + if ( edits->parent ) + path_parent( &pathname ); + + /* Put filename back together. */ + path_build( &pathname, out, 0 ); +} + + +/* + * var_edit_shift() - do upshift/downshift mods. + */ + +static void var_edit_shift( string * out, VAR_EDITS * edits ) +{ + /* Handle upshifting, downshifting and slash translation now. */ + char * p; + for ( p = out->value; *p; ++p) + { + if ( edits->upshift ) + *p = toupper( *p ); + else if ( edits->downshift ) + *p = tolower( *p ); + if ( edits->to_slashes && ( *p == '\\' ) ) + *p = '/'; +# ifdef OS_CYGWIN + if ( edits->to_windows ) + { + char result[ MAX_PATH + 1 ]; + cygwin_conv_to_win32_path( out->value, result ); + assert( strlen( result ) <= MAX_PATH ); + string_free( out ); + string_copy( out, result ); + } +# endif + } + out->size = p - out->value; +} + + +#ifndef NDEBUG +void var_expand_unit_test() +{ + LOL lol[ 1 ]; + LIST * l; + LIST * l2; + LIST * expected = list_new( list_new( L0, newstr( "axb" ) ), newstr( "ayb" ) ); + LIST * e2; + char axyb[] = "a$(xy)b"; + char azb[] = "a$($(z))b"; + char path[] = "$(p:W)"; + +# ifdef OS_CYGWIN + char cygpath[ 256 ]; + cygwin_conv_to_posix_path( "c:\\foo\\bar", cygpath ); +# else + char cygpath[] = "/cygdrive/c/foo/bar"; +# endif + + lol_init(lol); + var_set( "xy", list_new( list_new( L0, newstr( "x" ) ), newstr( "y" ) ), VAR_SET ); + var_set( "z", list_new( L0, newstr( "xy" ) ), VAR_SET ); + var_set( "p", list_new( L0, newstr( cygpath ) ), VAR_SET ); + + l = var_expand( 0, axyb, axyb + sizeof( axyb ) - 1, lol, 0 ); + for ( l2 = l, e2 = expected; l2 && e2; l2 = list_next( l2 ), e2 = list_next( e2 ) ) + assert( !strcmp( e2->string, l2->string ) ); + assert( l2 == 0 ); + assert( e2 == 0 ); + list_free( l ); + + l = var_expand( 0, azb, azb + sizeof( azb ) - 1, lol, 0 ); + for ( l2 = l, e2 = expected; l2 && e2; l2 = list_next( l2 ), e2 = list_next( e2 ) ) + assert( !strcmp( e2->string, l2->string ) ); + assert( l2 == 0 ); + assert( e2 == 0 ); + list_free( l ); + + l = var_expand( 0, path, path + sizeof( path ) - 1, lol, 0 ); + assert( l != 0 ); + assert( list_next( l ) == 0 ); +# ifdef OS_CYGWIN + /* On some installations of cygwin the drive letter is expanded to other + * case. This has been reported to be the case if cygwin has been installed + * to C:\ as opposed to C:\cygwin. Since case of the drive letter will not + * matter, we allow for both. + */ + assert( !strcmp( l->string, "c:\\foo\\bar" ) || + !strcmp( l->string, "C:\\foo\\bar" ) ); +# else + assert( !strcmp( l->string, cygpath ) ); +# endif + list_free( l ); + list_free( expected ); + lol_free( lol ); +} +#endif |