From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- jam-files/boost-build/util/string.jam | 189 ---------------------------------- 1 file changed, 189 deletions(-) delete mode 100644 jam-files/boost-build/util/string.jam (limited to 'jam-files/boost-build/util/string.jam') diff --git a/jam-files/boost-build/util/string.jam b/jam-files/boost-build/util/string.jam deleted file mode 100644 index a39ed119..00000000 --- a/jam-files/boost-build/util/string.jam +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2002 Dave Abrahams -# Copyright 2002, 2003 Rene Rivera -# Distributed under the Boost Software License, Version 1.0. -# (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) - -import regex ; - - -# Characters considered whitespace, as a list. -.whitespace-chars = " " " " " -" ; - -# Characters considered whitespace, as a single string. -.whitespace = $(.whitespace-chars:J="") ; - - -# Returns the canonical set of whitespace characters, as a list. -# -rule whitespace-chars ( ) -{ - return $(.whitespace-chars) ; -} - - -# Returns the canonical set of whitespace characters, as a single string. -# -rule whitespace ( ) -{ - return $(.whitespace) ; -} - - -# Splits the given string into a list of strings composed of each character of -# the string in sequence. -# -rule chars ( - string # The string to split. - ) -{ - local result ; - while $(string) - { - local s = [ MATCH (.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.*) : $(string) ] ; - string = $(s[9]) ; - result += $(s[1-8]) ; - } - - # Trim off empty strings. - while $(result[1]) && ! $(result[-1]) - { - result = $(result[1--2]) ; - } - - return $(result) ; -} - - -# Apply a set of standard transformations to string to produce an abbreviation -# no more than 5 characters long. -# -rule abbreviate ( string ) -{ - local r = $(.abbreviated-$(string)) ; - if $(r) - { - return $(r) ; - } - # Anything less than 4 characters gets no abbreviation. - else if ! [ MATCH (....) : $(string) ] - { - .abbreviated-$(string) = $(string) ; - return $(string) ; - } - else - { - # Separate the initial letter in case it's a vowel. - local s1 = [ MATCH ^(.)(.*) : $(string) ] ; - - # Drop trailing "ing". - local s2 = [ MATCH ^(.*)ing$ : $(s1[2]) ] ; - s2 ?= $(s1[2]) ; - - # Reduce all doubled characters to one. - local last = "" ; - for local c in [ chars $(s2) ] - { - if $(c) != $(last) - { - r += $(c) ; - last = $(c) ; - } - } - s2 = $(r:J="") ; - - # Chop all vowels out of the remainder. - s2 = [ regex.replace $(s2) [AEIOUaeiou] "" ] ; - - # Shorten remaining consonants to 4 characters. - s2 = [ MATCH ^(.?.?.?.?) : $(s2) ] ; - - # Glue the initial character back on to the front. - s2 = $(s1[1])$(s2) ; - - .abbreviated-$(string) = $(s2) ; - return $(s2) ; - } -} - - -# Concatenates the given strings, inserting the given separator between each -# string. -# -rule join ( - strings * # The strings to join. - : separator ? # The optional separator. - ) -{ - separator ?= "" ; - return $(strings:J=$(separator)) ; -} - - -# Split a string into whitespace separated words. -# -rule words ( - string # The string to split. - : whitespace * # Optional, characters to consider as whitespace. - ) -{ - whitespace = $(whitespace:J="") ; - whitespace ?= $(.whitespace) ; - local w = ; - while $(string) - { - string = [ MATCH "^[$(whitespace)]*([^$(whitespace)]*)(.*)" : $(string) ] ; - if $(string[1]) && $(string[1]) != "" - { - w += $(string[1]) ; - } - string = $(string[2]) ; - } - return $(w) ; -} - - -# Check that the given string is composed entirely of whitespace. -# -rule is-whitespace ( - string ? # The string to test. - ) -{ - if ! $(string) { return true ; } - else if $(string) = "" { return true ; } - else if [ MATCH "^([$(.whitespace)]+)$" : $(string) ] { return true ; } - else { return ; } -} - -rule __test__ ( ) -{ - import assert ; - assert.result a b c : chars abc ; - - assert.result rntm : abbreviate runtime ; - assert.result ovrld : abbreviate overload ; - assert.result dbg : abbreviate debugging ; - assert.result async : abbreviate asynchronous ; - assert.result pop : abbreviate pop ; - assert.result aaa : abbreviate aaa ; - assert.result qck : abbreviate quack ; - assert.result sttc : abbreviate static ; - - # Check boundary cases. - assert.result a : chars a ; - assert.result : chars "" ; - assert.result a b c d e f g h : chars abcdefgh ; - assert.result a b c d e f g h i : chars abcdefghi ; - assert.result a b c d e f g h i j : chars abcdefghij ; - assert.result a b c d e f g h i j k : chars abcdefghijk ; - - assert.result a//b/c/d : join a "" b c d : / ; - assert.result abcd : join a "" b c d ; - - assert.result a b c : words "a b c" ; - - assert.true is-whitespace " " ; - assert.false is-whitespace " a b c " ; - assert.true is-whitespace "" ; - assert.true is-whitespace ; -} -- cgit v1.2.3