From 1a43362c3aa079688415ec89d67ee0f41210f9dd Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 17 Nov 2012 17:21:30 -0500 Subject: make meteor jar configurable at build time --- configure.ac | 23 ++++++++++++++++++++--- mteval/Makefile.am | 14 +++++++++++++- mteval/ns.cc | 17 +++++++++-------- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/configure.ac b/configure.ac index cb132d66..233009ca 100644 --- a/configure.ac +++ b/configure.ac @@ -13,6 +13,7 @@ AC_LANG_CPLUSPLUS BOOST_REQUIRE([1.44]) BOOST_PROGRAM_OPTIONS BOOST_SYSTEM +BOOST_SERIALIZATION BOOST_TEST AM_PATH_PYTHON AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H)) @@ -26,10 +27,24 @@ AM_CONDITIONAL([MPI], [test "x$mpi" = xyes]) if test "x$mpi" = xyes then - BOOST_SERIALIZATION AC_DEFINE([HAVE_MPI], [1], [flag for MPI]) - # TODO BOOST_MPI needs to be implemented - LIBS="$LIBS -lboost_mpi $BOOST_SERIALIZATION_LIBS" + LIBS="$LIBS -lboost_mpi" +fi + +AM_CONDITIONAL([HAVE_METEOR], false) +AC_ARG_WITH(meteor, + [AC_HELP_STRING([--with-meteor=PATH], [(optional) path to METEOR jar])], + [with_meteor=$withval], + [with_meteor=no] + ) + +if test "x$with_meteor" != 'xno' +then + AC_CHECK_FILE([$with_meteor], + [AC_DEFINE([HAVE_METEOR], [1], [flag for METEOR jar library])], + [AC_MSG_ERROR([Cannot find METEOR jar!])]) + AC_SUBST(METEOR_JAR,"${with_meteor}") + AM_CONDITIONAL([HAVE_METEOR], true) fi AM_CONDITIONAL([HAVE_CMPH], false) @@ -129,6 +144,8 @@ AC_CONFIG_FILES([mira/Makefile]) AC_CONFIG_FILES([dtrain/Makefile]) AC_CONFIG_FILES([example_extff/Makefile]) +AC_CONFIG_FILES([mteval/meteor_jar.cc]) + AC_CONFIG_FILES([python/setup.py]) AC_OUTPUT diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 22550c99..5e9bba91 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -8,7 +8,19 @@ TESTS = scorer_test noinst_LIBRARIES = libmteval.a -libmteval_a_SOURCES = ter.cc comb_scorer.cc aer_scorer.cc scorer.cc external_scorer.cc ns.cc ns_ter.cc ns_ext.cc ns_comb.cc ns_docscorer.cc ns_cer.cc +libmteval_a_SOURCES = \ + aer_scorer.cc \ + comb_scorer.cc \ + external_scorer.cc \ + meteor_jar.cc \ + ns.cc \ + ns_cer.cc \ + ns_comb.cc \ + ns_docscorer.cc \ + ns_ext.cc \ + ns_ter.cc \ + scorer.cc \ + ter.cc fast_score_SOURCES = fast_score.cc fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz diff --git a/mteval/ns.cc b/mteval/ns.cc index f3a82ce0..7d73061c 100644 --- a/mteval/ns.cc +++ b/mteval/ns.cc @@ -19,6 +19,8 @@ using namespace std; map EvaluationMetric::instances_; +extern const char* meteor_jar_path; + SegmentEvaluator::~SegmentEvaluator() {} EvaluationMetric::~EvaluationMetric() {} @@ -235,13 +237,7 @@ struct BleuMetric : public EvaluationMetric { EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { static bool is_first = true; - static string meteor_jar_path = "/cab0/tools/meteor-1.3/meteor-1.3.jar"; if (is_first) { - const char* ppath = getenv("METEOR_JAR"); - if (ppath) { - cerr << "METEOR_JAR environment variable set to " << ppath << endl; - meteor_jar_path = ppath; - } instances_["NULL"] = NULL; is_first = false; } @@ -259,11 +255,16 @@ EvaluationMetric* EvaluationMetric::Instance(const string& imetric_id) { } else if (metric_id == "TER") { m = new TERMetric; } else if (metric_id == "METEOR") { +#if HAVE_METEOR if (!FileExists(meteor_jar_path)) { - cerr << meteor_jar_path << " not found. Set METEOR_JAR environment variable.\n"; + cerr << meteor_jar_path << " not found!\n"; abort(); } - m = new ExternalMetric("METEOR", "java -Xmx1536m -jar " + meteor_jar_path + " - - -mira -lower -t tune -l en"); + m = new ExternalMetric("METEOR", string("java -Xmx1536m -jar ") + meteor_jar_path + " - - -mira -lower -t tune -l en"); +#else + cerr << "cdec was not built with the --with-meteor option." << endl; + abort(); +#endif } else if (metric_id.find("COMB:") == 0) { m = new CombinationMetric(metric_id); } else if (metric_id == "CER") { -- cgit v1.2.3 From 2de96e4b06fd0ff6131f3ec9630e9df330cf9b14 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 17 Nov 2012 17:46:06 -0500 Subject: remove some old stuff --- configure.ac | 45 --------------------------------------------- decoder/Makefile.am | 4 ---- 2 files changed, 49 deletions(-) diff --git a/configure.ac b/configure.ac index 233009ca..5f18beaa 100644 --- a/configure.ac +++ b/configure.ac @@ -68,25 +68,6 @@ then AM_CONDITIONAL([HAVE_CMPH], true) fi -AM_CONDITIONAL([HAVE_EIGEN], false) -AC_ARG_WITH(eigen, - [AC_HELP_STRING([--with-eigen=PATH], [(optional) path to Eigen linear algebra library])], - [with_eigen=$withval], - [with_eigen=no] - ) - -if test "x$with_eigen" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_eigen}" - - AC_CHECK_HEADER(Eigen/Dense, - [AC_DEFINE([HAVE_EIGEN], [1], [flag for Eigen linear algebra library])], - [AC_MSG_ERROR([Cannot find Eigen!])]) - - AM_CONDITIONAL([HAVE_EIGEN], true) -fi - #BOOST_THREADS CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SYSTEM_LDFLAGS" @@ -99,32 +80,6 @@ AC_CHECK_HEADER(google/dense_hash_map, AC_PROG_INSTALL -AM_CONDITIONAL([GLC], false) -AC_ARG_WITH(glc, - [AC_HELP_STRING([--with-glc=PATH], [(optional) path to Global Lexical Coherence package (Context CRF)])], - [with_glc=$withval], - [with_glc=no] - ) -FF_GLC="" - -if test "x$with_glc" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_glc} -I${with_glc}/cdec" - - #AC_CHECK_HEADER(ff_glc.h, - # [AC_DEFINE([HAVE_GLC], [], [flag for GLC])], - # [AC_MSG_ERROR([Cannot find GLC!])]) - - AC_DEFINE([HAVE_GLC], [], [flag for GLC]) - #LIB_RANDLM="-lrandlm" - #LDFLAGS="$LDFLAGS -L${with_glc}/lib" - #LIBS="$LIBS $LIB_RANDLM" - #FMTLIBS="$FMTLIBS libglc.a" - AC_SUBST(FF_GLC,"${with_glc}/cdec/ff_glc.cc") - AM_CONDITIONAL([GLC], true) -fi - CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H" AC_CONFIG_FILES([Makefile]) diff --git a/decoder/Makefile.am b/decoder/Makefile.am index f8f427d3..6914fa0f 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -83,7 +83,3 @@ libcdec_a_SOURCES = \ json_parse.cc \ grammar.cc -if GLC - # Until we build GLC as a library... - libcdec_a_SOURCES += ff_glc.cc string_util.cc feature-factory.cc -endif -- cgit v1.2.3 From 7640d85b92f61016e0712825920c6a259329d79b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 11:31:21 -0500 Subject: more consistent naming, interface, fix compile error --- mteval/meteor_jar.cc.in | 3 + pro-train/Makefile.am | 11 - pro-train/README.shared-mem | 9 - pro-train/dist-pro.pl | 671 ------------------------------ pro-train/mr_pro_generate_mapper_input.pl | 18 - pro-train/mr_pro_map.cc | 201 --------- pro-train/mr_pro_reduce.cc | 286 ------------- pro/Makefile.am | 11 + pro/README.shared-mem | 9 + pro/mr_pro_generate_mapper_input.pl | 18 + pro/mr_pro_map.cc | 201 +++++++++ pro/mr_pro_reduce.cc | 286 +++++++++++++ pro/pro.pl | 555 ++++++++++++++++++++++++ 13 files changed, 1083 insertions(+), 1196 deletions(-) create mode 100644 mteval/meteor_jar.cc.in delete mode 100644 pro-train/Makefile.am delete mode 100644 pro-train/README.shared-mem delete mode 100755 pro-train/dist-pro.pl delete mode 100755 pro-train/mr_pro_generate_mapper_input.pl delete mode 100644 pro-train/mr_pro_map.cc delete mode 100644 pro-train/mr_pro_reduce.cc create mode 100644 pro/Makefile.am create mode 100644 pro/README.shared-mem create mode 100755 pro/mr_pro_generate_mapper_input.pl create mode 100644 pro/mr_pro_map.cc create mode 100644 pro/mr_pro_reduce.cc create mode 100755 pro/pro.pl diff --git a/mteval/meteor_jar.cc.in b/mteval/meteor_jar.cc.in new file mode 100644 index 00000000..fe45a72a --- /dev/null +++ b/mteval/meteor_jar.cc.in @@ -0,0 +1,3 @@ + +const char* meteor_jar_path = "@METEOR_JAR@"; + diff --git a/pro-train/Makefile.am b/pro-train/Makefile.am deleted file mode 100644 index 1e9d46b0..00000000 --- a/pro-train/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -bin_PROGRAMS = \ - mr_pro_map \ - mr_pro_reduce - -mr_pro_map_SOURCES = mr_pro_map.cc -mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/pro-train/README.shared-mem b/pro-train/README.shared-mem deleted file mode 100644 index 7728efc0..00000000 --- a/pro-train/README.shared-mem +++ /dev/null @@ -1,9 +0,0 @@ -If you want to run dist-vest.pl on a very large shared memory machine, do the -following: - - ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini - -This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the -decoder must load grammars, language models, etc., J should be smaller than I, but this will depend -on the system you are running on and the complexity of the models used for decoding. - diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl deleted file mode 100755 index 31258fa6..00000000 --- a/pro-train/dist-pro.pl +++ /dev/null @@ -1,671 +0,0 @@ -#!/usr/bin/env perl -use strict; -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } - -# Skip local config (used for distributing jobs) if we're running in local-only mode -use LocalConfig; -use Getopt::Long; -use IPC::Open2; -use POSIX ":sys_wait_h"; -my $QSUB_CMD = qsub_args(mert_memory()); -my $default_jobs = env_default_jobs(); - -my $VEST_DIR="$SCRIPT_DIR/../dpmert"; -require "$VEST_DIR/libcall.pl"; - -# Default settings -my $srcFile; -my $refFiles; -my $bin_dir = $SCRIPT_DIR; -die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; -die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl"; -my $MAPPER = "$bin_dir/mr_pro_map"; -my $REDUCER = "$bin_dir/mr_pro_reduce"; -my $parallelize = "$VEST_DIR/parallelize.pl"; -my $libcall = "$VEST_DIR/libcall.pl"; -my $sentserver = "$VEST_DIR/sentserver"; -my $sentclient = "$VEST_DIR/sentclient"; -my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; - -my $SCORER = $FAST_SCORE; -die "Can't find $MAPPER" unless -x $MAPPER; -my $cdec = "$bin_dir/../decoder/cdec"; -die "Can't find decoder in $cdec" unless -x $cdec; -die "Can't find $parallelize" unless -x $parallelize; -die "Can't find $libcall" unless -e $libcall; -my $decoder = $cdec; -my $lines_per_mapper = 30; -my $iteration = 1; -my $best_weights; -my $psi = 1; -my $default_max_iter = 30; -my $max_iterations = $default_max_iter; -my $jobs = $default_jobs; # number of decode nodes -my $pmem = "4g"; -my $disable_clean = 0; -my %seen_weights; -my $help = 0; -my $epsilon = 0.0001; -my $dryrun = 0; -my $last_score = -10000000; -my $metric = "ibm_bleu"; -my $dir; -my $iniFile; -my $weights; -my $use_make = 1; # use make to parallelize -my $useqsub = 0; -my $initial_weights; -my $pass_suffix = ''; -my $cpbin=1; - -# regularization strength -my $tune_regularizer = 0; -my $reg = 500; -my $reg_previous = 5000; - -# Process command-line options -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "jobs=i" => \$jobs, - "dont-clean" => \$disable_clean, - "pass-suffix=s" => \$pass_suffix, - "qsub" => \$useqsub, - "dry-run" => \$dryrun, - "epsilon=s" => \$epsilon, - "interpolate-with-weights=f" => \$psi, - "help" => \$help, - "weights=s" => \$initial_weights, - "tune-regularizer" => \$tune_regularizer, - "reg=f" => \$reg, - "reg-previous=f" => \$reg_previous, - "use-make=i" => \$use_make, - "max-iterations=i" => \$max_iterations, - "pmem=s" => \$pmem, - "cpbin!" => \$cpbin, - "ref-files=s" => \$refFiles, - "metric=s" => \$metric, - "source-file=s" => \$srcFile, - "workdir=s" => \$dir, -) == 0 || @ARGV!=1 || $help) { - print_help(); - exit; -} - -die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; - -if ($useqsub) { - $use_make = 0; - die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); -} - -my @missing_args = (); -if (!defined $srcFile) { push @missing_args, "--source-file"; } -if (!defined $refFiles) { push @missing_args, "--ref-files"; } -if (!defined $initial_weights) { push @missing_args, "--weights"; } -die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); - -if ($metric =~ /^(combi|ter)$/i) { - $lines_per_mapper = 5; -} - -($iniFile) = @ARGV; - - -sub write_config; -sub enseg; -sub print_help; - -my $nodelist; -my $host =check_output("hostname"); chomp $host; -my $bleu; -my $interval_count = 0; -my $logfile; -my $projected_score; - -# used in sorting scores -my $DIR_FLAG = '-r'; -if ($metric =~ /^ter$|^aer$/i) { - $DIR_FLAG = ''; -} - -my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); - -unless ($dir){ - $dir = "protrain"; -} -unless ($dir =~ /^\//){ # convert relative path to absolute path - my $basedir = check_output("pwd"); - chomp $basedir; - $dir = "$basedir/$dir"; -} - - -# Initializations and helper functions -srand; - -my @childpids = (); -my @cleanupcmds = (); - -sub cleanup { - print STDERR "Cleanup...\n"; - for my $pid (@childpids){ unchecked_call("kill $pid"); } - for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } - exit 1; -}; -# Always call cleanup, no matter how we exit -*CORE::GLOBAL::exit = - sub{ cleanup(); }; -$SIG{INT} = "cleanup"; -$SIG{TERM} = "cleanup"; -$SIG{HUP} = "cleanup"; - -my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; -my $newIniFile = "$dir/$decoderBase.ini"; -my $inputFileName = "$dir/input"; -my $user = $ENV{"USER"}; - - -# process ini file --e $iniFile || die "Error: could not open $iniFile for reading\n"; -open(INI, $iniFile); - -use File::Basename qw(basename); -#pass bindir, refs to vars holding bin -sub modbin { - local $_; - my $bindir=shift; - check_call("mkdir -p $bindir"); - -d $bindir || die "couldn't make bindir $bindir"; - for (@_) { - my $src=$$_; - $$_="$bindir/".basename($src); - check_call("cp -p $src $$_"); - } -} -sub dirsize { - opendir ISEMPTY,$_[0]; - return scalar(readdir(ISEMPTY))-1; -} -my @allweights; -if ($dryrun){ - write_config(*STDERR); - exit 0; -} else { - if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs - die "ERROR: working dir $dir already exists\n\n"; - } else { - -e $dir || mkdir $dir; - mkdir "$dir/hgs"; - modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; - mkdir "$dir/scripts"; - my $cmdfile="$dir/rerun-pro.sh"; - open CMD,'>',$cmdfile; - print CMD "cd ",&getcwd,"\n"; -# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted. - my $cline=&cmdline."\n"; - print CMD $cline; - close CMD; - print STDERR $cline; - chmod(0755,$cmdfile); - check_call("cp $initial_weights $dir/weights.0"); - die "Can't find weights.0" unless (-e "$dir/weights.0"); - } - write_config(*STDERR); -} - - -# Generate initial files and values -check_call("cp $iniFile $newIniFile"); -$iniFile = $newIniFile; - -my $newsrc = "$dir/dev.input"; -enseg($srcFile, $newsrc); -$srcFile = $newsrc; -my $devSize = 0; -open F, "<$srcFile" or die "Can't read $srcFile: $!"; -while() { $devSize++; } -close F; - -unless($best_weights){ $best_weights = $weights; } -unless($projected_score){ $projected_score = 0.0; } -$seen_weights{$weights} = 1; - -my $random_seed = int(time / 1000); -my $lastWeightsFile; -my $lastPScore = 0; -# main optimization loop -while (1){ - print STDERR "\n\nITERATION $iteration\n==========\n"; - - if ($iteration > $max_iterations){ - print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; - last; - } - # iteration-specific files - my $runFile="$dir/run.raw.$iteration"; - my $onebestFile="$dir/1best.$iteration"; - my $logdir="$dir/logs.$iteration"; - my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; - my $scorerLog="$logdir/scorer.log.$iteration"; - check_call("mkdir -p $logdir"); - - - #decode - print STDERR "RUNNING DECODER AT "; - print STDERR unchecked_output("date"); - my $im1 = $iteration - 1; - my $weightsFile="$dir/weights.$im1"; - push @allweights, "-w $dir/weights.$im1"; - `rm -f $dir/hgs/*.gz`; - my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; - my $pcmd; - if ($use_make) { - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; - } else { - $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; - } - my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - my $num_hgs; - my $num_topbest; - my $retries = 0; - while($retries < 5) { - $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); - $num_topbest = check_output("wc -l < $runFile"); - print STDERR "NUMBER OF HGs: $num_hgs\n"; - print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; - if($devSize == $num_hgs && $devSize == $num_topbest) { - last; - } else { - print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; - sleep(3); - } - $retries++; - } - die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); - chomp $dec_score; - print STDERR "DECODER SCORE: $dec_score\n"; - - # save space - check_call("gzip -f $runFile"); - check_call("gzip -f $decoderLog"); - - # run optimizer - print STDERR "RUNNING OPTIMIZER AT "; - print STDERR unchecked_output("date"); - print STDERR " - GENERATE TRAINING EXEMPLARS\n"; - my $mergeLog="$logdir/prune-merge.log.$iteration"; - - my $score = 0; - my $icc = 0; - my $inweights="$dir/weights.$im1"; - $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - check_call("mkdir -p $dir/splag.$im1"); - $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput."; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; - my @shards = grep { /^mapinput\./ } readdir(DIR); - closedir DIR; - die "No shards!" unless scalar @shards > 0; - my $joblist = ""; - my $nmappers = 0; - @cleanupcmds = (); - my %o2i = (); - my $first_shard = 1; - my $mkfile; # only used with makefiles - my $mkfilename; - if ($use_make) { - $mkfilename = "$dir/splag.$im1/domap.mk"; - open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!"; - print $mkfile "all: $dir/splag.$im1/map.done\n\n"; - } - my @mkouts = (); # only used with makefiles - my @mapoutputs = (); - for my $shard (@shards) { - my $mapoutput = $shard; - my $client_name = $shard; - $client_name =~ s/mapinput.//; - $client_name = "pro.$client_name"; - $mapoutput =~ s/mapinput/mapoutput/; - push @mapoutputs, "$dir/splag.$im1/$mapoutput"; - $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; - my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput"; - if ($use_make) { - my $script_file = "$dir/scripts/map.$shard"; - open F, ">$script_file" or die "Can't write $script_file: $!"; - print F "#!/bin/bash\n"; - print F "$script\n"; - close F; - my $output = "$dir/splag.$im1/$mapoutput"; - push @mkouts, $output; - chmod(0755, $script_file) or die "Can't chmod $script_file: $!"; - if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } - print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n"; - } else { - my $script_file = "$dir/scripts/map.$shard"; - open F, ">$script_file" or die "Can't write $script_file: $!"; - print F "$script\n"; - close F; - if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } - - $nmappers++; - my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; - my $jobid = check_output("$qcmd"); - chomp $jobid; - $jobid =~ s/^(\d+)(.*?)$/\1/g; - $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, "qdel $jobid 2> /dev/null"); - print STDERR " $jobid"; - if ($joblist == "") { $joblist = $jobid; } - else {$joblist = $joblist . "\|" . $jobid; } - } - } - my @dev_outs = (); - my @devtest_outs = (); - if ($tune_regularizer) { - for (my $i = 0; $i < scalar @mapoutputs; $i++) { - if ($i % 3 == 1) { - push @devtest_outs, $mapoutputs[$i]; - } else { - push @dev_outs, $mapoutputs[$i]; - } - } - if (scalar @devtest_outs == 0) { - die "Not enough training instances for regularization tuning! Rerun without --tune-regularizer\n"; - } - } else { - @dev_outs = @mapoutputs; - } - if ($use_make) { - print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; - close $mkfile; - my $mcmd = "make -j $jobs -f $mkfilename"; - print STDERR "\nExecuting: $mcmd\n"; - check_call($mcmd); - } else { - print STDERR "\nLaunched $nmappers mappers.\n"; - sleep 8; - print STDERR "Waiting for mappers to complete...\n"; - while ($nmappers > 0) { - sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); - $nmappers = scalar @livejobs; - } - print STDERR "All mappers complete.\n"; - } - my $tol = 0; - my $til = 0; - my $dev_test_file = "$dir/splag.$im1/devtest.gz"; - if ($tune_regularizer) { - my $cmd = "cat @devtest_outs | gzip > $dev_test_file"; - check_bash_call($cmd); - die "Can't find file $dev_test_file" unless -f $dev_test_file; - } - #print STDERR "MO: @mapoutputs\n"; - for my $mo (@mapoutputs) { - #my $olines = get_lines($mo); - #my $ilines = get_lines($o2i{$mo}); - #die "$mo: no training instances generated!" if $olines == 0; - } - print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n"; - print STDERR unchecked_output("date"); - $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi"; - if ($tune_regularizer) { - $cmd .= " -T -t $dev_test_file"; - } - $cmd .= " > $dir/weights.$iteration"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - $lastWeightsFile = "$dir/weights.$iteration"; - if ($tune_regularizer) { - open W, "<$lastWeightsFile" or die "Can't read $lastWeightsFile: $!"; - my $line = ; - close W; - my ($sharp, $label, $nreg) = split /\s|=/, $line; - print STDERR "REGULARIZATION STRENGTH ($label) IS $nreg\n"; - $reg = $nreg; - # only tune regularizer on first iteration? - $tune_regularizer = 0; - } - $lastPScore = $score; - $iteration++; - print STDERR "\n==========\n"; -} - -print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; - -print STDOUT "$lastWeightsFile\n"; - -exit 0; - -sub get_lines { - my $fn = shift @_; - open FL, "<$fn" or die "Couldn't read $fn: $!"; - my $lc = 0; - while() { $lc++; } - return $lc; -} - -sub get_comma_sep_refs { - my ($r,$p) = @_; - my $o = check_output("echo $p"); - chomp $o; - my @files = split /\s+/, $o; - return "-$r " . join(" -$r ", @files); -} - -sub read_weights_file { - my ($file) = @_; - open F, "<$file" or die "Couldn't read $file: $!"; - my @r = (); - my $pm = -1; - while() { - next if /^#/; - next if /^\s*$/; - chomp; - if (/^(.+)\s+(.+)$/) { - my $m = $1; - my $w = $2; - die "Weights out of order: $m <= $pm" unless $m > $pm; - push @r, $w; - } else { - warn "Unexpected feature name in weight file: $_"; - } - } - close F; - return join ' ', @r; -} - -# subs -sub write_config { - my $fh = shift; - my $cleanup = "yes"; - if ($disable_clean) {$cleanup = "no";} - - print $fh "\n"; - print $fh "DECODER: $decoder\n"; - print $fh "INI FILE: $iniFile\n"; - print $fh "WORKING DIR: $dir\n"; - print $fh "SOURCE (DEV): $srcFile\n"; - print $fh "REFS (DEV): $refFiles\n"; - print $fh "EVAL METRIC: $metric\n"; - print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "JOBS: $jobs\n"; - print $fh "HEAD NODE: $host\n"; - print $fh "PMEM (DECODING): $pmem\n"; - print $fh "CLEANUP: $cleanup\n"; -} - -sub update_weights_file { - my ($neww, $rfn, $rpts) = @_; - my @feats = @$rfn; - my @pts = @$rpts; - my $num_feats = scalar @feats; - my $num_pts = scalar @pts; - die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; - open G, ">$neww" or die; - for (my $i = 0; $i < $num_feats; $i++) { - my $f = $feats[$i]; - my $lambda = $pts[$i]; - print G "$f $lambda\n"; - } - close G; -} - -sub enseg { - my $src = shift; - my $newsrc = shift; - open(SRC, $src); - open(NEWSRC, ">$newsrc"); - my $i=0; - while (my $line=){ - chomp $line; - if ($line =~ /^\s* tags, you must include a zero-based id attribute"; - } - } else { - print NEWSRC "$line\n"; - } - $i++; - } - close SRC; - close NEWSRC; - die "Empty dev set!" if ($i == 0); -} - -sub print_help { - - my $executable = check_output("basename $0"); chomp $executable; - print << "Help"; - -Usage: $executable [options] - - $executable [options] - Runs a complete PRO optimization using the ini file specified. - -Required: - - --ref-files - Dev set ref files. This option takes only a single string argument. - To use multiple files (including file globbing), this argument should - be quoted. - - --source-file - Dev set source file. - - --weights - Initial weights file (use empty file to start from 0) - -General options: - - --help - Print this message and exit. - - --max-iterations - Maximum number of iterations to run. If not specified, defaults - to $default_max_iter. - - --metric - Metric to optimize. - Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi - - --pass-suffix - If the decoder is doing multi-pass decoding, the pass suffix "2", - "3", etc., is used to control what iteration of weights is set. - - --workdir - Directory for intermediate and output files. If not specified, the - name is derived from the ini filename. Assuming that the ini - filename begins with the decoder name and ends with ini, the default - name of the working directory is inferred from the middle part of - the filename. E.g. an ini file named decoder.foo.ini would have - a default working directory name foo. - -Regularization options: - - --reg - l2 regularization strength [default=500]. The greater this value, - the closer to zero the weights will be. - - --reg-previous - l2 penalty for moving away from the weights from the previous - iteration. [default=5000]. The greater this value, the closer - to the previous iteration's weights the next iteration's weights - will be. - -Job control options: - - --jobs - Number of decoder processes to run in parallel. [default=$default_jobs] - - --qsub - Use qsub to run jobs in parallel (qsub must be configured in - environment/LocalEnvironment.pm) - - --pmem - Amount of physical memory requested for parallel decoding jobs - (used with qsub requests only) - -Deprecated options: - - --interpolate-with-weights - [deprecated] At each iteration the resulting weights are - interpolated with the weights from the previous iteration, with - this factor. [default=1.0, i.e., no effect] - -Help -} - -sub convert { - my ($str) = @_; - my @ps = split /;/, $str; - my %dict = (); - for my $p (@ps) { - my ($k, $v) = split /=/, $p; - $dict{$k} = $v; - } - return %dict; -} - - -sub cmdline { - return join ' ',($0,@ORIG_ARGV); -} - -#buggy: last arg gets quoted sometimes? -my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; -my $shell_escape_in_quote=qr{[\\"\$`!]}; - -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} - -sub escaped_shell_args { - return map {local $_=$_;chomp;escape_shell($_)} @_; -} - -sub escaped_shell_args_str { - return join ' ',&escaped_shell_args(@_); -} - -sub escaped_cmdline { - return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); -} diff --git a/pro-train/mr_pro_generate_mapper_input.pl b/pro-train/mr_pro_generate_mapper_input.pl deleted file mode 100755 index b30fc4fd..00000000 --- a/pro-train/mr_pro_generate_mapper_input.pl +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; -my $d = shift @ARGV; -die "Can't find directory $d" unless -d $d; - -opendir(DIR, $d) or die "Can't read $d: $!"; -my @hgs = grep { /\.gz$/ } readdir(DIR); -closedir DIR; - -for my $hg (@hgs) { - my $file = $hg; - my $id = $hg; - $id =~ s/(\.json)?\.gz//; - print "$d/$file $id\n"; -} - diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc deleted file mode 100644 index eef40b8a..00000000 --- a/pro-train/mr_pro_map.cc +++ /dev/null @@ -1,201 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "candidate_set.h" -#include "sampler.h" -#include "filelib.h" -#include "stringlib.h" -#include "weights.h" -#include "inside_outside.h" -#include "hg_io.h" -#include "ns.h" -#include "ns_docscorer.h" - -// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011) - -using namespace std; -namespace po = boost::program_options; - -boost::shared_ptr rng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") - ("weights,w",po::value(), "[REQD] Weights files from current iterations") - ("kbest_repository,K",po::value()->default_value("./kbest"),"K-best list repository (directory)") - ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") - ("source,s",po::value()->default_value(""), "Source file (ignored, except for AER)") - ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") - ("kbest_size,k",po::value()->default_value(1500u), "Top k-hypotheses to extract") - ("candidate_pairs,G", po::value()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)") - ("best_pairs,X", po::value()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)") - ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("reference")) { - cerr << "Please specify one or more references using -r \n"; - flag = true; - } - if (!conf->count("weights")) { - cerr << "Please specify weights using -w \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct ThresholdAlpha { - explicit ThresholdAlpha(double t = 0.05) : threshold(t) {} - double operator()(double mag) const { - if (mag < threshold) return 0.0; else return 1.0; - } - const double threshold; -}; - -struct TrainingInstance { - TrainingInstance(const SparseVector& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {} - SparseVector x; -#undef DEBUGGING_PRO -#ifdef DEBUGGING_PRO - vector a; - vector b; -#endif - bool y; - float gdiff; -}; -#ifdef DEBUGGING_PRO -ostream& operator<<(ostream& os, const TrainingInstance& d) { - return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x; -} -#endif - -struct DiffOrder { - bool operator()(const TrainingInstance& a, const TrainingInstance& b) const { - return a.gdiff > b.gdiff; - } -}; - -void Sample(const unsigned gamma, - const unsigned xi, - const training::CandidateSet& J_i, - const EvaluationMetric* metric, - vector* pv) { - const bool invert_score = metric->IsErrorMetric(); - vector v1, v2; - float avg_diff = 0; - for (unsigned i = 0; i < gamma; ++i) { - const size_t a = rng->inclusive(0, J_i.size() - 1)(); - const size_t b = rng->inclusive(0, J_i.size() - 1)(); - if (a == b) continue; - float ga = metric->ComputeScore(J_i[a].eval_feats); - float gb = metric->ComputeScore(J_i[b].eval_feats); - bool positive = gb < ga; - if (invert_score) positive = !positive; - const float gdiff = fabs(ga - gb); - if (!gdiff) continue; - avg_diff += gdiff; - SparseVector xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros(); - if (xdiff.empty()) { - cerr << "Empty diff:\n " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl; - cerr << " " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl; - continue; - } - v1.push_back(TrainingInstance(xdiff, positive, gdiff)); -#ifdef DEBUGGING_PRO - v1.back().a = J_i[a].hyp; - v1.back().b = J_i[b].hyp; - cerr << "N: " << v1.back() << endl; -#endif - } - avg_diff /= v1.size(); - - for (unsigned i = 0; i < v1.size(); ++i) { - double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff)); - // cerr << "avg_diff=" << avg_diff << " gdiff=" << v1[i].gdiff << " p=" << p << endl; - if (rng->next() < p) v2.push_back(v1[i]); - } - vector::iterator mid = v2.begin() + xi; - if (xi > v2.size()) mid = v2.end(); - partial_sort(v2.begin(), mid, v2.end(), DiffOrder()); - copy(v2.begin(), mid, back_inserter(*pv)); -#ifdef DEBUGGING_PRO - if (v2.size() >= 5) { - for (int i =0; i < (mid - v2.begin()); ++i) { - cerr << v2[i] << endl; - } - cerr << pv->back() << endl; - } -#endif -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - if (conf.count("random_seed")) - rng.reset(new MT19937(conf["random_seed"].as())); - else - rng.reset(new MT19937); - const string evaluation_metric = conf["evaluation_metric"].as(); - - EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); - DocumentScorer ds(metric, conf["reference"].as >()); - cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; - - Hypergraph hg; - string last_file; - ReadFile in_read(conf["input"].as()); - istream &in=*in_read.stream(); - const unsigned kbest_size = conf["kbest_size"].as(); - const unsigned gamma = conf["candidate_pairs"].as(); - const unsigned xi = conf["best_pairs"].as(); - string weightsf = conf["weights"].as(); - vector weights; - Weights::InitFromFile(weightsf, &weights); - string kbest_repo = conf["kbest_repository"].as(); - MkDirP(kbest_repo); - while(in) { - vector v; - string line; - getline(in, line); - if (line.empty()) continue; - istringstream is(line); - int sent_id; - string file; - // path-to-file (JSON) sent_id - is >> file >> sent_id; - ReadFile rf(file); - ostringstream os; - training::CandidateSet J_i; - os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; - const string kbest_file = os.str(); - if (FileExists(kbest_file)) - J_i.ReadFromFile(kbest_file); - HypergraphIO::ReadFromJSON(rf.stream(), &hg); - hg.Reweight(weights); - J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]); - J_i.WriteToFile(kbest_file); - - Sample(gamma, xi, J_i, metric, &v); - for (unsigned i = 0; i < v.size(); ++i) { - const TrainingInstance& vi = v[i]; - cout << vi.y << "\t" << vi.x << endl; - cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl; - } - } - return 0; -} - diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc deleted file mode 100644 index 5ef9b470..00000000 --- a/pro-train/mr_pro_reduce.cc +++ /dev/null @@ -1,286 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include - -#include "filelib.h" -#include "weights.h" -#include "sparse_vector.h" -#include "optimize.h" -#include "liblbfgs/lbfgs++.h" - -using namespace std; -namespace po = boost::program_options; - -// since this is a ranking model, there should be equal numbers of -// positive and negative examples, so the bias should be 0 -static const double MAX_BIAS = 1e-10; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") - ("regularization_strength,C",po::value()->default_value(500.0), "l2 regularization strength") - ("l1",po::value()->default_value(0.0), "l1 regularization strength") - ("regularize_to_weights,y",po::value()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") - ("memory_buffers,m",po::value()->default_value(100), "Number of memory buffers (LBFGS)") - ("min_reg,r",po::value()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") - ("max_reg,R",po::value()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght") - ("testset,t",po::value(), "Optional held-out test set") - ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") - ("interpolate_with_weights,p",po::value()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ParseSparseVector(string& line, size_t cur, SparseVector* out) { - SparseVector& x = *out; - size_t last_start = cur; - size_t last_comma = string::npos; - while(cur <= line.size()) { - if (line[cur] == ' ' || cur == line.size()) { - if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { - cerr << "[ERROR] " << line << endl << " position = " << cur << endl; - exit(1); - } - const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); - if (cur < line.size()) line[cur] = 0; - const weight_t val = strtod(&line[last_comma + 1], NULL); - x.set_value(fid, val); - - last_comma = string::npos; - last_start = cur+1; - } else { - if (line[cur] == '=') - last_comma = cur; - } - ++cur; - } -} - -void ReadCorpus(istream* pin, vector > >* corpus) { - istream& in = *pin; - corpus->clear(); - bool flag = false; - int lc = 0; - string line; - SparseVector x; - while(getline(in, line)) { - ++lc; - if (lc % 1000 == 0) { cerr << '.'; flag = true; } - if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } - if (line.empty()) continue; - const size_t ks = line.find("\t"); - assert(string::npos != ks); - assert(ks == 1); - const bool y = line[0] == '1'; - x.clear(); - ParseSparseVector(line, ks + 1, &x); - corpus->push_back(make_pair(y, x)); - } - if (flag) cerr << endl; -} - -void GradAdd(const SparseVector& v, const double scale, weight_t* acc) { - for (SparseVector::const_iterator it = v.begin(); - it != v.end(); ++it) { - acc[it->first] += it->second * scale; - } -} - -double ApplyRegularizationTerms(const double C, - const double T, - const vector& weights, - const vector& prev_weights, - weight_t* g) { - double reg = 0; - for (size_t i = 0; i < weights.size(); ++i) { - const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); - const double& w_i = weights[i]; - reg += C * w_i * w_i; - g[i] += 2 * C * w_i; - - const double diff_i = w_i - prev_w_i; - reg += T * diff_i * diff_i; - g[i] += 2 * T * diff_i; - } - return reg; -} - -double TrainingInference(const vector& x, - const vector > >& corpus, - weight_t* g = NULL) { - double cll = 0; - for (int i = 0; i < corpus.size(); ++i) { - const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias - double lp_false = dotprod; - double lp_true = -dotprod; - if (0 < lp_true) { - lp_true += log1p(exp(-lp_true)); - lp_false = log1p(exp(lp_false)); - } else { - lp_true = log1p(exp(lp_true)); - lp_false += log1p(exp(-lp_false)); - } - lp_true*=-1; - lp_false*=-1; - if (corpus[i].first) { // true label - cll -= lp_true; - if (g) { - // g -= corpus[i].second * exp(lp_false); - GradAdd(corpus[i].second, -exp(lp_false), g); - g[0] -= exp(lp_false); // bias - } - } else { // false label - cll -= lp_false; - if (g) { - // g += corpus[i].second * exp(lp_true); - GradAdd(corpus[i].second, exp(lp_true), g); - g[0] += exp(lp_true); // bias - } - } - } - return cll; -} - -struct ProLoss { - ProLoss(const vector > >& tr, - const vector > >& te, - const double c, - const double t, - const vector& px) : training(tr), testing(te), C(c), T(t), prev_x(px){} - double operator()(const vector& x, double* g) const { - fill(g, g + x.size(), 0.0); - double cll = TrainingInference(x, training, g); - tppl = 0; - if (testing.size()) - tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size())); - double ppl = cll / log(2); - ppl /= training.size(); - ppl = pow(2.0, ppl); - double reg = ApplyRegularizationTerms(C, T, x, prev_x, g); - return cll + reg; - } - const vector > >& training, testing; - const double C, T; - const vector& prev_x; - mutable double tppl; -}; - -// return held-out log likelihood -double LearnParameters(const vector > >& training, - const vector > >& testing, - const double C, - const double C1, - const double T, - const unsigned memory_buffers, - const vector& prev_x, - vector* px) { - assert(px->size() == prev_x.size()); - ProLoss loss(training, testing, C, T, prev_x); - LBFGS lbfgs(px, loss, memory_buffers, C1); - lbfgs.MinimizeFunction(); - return loss.tppl; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - string line; - vector > > training, testing; - const bool tune_regularizer = conf.count("tune_regularizer"); - if (tune_regularizer && !conf.count("testset")) { - cerr << "--tune_regularizer requires --testset to be set\n"; - return 1; - } - const double min_reg = conf["min_reg"].as(); - const double max_reg = conf["max_reg"].as(); - double C = conf["regularization_strength"].as(); // will be overridden if parameter is tuned - double C1 = conf["l1"].as(); // will be overridden if parameter is tuned - const double T = conf["regularize_to_weights"].as(); - assert(C >= 0.0); - assert(min_reg >= 0.0); - assert(max_reg >= 0.0); - assert(max_reg > min_reg); - const double psi = conf["interpolate_with_weights"].as(); - if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; } - ReadCorpus(&cin, &training); - if (conf.count("testset")) { - ReadFile rf(conf["testset"].as()); - ReadCorpus(rf.stream(), &testing); - } - cerr << "Number of features: " << FD::NumFeats() << endl; - - vector x, prev_x; // x[0] is bias - if (conf.count("weights")) { - Weights::InitFromFile(conf["weights"].as(), &x); - x.resize(FD::NumFeats()); - prev_x = x; - } else { - x.resize(FD::NumFeats()); - prev_x = x; - } - cerr << " Number of features: " << x.size() << endl; - cerr << "Number of training examples: " << training.size() << endl; - cerr << "Number of testing examples: " << testing.size() << endl; - double tppl = 0.0; - vector > sp; - vector smoothed; - if (tune_regularizer) { - C = min_reg; - const double steps = 18; - double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps); - cerr << "SWEEP FACTOR: " << sweep_factor << endl; - while(C < max_reg) { - cerr << "C=" << C << "\tT=" <(), prev_x, &x); - sp.push_back(make_pair(C, tppl)); - C *= sweep_factor; - } - smoothed.resize(sp.size(), 0); - smoothed[0] = sp[0].second; - smoothed.back() = sp.back().second; - for (int i = 1; i < sp.size()-1; ++i) { - double prev = sp[i-1].second; - double next = sp[i+1].second; - double cur = sp[i].second; - smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next); - } - double best_ppl = 9999999; - unsigned best_i = 0; - for (unsigned i = 0; i < sp.size(); ++i) { - if (smoothed[i] < best_ppl) { - best_ppl = smoothed[i]; - best_i = i; - } - } - C = sp[best_i].first; - } // tune regularizer - tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as(), prev_x, &x); - if (conf.count("weights")) { - for (int i = 1; i < x.size(); ++i) { - x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi); - } - } - cout.precision(15); - cout << "# C=" << C << "\theld out perplexity="; - if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; } - if (sp.size()) { - cout << "# Parameter sweep:\n"; - for (int i = 0; i < sp.size(); ++i) { - cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl; - } - } - Weights::WriteToFile("-", x); - return 0; -} diff --git a/pro/Makefile.am b/pro/Makefile.am new file mode 100644 index 00000000..1e9d46b0 --- /dev/null +++ b/pro/Makefile.am @@ -0,0 +1,11 @@ +bin_PROGRAMS = \ + mr_pro_map \ + mr_pro_reduce + +mr_pro_map_SOURCES = mr_pro_map.cc +mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +mr_pro_reduce_SOURCES = mr_pro_reduce.cc +mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/pro/README.shared-mem b/pro/README.shared-mem new file mode 100644 index 00000000..7728efc0 --- /dev/null +++ b/pro/README.shared-mem @@ -0,0 +1,9 @@ +If you want to run dist-vest.pl on a very large shared memory machine, do the +following: + + ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini + +This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the +decoder must load grammars, language models, etc., J should be smaller than I, but this will depend +on the system you are running on and the complexity of the models used for decoding. + diff --git a/pro/mr_pro_generate_mapper_input.pl b/pro/mr_pro_generate_mapper_input.pl new file mode 100755 index 00000000..b30fc4fd --- /dev/null +++ b/pro/mr_pro_generate_mapper_input.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl -w +use strict; + +die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; +my $d = shift @ARGV; +die "Can't find directory $d" unless -d $d; + +opendir(DIR, $d) or die "Can't read $d: $!"; +my @hgs = grep { /\.gz$/ } readdir(DIR); +closedir DIR; + +for my $hg (@hgs) { + my $file = $hg; + my $id = $hg; + $id =~ s/(\.json)?\.gz//; + print "$d/$file $id\n"; +} + diff --git a/pro/mr_pro_map.cc b/pro/mr_pro_map.cc new file mode 100644 index 00000000..eef40b8a --- /dev/null +++ b/pro/mr_pro_map.cc @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "candidate_set.h" +#include "sampler.h" +#include "filelib.h" +#include "stringlib.h" +#include "weights.h" +#include "inside_outside.h" +#include "hg_io.h" +#include "ns.h" +#include "ns_docscorer.h" + +// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011) + +using namespace std; +namespace po = boost::program_options; + +boost::shared_ptr rng; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") + ("weights,w",po::value(), "[REQD] Weights files from current iterations") + ("kbest_repository,K",po::value()->default_value("./kbest"),"K-best list repository (directory)") + ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") + ("source,s",po::value()->default_value(""), "Source file (ignored, except for AER)") + ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") + ("kbest_size,k",po::value()->default_value(1500u), "Top k-hypotheses to extract") + ("candidate_pairs,G", po::value()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)") + ("best_pairs,X", po::value()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (!conf->count("reference")) { + cerr << "Please specify one or more references using -r \n"; + flag = true; + } + if (!conf->count("weights")) { + cerr << "Please specify weights using -w \n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +struct ThresholdAlpha { + explicit ThresholdAlpha(double t = 0.05) : threshold(t) {} + double operator()(double mag) const { + if (mag < threshold) return 0.0; else return 1.0; + } + const double threshold; +}; + +struct TrainingInstance { + TrainingInstance(const SparseVector& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {} + SparseVector x; +#undef DEBUGGING_PRO +#ifdef DEBUGGING_PRO + vector a; + vector b; +#endif + bool y; + float gdiff; +}; +#ifdef DEBUGGING_PRO +ostream& operator<<(ostream& os, const TrainingInstance& d) { + return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x; +} +#endif + +struct DiffOrder { + bool operator()(const TrainingInstance& a, const TrainingInstance& b) const { + return a.gdiff > b.gdiff; + } +}; + +void Sample(const unsigned gamma, + const unsigned xi, + const training::CandidateSet& J_i, + const EvaluationMetric* metric, + vector* pv) { + const bool invert_score = metric->IsErrorMetric(); + vector v1, v2; + float avg_diff = 0; + for (unsigned i = 0; i < gamma; ++i) { + const size_t a = rng->inclusive(0, J_i.size() - 1)(); + const size_t b = rng->inclusive(0, J_i.size() - 1)(); + if (a == b) continue; + float ga = metric->ComputeScore(J_i[a].eval_feats); + float gb = metric->ComputeScore(J_i[b].eval_feats); + bool positive = gb < ga; + if (invert_score) positive = !positive; + const float gdiff = fabs(ga - gb); + if (!gdiff) continue; + avg_diff += gdiff; + SparseVector xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros(); + if (xdiff.empty()) { + cerr << "Empty diff:\n " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl; + cerr << " " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl; + continue; + } + v1.push_back(TrainingInstance(xdiff, positive, gdiff)); +#ifdef DEBUGGING_PRO + v1.back().a = J_i[a].hyp; + v1.back().b = J_i[b].hyp; + cerr << "N: " << v1.back() << endl; +#endif + } + avg_diff /= v1.size(); + + for (unsigned i = 0; i < v1.size(); ++i) { + double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff)); + // cerr << "avg_diff=" << avg_diff << " gdiff=" << v1[i].gdiff << " p=" << p << endl; + if (rng->next() < p) v2.push_back(v1[i]); + } + vector::iterator mid = v2.begin() + xi; + if (xi > v2.size()) mid = v2.end(); + partial_sort(v2.begin(), mid, v2.end(), DiffOrder()); + copy(v2.begin(), mid, back_inserter(*pv)); +#ifdef DEBUGGING_PRO + if (v2.size() >= 5) { + for (int i =0; i < (mid - v2.begin()); ++i) { + cerr << v2[i] << endl; + } + cerr << pv->back() << endl; + } +#endif +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + const string evaluation_metric = conf["evaluation_metric"].as(); + + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + DocumentScorer ds(metric, conf["reference"].as >()); + cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; + + Hypergraph hg; + string last_file; + ReadFile in_read(conf["input"].as()); + istream &in=*in_read.stream(); + const unsigned kbest_size = conf["kbest_size"].as(); + const unsigned gamma = conf["candidate_pairs"].as(); + const unsigned xi = conf["best_pairs"].as(); + string weightsf = conf["weights"].as(); + vector weights; + Weights::InitFromFile(weightsf, &weights); + string kbest_repo = conf["kbest_repository"].as(); + MkDirP(kbest_repo); + while(in) { + vector v; + string line; + getline(in, line); + if (line.empty()) continue; + istringstream is(line); + int sent_id; + string file; + // path-to-file (JSON) sent_id + is >> file >> sent_id; + ReadFile rf(file); + ostringstream os; + training::CandidateSet J_i; + os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; + const string kbest_file = os.str(); + if (FileExists(kbest_file)) + J_i.ReadFromFile(kbest_file); + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + hg.Reweight(weights); + J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]); + J_i.WriteToFile(kbest_file); + + Sample(gamma, xi, J_i, metric, &v); + for (unsigned i = 0; i < v.size(); ++i) { + const TrainingInstance& vi = v[i]; + cout << vi.y << "\t" << vi.x << endl; + cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl; + } + } + return 0; +} + diff --git a/pro/mr_pro_reduce.cc b/pro/mr_pro_reduce.cc new file mode 100644 index 00000000..5ef9b470 --- /dev/null +++ b/pro/mr_pro_reduce.cc @@ -0,0 +1,286 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "filelib.h" +#include "weights.h" +#include "sparse_vector.h" +#include "optimize.h" +#include "liblbfgs/lbfgs++.h" + +using namespace std; +namespace po = boost::program_options; + +// since this is a ranking model, there should be equal numbers of +// positive and negative examples, so the bias should be 0 +static const double MAX_BIAS = 1e-10; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") + ("regularization_strength,C",po::value()->default_value(500.0), "l2 regularization strength") + ("l1",po::value()->default_value(0.0), "l1 regularization strength") + ("regularize_to_weights,y",po::value()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") + ("memory_buffers,m",po::value()->default_value(100), "Number of memory buffers (LBFGS)") + ("min_reg,r",po::value()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") + ("max_reg,R",po::value()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght") + ("testset,t",po::value(), "Optional held-out test set") + ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") + ("interpolate_with_weights,p",po::value()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +void ParseSparseVector(string& line, size_t cur, SparseVector* out) { + SparseVector& x = *out; + size_t last_start = cur; + size_t last_comma = string::npos; + while(cur <= line.size()) { + if (line[cur] == ' ' || cur == line.size()) { + if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { + cerr << "[ERROR] " << line << endl << " position = " << cur << endl; + exit(1); + } + const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); + if (cur < line.size()) line[cur] = 0; + const weight_t val = strtod(&line[last_comma + 1], NULL); + x.set_value(fid, val); + + last_comma = string::npos; + last_start = cur+1; + } else { + if (line[cur] == '=') + last_comma = cur; + } + ++cur; + } +} + +void ReadCorpus(istream* pin, vector > >* corpus) { + istream& in = *pin; + corpus->clear(); + bool flag = false; + int lc = 0; + string line; + SparseVector x; + while(getline(in, line)) { + ++lc; + if (lc % 1000 == 0) { cerr << '.'; flag = true; } + if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } + if (line.empty()) continue; + const size_t ks = line.find("\t"); + assert(string::npos != ks); + assert(ks == 1); + const bool y = line[0] == '1'; + x.clear(); + ParseSparseVector(line, ks + 1, &x); + corpus->push_back(make_pair(y, x)); + } + if (flag) cerr << endl; +} + +void GradAdd(const SparseVector& v, const double scale, weight_t* acc) { + for (SparseVector::const_iterator it = v.begin(); + it != v.end(); ++it) { + acc[it->first] += it->second * scale; + } +} + +double ApplyRegularizationTerms(const double C, + const double T, + const vector& weights, + const vector& prev_weights, + weight_t* g) { + double reg = 0; + for (size_t i = 0; i < weights.size(); ++i) { + const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); + const double& w_i = weights[i]; + reg += C * w_i * w_i; + g[i] += 2 * C * w_i; + + const double diff_i = w_i - prev_w_i; + reg += T * diff_i * diff_i; + g[i] += 2 * T * diff_i; + } + return reg; +} + +double TrainingInference(const vector& x, + const vector > >& corpus, + weight_t* g = NULL) { + double cll = 0; + for (int i = 0; i < corpus.size(); ++i) { + const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias + double lp_false = dotprod; + double lp_true = -dotprod; + if (0 < lp_true) { + lp_true += log1p(exp(-lp_true)); + lp_false = log1p(exp(lp_false)); + } else { + lp_true = log1p(exp(lp_true)); + lp_false += log1p(exp(-lp_false)); + } + lp_true*=-1; + lp_false*=-1; + if (corpus[i].first) { // true label + cll -= lp_true; + if (g) { + // g -= corpus[i].second * exp(lp_false); + GradAdd(corpus[i].second, -exp(lp_false), g); + g[0] -= exp(lp_false); // bias + } + } else { // false label + cll -= lp_false; + if (g) { + // g += corpus[i].second * exp(lp_true); + GradAdd(corpus[i].second, exp(lp_true), g); + g[0] += exp(lp_true); // bias + } + } + } + return cll; +} + +struct ProLoss { + ProLoss(const vector > >& tr, + const vector > >& te, + const double c, + const double t, + const vector& px) : training(tr), testing(te), C(c), T(t), prev_x(px){} + double operator()(const vector& x, double* g) const { + fill(g, g + x.size(), 0.0); + double cll = TrainingInference(x, training, g); + tppl = 0; + if (testing.size()) + tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size())); + double ppl = cll / log(2); + ppl /= training.size(); + ppl = pow(2.0, ppl); + double reg = ApplyRegularizationTerms(C, T, x, prev_x, g); + return cll + reg; + } + const vector > >& training, testing; + const double C, T; + const vector& prev_x; + mutable double tppl; +}; + +// return held-out log likelihood +double LearnParameters(const vector > >& training, + const vector > >& testing, + const double C, + const double C1, + const double T, + const unsigned memory_buffers, + const vector& prev_x, + vector* px) { + assert(px->size() == prev_x.size()); + ProLoss loss(training, testing, C, T, prev_x); + LBFGS lbfgs(px, loss, memory_buffers, C1); + lbfgs.MinimizeFunction(); + return loss.tppl; +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + string line; + vector > > training, testing; + const bool tune_regularizer = conf.count("tune_regularizer"); + if (tune_regularizer && !conf.count("testset")) { + cerr << "--tune_regularizer requires --testset to be set\n"; + return 1; + } + const double min_reg = conf["min_reg"].as(); + const double max_reg = conf["max_reg"].as(); + double C = conf["regularization_strength"].as(); // will be overridden if parameter is tuned + double C1 = conf["l1"].as(); // will be overridden if parameter is tuned + const double T = conf["regularize_to_weights"].as(); + assert(C >= 0.0); + assert(min_reg >= 0.0); + assert(max_reg >= 0.0); + assert(max_reg > min_reg); + const double psi = conf["interpolate_with_weights"].as(); + if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; } + ReadCorpus(&cin, &training); + if (conf.count("testset")) { + ReadFile rf(conf["testset"].as()); + ReadCorpus(rf.stream(), &testing); + } + cerr << "Number of features: " << FD::NumFeats() << endl; + + vector x, prev_x; // x[0] is bias + if (conf.count("weights")) { + Weights::InitFromFile(conf["weights"].as(), &x); + x.resize(FD::NumFeats()); + prev_x = x; + } else { + x.resize(FD::NumFeats()); + prev_x = x; + } + cerr << " Number of features: " << x.size() << endl; + cerr << "Number of training examples: " << training.size() << endl; + cerr << "Number of testing examples: " << testing.size() << endl; + double tppl = 0.0; + vector > sp; + vector smoothed; + if (tune_regularizer) { + C = min_reg; + const double steps = 18; + double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps); + cerr << "SWEEP FACTOR: " << sweep_factor << endl; + while(C < max_reg) { + cerr << "C=" << C << "\tT=" <(), prev_x, &x); + sp.push_back(make_pair(C, tppl)); + C *= sweep_factor; + } + smoothed.resize(sp.size(), 0); + smoothed[0] = sp[0].second; + smoothed.back() = sp.back().second; + for (int i = 1; i < sp.size()-1; ++i) { + double prev = sp[i-1].second; + double next = sp[i+1].second; + double cur = sp[i].second; + smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next); + } + double best_ppl = 9999999; + unsigned best_i = 0; + for (unsigned i = 0; i < sp.size(); ++i) { + if (smoothed[i] < best_ppl) { + best_ppl = smoothed[i]; + best_i = i; + } + } + C = sp[best_i].first; + } // tune regularizer + tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as(), prev_x, &x); + if (conf.count("weights")) { + for (int i = 1; i < x.size(); ++i) { + x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi); + } + } + cout.precision(15); + cout << "# C=" << C << "\theld out perplexity="; + if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; } + if (sp.size()) { + cout << "# Parameter sweep:\n"; + for (int i = 0; i < sp.size(); ++i) { + cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl; + } + } + Weights::WriteToFile("-", x); + return 0; +} diff --git a/pro/pro.pl b/pro/pro.pl new file mode 100755 index 00000000..891b7e4c --- /dev/null +++ b/pro/pro.pl @@ -0,0 +1,555 @@ +#!/usr/bin/env perl +use strict; +use File::Basename qw(basename); +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use IPC::Open2; +use POSIX ":sys_wait_h"; +my $QSUB_CMD = qsub_args(mert_memory()); +my $default_jobs = env_default_jobs(); + +my $VEST_DIR="$SCRIPT_DIR/../dpmert"; +require "$VEST_DIR/libcall.pl"; + +# Default settings +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; +my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl"; +my $MAPPER = "$bin_dir/mr_pro_map"; +my $REDUCER = "$bin_dir/mr_pro_reduce"; +my $parallelize = "$VEST_DIR/parallelize.pl"; +my $libcall = "$VEST_DIR/libcall.pl"; +my $sentserver = "$VEST_DIR/sentserver"; +my $sentclient = "$VEST_DIR/sentclient"; +my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; + +my $SCORER = $FAST_SCORE; +die "Can't find $MAPPER" unless -x $MAPPER; +my $cdec = "$bin_dir/../decoder/cdec"; +die "Can't find decoder in $cdec" unless -x $cdec; +die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; +my $decoder = $cdec; +my $lines_per_mapper = 30; +my $iteration = 1; +my $best_weights; +my $psi = 1; +my $default_max_iter = 30; +my $max_iterations = $default_max_iter; +my $jobs = $default_jobs; # number of decode nodes +my $pmem = "4g"; +my $disable_clean = 0; +my %seen_weights; +my $help = 0; +my $epsilon = 0.0001; +my $dryrun = 0; +my $last_score = -10000000; +my $metric = "ibm_bleu"; +my $dir; +my $iniFile; +my $weights; +my $use_make = 1; # use make to parallelize +my $useqsub = 0; +my $initial_weights; +my $pass_suffix = ''; +my $devset; + +# regularization strength +my $reg = 500; +my $reg_previous = 5000; + +# Process command-line options +if (GetOptions( + "config=s" => \$iniFile, + "weights=s" => \$initial_weights, + "devset=s" => \$devset, + "jobs=i" => \$jobs, + "metric=s" => \$metric, + "pass-suffix=s" => \$pass_suffix, + "qsub" => \$useqsub, + "help" => \$help, + "reg=f" => \$reg, + "reg-previous=f" => \$reg_previous, + "output-dir=s" => \$dir, +) == 0 || @ARGV!=0 || $help) { + print_help(); + exit; +} + +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (!defined $iniFile) { push @missing_args, "--config"; } +if (!defined $devset) { push @missing_args, "--devset"; } +if (!defined $initial_weights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); + +if ($metric =~ /^(combi|ter)$/i) { + $lines_per_mapper = 5; +} + +my $host =check_output("hostname"); chomp $host; +my $bleu; +my $interval_count = 0; +my $logfile; +my $projected_score; + +# used in sorting scores +my $DIR_FLAG = '-r'; +if ($metric =~ /^ter$|^aer$/i) { + $DIR_FLAG = ''; +} + +unless ($dir){ + $dir = 'pro'; +} +unless ($dir =~ /^\//){ # convert relative path to absolute path + my $basedir = check_output("pwd"); + chomp $basedir; + $dir = "$basedir/$dir"; +} + +# Initializations and helper functions +srand; + +my @childpids = (); +my @cleanupcmds = (); + +sub cleanup { + print STDERR "Cleanup...\n"; + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } + exit 1; +}; +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; +$SIG{INT} = "cleanup"; +$SIG{TERM} = "cleanup"; +$SIG{HUP} = "cleanup"; + +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; +my $newIniFile = "$dir/$decoderBase.ini"; +my $inputFileName = "$dir/input"; +my $user = $ENV{"USER"}; + + +# process ini file +-e $iniFile || die "Error: could not open $iniFile for reading\n"; +open(INI, $iniFile); + +if (-e $dir) { + die "ERROR: working dir $dir already exists\n\n"; +} else { + mkdir "$dir" or die "Can't mkdir $dir: $!"; + mkdir "$dir/hgs" or die; + mkdir "$dir/scripts" or die; + print STDERR <) { $devSize++; } +close F; + +unless($best_weights){ $best_weights = $weights; } +unless($projected_score){ $projected_score = 0.0; } +$seen_weights{$weights} = 1; + +my $random_seed = int(time / 1000); +my $lastWeightsFile; +my $lastPScore = 0; +# main optimization loop +my @allweights; +while (1){ + print STDERR "\n\nITERATION $iteration\n==========\n"; + + if ($iteration > $max_iterations){ + print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; + last; + } + # iteration-specific files + my $runFile="$dir/run.raw.$iteration"; + my $onebestFile="$dir/1best.$iteration"; + my $logdir="$dir/logs.$iteration"; + my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; + my $scorerLog="$logdir/scorer.log.$iteration"; + check_call("mkdir -p $logdir"); + + + #decode + print STDERR "RUNNING DECODER AT "; + print STDERR unchecked_output("date"); + my $im1 = $iteration - 1; + my $weightsFile="$dir/weights.$im1"; + push @allweights, "-w $dir/weights.$im1"; + `rm -f $dir/hgs/*.gz`; + my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; + my $pcmd; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; + } else { + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; + } + my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + my $num_hgs; + my $num_topbest; + my $retries = 0; + while($retries < 5) { + $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF HGs: $num_hgs\n"; + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_hgs && $devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); + my $dec_score = check_output("cat $runFile | $SCORER -r $refs -m $metric"); + chomp $dec_score; + print STDERR "DECODER SCORE: $dec_score\n"; + + # save space + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); + + # run optimizer + print STDERR "RUNNING OPTIMIZER AT "; + print STDERR unchecked_output("date"); + print STDERR " - GENERATE TRAINING EXEMPLARS\n"; + my $mergeLog="$logdir/prune-merge.log.$iteration"; + + my $score = 0; + my $icc = 0; + my $inweights="$dir/weights.$im1"; + $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + check_call("mkdir -p $dir/splag.$im1"); + $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput."; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; + my @shards = grep { /^mapinput\./ } readdir(DIR); + closedir DIR; + die "No shards!" unless scalar @shards > 0; + my $joblist = ""; + my $nmappers = 0; + @cleanupcmds = (); + my %o2i = (); + my $first_shard = 1; + my $mkfile; # only used with makefiles + my $mkfilename; + if ($use_make) { + $mkfilename = "$dir/splag.$im1/domap.mk"; + open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!"; + print $mkfile "all: $dir/splag.$im1/map.done\n\n"; + } + my @mkouts = (); # only used with makefiles + my @mapoutputs = (); + for my $shard (@shards) { + my $mapoutput = $shard; + my $client_name = $shard; + $client_name =~ s/mapinput.//; + $client_name = "pro.$client_name"; + $mapoutput =~ s/mapinput/mapoutput/; + push @mapoutputs, "$dir/splag.$im1/$mapoutput"; + $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; + my $script = "$MAPPER -s $srcFile -m $metric -r $refs -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput"; + if ($use_make) { + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "#!/bin/bash\n"; + print F "$script\n"; + close F; + my $output = "$dir/splag.$im1/$mapoutput"; + push @mkouts, $output; + chmod(0755, $script_file) or die "Can't chmod $script_file: $!"; + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } + print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n"; + } else { + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "$script\n"; + close F; + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } + + $nmappers++; + my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $jobid = check_output("$qcmd"); + chomp $jobid; + $jobid =~ s/^(\d+)(.*?)$/\1/g; + $jobid =~ s/^Your job (\d+) .*$/\1/; + push(@cleanupcmds, "qdel $jobid 2> /dev/null"); + print STDERR " $jobid"; + if ($joblist == "") { $joblist = $jobid; } + else {$joblist = $joblist . "\|" . $jobid; } + } + } + my @dev_outs = (); + my @devtest_outs = (); + @dev_outs = @mapoutputs; + if ($use_make) { + print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; + close $mkfile; + my $mcmd = "make -j $jobs -f $mkfilename"; + print STDERR "\nExecuting: $mcmd\n"; + check_call($mcmd); + } else { + print STDERR "\nLaunched $nmappers mappers.\n"; + sleep 8; + print STDERR "Waiting for mappers to complete...\n"; + while ($nmappers > 0) { + sleep 5; + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); + $nmappers = scalar @livejobs; + } + print STDERR "All mappers complete.\n"; + } + my $tol = 0; + my $til = 0; + my $dev_test_file = "$dir/splag.$im1/devtest.gz"; + print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n"; + print STDERR unchecked_output("date"); + $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi"; + $cmd .= " > $dir/weights.$iteration"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + $lastWeightsFile = "$dir/weights.$iteration"; + $lastPScore = $score; + $iteration++; + print STDERR "\n==========\n"; +} + + +check_call("cp $lastWeightsFile $dir/weights.final"); +print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w with the decoder)\n\n"; +print STDOUT "$dir/weights.final\n"; + +exit 0; + +sub read_weights_file { + my ($file) = @_; + open F, "<$file" or die "Couldn't read $file: $!"; + my @r = (); + my $pm = -1; + while() { + next if /^#/; + next if /^\s*$/; + chomp; + if (/^(.+)\s+(.+)$/) { + my $m = $1; + my $w = $2; + die "Weights out of order: $m <= $pm" unless $m > $pm; + push @r, $w; + } else { + warn "Unexpected feature name in weight file: $_"; + } + } + close F; + return join ' ', @r; +} + +sub enseg { + my $src = shift; + my $newsrc = shift; + open(SRC, $src); + open(NEWSRC, ">$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; + die "Empty dev set!" if ($i == 0); +} + +sub print_help { + + my $executable = basename($0); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable [options] + Runs a complete PRO optimization using the ini file specified. + +Required: + + --config + Decoder configuration file. + + --devset + Dev set source and reference data. + + --weights + Initial weights file (use empty file to start from 0) + +General options: + + --help + Print this message and exit. + + --max-iterations + Maximum number of iterations to run. If not specified, defaults + to $default_max_iter. + + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + + --pass-suffix + If the decoder is doing multi-pass decoding, the pass suffix "2", + "3", etc., is used to control what iteration of weights is set. + + --workdir + Directory for intermediate and output files. If not specified, the + name is derived from the ini filename. Assuming that the ini + filename begins with the decoder name and ends with ini, the default + name of the working directory is inferred from the middle part of + the filename. E.g. an ini file named decoder.foo.ini would have + a default working directory name foo. + +Regularization options: + + --reg + l2 regularization strength [default=500]. The greater this value, + the closer to zero the weights will be. + + --reg-previous + l2 penalty for moving away from the weights from the previous + iteration. [default=5000]. The greater this value, the closer + to the previous iteration's weights the next iteration's weights + will be. + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + +Deprecated options: + + --interpolate-with-weights + [deprecated] At each iteration the resulting weights are + interpolated with the weights from the previous iteration, with + this factor. [default=1.0, i.e., no effect] + +Help +} + +sub convert { + my ($str) = @_; + my @ps = split /;/, $str; + my %dict = (); + for my $p (@ps) { + my ($k, $v) = split /=/, $p; + $dict{$k} = $v; + } + return %dict; +} + + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} + +sub split_devset { + my ($infile, $outsrc, $outref) = @_; + open F, "<$infile" or die "Can't read $infile: $!"; + open S, ">$outsrc" or die "Can't write $outsrc: $!"; + open R, ">$outref" or die "Can't write $outref: $!"; + while() { + chomp; + my ($src, @refs) = split /\s*\|\|\|\s*/; + die "Malformed devset line: $_\n" unless scalar @refs > 0; + print S "$src\n"; + print R join(' ||| ', @refs) . "\n"; + } + close R; + close S; + close F; +} + -- cgit v1.2.3 From 25c4de41139d7c6a095758d4e0e270877377661e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 11:33:03 -0500 Subject: fix part 2 --- Makefile.am | 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index fefc470d..7ca7268a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,7 +13,7 @@ SUBDIRS = \ mira \ dtrain \ dpmert \ - pro-train \ + pro \ rampion \ minrisk \ example_extff diff --git a/configure.ac b/configure.ac index 5f18beaa..34e138c2 100644 --- a/configure.ac +++ b/configure.ac @@ -89,7 +89,7 @@ AC_CONFIG_FILES([decoder/Makefile]) AC_CONFIG_FILES([training/Makefile]) AC_CONFIG_FILES([training/liblbfgs/Makefile]) AC_CONFIG_FILES([dpmert/Makefile]) -AC_CONFIG_FILES([pro-train/Makefile]) +AC_CONFIG_FILES([pro/Makefile]) AC_CONFIG_FILES([rampion/Makefile]) AC_CONFIG_FILES([minrisk/Makefile]) AC_CONFIG_FILES([klm/util/Makefile]) -- cgit v1.2.3 From 7c4665949fb93fb3de402e4ce1d19bef67850d05 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 11:38:03 -0500 Subject: fix reference to serialization --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 34e138c2..09fc5c5b 100644 --- a/configure.ac +++ b/configure.ac @@ -70,9 +70,9 @@ fi #BOOST_THREADS CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" -LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SYSTEM_LDFLAGS" +LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SERIALIZATION_LDFLAGS $BOOST_SYSTEM_LDFLAGS" # $BOOST_THREAD_LDFLAGS" -LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SYSTEM_LIBS" +LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS" # $BOOST_THREAD_LIBS" AC_CHECK_HEADER(google/dense_hash_map, -- cgit v1.2.3 From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- .gitignore | 28 +- Makefile.am | 7 +- configure.ac | 32 +- dpmert/Makefile.am | 33 - dpmert/README.shared-mem | 9 - dpmert/ces.cc | 90 -- dpmert/ces.h | 16 - dpmert/decode-and-evaluate.pl | 246 ---- dpmert/divide_refs.py | 15 - dpmert/dpmert.pl | 617 --------- dpmert/error_surface.cc | 42 - dpmert/error_surface.h | 24 - dpmert/libcall.pl | 71 - dpmert/line_mediator.pl | 116 -- dpmert/line_optimizer.cc | 114 -- dpmert/line_optimizer.h | 48 - dpmert/lo_test.cc | 229 --- dpmert/mert_geometry.cc | 185 --- dpmert/mert_geometry.h | 81 -- dpmert/mr_dpmert_generate_mapper_input.cc | 81 -- dpmert/mr_dpmert_map.cc | 112 -- dpmert/mr_dpmert_reduce.cc | 77 -- dpmert/parallelize.pl | 423 ------ dpmert/sentclient.c | 76 - dpmert/sentserver.c | 515 ------- dpmert/sentserver.h | 6 - dpmert/test_aer/README | 8 - dpmert/test_aer/cdec.ini | 3 - dpmert/test_aer/corpus.src | 3 - dpmert/test_aer/grammar | 12 - dpmert/test_aer/ref.0 | 3 - dpmert/test_aer/weights | 13 - dpmert/test_data/0.json.gz | Bin 13709 -> 0 bytes dpmert/test_data/1.json.gz | Bin 204803 -> 0 bytes dpmert/test_data/c2e.txt.0 | 2 - dpmert/test_data/c2e.txt.1 | 2 - dpmert/test_data/c2e.txt.2 | 2 - dpmert/test_data/c2e.txt.3 | 2 - dpmert/test_data/re.txt.0 | 5 - dpmert/test_data/re.txt.1 | 5 - dpmert/test_data/re.txt.2 | 5 - dpmert/test_data/re.txt.3 | 5 - dtrain/Makefile.am | 7 - dtrain/README.md | 48 - dtrain/dtrain.cc | 657 --------- dtrain/dtrain.h | 97 -- dtrain/hstreaming/avg.rb | 32 - dtrain/hstreaming/cdec.ini | 22 - dtrain/hstreaming/dtrain.ini | 15 - dtrain/hstreaming/dtrain.sh | 9 - dtrain/hstreaming/hadoop-streaming-job.sh | 30 - dtrain/hstreaming/lplp.rb | 131 -- dtrain/hstreaming/red-test | 9 - dtrain/kbestget.h | 152 -- dtrain/ksampler.h | 61 - dtrain/pairsampling.h | 149 -- dtrain/parallelize.rb | 79 -- dtrain/parallelize/test/cdec.ini | 22 - dtrain/parallelize/test/dtrain.ini | 15 - dtrain/parallelize/test/in | 10 - dtrain/parallelize/test/refs | 10 - dtrain/score.cc | 254 ---- dtrain/score.h | 212 --- dtrain/test/example/README | 8 - dtrain/test/example/cdec.ini | 25 - dtrain/test/example/dtrain.ini | 22 - dtrain/test/example/expected-output | 89 -- dtrain/test/parallelize/cdec.ini | 22 - dtrain/test/parallelize/dtrain.ini | 15 - dtrain/test/parallelize/in | 10 - dtrain/test/parallelize/refs | 10 - dtrain/test/toy/cdec.ini | 2 - dtrain/test/toy/dtrain.ini | 12 - dtrain/test/toy/input | 2 - minrisk/Makefile.am | 6 - minrisk/minrisk.pl | 540 -------- minrisk/minrisk_generate_input.pl | 18 - minrisk/minrisk_optimize.cc | 197 --- mira/Makefile.am | 6 - mira/kbest_mira.cc | 309 ----- pro/Makefile.am | 11 - pro/README.shared-mem | 9 - pro/mr_pro_generate_mapper_input.pl | 18 - pro/mr_pro_map.cc | 201 --- pro/mr_pro_reduce.cc | 286 ---- pro/pro.pl | 555 -------- rampion/Makefile.am | 6 - rampion/rampion.pl | 540 -------- rampion/rampion_cccp.cc | 168 --- rampion/rampion_generate_input.pl | 18 - training/Makefile.am | 100 +- training/add-model1-features-to-scfg.pl | 93 -- training/candidate_set.cc | 169 --- training/candidate_set.h | 60 - training/cllh_observer.cc | 52 - training/cllh_observer.h | 26 - training/collapse_weights.cc | 110 -- training/crf/Makefile.am | 27 + training/crf/cllh_observer.cc | 52 + training/crf/cllh_observer.h | 26 + training/crf/mpi_batch_optimize.cc | 372 +++++ training/crf/mpi_compute_cllh.cc | 134 ++ training/crf/mpi_extract_features.cc | 151 ++ training/crf/mpi_extract_reachable.cc | 163 +++ training/crf/mpi_flex_optimize.cc | 386 ++++++ training/crf/mpi_online_optimize.cc | 374 +++++ training/dep-reorder/conll2reordering-forest.pl | 65 - training/dep-reorder/george.conll | 4 - training/dep-reorder/scripts/conll2simplecfg.pl | 57 - training/dpmert/Makefile.am | 25 + training/dpmert/ces.cc | 90 ++ training/dpmert/ces.h | 16 + training/dpmert/divide_refs.py | 15 + training/dpmert/dpmert.pl | 618 +++++++++ training/dpmert/error_surface.cc | 42 + training/dpmert/error_surface.h | 24 + training/dpmert/line_mediator.pl | 116 ++ training/dpmert/line_optimizer.cc | 114 ++ training/dpmert/line_optimizer.h | 48 + training/dpmert/lo_test.cc | 229 +++ training/dpmert/mert_geometry.cc | 185 +++ training/dpmert/mert_geometry.h | 81 ++ training/dpmert/mr_dpmert_generate_mapper_input.cc | 81 ++ training/dpmert/mr_dpmert_map.cc | 112 ++ training/dpmert/mr_dpmert_reduce.cc | 77 ++ training/dpmert/test_aer/README | 8 + training/dpmert/test_aer/cdec.ini | 3 + training/dpmert/test_aer/corpus.src | 3 + training/dpmert/test_aer/grammar | 12 + training/dpmert/test_aer/ref.0 | 3 + training/dpmert/test_aer/weights | 13 + training/dpmert/test_data/0.json.gz | Bin 0 -> 13709 bytes training/dpmert/test_data/1.json.gz | Bin 0 -> 204803 bytes training/dpmert/test_data/c2e.txt.0 | 2 + training/dpmert/test_data/c2e.txt.1 | 2 + training/dpmert/test_data/c2e.txt.2 | 2 + training/dpmert/test_data/c2e.txt.3 | 2 + training/dpmert/test_data/re.txt.0 | 5 + training/dpmert/test_data/re.txt.1 | 5 + training/dpmert/test_data/re.txt.2 | 5 + training/dpmert/test_data/re.txt.3 | 5 + training/dtrain/Makefile.am | 7 + training/dtrain/README.md | 48 + training/dtrain/dtrain.cc | 657 +++++++++ training/dtrain/dtrain.h | 97 ++ training/dtrain/hstreaming/avg.rb | 32 + training/dtrain/hstreaming/cdec.ini | 22 + training/dtrain/hstreaming/dtrain.ini | 15 + training/dtrain/hstreaming/dtrain.sh | 9 + training/dtrain/hstreaming/hadoop-streaming-job.sh | 30 + training/dtrain/hstreaming/lplp.rb | 131 ++ training/dtrain/hstreaming/red-test | 9 + training/dtrain/kbestget.h | 152 ++ training/dtrain/ksampler.h | 61 + training/dtrain/pairsampling.h | 149 ++ training/dtrain/parallelize.rb | 79 ++ training/dtrain/parallelize/test/cdec.ini | 22 + training/dtrain/parallelize/test/dtrain.ini | 15 + training/dtrain/parallelize/test/in | 10 + training/dtrain/parallelize/test/refs | 10 + training/dtrain/score.cc | 254 ++++ training/dtrain/score.h | 212 +++ training/dtrain/test/example/README | 8 + training/dtrain/test/example/cdec.ini | 25 + training/dtrain/test/example/dtrain.ini | 22 + training/dtrain/test/example/expected-output | 89 ++ training/dtrain/test/parallelize/cdec.ini | 22 + training/dtrain/test/parallelize/dtrain.ini | 15 + training/dtrain/test/parallelize/in | 10 + training/dtrain/test/parallelize/refs | 10 + training/dtrain/test/toy/cdec.ini | 2 + training/dtrain/test/toy/dtrain.ini | 12 + training/dtrain/test/toy/input | 2 + training/entropy.cc | 41 - training/entropy.h | 22 - training/fast_align.cc | 281 ---- training/feature_expectations.cc | 232 ---- training/grammar_convert.cc | 348 ----- training/lbfgs.h | 1459 -------------------- training/lbfgs_test.cc | 117 -- training/lbl_model.cc | 421 ------ training/minrisk/Makefile.am | 6 + training/minrisk/minrisk.pl | 540 ++++++++ training/minrisk/minrisk_generate_input.pl | 18 + training/minrisk/minrisk_optimize.cc | 197 +++ training/mira/Makefile.am | 6 + training/mira/kbest_mira.cc | 309 +++++ training/mpi_batch_optimize.cc | 372 ----- training/mpi_compute_cllh.cc | 134 -- training/mpi_em_optimize.cc | 389 ------ training/mpi_extract_features.cc | 151 -- training/mpi_extract_reachable.cc | 163 --- training/mpi_flex_optimize.cc | 386 ------ training/mpi_online_optimize.cc | 374 ----- training/mr_em_adapted_reduce.cc | 173 --- training/mr_em_map_adapter.cc | 160 --- training/mr_optimize_reduce.cc | 231 ---- training/mr_reduce_to_weights.cc | 109 -- training/online_optimizer.cc | 16 - training/online_optimizer.h | 129 -- training/optimize.cc | 102 -- training/optimize.h | 92 -- training/optimize_test.cc | 118 -- training/pro/Makefile.am | 11 + training/pro/mr_pro_generate_mapper_input.pl | 18 + training/pro/mr_pro_map.cc | 201 +++ training/pro/mr_pro_reduce.cc | 286 ++++ training/pro/pro.pl | 555 ++++++++ training/rampion/Makefile.am | 6 + training/rampion/rampion.pl | 540 ++++++++ training/rampion/rampion_cccp.cc | 168 +++ training/rampion/rampion_generate_input.pl | 18 + training/risk.cc | 45 - training/risk.h | 26 - training/ttables.cc | 31 - training/ttables.h | 101 -- training/utils/candidate_set.cc | 169 +++ training/utils/candidate_set.h | 60 + training/utils/decode-and-evaluate.pl | 246 ++++ training/utils/entropy.cc | 41 + training/utils/entropy.h | 22 + training/utils/grammar_convert.cc | 348 +++++ training/utils/lbfgs.h | 1459 ++++++++++++++++++++ training/utils/lbfgs_test.cc | 117 ++ training/utils/libcall.pl | 71 + training/utils/online_optimizer.cc | 16 + training/utils/online_optimizer.h | 129 ++ training/utils/optimize.cc | 102 ++ training/utils/optimize.h | 92 ++ training/utils/optimize_test.cc | 118 ++ training/utils/parallelize.pl | 423 ++++++ training/utils/risk.cc | 45 + training/utils/risk.h | 26 + training/utils/sentclient.c | 76 + training/utils/sentserver.c | 515 +++++++ training/utils/sentserver.h | 6 + word-aligner/Makefile.am | 6 + word-aligner/fast_align.cc | 281 ++++ word-aligner/makefiles/makefile.grammars | 2 +- word-aligner/paste-parallel-files.pl | 35 - word-aligner/ttables.cc | 31 + word-aligner/ttables.h | 101 ++ 242 files changed, 13304 insertions(+), 15426 deletions(-) delete mode 100644 dpmert/Makefile.am delete mode 100644 dpmert/README.shared-mem delete mode 100644 dpmert/ces.cc delete mode 100644 dpmert/ces.h delete mode 100755 dpmert/decode-and-evaluate.pl delete mode 100755 dpmert/divide_refs.py delete mode 100755 dpmert/dpmert.pl delete mode 100644 dpmert/error_surface.cc delete mode 100644 dpmert/error_surface.h delete mode 100644 dpmert/libcall.pl delete mode 100755 dpmert/line_mediator.pl delete mode 100644 dpmert/line_optimizer.cc delete mode 100644 dpmert/line_optimizer.h delete mode 100644 dpmert/lo_test.cc delete mode 100644 dpmert/mert_geometry.cc delete mode 100644 dpmert/mert_geometry.h delete mode 100644 dpmert/mr_dpmert_generate_mapper_input.cc delete mode 100644 dpmert/mr_dpmert_map.cc delete mode 100644 dpmert/mr_dpmert_reduce.cc delete mode 100755 dpmert/parallelize.pl delete mode 100644 dpmert/sentclient.c delete mode 100644 dpmert/sentserver.c delete mode 100644 dpmert/sentserver.h delete mode 100644 dpmert/test_aer/README delete mode 100644 dpmert/test_aer/cdec.ini delete mode 100644 dpmert/test_aer/corpus.src delete mode 100644 dpmert/test_aer/grammar delete mode 100644 dpmert/test_aer/ref.0 delete mode 100644 dpmert/test_aer/weights delete mode 100644 dpmert/test_data/0.json.gz delete mode 100644 dpmert/test_data/1.json.gz delete mode 100644 dpmert/test_data/c2e.txt.0 delete mode 100644 dpmert/test_data/c2e.txt.1 delete mode 100644 dpmert/test_data/c2e.txt.2 delete mode 100644 dpmert/test_data/c2e.txt.3 delete mode 100644 dpmert/test_data/re.txt.0 delete mode 100644 dpmert/test_data/re.txt.1 delete mode 100644 dpmert/test_data/re.txt.2 delete mode 100644 dpmert/test_data/re.txt.3 delete mode 100644 dtrain/Makefile.am delete mode 100644 dtrain/README.md delete mode 100644 dtrain/dtrain.cc delete mode 100644 dtrain/dtrain.h delete mode 100755 dtrain/hstreaming/avg.rb delete mode 100644 dtrain/hstreaming/cdec.ini delete mode 100644 dtrain/hstreaming/dtrain.ini delete mode 100755 dtrain/hstreaming/dtrain.sh delete mode 100755 dtrain/hstreaming/hadoop-streaming-job.sh delete mode 100755 dtrain/hstreaming/lplp.rb delete mode 100644 dtrain/hstreaming/red-test delete mode 100644 dtrain/kbestget.h delete mode 100644 dtrain/ksampler.h delete mode 100644 dtrain/pairsampling.h delete mode 100755 dtrain/parallelize.rb delete mode 100644 dtrain/parallelize/test/cdec.ini delete mode 100644 dtrain/parallelize/test/dtrain.ini delete mode 100644 dtrain/parallelize/test/in delete mode 100644 dtrain/parallelize/test/refs delete mode 100644 dtrain/score.cc delete mode 100644 dtrain/score.h delete mode 100644 dtrain/test/example/README delete mode 100644 dtrain/test/example/cdec.ini delete mode 100644 dtrain/test/example/dtrain.ini delete mode 100644 dtrain/test/example/expected-output delete mode 100644 dtrain/test/parallelize/cdec.ini delete mode 100644 dtrain/test/parallelize/dtrain.ini delete mode 100644 dtrain/test/parallelize/in delete mode 100644 dtrain/test/parallelize/refs delete mode 100644 dtrain/test/toy/cdec.ini delete mode 100644 dtrain/test/toy/dtrain.ini delete mode 100644 dtrain/test/toy/input delete mode 100644 minrisk/Makefile.am delete mode 100755 minrisk/minrisk.pl delete mode 100755 minrisk/minrisk_generate_input.pl delete mode 100644 minrisk/minrisk_optimize.cc delete mode 100644 mira/Makefile.am delete mode 100644 mira/kbest_mira.cc delete mode 100644 pro/Makefile.am delete mode 100644 pro/README.shared-mem delete mode 100755 pro/mr_pro_generate_mapper_input.pl delete mode 100644 pro/mr_pro_map.cc delete mode 100644 pro/mr_pro_reduce.cc delete mode 100755 pro/pro.pl delete mode 100644 rampion/Makefile.am delete mode 100755 rampion/rampion.pl delete mode 100644 rampion/rampion_cccp.cc delete mode 100755 rampion/rampion_generate_input.pl delete mode 100755 training/add-model1-features-to-scfg.pl delete mode 100644 training/candidate_set.cc delete mode 100644 training/candidate_set.h delete mode 100644 training/cllh_observer.cc delete mode 100644 training/cllh_observer.h delete mode 100644 training/collapse_weights.cc create mode 100644 training/crf/Makefile.am create mode 100644 training/crf/cllh_observer.cc create mode 100644 training/crf/cllh_observer.h create mode 100644 training/crf/mpi_batch_optimize.cc create mode 100644 training/crf/mpi_compute_cllh.cc create mode 100644 training/crf/mpi_extract_features.cc create mode 100644 training/crf/mpi_extract_reachable.cc create mode 100644 training/crf/mpi_flex_optimize.cc create mode 100644 training/crf/mpi_online_optimize.cc delete mode 100755 training/dep-reorder/conll2reordering-forest.pl delete mode 100644 training/dep-reorder/george.conll delete mode 100755 training/dep-reorder/scripts/conll2simplecfg.pl create mode 100644 training/dpmert/Makefile.am create mode 100644 training/dpmert/ces.cc create mode 100644 training/dpmert/ces.h create mode 100755 training/dpmert/divide_refs.py create mode 100755 training/dpmert/dpmert.pl create mode 100644 training/dpmert/error_surface.cc create mode 100644 training/dpmert/error_surface.h create mode 100755 training/dpmert/line_mediator.pl create mode 100644 training/dpmert/line_optimizer.cc create mode 100644 training/dpmert/line_optimizer.h create mode 100644 training/dpmert/lo_test.cc create mode 100644 training/dpmert/mert_geometry.cc create mode 100644 training/dpmert/mert_geometry.h create mode 100644 training/dpmert/mr_dpmert_generate_mapper_input.cc create mode 100644 training/dpmert/mr_dpmert_map.cc create mode 100644 training/dpmert/mr_dpmert_reduce.cc create mode 100644 training/dpmert/test_aer/README create mode 100644 training/dpmert/test_aer/cdec.ini create mode 100644 training/dpmert/test_aer/corpus.src create mode 100644 training/dpmert/test_aer/grammar create mode 100644 training/dpmert/test_aer/ref.0 create mode 100644 training/dpmert/test_aer/weights create mode 100644 training/dpmert/test_data/0.json.gz create mode 100644 training/dpmert/test_data/1.json.gz create mode 100644 training/dpmert/test_data/c2e.txt.0 create mode 100644 training/dpmert/test_data/c2e.txt.1 create mode 100644 training/dpmert/test_data/c2e.txt.2 create mode 100644 training/dpmert/test_data/c2e.txt.3 create mode 100644 training/dpmert/test_data/re.txt.0 create mode 100644 training/dpmert/test_data/re.txt.1 create mode 100644 training/dpmert/test_data/re.txt.2 create mode 100644 training/dpmert/test_data/re.txt.3 create mode 100644 training/dtrain/Makefile.am create mode 100644 training/dtrain/README.md create mode 100644 training/dtrain/dtrain.cc create mode 100644 training/dtrain/dtrain.h create mode 100755 training/dtrain/hstreaming/avg.rb create mode 100644 training/dtrain/hstreaming/cdec.ini create mode 100644 training/dtrain/hstreaming/dtrain.ini create mode 100755 training/dtrain/hstreaming/dtrain.sh create mode 100755 training/dtrain/hstreaming/hadoop-streaming-job.sh create mode 100755 training/dtrain/hstreaming/lplp.rb create mode 100644 training/dtrain/hstreaming/red-test create mode 100644 training/dtrain/kbestget.h create mode 100644 training/dtrain/ksampler.h create mode 100644 training/dtrain/pairsampling.h create mode 100755 training/dtrain/parallelize.rb create mode 100644 training/dtrain/parallelize/test/cdec.ini create mode 100644 training/dtrain/parallelize/test/dtrain.ini create mode 100644 training/dtrain/parallelize/test/in create mode 100644 training/dtrain/parallelize/test/refs create mode 100644 training/dtrain/score.cc create mode 100644 training/dtrain/score.h create mode 100644 training/dtrain/test/example/README create mode 100644 training/dtrain/test/example/cdec.ini create mode 100644 training/dtrain/test/example/dtrain.ini create mode 100644 training/dtrain/test/example/expected-output create mode 100644 training/dtrain/test/parallelize/cdec.ini create mode 100644 training/dtrain/test/parallelize/dtrain.ini create mode 100644 training/dtrain/test/parallelize/in create mode 100644 training/dtrain/test/parallelize/refs create mode 100644 training/dtrain/test/toy/cdec.ini create mode 100644 training/dtrain/test/toy/dtrain.ini create mode 100644 training/dtrain/test/toy/input delete mode 100644 training/entropy.cc delete mode 100644 training/entropy.h delete mode 100644 training/fast_align.cc delete mode 100644 training/feature_expectations.cc delete mode 100644 training/grammar_convert.cc delete mode 100644 training/lbfgs.h delete mode 100644 training/lbfgs_test.cc delete mode 100644 training/lbl_model.cc create mode 100644 training/minrisk/Makefile.am create mode 100755 training/minrisk/minrisk.pl create mode 100755 training/minrisk/minrisk_generate_input.pl create mode 100644 training/minrisk/minrisk_optimize.cc create mode 100644 training/mira/Makefile.am create mode 100644 training/mira/kbest_mira.cc delete mode 100644 training/mpi_batch_optimize.cc delete mode 100644 training/mpi_compute_cllh.cc delete mode 100644 training/mpi_em_optimize.cc delete mode 100644 training/mpi_extract_features.cc delete mode 100644 training/mpi_extract_reachable.cc delete mode 100644 training/mpi_flex_optimize.cc delete mode 100644 training/mpi_online_optimize.cc delete mode 100644 training/mr_em_adapted_reduce.cc delete mode 100644 training/mr_em_map_adapter.cc delete mode 100644 training/mr_optimize_reduce.cc delete mode 100644 training/mr_reduce_to_weights.cc delete mode 100644 training/online_optimizer.cc delete mode 100644 training/online_optimizer.h delete mode 100644 training/optimize.cc delete mode 100644 training/optimize.h delete mode 100644 training/optimize_test.cc create mode 100644 training/pro/Makefile.am create mode 100755 training/pro/mr_pro_generate_mapper_input.pl create mode 100644 training/pro/mr_pro_map.cc create mode 100644 training/pro/mr_pro_reduce.cc create mode 100755 training/pro/pro.pl create mode 100644 training/rampion/Makefile.am create mode 100755 training/rampion/rampion.pl create mode 100644 training/rampion/rampion_cccp.cc create mode 100755 training/rampion/rampion_generate_input.pl delete mode 100644 training/risk.cc delete mode 100644 training/risk.h delete mode 100644 training/ttables.cc delete mode 100644 training/ttables.h create mode 100644 training/utils/candidate_set.cc create mode 100644 training/utils/candidate_set.h create mode 100755 training/utils/decode-and-evaluate.pl create mode 100644 training/utils/entropy.cc create mode 100644 training/utils/entropy.h create mode 100644 training/utils/grammar_convert.cc create mode 100644 training/utils/lbfgs.h create mode 100644 training/utils/lbfgs_test.cc create mode 100644 training/utils/libcall.pl create mode 100644 training/utils/online_optimizer.cc create mode 100644 training/utils/online_optimizer.h create mode 100644 training/utils/optimize.cc create mode 100644 training/utils/optimize.h create mode 100644 training/utils/optimize_test.cc create mode 100755 training/utils/parallelize.pl create mode 100644 training/utils/risk.cc create mode 100644 training/utils/risk.h create mode 100644 training/utils/sentclient.c create mode 100644 training/utils/sentserver.c create mode 100644 training/utils/sentserver.h create mode 100644 word-aligner/Makefile.am create mode 100644 word-aligner/fast_align.cc delete mode 100755 word-aligner/paste-parallel-files.pl create mode 100644 word-aligner/ttables.cc create mode 100644 word-aligner/ttables.h diff --git a/.gitignore b/.gitignore index aa2e64eb..c6023822 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +example_extff/ff_example.lo +example_extff/libff_example.la +mteval/meteor_jar.cc *.a *.aux *.bbl @@ -176,4 +179,27 @@ utils/reconstruct_weights utils/small_vector_test utils/ts utils/weights_test -utils/unigram_pyp_lm +training/crf/mpi_batch_optimize +training/crf/mpi_compute_cllh +training/crf/mpi_extract_features +training/crf/mpi_extract_reachable +training/crf/mpi_flex_optimize +training/crf/mpi_online_optimize +training/dpmert/lo_test +training/dpmert/mr_dpmert_generate_mapper_input +training/dpmert/mr_dpmert_map +training/dpmert/mr_dpmert_reduce +training/dpmert/sentclient +training/dpmert/sentserver +training/dtrain/dtrain +training/minrisk/minrisk_optimize +training/mira/kbest_mira +training/pro/mr_pro_map +training/pro/mr_pro_reduce +training/rampion/rampion_cccp +training/utils/Makefile.am +training/utils/lbfgs_test +training/utils/optimize_test +training/utils/sentclient +training/utils/sentserver +word-aligner/fast_align diff --git a/Makefile.am b/Makefile.am index 7ca7268a..dbf604a1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,12 +10,7 @@ SUBDIRS = \ decoder \ training \ training/liblbfgs \ - mira \ - dtrain \ - dpmert \ - pro \ - rampion \ - minrisk \ + word-aligner \ example_extff #gi/pyp-topics/src gi/clda/src gi/posterior-regularisation/prjava diff --git a/configure.ac b/configure.ac index 09fc5c5b..366112a3 100644 --- a/configure.ac +++ b/configure.ac @@ -82,26 +82,34 @@ AC_PROG_INSTALL CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H" +# core cdec stuff AC_CONFIG_FILES([Makefile]) AC_CONFIG_FILES([utils/Makefile]) AC_CONFIG_FILES([mteval/Makefile]) +AC_CONFIG_FILES([mteval/meteor_jar.cc]) AC_CONFIG_FILES([decoder/Makefile]) -AC_CONFIG_FILES([training/Makefile]) -AC_CONFIG_FILES([training/liblbfgs/Makefile]) -AC_CONFIG_FILES([dpmert/Makefile]) -AC_CONFIG_FILES([pro/Makefile]) -AC_CONFIG_FILES([rampion/Makefile]) -AC_CONFIG_FILES([minrisk/Makefile]) +AC_CONFIG_FILES([python/setup.py]) +AC_CONFIG_FILES([word-aligner/Makefile]) + +# KenLM stuff AC_CONFIG_FILES([klm/util/Makefile]) AC_CONFIG_FILES([klm/lm/Makefile]) AC_CONFIG_FILES([klm/search/Makefile]) -AC_CONFIG_FILES([mira/Makefile]) -AC_CONFIG_FILES([dtrain/Makefile]) -AC_CONFIG_FILES([example_extff/Makefile]) -AC_CONFIG_FILES([mteval/meteor_jar.cc]) - -AC_CONFIG_FILES([python/setup.py]) +# training stuff +AC_CONFIG_FILES([training/Makefile]) +AC_CONFIG_FILES([training/utils/Makefile]) +AC_CONFIG_FILES([training/liblbfgs/Makefile]) +AC_CONFIG_FILES([training/crf/Makefile]) +AC_CONFIG_FILES([training/dpmert/Makefile]) +AC_CONFIG_FILES([training/pro/Makefile]) +AC_CONFIG_FILES([training/rampion/Makefile]) +AC_CONFIG_FILES([training/minrisk/Makefile]) +AC_CONFIG_FILES([training/mira/Makefile]) +AC_CONFIG_FILES([training/dtrain/Makefile]) + +# external feature function example code +AC_CONFIG_FILES([example_extff/Makefile]) AC_OUTPUT diff --git a/dpmert/Makefile.am b/dpmert/Makefile.am deleted file mode 100644 index 00768271..00000000 --- a/dpmert/Makefile.am +++ /dev/null @@ -1,33 +0,0 @@ -bin_PROGRAMS = \ - mr_dpmert_map \ - mr_dpmert_reduce \ - mr_dpmert_generate_mapper_input \ - sentserver \ - sentclient - -noinst_PROGRAMS = \ - lo_test -TESTS = lo_test - -sentserver_SOURCES = sentserver.c -sentserver_LDFLAGS = -pthread - -sentclient_SOURCES = sentclient.c -sentclient_LDFLAGS = -pthread - -mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc -mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -# nbest2hg_SOURCES = nbest2hg.cc -# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz - -mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc -mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc -mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc -lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/dpmert/README.shared-mem b/dpmert/README.shared-mem deleted file mode 100644 index 7728efc0..00000000 --- a/dpmert/README.shared-mem +++ /dev/null @@ -1,9 +0,0 @@ -If you want to run dist-vest.pl on a very large shared memory machine, do the -following: - - ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini - -This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the -decoder must load grammars, language models, etc., J should be smaller than I, but this will depend -on the system you are running on and the complexity of the models used for decoding. - diff --git a/dpmert/ces.cc b/dpmert/ces.cc deleted file mode 100644 index 157b2d17..00000000 --- a/dpmert/ces.cc +++ /dev/null @@ -1,90 +0,0 @@ -#include "ces.h" - -#include -#include -#include - -// TODO, if AER is to be optimized again, we will need this -// #include "aligner.h" -#include "lattice.h" -#include "mert_geometry.h" -#include "error_surface.h" -#include "ns.h" - -using namespace std; - -const bool minimize_segments = true; // if adjacent segments have equal scores, merge them - -void ComputeErrorSurface(const SegmentEvaluator& ss, - const ConvexHull& ve, - ErrorSurface* env, - const EvaluationMetric* metric, - const Hypergraph& hg) { - vector prev_trans; - const vector >& ienv = ve.GetSortedSegs(); - env->resize(ienv.size()); - SufficientStats prev_score; // defaults to 0 - int j = 0; - for (unsigned i = 0; i < ienv.size(); ++i) { - const MERTPoint& seg = *ienv[i]; - vector trans; -#if 0 - if (type == AER) { - vector edges(hg.edges_.size(), false); - seg.CollectEdgesUsed(&edges); // get the set of edges in the viterbi - // alignment - ostringstream os; - const string* psrc = ss.GetSource(); - if (psrc == NULL) { - cerr << "AER scoring in VEST requires source, but it is missing!\n"; - abort(); - } - size_t pos = psrc->rfind(" ||| "); - if (pos == string::npos) { - cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl; - abort(); - } - Lattice src; - Lattice ref; - LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src); - LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref); - AlignerTools::WriteAlignment(src, ref, hg, &os, true, 0, &edges); - string tstr = os.str(); - TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans); - } else { -#endif - seg.ConstructTranslation(&trans); - //} - //cerr << "Scoring: " << TD::GetString(trans) << endl; - if (trans == prev_trans) { - if (!minimize_segments) { - ErrorSegment& out = (*env)[j]; - out.delta.fields.clear(); - out.x = seg.x; - ++j; - } - //cerr << "Identical translation, skipping scoring\n"; - } else { - SufficientStats score; - ss.Evaluate(trans, &score); - // cerr << "score= " << score->ComputeScore() << "\n"; - //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl; - const SufficientStats delta = score - prev_score; - //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl; - //string xx; delta.Encode(&xx); cerr << xx << endl; - prev_trans.swap(trans); - prev_score = score; - if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) { - ErrorSegment& out = (*env)[j]; - out.delta = delta; - out.x = seg.x; - ++j; - } - } - } - // cerr << " In segments: " << ienv.size() << endl; - // cerr << "Out segments: " << j << endl; - assert(j > 0); - env->resize(j); -} - diff --git a/dpmert/ces.h b/dpmert/ces.h deleted file mode 100644 index e4fa2080..00000000 --- a/dpmert/ces.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _CES_H_ -#define _CES_H_ - -class ConvexHull; -class Hypergraph; -class SegmentEvaluator; -class ErrorSurface; -class EvaluationMetric; - -void ComputeErrorSurface(const SegmentEvaluator& ss, - const ConvexHull& convex_hull, - ErrorSurface* es, - const EvaluationMetric* metric, - const Hypergraph& hg); - -#endif diff --git a/dpmert/decode-and-evaluate.pl b/dpmert/decode-and-evaluate.pl deleted file mode 100755 index fe765d00..00000000 --- a/dpmert/decode-and-evaluate.pl +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env perl -use strict; -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } - -# Skip local config (used for distributing jobs) if we're running in local-only mode -use LocalConfig; -use Getopt::Long; -use File::Basename qw(basename); -my $QSUB_CMD = qsub_args(mert_memory()); - -require "libcall.pl"; - -# Default settings -my $default_jobs = env_default_jobs(); -my $bin_dir = $SCRIPT_DIR; -die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; -die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $parallelize = "$bin_dir/parallelize.pl"; -my $libcall = "$bin_dir/libcall.pl"; -my $sentserver = "$bin_dir/sentserver"; -my $sentclient = "$bin_dir/sentclient"; -my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; - -my $SCORER = $FAST_SCORE; -my $cdec = "$bin_dir/../decoder/cdec"; -die "Can't find decoder in $cdec" unless -x $cdec; -die "Can't find $parallelize" unless -x $parallelize; -die "Can't find $libcall" unless -e $libcall; -my $decoder = $cdec; -my $jobs = $default_jobs; # number of decode nodes -my $pmem = "9g"; -my $help = 0; -my $config; -my $test_set; -my $weights; -my $use_make = 1; -my $useqsub; -my $cpbin=1; -# Process command-line options -if (GetOptions( - "jobs=i" => \$jobs, - "help" => \$help, - "qsub" => \$useqsub, - "input=s" => \$test_set, - "config=s" => \$config, - "weights=s" => \$weights, -) == 0 || @ARGV!=0 || $help) { - print_help(); - exit; -} - -if ($useqsub) { - $use_make = 0; - die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); -} - -my @missing_args = (); - -if (!defined $test_set) { push @missing_args, "--input"; } -if (!defined $config) { push @missing_args, "--config"; } -if (!defined $weights) { push @missing_args, "--weights"; } -die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args); - -my @tf = localtime(time); -my $tname = basename($test_set); -$tname =~ s/\.(sgm|sgml|xml)$//i; -my $dir = "eval.$tname." . sprintf('%d%02d%02d-%02d%02d%02d', 1900+$tf[5], $tf[4], $tf[3], $tf[2], $tf[1], $tf[0]); - -my $time = unchecked_output("date"); - -check_call("mkdir -p $dir"); - -split_devset($test_set, "$dir/test.input.raw", "$dir/test.refs"); -my $refs = "-r $dir/test.refs"; -my $newsrc = "$dir/test.input"; -enseg("$dir/test.input.raw", $newsrc); -my $src_file = $newsrc; -open F, "<$src_file" or die "Can't read $src_file: $!"; close F; - -my $test_trans="$dir/test.trans"; -my $logdir="$dir/logs"; -my $decoderLog="$logdir/decoder.sentserver.log"; -check_call("mkdir -p $logdir"); - -#decode -print STDERR "RUNNING DECODER AT "; -print STDERR unchecked_output("date"); -my $decoder_cmd = "$decoder -c $config --weights $weights"; -my $pcmd; -if ($use_make) { - $pcmd = "cat $src_file | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --"; -} else { - $pcmd = "cat $src_file | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --"; -} -my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $test_trans"; -check_bash_call($cmd); -print STDERR "DECODER COMPLETED AT "; -print STDERR unchecked_output("date"); -print STDERR "\nOUTPUT: $test_trans\n\n"; -my $bleu = check_output("cat $test_trans | $SCORER $refs -m ibm_bleu"); -chomp $bleu; -print STDERR "BLEU: $bleu\n"; -my $ter = check_output("cat $test_trans | $SCORER $refs -m ter"); -chomp $ter; -print STDERR " TER: $ter\n"; -open TR, ">$dir/test.scores" or die "Can't write $dir/test.scores: $!"; -print TR <$newsrc"); - my $i=0; - while (my $line=){ - chomp $line; - if ($line =~ /^\s* tags, you must include a zero-based id attribute"; - } - } else { - print NEWSRC "$line\n"; - } - $i++; - } - close SRC; - close NEWSRC; -} - -sub print_help { - my $executable = basename($0); chomp $executable; - print << "Help"; - -Usage: $executable [options] - - $executable --config cdec.ini --weights weights.txt [--jobs N] [--qsub] - -Options: - - --help - Print this message and exit. - - --config - A path to the cdec.ini file. - - --weights - A file specifying feature weights. - - --dir - Directory for intermediate and output files. - -Job control options: - - --jobs - Number of decoder processes to run in parallel. [default=$default_jobs] - - --qsub - Use qsub to run jobs in parallel (qsub must be configured in - environment/LocalEnvironment.pm) - - --pmem - Amount of physical memory requested for parallel decoding jobs - (used with qsub requests only) - -Help -} - -sub convert { - my ($str) = @_; - my @ps = split /;/, $str; - my %dict = (); - for my $p (@ps) { - my ($k, $v) = split /=/, $p; - $dict{$k} = $v; - } - return %dict; -} - - - -sub cmdline { - return join ' ',($0,@ORIG_ARGV); -} - -#buggy: last arg gets quoted sometimes? -my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; -my $shell_escape_in_quote=qr{[\\"\$`!]}; - -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} - -sub escaped_shell_args { - return map {local $_=$_;chomp;escape_shell($_)} @_; -} - -sub escaped_shell_args_str { - return join ' ',&escaped_shell_args(@_); -} - -sub escaped_cmdline { - return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); -} - -sub split_devset { - my ($infile, $outsrc, $outref) = @_; - open F, "<$infile" or die "Can't read $infile: $!"; - open S, ">$outsrc" or die "Can't write $outsrc: $!"; - open R, ">$outref" or die "Can't write $outref: $!"; - while() { - chomp; - my ($src, @refs) = split /\s*\|\|\|\s*/; - die "Malformed devset line: $_\n" unless scalar @refs > 0; - print S "$src\n"; - print R join(' ||| ', @refs) . "\n"; - } - close R; - close S; - close F; -} - diff --git a/dpmert/divide_refs.py b/dpmert/divide_refs.py deleted file mode 100755 index b478f918..00000000 --- a/dpmert/divide_refs.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python -import sys - -(numRefs, outPrefix) = sys.argv[1:] -numRefs = int(numRefs) - -outs = [open(outPrefix+str(i), "w") for i in range(numRefs)] - -i = 0 -for line in sys.stdin: - outs[i].write(line) - i = (i + 1) % numRefs - -for out in outs: - out.close() diff --git a/dpmert/dpmert.pl b/dpmert/dpmert.pl deleted file mode 100755 index c4f98870..00000000 --- a/dpmert/dpmert.pl +++ /dev/null @@ -1,617 +0,0 @@ -#!/usr/bin/env perl -use strict; -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } - -# Skip local config (used for distributing jobs) if we're running in local-only mode -use LocalConfig; -use Getopt::Long; -use File::Basename qw(basename); -require "libcall.pl"; - -my $QSUB_CMD = qsub_args(mert_memory()); - -# Default settings -my $srcFile; # deprecated -my $refFiles; # deprecated -my $default_jobs = env_default_jobs(); -my $bin_dir = $SCRIPT_DIR; -die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; -die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input"; -my $MAPPER = "$bin_dir/mr_dpmert_map"; -my $REDUCER = "$bin_dir/mr_dpmert_reduce"; -my $parallelize = "$bin_dir/parallelize.pl"; -my $libcall = "$bin_dir/libcall.pl"; -my $sentserver = "$bin_dir/sentserver"; -my $sentclient = "$bin_dir/sentclient"; -my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; - -my $SCORER = $FAST_SCORE; -die "Can't find $MAPPER" unless -x $MAPPER; -my $cdec = "$bin_dir/../decoder/cdec"; -die "Can't find decoder in $cdec" unless -x $cdec; -die "Can't find $parallelize" unless -x $parallelize; -die "Can't find $libcall" unless -e $libcall; -my $decoder = $cdec; -my $lines_per_mapper = 200; -my $rand_directions = 15; -my $iteration = 1; -my $best_weights; -my $max_iterations = 15; -my $optimization_iters = 6; -my $jobs = $default_jobs; # number of decode nodes -my $pmem = "9g"; -my $disable_clean = 0; -my %seen_weights; -my $help = 0; -my $epsilon = 0.0001; -my $last_score = -10000000; -my $metric = "ibm_bleu"; -my $dir; -my $iniFile; -my $weights; -my $initialWeights; -my $bleu_weight=1; -my $use_make = 1; # use make to parallelize line search -my $useqsub; -my $pass_suffix = ''; -my $devset; -# Process command-line options -if (GetOptions( - "config=s" => \$iniFile, - "weights=s" => \$initialWeights, - "devset=s" => \$devset, - "jobs=i" => \$jobs, - "pass-suffix=s" => \$pass_suffix, - "help" => \$help, - "qsub" => \$useqsub, - "iterations=i" => \$max_iterations, - "pmem=s" => \$pmem, - "random-directions=i" => \$rand_directions, - "metric=s" => \$metric, - "source-file=s" => \$srcFile, - "output-dir=s" => \$dir, -) == 0 || @ARGV!=0 || $help) { - print_help(); - exit; -} - -if ($useqsub) { - $use_make = 0; - die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); -} - -my @missing_args = (); -if (defined $srcFile || defined $refFiles) { - die <) { $devSize++; } -close F; - -unless($best_weights){ $best_weights = $weights; } -unless($projected_score){ $projected_score = 0.0; } -$seen_weights{$weights} = 1; - -my $random_seed = int(time / 1000); -my $lastWeightsFile; -my $lastPScore = 0; -# main optimization loop -while (1){ - print STDERR "\n\nITERATION $iteration\n==========\n"; - - if ($iteration > $max_iterations){ - print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; - last; - } - # iteration-specific files - my $runFile="$dir/run.raw.$iteration"; - my $onebestFile="$dir/1best.$iteration"; - my $logdir="$dir/logs.$iteration"; - my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; - my $scorerLog="$logdir/scorer.log.$iteration"; - check_call("mkdir -p $logdir"); - - - #decode - print STDERR "RUNNING DECODER AT "; - print STDERR unchecked_output("date"); - my $im1 = $iteration - 1; - my $weightsFile="$dir/weights.$im1"; - my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; - my $pcmd; - if ($use_make) { - $pcmd = "cat $srcFile | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --"; - } else { - $pcmd = "cat $srcFile | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --"; - } - my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - my $num_hgs; - my $num_topbest; - my $retries = 0; - while($retries < 5) { - $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); - $num_topbest = check_output("wc -l < $runFile"); - print STDERR "NUMBER OF HGs: $num_hgs\n"; - print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; - if($devSize == $num_hgs && $devSize == $num_topbest) { - last; - } else { - print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; - sleep(3); - } - $retries++; - } - die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER $refs -m $metric"); - chomp $dec_score; - print STDERR "DECODER SCORE: $dec_score\n"; - - # save space - check_call("gzip -f $runFile"); - check_call("gzip -f $decoderLog"); - - # run optimizer - print STDERR "RUNNING OPTIMIZER AT "; - print STDERR unchecked_output("date"); - my $mergeLog="$logdir/prune-merge.log.$iteration"; - - my $score = 0; - my $icc = 0; - my $inweights="$dir/weights.$im1"; - for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) { - print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; - print STDERR unchecked_output("date"); - $icc++; - $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter"; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - check_call("mkdir -p $dir/splag.$im1"); - $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput."; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; - my @shards = grep { /^mapinput\./ } readdir(DIR); - closedir DIR; - die "No shards!" unless scalar @shards > 0; - my $joblist = ""; - my $nmappers = 0; - my @mapoutputs = (); - @cleanupcmds = (); - my %o2i = (); - my $first_shard = 1; - my $mkfile; # only used with makefiles - my $mkfilename; - if ($use_make) { - $mkfilename = "$dir/splag.$im1/domap.mk"; - open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!"; - print $mkfile "all: $dir/splag.$im1/map.done\n\n"; - } - my @mkouts = (); # only used with makefiles - for my $shard (@shards) { - my $mapoutput = $shard; - my $client_name = $shard; - $client_name =~ s/mapinput.//; - $client_name = "dpmert.$client_name"; - $mapoutput =~ s/mapinput/mapoutput/; - push @mapoutputs, "$dir/splag.$im1/$mapoutput"; - $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; - my $script = "$MAPPER -s $srcFile -m $metric $refs < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput"; - if ($use_make) { - my $script_file = "$dir/scripts/map.$shard"; - open F, ">$script_file" or die "Can't write $script_file: $!"; - print F "#!/bin/bash\n"; - print F "$script\n"; - close F; - my $output = "$dir/splag.$im1/$mapoutput"; - push @mkouts, $output; - chmod(0755, $script_file) or die "Can't chmod $script_file: $!"; - if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } - print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n"; - } else { - my $script_file = "$dir/scripts/map.$shard"; - open F, ">$script_file" or die "Can't write $script_file: $!"; - print F "$script\n"; - close F; - if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } - - $nmappers++; - my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; - my $jobid = check_output("$qcmd"); - chomp $jobid; - $jobid =~ s/^(\d+)(.*?)$/\1/g; - $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, "qdel $jobid 2> /dev/null"); - print STDERR " $jobid"; - if ($joblist == "") { $joblist = $jobid; } - else {$joblist = $joblist . "\|" . $jobid; } - } - } - if ($use_make) { - print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; - close $mkfile; - my $mcmd = "make -j $jobs -f $mkfilename"; - print STDERR "\nExecuting: $mcmd\n"; - check_call($mcmd); - } else { - print STDERR "\nLaunched $nmappers mappers.\n"; - sleep 8; - print STDERR "Waiting for mappers to complete...\n"; - while ($nmappers > 0) { - sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); - $nmappers = scalar @livejobs; - } - print STDERR "All mappers complete.\n"; - } - my $tol = 0; - my $til = 0; - for my $mo (@mapoutputs) { - my $olines = get_lines($mo); - my $ilines = get_lines($o2i{$mo}); - $tol += $olines; - $til += $ilines; - die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines; - } - print STDERR "Results for $tol/$til lines\n"; - print STDERR "\nSORTING AND RUNNING VEST REDUCER\n"; - print STDERR unchecked_output("date"); - $cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - $cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1"; - # sort returns failure even when it doesn't fail for some reason - my $best=unchecked_output("$cmd"); chomp $best; - print STDERR "$best\n"; - my ($oa, $x, $xscore) = split /\|/, $best; - $score = $xscore; - print STDERR "PROJECTED SCORE: $score\n"; - if (abs($x) < $epsilon) { - print STDERR "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n"; - last; - } - my $psd = $score - $last_score; - $last_score = $score; - if (abs($psd) < $epsilon) { - print STDERR "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n"; - last; - } - my ($origin, $axis) = split /\s+/, $oa; - - my %ori = convert($origin); - my %axi = convert($axis); - - my $finalFile="$dir/weights.$im1-$opt_iter"; - open W, ">$finalFile" or die "Can't write: $finalFile: $!"; - my $norm = 0; - for my $k (sort keys %ori) { - my $dd = $ori{$k} + $axi{$k} * $x; - $norm += $dd * $dd; - } - $norm = sqrt($norm); - $norm = 1; - for my $k (sort keys %ori) { - my $v = ($ori{$k} + $axi{$k} * $x) / $norm; - print W "$k $v\n"; - } - check_call("rm $dir/splag.$im1/*"); - $inweights = $finalFile; - } - $lastWeightsFile = "$dir/weights.$iteration"; - check_call("cp $inweights $lastWeightsFile"); - if ($icc < 2) { - print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; - last; - } - $lastPScore = $score; - $iteration++; - print STDERR "\n==========\n"; -} - -check_call("cp $lastWeightsFile $dir/weights.final"); -print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w with the decoder)\n\n"; -print STDOUT "$dir/weights.final\n"; -exit 0; - - -sub get_lines { - my $fn = shift @_; - open FL, "<$fn" or die "Couldn't read $fn: $!"; - my $lc = 0; - while() { $lc++; } - return $lc; -} - -sub read_weights_file { - my ($file) = @_; - open F, "<$file" or die "Couldn't read $file: $!"; - my @r = (); - my $pm = -1; - while() { - next if /^#/; - next if /^\s*$/; - chomp; - if (/^(.+)\s+(.+)$/) { - my $m = $1; - my $w = $2; - die "Weights out of order: $m <= $pm" unless $m > $pm; - push @r, $w; - } else { - warn "Unexpected feature name in weight file: $_"; - } - } - close F; - return join ' ', @r; -} - -sub update_weights_file { - my ($neww, $rfn, $rpts) = @_; - my @feats = @$rfn; - my @pts = @$rpts; - my $num_feats = scalar @feats; - my $num_pts = scalar @pts; - die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; - open G, ">$neww" or die; - for (my $i = 0; $i < $num_feats; $i++) { - my $f = $feats[$i]; - my $lambda = $pts[$i]; - print G "$f $lambda\n"; - } - close G; -} - -sub enseg { - my $src = shift; - my $newsrc = shift; - open(SRC, $src); - open(NEWSRC, ">$newsrc"); - my $i=0; - while (my $line=){ - chomp $line; - if ($line =~ /^\s* tags, you must include a zero-based id attribute"; - } - } else { - print NEWSRC "$line\n"; - } - $i++; - } - close SRC; - close NEWSRC; -} - -sub print_help { - - my $executable = basename($0); chomp $executable; - print << "Help"; - -Usage: $executable [options] - - $executable [options] - Runs a complete MERT optimization. Required options are --weights, - --devset, and --config. - -Options: - - --config [-c ] - The decoder configuration file. - - --devset [-d ] - The source *and* references for the development set. - - --weights [-w ] - A file specifying initial feature weights. The format is - FeatureName_1 value1 - FeatureName_2 value2 - **All and only the weights listed in will be optimized!** - - --metric - Metric to optimize. - Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi - - --iterations - Maximum number of iterations to run. If not specified, defaults - to 10. - - --pass-suffix - If the decoder is doing multi-pass decoding, the pass suffix "2", - "3", etc., is used to control what iteration of weights is set. - - --rand-directions - MERT will attempt to optimize along all of the principle directions, - set this parameter to explore other directions. Defaults to 5. - - --output-dir - Directory for intermediate and output files. - - --help - Print this message and exit. - -Job control options: - - --jobs - Number of decoder processes to run in parallel. [default=$default_jobs] - - --qsub - Use qsub to run jobs in parallel (qsub must be configured in - environment/LocalEnvironment.pm) - - --pmem - Amount of physical memory requested for parallel decoding jobs - (used with qsub requests only) - -Help -} - -sub convert { - my ($str) = @_; - my @ps = split /;/, $str; - my %dict = (); - for my $p (@ps) { - my ($k, $v) = split /=/, $p; - $dict{$k} = $v; - } - return %dict; -} - - - -sub cmdline { - return join ' ',($0,@ORIG_ARGV); -} - -#buggy: last arg gets quoted sometimes? -my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; -my $shell_escape_in_quote=qr{[\\"\$`!]}; - -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} - -sub escaped_shell_args { - return map {local $_=$_;chomp;escape_shell($_)} @_; -} - -sub escaped_shell_args_str { - return join ' ',&escaped_shell_args(@_); -} - -sub escaped_cmdline { - return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); -} - -sub split_devset { - my ($infile, $outsrc, $outref) = @_; - open F, "<$infile" or die "Can't read $infile: $!"; - open S, ">$outsrc" or die "Can't write $outsrc: $!"; - open R, ">$outref" or die "Can't write $outref: $!"; - while() { - chomp; - my ($src, @refs) = split /\s*\|\|\|\s*/; - die "Malformed devset line: $_\n" unless scalar @refs > 0; - print S "$src\n"; - print R join(' ||| ', @refs) . "\n"; - } - close R; - close S; - close F; -} - diff --git a/dpmert/error_surface.cc b/dpmert/error_surface.cc deleted file mode 100644 index 515b67f8..00000000 --- a/dpmert/error_surface.cc +++ /dev/null @@ -1,42 +0,0 @@ -#include "error_surface.h" - -#include -#include - -using namespace std; - -ErrorSurface::~ErrorSurface() {} - -void ErrorSurface::Serialize(std::string* out) const { - const int segments = this->size(); - ostringstream os(ios::binary); - os.write((const char*)&segments,sizeof(segments)); - for (int i = 0; i < segments; ++i) { - const ErrorSegment& cur = (*this)[i]; - string senc; - cur.delta.Encode(&senc); - assert(senc.size() < 1024); - unsigned char len = senc.size(); - os.write((const char*)&cur.x, sizeof(cur.x)); - os.write((const char*)&len, sizeof(len)); - os.write((const char*)&senc[0], len); - } - *out = os.str(); -} - -void ErrorSurface::Deserialize(const std::string& in) { - istringstream is(in, ios::binary); - int segments; - is.read((char*)&segments, sizeof(segments)); - this->resize(segments); - for (int i = 0; i < segments; ++i) { - ErrorSegment& cur = (*this)[i]; - unsigned char len; - is.read((char*)&cur.x, sizeof(cur.x)); - is.read((char*)&len, sizeof(len)); - string senc(len, '\0'); assert(senc.size() == len); - is.read((char*)&senc[0], len); - cur.delta = SufficientStats(senc); - } -} - diff --git a/dpmert/error_surface.h b/dpmert/error_surface.h deleted file mode 100644 index bb65847b..00000000 --- a/dpmert/error_surface.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _ERROR_SURFACE_H_ -#define _ERROR_SURFACE_H_ - -#include -#include - -#include "ns.h" - -class Score; - -struct ErrorSegment { - double x; - SufficientStats delta; - ErrorSegment() : x(0), delta() {} -}; - -class ErrorSurface : public std::vector { - public: - ~ErrorSurface(); - void Serialize(std::string* out) const; - void Deserialize(const std::string& in); -}; - -#endif diff --git a/dpmert/libcall.pl b/dpmert/libcall.pl deleted file mode 100644 index c7d0f128..00000000 --- a/dpmert/libcall.pl +++ /dev/null @@ -1,71 +0,0 @@ -use IPC::Open3; -use Symbol qw(gensym); - -$DUMMY_STDERR = gensym(); -$DUMMY_STDIN = gensym(); - -# Run the command and ignore failures -sub unchecked_call { - system("@_") -} - -# Run the command and return its output, if any ignoring failures -sub unchecked_output { - return `@_` -} - -# WARNING: Do not use this for commands that will return large amounts -# of stdout or stderr -- they might block indefinitely -sub check_output { - print STDERR "Executing and gathering output: @_\n"; - - my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_); - my $proc_output = ""; - while( ) { - $proc_output .= $_; - } - waitpid($pid, 0); - # TODO: Grab signal that the process died from - my $child_exit_status = $? >> 8; - if($child_exit_status == 0) { - return $proc_output; - } else { - print STDERR "ERROR: Execution of @_ failed.\n"; - exit(1); - } -} - -# Based on Moses' safesystem sub -sub check_call { - print STDERR "Executing: @_\n"; - system(@_); - my $exitcode = $? >> 8; - if($exitcode == 0) { - return 0; - } elsif ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - exit(1); - - } elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - exit(1); - - } else { - print STDERR "Failed with exit code: $exitcode\n" if $exitcode; - exit($exitcode); - } -} - -sub check_bash_call { - my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_"); - check_call(@args); -} - -sub check_bash_output { - my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_"); - return check_output(@args); -} - -# perl module weirdness... -return 1; diff --git a/dpmert/line_mediator.pl b/dpmert/line_mediator.pl deleted file mode 100755 index bc2bb24c..00000000 --- a/dpmert/line_mediator.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/perl -w -#hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication - -# if you don't know how to fork/exec in a C program, this could be helpful under limited cirmustances (would be ok to liaise with sentserver) - -#WARNING: because it waits for the result from command 2 after sending every line, and especially if command 1 does the same, using sentserver as command 2 won't actually buy you any real parallelism. - -use strict; -use IPC::Open2; -use POSIX qw(pipe dup2 STDIN_FILENO STDOUT_FILENO); - -my $quiet=!$ENV{DEBUG}; -$quiet=1 if $ENV{QUIET}; -sub info { - local $,=' '; - print STDERR @_ unless $quiet; -} - -my $mode='CROSS'; -my $ser='DIRECT'; -$mode='PIPE' if $ENV{PIPE}; -$mode='SNAKE' if $ENV{SNAKE}; -$mode='CROSS' if $ENV{CROSS}; -$ser='SERIAL' if $ENV{SERIAL}; -$ser='DIRECT' if $ENV{DIRECT}; -$ser='SERIAL' if $mode eq 'SNAKE'; -info("mode: $mode\n"); -info("connection: $ser\n"); - - -my @c1; -if (scalar @ARGV) { - do { - push @c1,shift - } while scalar @ARGV && $c1[$#c1] ne '--'; -} -pop @c1; -my @c2=@ARGV; -@ARGV=(); -(scalar @c1 && scalar @c2) || die qq{ -usage: $0 cmd1 args -- cmd2 args -all options are environment variables. -DEBUG=1 env var enables debugging output. -CROSS=1 hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication. crosses stdin/stderr of cmd1 and cmd2 line by line (both must flush on newline and output. cmd1 initiates the conversation (sends the first line). default: attempts to cross stdin/stdout of c1 and c2 directly (via two unidirectional posix pipes created before fork). -SERIAL=1: (no parallelism possible) but lines exchanged are logged if DEBUG. -if SNAKE then stdin -> c1 -> c2 -> c1 -> stdout. -if PIPE then stdin -> c1 -> c2 -> stdout (same as shell c1|c2, but with SERIAL you can see the intermediate in real time; you could do similar with c1 | tee /dev/fd/2 |c2. -DIRECT=1 (default) will override SERIAL=1. -CROSS=1 (default) will override SNAKE or PIPE. -}; - -info("1 cmd:",@c1,"\n"); -info("2 cmd:",@c2,"\n"); - -sub lineto { - select $_[0]; - $|=1; - shift; - print @_; -} - -if ($ser eq 'SERIAL') { - my ($R1,$W1,$R2,$W2); - my $c1p=open2($R1,$W1,@c1); # Open2 R W backward from Open3. - my $c2p=open2($R2,$W2,@c2); - if ($mode eq 'CROSS') { - while(<$R1>) { - info("1:",$_); - lineto($W2,$_); - last unless defined ($_=<$R2>); - info("1|2:",$_); - lineto($W1,$_); - } - } else { - my $snake=$mode eq 'SNAKE'; - while() { - info("IN:",$_); - lineto($W1,$_); - last unless defined ($_=<$R1>); - info("IN|1:",$_); - lineto($W2,$_); - last unless defined ($_=<$R2>); - info("IN|1|2:",$_); - if ($snake) { - lineto($W1,$_); - last unless defined ($_=<$R1>); - info("IN|1|2|1:",$_); - } - lineto(*STDOUT,$_); - } - } -} else { - info("DIRECT mode\n"); - my @rw1=POSIX::pipe(); - my @rw2=POSIX::pipe(); - my $pid=undef; - $SIG{CHLD} = sub { wait }; - while (not defined ($pid=fork())) { - sleep 1; - } - my $pipe = $mode eq 'PIPE'; - unless ($pipe) { - POSIX::close(STDOUT_FILENO); - POSIX::close(STDIN_FILENO); - } - if ($pid) { - POSIX::dup2($rw1[1],STDOUT_FILENO); - POSIX::dup2($rw2[0],STDIN_FILENO) unless $pipe; - exec @c1; - } else { - POSIX::dup2($rw2[1],STDOUT_FILENO) unless $pipe; - POSIX::dup2($rw1[0],STDIN_FILENO); - exec @c2; - } - while (wait()!=-1) {} -} diff --git a/dpmert/line_optimizer.cc b/dpmert/line_optimizer.cc deleted file mode 100644 index 9cf33502..00000000 --- a/dpmert/line_optimizer.cc +++ /dev/null @@ -1,114 +0,0 @@ -#include "line_optimizer.h" - -#include -#include - -#include "sparse_vector.h" -#include "ns.h" - -using namespace std; - -typedef ErrorSurface::const_iterator ErrorIter; - -// sort by increasing x-ints -struct IntervalComp { - bool operator() (const ErrorIter& a, const ErrorIter& b) const { - return a->x < b->x; - } -}; - -double LineOptimizer::LineOptimize( - const EvaluationMetric* metric, - const vector& surfaces, - const LineOptimizer::ScoreType type, - float* best_score, - const double epsilon) { - // cerr << "MIN=" << MINIMIZE_SCORE << " MAX=" << MAXIMIZE_SCORE << " MINE=" << type << endl; - vector all_ints; - for (vector::const_iterator i = surfaces.begin(); - i != surfaces.end(); ++i) { - const ErrorSurface& surface = *i; - for (ErrorIter j = surface.begin(); j != surface.end(); ++j) - all_ints.push_back(j); - } - sort(all_ints.begin(), all_ints.end(), IntervalComp()); - double last_boundary = all_ints.front()->x; - SufficientStats acc; - float& cur_best_score = *best_score; - cur_best_score = (type == MAXIMIZE_SCORE ? - -numeric_limits::max() : numeric_limits::max()); - bool left_edge = true; - double pos = numeric_limits::quiet_NaN(); - for (vector::iterator i = all_ints.begin(); - i != all_ints.end(); ++i) { - const ErrorSegment& seg = **i; - if (seg.x - last_boundary > epsilon) { - float sco = metric->ComputeScore(acc); - if ((type == MAXIMIZE_SCORE && sco > cur_best_score) || - (type == MINIMIZE_SCORE && sco < cur_best_score) ) { - cur_best_score = sco; - if (left_edge) { - pos = seg.x - 0.1; - left_edge = false; - } else { - pos = last_boundary + (seg.x - last_boundary) / 2; - } - //cerr << "NEW BEST: " << pos << " (score=" << cur_best_score << ")\n"; - } - // string xx = metric->DetailedScore(acc); cerr << "---- " << xx; -#undef SHOW_ERROR_SURFACES -#ifdef SHOW_ERROR_SURFACES - cerr << "x=" << seg.x << "\ts=" << sco << "\n"; -#endif - last_boundary = seg.x; - } - // cerr << "x-boundary=" << seg.x << "\n"; - //string x2; acc.Encode(&x2); cerr << " ACC: " << x2 << endl; - //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl; - acc += seg.delta; - } - float sco = metric->ComputeScore(acc); - if ((type == MAXIMIZE_SCORE && sco > cur_best_score) || - (type == MINIMIZE_SCORE && sco < cur_best_score) ) { - cur_best_score = sco; - if (left_edge) { - pos = 0; - } else { - pos = last_boundary + 1000.0; - } - } - return pos; -} - -void LineOptimizer::RandomUnitVector(const vector& features_to_optimize, - SparseVector* axis, - RandomNumberGenerator* rng) { - axis->clear(); - for (int i = 0; i < features_to_optimize.size(); ++i) - axis->set_value(features_to_optimize[i], rng->NextNormal(0.0,1.0)); - (*axis) /= axis->l2norm(); -} - -void LineOptimizer::CreateOptimizationDirections( - const vector& features_to_optimize, - int additional_random_directions, - RandomNumberGenerator* rng, - vector >* dirs - , bool include_orthogonal - ) { - dirs->clear(); - typedef SparseVector Dir; - vector &out=*dirs; - int i=0; - if (include_orthogonal) - for (;i - -#include "sparse_vector.h" -#include "error_surface.h" -#include "sampler.h" - -class EvaluationMetric; -class Weights; - -struct LineOptimizer { - - // use MINIMIZE_SCORE for things like TER, WER - // MAXIMIZE_SCORE for things like BLEU - enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE }; - - // merge all the error surfaces together into a global - // error surface and find (the middle of) the best segment - static double LineOptimize( - const EvaluationMetric* metric, - const std::vector& envs, - const LineOptimizer::ScoreType type, - float* best_score, - const double epsilon = 1.0/65536.0); - - // return a random vector of length 1 where all dimensions - // not listed in dimensions will be 0. - static void RandomUnitVector(const std::vector& dimensions, - SparseVector* axis, - RandomNumberGenerator* rng); - - // generate a list of directions to optimize; the list will - // contain the orthogonal vectors corresponding to the dimensions in - // primary and then additional_random_directions directions in those - // dimensions as well. All vectors will be length 1. - static void CreateOptimizationDirections( - const std::vector& primary, - int additional_random_directions, - RandomNumberGenerator* rng, - std::vector >* dirs - , bool include_primary=true - ); - -}; - -#endif diff --git a/dpmert/lo_test.cc b/dpmert/lo_test.cc deleted file mode 100644 index 95a08d3d..00000000 --- a/dpmert/lo_test.cc +++ /dev/null @@ -1,229 +0,0 @@ -#define BOOST_TEST_MODULE LineOptimizerTest -#include -#include - -#include -#include -#include - -#include - -#include "ns.h" -#include "ns_docscorer.h" -#include "ces.h" -#include "fdict.h" -#include "hg.h" -#include "kbest.h" -#include "hg_io.h" -#include "filelib.h" -#include "inside_outside.h" -#include "viterbi.h" -#include "mert_geometry.h" -#include "line_optimizer.h" - -using namespace std; - -const char* ref11 = "australia reopens embassy in manila"; -const char* ref12 = "( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack ."; -const char* ref21 = "australia reopened manila embassy"; -const char* ref22 = "( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack ."; -const char* ref31 = "australia to reopen embassy in manila"; -const char* ref32 = "( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so - called confirmed terrorist attack threats ."; -const char* ref41 = "australia to re - open its embassy to manila"; -const char* ref42 = "( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so - called \" clear \" threat of terrorist attack 7 weeks ago ."; - -BOOST_AUTO_TEST_CASE( TestCheckNaN) { - double x = 0; - double y = 0; - double z = x / y; - BOOST_CHECK_EQUAL(true, std::isnan(z)); -} - -BOOST_AUTO_TEST_CASE(TestConvexHull) { - boost::shared_ptr a1(new MERTPoint(-1, 0)); - boost::shared_ptr b1(new MERTPoint(1, 0)); - boost::shared_ptr a2(new MERTPoint(-1, 1)); - boost::shared_ptr b2(new MERTPoint(1, -1)); - vector > sa; sa.push_back(a1); sa.push_back(b1); - vector > sb; sb.push_back(a2); sb.push_back(b2); - ConvexHull a(sa); - cerr << a << endl; - ConvexHull b(sb); - ConvexHull c = a; - c *= b; - cerr << a << " (*) " << b << " = " << c << endl; - BOOST_CHECK_EQUAL(3, c.size()); -} - -BOOST_AUTO_TEST_CASE(TestConvexHullInside) { - const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}"; - Hypergraph hg; - istringstream instr(json); - HypergraphIO::ReadFromJSON(&instr, &hg); - SparseVector wts; - wts.set_value(FD::Convert("f1"), 0.4); - wts.set_value(FD::Convert("f2"), 1.0); - hg.Reweight(wts); - vector, prob_t> > list; - std::vector > features; - KBest::KBestDerivations, ESentenceTraversal> kbest(hg, 10); - for (int i = 0; i < 10; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; - } - SparseVector dir; dir.set_value(FD::Convert("f1"), 1.0); - ConvexHullWeightFunction wf(wts, dir); - ConvexHull env = Inside(hg, NULL, wf); - cerr << env << endl; - const vector >& segs = env.GetSortedSegs(); - dir *= segs[1]->x; - wts += dir; - hg.Reweight(wts); - KBest::KBestDerivations, ESentenceTraversal> kbest2(hg, 10); - for (int i = 0; i < 10; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest2.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; - } - for (unsigned i = 0; i < segs.size(); ++i) { - cerr << "seg=" << i << endl; - vector trans; - segs[i]->ConstructTranslation(&trans); - cerr << TD::GetString(trans) << endl; - } -} - -BOOST_AUTO_TEST_CASE( TestS1) { - int fPhraseModel_0 = FD::Convert("PhraseModel_0"); - int fPhraseModel_1 = FD::Convert("PhraseModel_1"); - int fPhraseModel_2 = FD::Convert("PhraseModel_2"); - int fLanguageModel = FD::Convert("LanguageModel"); - int fWordPenalty = FD::Convert("WordPenalty"); - int fPassThrough = FD::Convert("PassThrough"); - SparseVector wts; - wts.set_value(fWordPenalty, 4.25); - wts.set_value(fLanguageModel, -1.1165); - wts.set_value(fPhraseModel_0, -0.96); - wts.set_value(fPhraseModel_1, -0.65); - wts.set_value(fPhraseModel_2, -0.77); - wts.set_value(fPassThrough, -10.0); - - vector to_optimize; - to_optimize.push_back(fWordPenalty); - to_optimize.push_back(fLanguageModel); - to_optimize.push_back(fPhraseModel_0); - to_optimize.push_back(fPhraseModel_1); - to_optimize.push_back(fPhraseModel_2); - - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); - - Hypergraph hg; - ReadFile rf(path + "/0.json.gz"); - HypergraphIO::ReadFromJSON(rf.stream(), &hg); - hg.Reweight(wts); - - Hypergraph hg2; - ReadFile rf2(path + "/1.json.gz"); - HypergraphIO::ReadFromJSON(rf2.stream(), &hg2); - hg2.Reweight(wts); - - vector > refs1(4); - TD::ConvertSentence(ref11, &refs1[0]); - TD::ConvertSentence(ref21, &refs1[1]); - TD::ConvertSentence(ref31, &refs1[2]); - TD::ConvertSentence(ref41, &refs1[3]); - vector > refs2(4); - TD::ConvertSentence(ref12, &refs2[0]); - TD::ConvertSentence(ref22, &refs2[1]); - TD::ConvertSentence(ref32, &refs2[2]); - TD::ConvertSentence(ref42, &refs2[3]); - vector envs(2); - - RandomNumberGenerator rng; - - vector > axes; // directions to search - LineOptimizer::CreateOptimizationDirections( - to_optimize, - 10, - &rng, - &axes); - assert(axes.size() == 10 + to_optimize.size()); - for (unsigned i = 0; i < axes.size(); ++i) - cerr << axes[i] << endl; - const SparseVector& axis = axes[0]; - - cerr << "Computing Viterbi envelope using inside algorithm...\n"; - cerr << "axis: " << axis << endl; - clock_t t_start=clock(); - ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction - envs[0] = Inside(hg, NULL, wf); - envs[1] = Inside(hg2, NULL, wf); - - vector es(2); - EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); - boost::shared_ptr scorer1 = metric->CreateSegmentEvaluator(refs1); - boost::shared_ptr scorer2 = metric->CreateSegmentEvaluator(refs2); - ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg); - ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2); - cerr << envs[0].size() << " " << envs[1].size() << endl; - cerr << es[0].size() << " " << es[1].size() << endl; - envs.clear(); - clock_t t_env=clock(); - float score; - double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score); - clock_t t_opt=clock(); - cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n"; - BOOST_CHECK_CLOSE(0.48719698, score, 1e-5); - SparseVector res = axis; - res *= m; - res += wts; - cerr << "res: " << res << endl; - cerr << "ENVELOPE PROCESSING=" << (static_cast(t_env - t_start) / 1000.0) << endl; - cerr << " LINE OPTIMIZATION=" << (static_cast(t_opt - t_env) / 1000.0) << endl; - hg.Reweight(res); - hg2.Reweight(res); - vector t1,t2; - ViterbiESentence(hg, &t1); - ViterbiESentence(hg2, &t2); - cerr << TD::GetString(t1) << endl; - cerr << TD::GetString(t2) << endl; -} - -BOOST_AUTO_TEST_CASE(TestZeroOrigin) { - const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}"; - Hypergraph hg; - istringstream instr(json); - HypergraphIO::ReadFromJSON(&instr, &hg); - SparseVector wts; - wts.set_value(FD::Convert("PassThrough"), -0.929201533002898); - hg.Reweight(wts); - - vector, prob_t> > list; - std::vector > features; - KBest::KBestDerivations, ESentenceTraversal> kbest(hg, 10); - for (int i = 0; i < 10; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; - } - - SparseVector axis; axis.set_value(FD::Convert("Glue"),1.0); - ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction - vector envs(1); - envs[0] = Inside(hg, NULL, wf); - - vector > mr(4); - TD::ConvertSentence("untitled", &mr[0]); - TD::ConvertSentence("with no title", &mr[1]); - TD::ConvertSentence("without a title", &mr[2]); - TD::ConvertSentence("without title", &mr[3]); - EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); - boost::shared_ptr scorer1 = metric->CreateSegmentEvaluator(mr); - vector es(1); - ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg); -} - diff --git a/dpmert/mert_geometry.cc b/dpmert/mert_geometry.cc deleted file mode 100644 index d6973658..00000000 --- a/dpmert/mert_geometry.cc +++ /dev/null @@ -1,185 +0,0 @@ -#include "mert_geometry.h" - -#include -#include - -using namespace std; - -ConvexHull::ConvexHull(int i) { - if (i == 0) { - // do nothing - <> - } else if (i == 1) { - points.push_back(boost::shared_ptr(new MERTPoint(0, 0, 0, boost::shared_ptr(), boost::shared_ptr()))); - assert(this->IsMultiplicativeIdentity()); - } else { - cerr << "Only can create ConvexHull semiring 0 and 1 with this constructor!\n"; - abort(); - } -} - -const ConvexHull ConvexHullWeightFunction::operator()(const Hypergraph::Edge& e) const { - const double m = direction.dot(e.feature_values_); - const double b = origin.dot(e.feature_values_); - MERTPoint* point = new MERTPoint(m, b, e); - return ConvexHull(1, point); -} - -ostream& operator<<(ostream& os, const ConvexHull& env) { - os << '<'; - const vector >& points = env.GetSortedSegs(); - for (int i = 0; i < points.size(); ++i) - os << (i==0 ? "" : "|") << "x=" << points[i]->x << ",b=" << points[i]->b << ",m=" << points[i]->m << ",p1=" << points[i]->p1 << ",p2=" << points[i]->p2; - return os << '>'; -} - -#define ORIGINAL_MERT_IMPLEMENTATION 1 -#ifdef ORIGINAL_MERT_IMPLEMENTATION - -struct SlopeCompare { - bool operator() (const boost::shared_ptr& a, const boost::shared_ptr& b) const { - return a->m < b->m; - } -}; - -const ConvexHull& ConvexHull::operator+=(const ConvexHull& other) { - if (!other.is_sorted) other.Sort(); - if (points.empty()) { - points = other.points; - return *this; - } - is_sorted = false; - int j = points.size(); - points.resize(points.size() + other.points.size()); - for (int i = 0; i < other.points.size(); ++i) - points[j++] = other.points[i]; - assert(j == points.size()); - return *this; -} - -void ConvexHull::Sort() const { - sort(points.begin(), points.end(), SlopeCompare()); - const int k = points.size(); - int j = 0; - for (int i = 0; i < k; ++i) { - MERTPoint l = *points[i]; - l.x = kMinusInfinity; - // cerr << "m=" << l.m << endl; - if (0 < j) { - if (points[j-1]->m == l.m) { // lines are parallel - if (l.b <= points[j-1]->b) continue; - --j; - } - while(0 < j) { - l.x = (l.b - points[j-1]->b) / (points[j-1]->m - l.m); - if (points[j-1]->x < l.x) break; - --j; - } - if (0 == j) l.x = kMinusInfinity; - } - *points[j++] = l; - } - points.resize(j); - is_sorted = true; -} - -const ConvexHull& ConvexHull::operator*=(const ConvexHull& other) { - if (other.IsMultiplicativeIdentity()) { return *this; } - if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; } - - if (!is_sorted) Sort(); - if (!other.is_sorted) other.Sort(); - - if (this->IsEdgeEnvelope()) { -// if (other.size() > 1) -// cerr << *this << " (TIMES) " << other << endl; - boost::shared_ptr edge_parent = points[0]; - const double& edge_b = edge_parent->b; - const double& edge_m = edge_parent->m; - points.clear(); - for (int i = 0; i < other.points.size(); ++i) { - const MERTPoint& p = *other.points[i]; - const double m = p.m + edge_m; - const double b = p.b + edge_b; - const double& x = p.x; // x's don't change with * - points.push_back(boost::shared_ptr(new MERTPoint(x, m, b, edge_parent, other.points[i]))); - assert(points.back()->p1->edge); - } -// if (other.size() > 1) -// cerr << " = " << *this << endl; - } else { - vector > new_points; - int this_i = 0; - int other_i = 0; - const int this_size = points.size(); - const int other_size = other.points.size(); - double cur_x = kMinusInfinity; // moves from left to right across the - // real numbers, stopping for all inter- - // sections - double this_next_val = (1 < this_size ? points[1]->x : kPlusInfinity); - double other_next_val = (1 < other_size ? other.points[1]->x : kPlusInfinity); - while (this_i < this_size && other_i < other_size) { - const MERTPoint& this_point = *points[this_i]; - const MERTPoint& other_point= *other.points[other_i]; - const double m = this_point.m + other_point.m; - const double b = this_point.b + other_point.b; - - new_points.push_back(boost::shared_ptr(new MERTPoint(cur_x, m, b, points[this_i], other.points[other_i]))); - int comp = 0; - if (this_next_val < other_next_val) comp = -1; else - if (this_next_val > other_next_val) comp = 1; - if (0 == comp) { // the next values are equal, advance both indices - ++this_i; - ++other_i; - cur_x = this_next_val; // could be other_next_val (they're equal!) - this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity); - other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity); - } else { // advance the i with the lower x, update cur_x - if (-1 == comp) { - ++this_i; - cur_x = this_next_val; - this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity); - } else { - ++other_i; - cur_x = other_next_val; - other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity); - } - } - } - points.swap(new_points); - } - //cerr << "Multiply: result=" << (*this) << endl; - return *this; -} - -// recursively construct translation -void MERTPoint::ConstructTranslation(vector* trans) const { - const MERTPoint* cur = this; - vector > ant_trans; - while(!cur->edge) { - ant_trans.resize(ant_trans.size() + 1); - cur->p2->ConstructTranslation(&ant_trans.back()); - cur = cur->p1.get(); - } - size_t ant_size = ant_trans.size(); - vector*> pants(ant_size); - assert(ant_size == cur->edge->tail_nodes_.size()); - --ant_size; - for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i]; - cur->edge->rule_->ESubstitute(pants, trans); -} - -void MERTPoint::CollectEdgesUsed(std::vector* edges_used) const { - if (edge) { - assert(edge->id_ < edges_used->size()); - (*edges_used)[edge->id_] = true; - } - if (p1) p1->CollectEdgesUsed(edges_used); - if (p2) p2->CollectEdgesUsed(edges_used); -} - -#else - -// THIS IS THE NEW FASTER IMPLEMENTATION OF THE MERT SEMIRING OPERATIONS - -#endif - diff --git a/dpmert/mert_geometry.h b/dpmert/mert_geometry.h deleted file mode 100644 index a8b6959e..00000000 --- a/dpmert/mert_geometry.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef _MERT_GEOMETRY_H_ -#define _MERT_GEOMETRY_H_ - -#include -#include -#include - -#include "hg.h" -#include "sparse_vector.h" - -static const double kMinusInfinity = -std::numeric_limits::infinity(); -static const double kPlusInfinity = std::numeric_limits::infinity(); - -struct MERTPoint { - MERTPoint() : x(), m(), b(), edge() {} - MERTPoint(double _m, double _b) : - x(kMinusInfinity), m(_m), b(_b), edge() {} - MERTPoint(double _x, double _m, double _b, const boost::shared_ptr& p1_, const boost::shared_ptr& p2_) : - x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {} - MERTPoint(double _m, double _b, const Hypergraph::Edge& edge) : - x(kMinusInfinity), m(_m), b(_b), edge(&edge) {} - - double x; // x intersection with previous segment in env, or -inf if none - double m; // this line's slope - double b; // intercept with y-axis - - // we keep a pointer to the "parents" of this segment so we can reconstruct - // the Viterbi translation corresponding to this segment - boost::shared_ptr p1; - boost::shared_ptr p2; - - // only MERTPoints created from an edge using the ConvexHullWeightFunction - // have rules - // TRulePtr rule; - const Hypergraph::Edge* edge; - - // recursively recover the Viterbi translation that will result from setting - // the weights to origin + axis * x, where x is any value from this->x up - // until the next largest x in the containing ConvexHull - void ConstructTranslation(std::vector* trans) const; - void CollectEdgesUsed(std::vector* edges_used) const; -}; - -// this is the semiring value type, -// it defines constructors for 0, 1, and the operations + and * -struct ConvexHull { - // create semiring zero - ConvexHull() : is_sorted(true) {} // zero - // for debugging: - ConvexHull(const std::vector >& s) : points(s) { Sort(); } - // create semiring 1 or 0 - explicit ConvexHull(int i); - ConvexHull(int n, MERTPoint* point) : is_sorted(true), points(n, boost::shared_ptr(point)) {} - const ConvexHull& operator+=(const ConvexHull& other); - const ConvexHull& operator*=(const ConvexHull& other); - bool IsMultiplicativeIdentity() const { - return size() == 1 && (points[0]->b == 0.0 && points[0]->m == 0.0) && (!points[0]->edge) && (!points[0]->p1) && (!points[0]->p2); } - const std::vector >& GetSortedSegs() const { - if (!is_sorted) Sort(); - return points; - } - size_t size() const { return points.size(); } - - private: - bool IsEdgeEnvelope() const { - return points.size() == 1 && points[0]->edge; } - void Sort() const; - mutable bool is_sorted; - mutable std::vector > points; -}; -std::ostream& operator<<(std::ostream& os, const ConvexHull& env); - -struct ConvexHullWeightFunction { - ConvexHullWeightFunction(const SparseVector& ori, - const SparseVector& dir) : origin(ori), direction(dir) {} - const ConvexHull operator()(const Hypergraph::Edge& e) const; - const SparseVector origin; - const SparseVector direction; -}; - -#endif diff --git a/dpmert/mr_dpmert_generate_mapper_input.cc b/dpmert/mr_dpmert_generate_mapper_input.cc deleted file mode 100644 index 199cd23a..00000000 --- a/dpmert/mr_dpmert_generate_mapper_input.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include -#include - -#include -#include - -#include "filelib.h" -#include "weights.h" -#include "line_optimizer.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("dev_set_size,s",po::value(),"[REQD] Development set size (# of parallel sentences)") - ("forest_repository,r",po::value(),"[REQD] Path to forest repository") - ("weights,w",po::value(),"[REQD] Current feature weights file") - ("optimize_feature,o",po::value >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") - ("random_directions,d",po::value()->default_value(20),"Number of random directions to run the line optimizer in") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (conf->count("dev_set_size") == 0) { - cerr << "Please specify the size of the development set using -d N\n"; - flag = true; - } - if (conf->count("weights") == 0) { - cerr << "Please specify the starting-point weights using -w \n"; - flag = true; - } - if (conf->count("forest_repository") == 0) { - cerr << "Please specify the forest repository location using -r \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - RandomNumberGenerator rng; - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - vector features; - SparseVector origin; - vector w; - Weights::InitFromFile(conf["weights"].as(), &w, &features); - Weights::InitSparseVector(w, &origin); - const string forest_repository = conf["forest_repository"].as(); - if (!DirectoryExists(forest_repository)) { - cerr << "Forest repository directory " << forest_repository << " not found!\n"; - return 1; - } - if (conf.count("optimize_feature") > 0) - features=conf["optimize_feature"].as >(); - vector > directions; - vector fids(features.size()); - for (unsigned i = 0; i < features.size(); ++i) - fids[i] = FD::Convert(features[i]); - LineOptimizer::CreateOptimizationDirections( - fids, - conf["random_directions"].as(), - &rng, - &directions); - unsigned dev_set_size = conf["dev_set_size"].as(); - for (unsigned i = 0; i < dev_set_size; ++i) { - for (unsigned j = 0; j < directions.size(); ++j) { - cout << forest_repository << '/' << i << ".json.gz " << i << ' '; - print(cout, origin, "=", ";"); - cout << ' '; - print(cout, directions[j], "=", ";"); - cout << endl; - } - } - return 0; -} diff --git a/dpmert/mr_dpmert_map.cc b/dpmert/mr_dpmert_map.cc deleted file mode 100644 index d1efcf96..00000000 --- a/dpmert/mr_dpmert_map.cc +++ /dev/null @@ -1,112 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "ns.h" -#include "ns_docscorer.h" -#include "ces.h" -#include "filelib.h" -#include "stringlib.h" -#include "sparse_vector.h" -#include "mert_geometry.h" -#include "inside_outside.h" -#include "error_surface.h" -#include "b64tools.h" -#include "hg_io.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") - ("source,s",po::value(), "Source file (ignored, except for AER)") - ("evaluation_metric,m",po::value()->default_value("ibm_bleu"), "Evaluation metric being optimized") - ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("reference")) { - cerr << "Please specify one or more references using -r \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -bool ReadSparseVectorString(const string& s, SparseVector* v) { -#if 0 - // this should work, but untested. - std::istringstream i(s); - i>>*v; -#else - vector fields; - Tokenize(s, ';', &fields); - if (fields.empty()) return false; - for (unsigned i = 0; i < fields.size(); ++i) { - vector pair(2); - Tokenize(fields[i], '=', &pair); - if (pair.size() != 2) { - cerr << "Error parsing vector string: " << fields[i] << endl; - return false; - } - v->set_value(FD::Convert(pair[0]), atof(pair[1].c_str())); - } - return true; -#endif -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const string evaluation_metric = conf["evaluation_metric"].as(); - EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); - DocumentScorer ds(metric, conf["reference"].as >()); - cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; - Hypergraph hg; - string last_file; - ReadFile in_read(conf["input"].as()); - istream &in=*in_read.stream(); - while(in) { - string line; - getline(in, line); - if (line.empty()) continue; - istringstream is(line); - int sent_id; - string file, s_origin, s_direction; - // path-to-file (JSON) sent_ed starting-point search-direction - is >> file >> sent_id >> s_origin >> s_direction; - SparseVector origin; - ReadSparseVectorString(s_origin, &origin); - SparseVector direction; - ReadSparseVectorString(s_direction, &direction); - // cerr << "File: " << file << "\nDir: " << direction << "\n X: " << origin << endl; - if (last_file != file) { - last_file = file; - ReadFile rf(file); - HypergraphIO::ReadFromJSON(rf.stream(), &hg); - } - const ConvexHullWeightFunction wf(origin, direction); - const ConvexHull hull = Inside(hg, NULL, wf); - - ErrorSurface es; - ComputeErrorSurface(*ds[sent_id], hull, &es, metric, hg); - //cerr << "Viterbi envelope has " << ve.size() << " segments\n"; - // cerr << "Error surface has " << es.size() << " segments\n"; - string val; - es.Serialize(&val); - cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t'; - B64::b64encode(val.c_str(), val.size(), &cout); - cout << endl << flush; - } - return 0; -} diff --git a/dpmert/mr_dpmert_reduce.cc b/dpmert/mr_dpmert_reduce.cc deleted file mode 100644 index 31512a03..00000000 --- a/dpmert/mr_dpmert_reduce.cc +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "sparse_vector.h" -#include "error_surface.h" -#include "line_optimizer.h" -#include "b64tools.h" -#include "stringlib.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("evaluation_metric,m",po::value(), "Evaluation metric (IBM_BLEU, etc.)") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = conf->count("evaluation_metric") == 0; - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const string evaluation_metric = conf["evaluation_metric"].as(); - EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); - LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; - if (metric->IsErrorMetric()) - opt_type = LineOptimizer::MINIMIZE_SCORE; - - vector esv; - string last_key, line, key, val; - while(getline(cin, line)) { - size_t ks = line.find("\t"); - assert(string::npos != ks); - assert(ks > 2); - key = line.substr(2, ks - 2); - val = line.substr(ks + 1); - if (key != last_key) { - if (!last_key.empty()) { - float score; - double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); - cout << last_key << "|" << x << "|" << score << endl; - } - last_key.swap(key); - esv.clear(); - } - if (val.size() % 4 != 0) { - cerr << "B64 encoding error 1! Skipping.\n"; - continue; - } - string encoded(val.size() / 4 * 3, '\0'); - if (!B64::b64decode(reinterpret_cast(&val[0]), val.size(), &encoded[0], encoded.size())) { - cerr << "B64 encoding error 2! Skipping.\n"; - continue; - } - esv.push_back(ErrorSurface()); - esv.back().Deserialize(encoded); - } - if (!esv.empty()) { - float score; - double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); - cout << last_key << "|" << x << "|" << score << endl; - } - return 0; -} diff --git a/dpmert/parallelize.pl b/dpmert/parallelize.pl deleted file mode 100755 index d2ebaeea..00000000 --- a/dpmert/parallelize.pl +++ /dev/null @@ -1,423 +0,0 @@ -#!/usr/bin/env perl - -# Author: Adam Lopez -# -# This script takes a command that processes input -# from stdin one-line-at-time, and parallelizes it -# on the cluster using David Chiang's sentserver/ -# sentclient architecture. -# -# Prerequisites: the command *must* read each line -# without waiting for subsequent lines of input -# (for instance, a command which must read all lines -# of input before processing will not work) and -# return it to the output *without* buffering -# multiple lines. - -#TODO: if -j 1, run immediately, not via sentserver? possible differences in environment might make debugging harder - -#ANNOYANCE: if input is shorter than -j n lines, or at the very last few lines, repeatedly sleeps. time cut down to 15s from 60s - -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } -use LocalConfig; - -use Cwd qw/ abs_path cwd getcwd /; -use File::Temp qw/ tempfile /; -use Getopt::Long; -use IPC::Open2; -use strict; -use POSIX ":sys_wait_h"; - -use File::Basename; -my $myDir = dirname(__FILE__); -print STDERR __FILE__." -> $myDir\n"; -push(@INC, $myDir); -require "libcall.pl"; - -my $tailn=5; # +0 = concatenate all the client logs. 5 = last 5 lines -my $recycle_clients; # spawn new clients when previous ones terminate -my $stay_alive; # dont let server die when having zero clients -my $joblist = ""; -my $errordir=""; -my $multiline; -my $workdir = '.'; -my $numnodes = 8; -my $user = $ENV{"USER"}; -my $pmem = "9g"; -my $basep=50300; -my $randp=300; -my $tryp=50; -my $no_which; -my $no_cd; - -my $DEBUG=$ENV{DEBUG}; -print STDERR "DEBUG=$DEBUG output enabled.\n" if $DEBUG; -my $verbose = 1; -sub verbose { - if ($verbose) { - print STDERR @_,"\n"; - } -} -sub debug { - if ($DEBUG) { - my ($package, $filename, $line) = caller; - print STDERR "DEBUG: $filename($line): ",join(' ',@_),"\n"; - } -} -my $is_shell_special=qr.[ \t\n\\><|&;"'`~*?{}$!()].; -my $shell_escape_in_quote=qr.[\\"\$`!].; -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - return '""' unless $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} -sub preview_files { - my ($l,$skipempty,$footer,$n)=@_; - $n=$tailn unless defined $n; - my @f=grep { ! ($skipempty && -z $_) } @$l; - my $fn=join(' ',map {escape_shell($_)} @f); - my $cmd="tail -n $n $fn"; - unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); -} -sub prefix_dirname($) { - #like `dirname but if ends in / then return the whole thing - local ($_)=@_; - if (/\/$/) { - $_; - } else { - s#/[^/]$##; - $_ ? $_ : ''; - } -} -sub ensure_final_slash($) { - local ($_)=@_; - m#/$# ? $_ : ($_."/"); -} -sub extend_path($$;$$) { - my ($base,$ext,$mkdir,$baseisdir)=@_; - if (-d $base) { - $base.="/"; - } else { - my $dir; - if ($baseisdir) { - $dir=$base; - $base.='/' unless $base =~ /\/$/; - } else { - $dir=prefix_dirname($base); - } - my @cmd=("/bin/mkdir","-p",$dir); - check_call(@cmd) if $mkdir; - } - return $base.$ext; -} - -my $abscwd=abs_path(&getcwd); -sub print_help; - -my $use_fork; -my @pids; - -# Process command-line options -unless (GetOptions( - "stay-alive" => \$stay_alive, - "recycle-clients" => \$recycle_clients, - "error-dir=s" => \$errordir, - "multi-line" => \$multiline, - "workdir=s" => \$workdir, - "use-fork" => \$use_fork, - "verbose" => \$verbose, - "jobs=i" => \$numnodes, - "pmem=s" => \$pmem, - "baseport=i" => \$basep, -# "iport=i" => \$randp, #for short name -i - "no-which!" => \$no_which, - "no-cd!" => \$no_cd, - "tailn=s" => \$tailn, -) && scalar @ARGV){ - print_help(); - die "bad options."; -} - -my $cmd = ""; -my $prog=shift; -if ($no_which) { - $cmd=$prog; -} else { - $cmd=check_output("which $prog"); - chomp $cmd; - die "$prog not found - $cmd" unless $cmd; -} -#$cmd=abs_path($cmd); -for my $arg (@ARGV) { - $cmd .= " ".escape_shell($arg); -} -die "Please specify a command to parallelize\n" if $cmd eq ''; - -my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n"); - -my $executable = $cmd; -$executable =~ s/^\s*(\S+)($|\s.*)/$1/; -$executable=check_output("basename $executable"); -chomp $executable; - - -print STDERR "Parallelizing ($numnodes ways): $cmd\n\n"; - -# create -e dir and save .sh -use File::Temp qw/tempdir/; -unless ($errordir) { - $errordir=tempdir("$executable.XXXXXX",CLEANUP=>1); -} -if ($errordir) { - my $scriptfile=extend_path("$errordir/","$executable.sh",1,1); - -d $errordir || die "should have created -e dir $errordir"; - open SF,">",$scriptfile || die; - print SF "$cdcmd$cmd\n"; - close SF; - chmod 0755,$scriptfile; - $errordir=abs_path($errordir); - &verbose("-e dir: $errordir"); -} - -# set cleanup handler -my @cleanup_cmds; -sub cleanup; -sub cleanup_and_die; -$SIG{INT} = "cleanup_and_die"; -$SIG{TERM} = "cleanup_and_die"; -$SIG{HUP} = "cleanup_and_die"; - -# other subs: -sub numof_live_jobs; -sub launch_job_on_node; - - -# vars -my $mydir = check_output("dirname $0"); chomp $mydir; -my $sentserver = "$mydir/sentserver"; -my $sentclient = "$mydir/sentclient"; -my $host = check_output("hostname"); -chomp $host; - - -# find open port -srand; -my $port = 50300+int(rand($randp)); -my $endp=$port+$tryp; -sub listening_port_lines { - my $quiet=$verbose?'':'2>/dev/null'; - return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp"); -} -my $netstat=&listening_port_lines; - -if ($verbose){ print STDERR "Testing port $port...";} - -while ($netstat=~/$port/ || &listening_port_lines=~/$port/){ - if ($verbose){ print STDERR "port is busy\n";} - $port++; - if ($port > $endp){ - die "Unable to find open port\n"; - } - if ($verbose){ print STDERR "Testing port $port... "; } -} -if ($verbose){ - print STDERR "port $port is available\n"; -} - -my $key = int(rand()*1000000); - -my $multiflag = ""; -if ($multiline){ $multiflag = "-m"; print STDERR "expecting multiline output.\n"; } -my $stay_alive_flag = ""; -if ($stay_alive){ $stay_alive_flag = "--stay-alive"; print STDERR "staying alive while no clients are connected.\n"; } - -my $node_count = 0; -my $script = ""; -# fork == one thread runs the sentserver, while the -# other spawns the sentclient commands. -my $pid = fork; -if ($pid == 0) { # child - sleep 8; # give other thread time to start sentserver - $script = "$cdcmd$sentclient $host:$port:$key $cmd"; - - if ($verbose){ - print STDERR "Client script:\n====\n"; - print STDERR $script; - print STDERR "====\n"; - } - for (my $jobn=0; $jobn<$numnodes; $jobn++){ - launch_job(); - } - if ($recycle_clients) { - my $ret; - my $livejobs; - while (1) { - $ret = waitpid($pid, WNOHANG); - #print STDERR "waitpid $pid ret = $ret \n"; - last if ($ret != 0); - $livejobs = numof_live_jobs(); - if ($numnodes >= $livejobs ) { # a client terminated, OR # lines of input was less than -j - print STDERR "num of requested nodes = $numnodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n"; - launch_job(); - } else { - sleep 15; - } - } - } - print STDERR "CHILD PROCESSES SPAWNED ... WAITING\n"; - for my $p (@pids) { - waitpid($p, 0); - } -} else { -# my $todo = "$sentserver -k $key $multiflag $port "; - my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag "; - if ($verbose){ print STDERR "Running: $todo\n"; } - check_call($todo); - print STDERR "Call to $sentserver returned.\n"; - cleanup(); - exit(0); -} - -sub numof_live_jobs { - if ($use_fork) { - die "not implemented"; - } else { - # We can probably continue decoding if the qstat error is only temporary - my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat"))); - return ($#livejobs + 1); - } -} -my (@errors,@outs,@cmds); - -sub launch_job { - if ($use_fork) { return launch_job_fork(); } - my $errorfile = "/dev/null"; - my $outfile = "/dev/null"; - $node_count++; - my $clientname = $executable; - $clientname =~ s/^(.{4}).*$/$1/; - $clientname = "$clientname.$node_count"; - if ($errordir){ - $errorfile = "$errordir/$clientname.ER"; - $outfile = "$errordir/$clientname.OU"; - push @errors,$errorfile; - push @outs,$outfile; - } - my $todo = qsub_args($pmem) . " -N $clientname -o $outfile -e $errorfile"; - push @cmds,$todo; - - print STDERR "Running: $todo\n"; - local(*QOUT, *QIN); - open2(\*QOUT, \*QIN, $todo) or die "Failed to open2: $!"; - print QIN $script; - close QIN; - while (my $jobid=){ - chomp $jobid; - if ($verbose){ print STDERR "Launched client job: $jobid"; } - $jobid =~ s/^(\d+)(.*?)$/\1/g; - $jobid =~ s/^Your job (\d+) .*$/\1/; - print STDERR " short job id $jobid\n"; - if ($verbose){ - print STDERR "cd: $abscwd\n"; - print STDERR "cmd: $cmd\n"; - } - if ($joblist == "") { $joblist = $jobid; } - else {$joblist = $joblist . "\|" . $jobid; } - my $cleanfn="qdel $jobid 2> /dev/null"; - push(@cleanup_cmds, $cleanfn); - } - close QOUT; -} - -sub launch_job_fork { - my $errorfile = "/dev/null"; - my $outfile = "/dev/null"; - $node_count++; - my $clientname = $executable; - $clientname =~ s/^(.{4}).*$/$1/; - $clientname = "$clientname.$node_count"; - if ($errordir){ - $errorfile = "$errordir/$clientname.ER"; - $outfile = "$errordir/$clientname.OU"; - push @errors,$errorfile; - push @outs,$outfile; - } - my $pid = fork; - if ($pid == 0) { - my ($fh, $scr_name) = get_temp_script(); - print $fh $script; - close $fh; - my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile"; - print STDERR "EXEC: $todo\n"; - my $out = check_output("$todo"); - unlink $scr_name or warn "Failed to remove $scr_name"; - exit 0; - } else { - push @pids, $pid; - } -} - -sub get_temp_script { - my ($fh, $filename) = tempfile( "$workdir/workXXXX", SUFFIX => '.sh'); - return ($fh, $filename); -} - -sub cleanup_and_die { - cleanup(); - die "\n"; -} - -sub cleanup { - print STDERR "Cleaning up...\n"; - for $cmd (@cleanup_cmds){ - print STDERR " Cleanup command: $cmd\n"; - eval $cmd; - } - print STDERR "outputs:\n",preview_files(\@outs,1),"\n"; - print STDERR "errors:\n",preview_files(\@errors,1),"\n"; - print STDERR "cmd:\n",$cmd,"\n"; - print STDERR " cat $errordir/*.ER\nfor logs.\n"; - print STDERR "Cleanup finished.\n"; -} - -sub print_help -{ - my $name = check_output("basename $0"); chomp $name; - print << "Help"; - -usage: $name [options] - - Automatic black-box parallelization of commands. - -options: - - --use-fork - Instead of using qsub, use fork. - - -e, --error-dir - Retain output files from jobs in , rather - than silently deleting them. - - -m, --multi-line - Expect that command may produce multiple output - lines for a single input line. $name makes a - reasonable attempt to obtain all output before - processing additional inputs. However, use of this - option is inherently unsafe. - - -v, --verbose - Print diagnostic informatoin on stderr. - - -j, --jobs - Number of jobs to use. - - -p, --pmem - pmem setting for each job. - -Help -} diff --git a/dpmert/sentclient.c b/dpmert/sentclient.c deleted file mode 100644 index 91d994ab..00000000 --- a/dpmert/sentclient.c +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2001 by David Chiang. All rights reserved.*/ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sentserver.h" - -int main (int argc, char *argv[]) { - int sock, port; - char *s, *key; - struct hostent *hp; - struct sockaddr_in server; - int errors = 0; - - if (argc < 3) { - fprintf(stderr, "Usage: sentclient host[:port[:key]] command [args ...]\n"); - exit(1); - } - - s = strchr(argv[1], ':'); - key = NULL; - - if (s == NULL) { - port = DEFAULT_PORT; - } else { - *s = '\0'; - s+=1; - /* dumb hack */ - key = strchr(s, ':'); - if (key != NULL){ - *key = '\0'; - key += 1; - } - port = atoi(s); - } - - sock = socket(AF_INET, SOCK_STREAM, 0); - - hp = gethostbyname(argv[1]); - if (hp == NULL) { - fprintf(stderr, "unknown host %s\n", argv[1]); - exit(1); - } - - bzero((char *)&server, sizeof(server)); - bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length); - server.sin_family = hp->h_addrtype; - server.sin_port = htons(port); - - while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) { - perror("connect()"); - sleep(1); - errors++; - if (errors > 5) - exit(1); - } - - close(0); - close(1); - dup2(sock, 0); - dup2(sock, 1); - - if (key != NULL){ - write(1, key, strlen(key)); - write(1, "\n", 1); - } - - execvp(argv[2], argv+2); - return 0; -} diff --git a/dpmert/sentserver.c b/dpmert/sentserver.c deleted file mode 100644 index c20b4fa6..00000000 --- a/dpmert/sentserver.c +++ /dev/null @@ -1,515 +0,0 @@ -/* Copyright (c) 2001 by David Chiang. All rights reserved.*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sentserver.h" - -#define MAX_CLIENTS 64 - -struct clientinfo { - int s; - struct sockaddr_in sin; -}; - -struct line { - int id; - char *s; - int status; - struct line *next; -} *head, **ptail; - -int n_sent = 0, n_received=0, n_flushed=0; - -#define STATUS_RUNNING 0 -#define STATUS_ABORTED 1 -#define STATUS_FINISHED 2 - -pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t clients_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER; - -int n_clients = 0; -int s; -int expect_multiline_output = 0; -int log_mutex = 0; -int stay_alive = 0; /* dont panic and die with zero clients */ - -void queue_finish(struct line *node, char *s, int fid); -char * read_line(int fd, int multiline); -void done (int code); - -struct line * queue_get(int fid) { - struct line *cur; - char *s, *synch; - - if (log_mutex) fprintf(stderr, "Getting for data for fid %d\n", fid); - if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); - pthread_mutex_lock(&queue_mutex); - - /* First, check for aborted sentences. */ - - if (log_mutex) fprintf(stderr, " Checking queue for aborted jobs (fid %d)\n", fid); - for (cur = head; cur != NULL; cur = cur->next) { - if (cur->status == STATUS_ABORTED) { - cur->status = STATUS_RUNNING; - - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); - - return cur; - } - } - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); - - /* Otherwise, read a new one. */ - if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid); - if (log_mutex) fprintf(stderr, " Reading input for new data (fid %d)\n", fid); - pthread_mutex_lock(&input_mutex); - s = read_line(0,0); - - while (s) { - if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); - pthread_mutex_lock(&queue_mutex); - if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid); - pthread_mutex_unlock(&input_mutex); - - cur = malloc(sizeof (struct line)); - cur->id = n_sent; - cur->s = s; - cur->next = NULL; - - *ptail = cur; - ptail = &cur->next; - - n_sent++; - - if (strcmp(s,"===SYNCH===\n")==0){ - fprintf(stderr, "Received ===SYNCH=== signal (fid %d)\n", fid); - // Note: queue_finish calls free(cur->s). - // Therefore we need to create a new string here. - synch = malloc((strlen("===SYNCH===\n")+2) * sizeof (char)); - synch = strcpy(synch, s); - - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); - queue_finish(cur, synch, fid); /* handles its own lock */ - - if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid); - if (log_mutex) fprintf(stderr, " Reading input for new data (fid %d)\n", fid); - pthread_mutex_lock(&input_mutex); - - s = read_line(0,0); - } else { - if (log_mutex) fprintf(stderr, " Received new data %d (fid %d)\n", cur->id, fid); - cur->status = STATUS_RUNNING; - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); - return cur; - } - } - - if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid); - pthread_mutex_unlock(&input_mutex); - /* Only way to reach this point: no more output */ - - if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); - pthread_mutex_lock(&queue_mutex); - if (head == NULL) { - fprintf(stderr, "Reached end of file. Exiting.\n"); - done(0); - } else - ptail = NULL; /* This serves as a signal that there is no more input */ - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); - - return NULL; -} - -void queue_panic() { - struct line *next; - while (head && head->status == STATUS_FINISHED) { - /* Write out finished sentences */ - if (head->status == STATUS_FINISHED) { - fputs(head->s, stdout); - fflush(stdout); - } - /* Write out blank line for unfinished sentences */ - if (head->status == STATUS_ABORTED) { - fputs("\n", stdout); - fflush(stdout); - } - /* By defition, there cannot be any RUNNING sentences, since - function is only called when n_clients == 0 */ - free(head->s); - next = head->next; - free(head); - head = next; - n_flushed++; - } - fclose(stdout); - fprintf(stderr, "All clients died. Panicking, flushing completed sentences and exiting.\n"); - done(1); -} - -void queue_abort(struct line *node, int fid) { - if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); - pthread_mutex_lock(&queue_mutex); - node->status = STATUS_ABORTED; - if (n_clients == 0) { - if (stay_alive) { - fprintf(stderr, "Warning! No live clients detected! Staying alive, will retry soon.\n"); - } else { - queue_panic(); - } - } - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); -} - - -void queue_print() { - struct line *cur; - - fprintf(stderr, " Queue\n"); - - for (cur = head; cur != NULL; cur = cur->next) { - switch(cur->status) { - case STATUS_RUNNING: - fprintf(stderr, " %d running ", cur->id); break; - case STATUS_ABORTED: - fprintf(stderr, " %d aborted ", cur->id); break; - case STATUS_FINISHED: - fprintf(stderr, " %d finished ", cur->id); break; - - } - fprintf(stderr, "\n"); - //fprintf(stderr, cur->s); - } -} - -void queue_finish(struct line *node, char *s, int fid) { - struct line *next; - if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); - pthread_mutex_lock(&queue_mutex); - - free(node->s); - node->s = s; - node->status = STATUS_FINISHED; - n_received++; - - /* Flush out finished nodes */ - while (head && head->status == STATUS_FINISHED) { - - if (log_mutex) fprintf(stderr, " Flushing finished node %d\n", head->id); - - fputs(head->s, stdout); - fflush(stdout); - if (log_mutex) fprintf(stderr, " Flushed node %d\n", head->id); - free(head->s); - - next = head->next; - free(head); - - head = next; - - n_flushed++; - - if (head == NULL) { /* empty queue */ - if (ptail == NULL) { /* This can only happen if set in queue_get as signal that there is no more input. */ - fprintf(stderr, "All sentences finished. Exiting.\n"); - done(0); - } else /* ptail pointed at something which was just popped off the stack -- reset to head*/ - ptail = &head; - } - } - - if (log_mutex) fprintf(stderr, " Flushing output %d\n", head->id); - fflush(stdout); - fprintf(stderr, "%d sentences sent, %d sentences finished, %d sentences flushed\n", n_sent, n_received, n_flushed); - - if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); - pthread_mutex_unlock(&queue_mutex); - -} - -char * read_line(int fd, int multiline) { - int size = 80; - char errorbuf[100]; - char *s = malloc(size+2); - int result, errors=0; - int i = 0; - - result = read(fd, s+i, 1); - - while (1) { - if (result < 0) { - perror("read()"); - sprintf(errorbuf, "Error code: %d\n", errno); - fprintf(stderr, errorbuf); - errors++; - if (errors > 5) { - free(s); - return NULL; - } else { - sleep(1); /* retry after delay */ - } - } else if (result == 0) { - break; - } else if (multiline==0 && s[i] == '\n') { - break; - } else { - if (s[i] == '\n'){ - /* if we've reached this point, - then multiline must be 1, and we're - going to poll the fd for an additional - line of data. The basic design is to - run a select on the filedescriptor fd. - Select will return under two conditions: - if there is data on the fd, or if a - timeout is reached. We'll select on this - fd. If select returns because there's data - ready, keep going; else assume there's no - more and return the data we already have. - */ - - fd_set set; - FD_ZERO(&set); - FD_SET(fd, &set); - - struct timeval timeout; - timeout.tv_sec = 3; // number of seconds for timeout - timeout.tv_usec = 0; - - int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout); - if (ready<1){ - break; // no more data, stop looping - } - } - i++; - - if (i == size) { - size = size*2; - s = realloc(s, size+2); - } - } - - result = read(fd, s+i, 1); - } - - if (result == 0 && i == 0) { /* end of file */ - free(s); - return NULL; - } - - s[i] = '\n'; - s[i+1] = '\0'; - - return s; -} - -void * new_client(void *arg) { - struct clientinfo *client = (struct clientinfo *)arg; - struct line *cur; - int result; - char *s; - char errorbuf[100]; - - pthread_mutex_lock(&clients_mutex); - n_clients++; - pthread_mutex_unlock(&clients_mutex); - - fprintf(stderr, "Client connected (%d connected)\n", n_clients); - - for (;;) { - - cur = queue_get(client->s); - - if (cur) { - /* fprintf(stderr, "Sending to client: %s", cur->s); */ - fprintf(stderr, "Sending data %d to client (fid %d)\n", cur->id, client->s); - result = write(client->s, cur->s, strlen(cur->s)); - if (result < strlen(cur->s)){ - perror("write()"); - sprintf(errorbuf, "Error code: %d\n", errno); - fprintf(stderr, errorbuf); - - pthread_mutex_lock(&clients_mutex); - n_clients--; - pthread_mutex_unlock(&clients_mutex); - - fprintf(stderr, "Client died (%d connected)\n", n_clients); - queue_abort(cur, client->s); - - close(client->s); - free(client); - - pthread_exit(NULL); - } - } else { - close(client->s); - pthread_mutex_lock(&clients_mutex); - n_clients--; - pthread_mutex_unlock(&clients_mutex); - fprintf(stderr, "Client dismissed (%d connected)\n", n_clients); - pthread_exit(NULL); - } - - s = read_line(client->s,expect_multiline_output); - if (s) { - /* fprintf(stderr, "Client (fid %d) returned: %s", client->s, s); */ - fprintf(stderr, "Client (fid %d) returned data %d\n", client->s, cur->id); -// queue_print(); - queue_finish(cur, s, client->s); - } else { - pthread_mutex_lock(&clients_mutex); - n_clients--; - pthread_mutex_unlock(&clients_mutex); - - fprintf(stderr, "Client died (%d connected)\n", n_clients); - queue_abort(cur, client->s); - - close(client->s); - free(client); - - pthread_exit(NULL); - } - - } - return 0; -} - -void done (int code) { - close(s); - exit(code); -} - - - -int main (int argc, char *argv[]) { - struct sockaddr_in sin, from; - int g; - socklen_t len; - struct clientinfo *client; - int port; - int opt; - int errors = 0; - int argi; - char *key = NULL, *client_key; - int use_key = 0; - /* the key stuff here doesn't provide any - real measure of security, it's mainly to keep - jobs from bumping into each other. */ - - pthread_t tid; - port = DEFAULT_PORT; - - for (argi=1; argi < argc; argi++){ - if (strcmp(argv[argi], "-m")==0){ - expect_multiline_output = 1; - } else if (strcmp(argv[argi], "-k")==0){ - argi++; - if (argi == argc){ - fprintf(stderr, "Key must be specified after -k\n"); - exit(1); - } - key = argv[argi]; - use_key = 1; - } else if (strcmp(argv[argi], "--stay-alive")==0){ - stay_alive = 1; /* dont panic and die with zero clients */ - } else { - port = atoi(argv[argi]); - } - } - - /* Initialize data structures */ - head = NULL; - ptail = &head; - - /* Set up listener */ - s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - opt = 1; - setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htonl(INADDR_ANY); - sin.sin_port = htons(port); - while (bind(s, (struct sockaddr *) &sin, sizeof(sin)) < 0) { - perror("bind()"); - sleep(1); - errors++; - if (errors > 100) - exit(1); - } - - len = sizeof(sin); - getsockname(s, (struct sockaddr *) &sin, &len); - - fprintf(stderr, "Listening on port %hu\n", ntohs(sin.sin_port)); - - while (listen(s, MAX_CLIENTS) < 0) { - perror("listen()"); - sleep(1); - errors++; - if (errors > 100) - exit(1); - } - - for (;;) { - len = sizeof(from); - g = accept(s, (struct sockaddr *)&from, &len); - if (g < 0) { - perror("accept()"); - sleep(1); - continue; - } - client = malloc(sizeof(struct clientinfo)); - client->s = g; - bcopy(&from, &client->sin, len); - - if (use_key){ - fd_set set; - FD_ZERO(&set); - FD_SET(client->s, &set); - - struct timeval timeout; - timeout.tv_sec = 3; // number of seconds for timeout - timeout.tv_usec = 0; - - int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout); - if (ready<1){ - fprintf(stderr, "Prospective client failed to respond with correct key.\n"); - close(client->s); - free(client); - } else { - client_key = read_line(client->s,0); - client_key[strlen(client_key)-1]='\0'; /* chop trailing newline */ - if (strcmp(key, client_key)==0){ - pthread_create(&tid, NULL, new_client, client); - } else { - fprintf(stderr, "Prospective client failed to respond with correct key.\n"); - close(client->s); - free(client); - } - free(client_key); - } - } else { - pthread_create(&tid, NULL, new_client, client); - } - } - -} - - - diff --git a/dpmert/sentserver.h b/dpmert/sentserver.h deleted file mode 100644 index cd17a546..00000000 --- a/dpmert/sentserver.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef SENTSERVER_H -#define SENTSERVER_H - -#define DEFAULT_PORT 50000 - -#endif diff --git a/dpmert/test_aer/README b/dpmert/test_aer/README deleted file mode 100644 index 819b2e32..00000000 --- a/dpmert/test_aer/README +++ /dev/null @@ -1,8 +0,0 @@ -To run the test: - -../dist-vest.pl --local --metric aer cdec.ini --source-file corpus.src --ref-files=ref.0 --weights weights - -This will optimize the parameters of the tiny lexical translation model -so as to minimize the AER of the Viterbi alignment on the development -set in corpus.src according to the reference alignments in ref.0. - diff --git a/dpmert/test_aer/cdec.ini b/dpmert/test_aer/cdec.ini deleted file mode 100644 index 08187848..00000000 --- a/dpmert/test_aer/cdec.ini +++ /dev/null @@ -1,3 +0,0 @@ -formalism=lextrans -grammar=grammar -aligner=true diff --git a/dpmert/test_aer/corpus.src b/dpmert/test_aer/corpus.src deleted file mode 100644 index 31b23971..00000000 --- a/dpmert/test_aer/corpus.src +++ /dev/null @@ -1,3 +0,0 @@ -el gato negro ||| the black cat -el gato ||| the cat -el libro ||| the book diff --git a/dpmert/test_aer/grammar b/dpmert/test_aer/grammar deleted file mode 100644 index 9d857824..00000000 --- a/dpmert/test_aer/grammar +++ /dev/null @@ -1,12 +0,0 @@ -el ||| cat ||| F1=1 -el ||| the ||| F2=1 -el ||| black ||| F3=1 -el ||| book ||| F11=1 -gato ||| cat ||| F4=1 NN=1 -gato ||| black ||| F5=1 -gato ||| the ||| F6=1 -negro ||| the ||| F7=1 -negro ||| cat ||| F8=1 -negro ||| black ||| F9=1 -libro ||| the ||| F10=1 -libro ||| book ||| F12=1 NN=1 diff --git a/dpmert/test_aer/ref.0 b/dpmert/test_aer/ref.0 deleted file mode 100644 index 734a9c5b..00000000 --- a/dpmert/test_aer/ref.0 +++ /dev/null @@ -1,3 +0,0 @@ -0-0 1-2 2-1 -0-0 1-1 -0-0 1-1 diff --git a/dpmert/test_aer/weights b/dpmert/test_aer/weights deleted file mode 100644 index afc9282e..00000000 --- a/dpmert/test_aer/weights +++ /dev/null @@ -1,13 +0,0 @@ -F1 0.1 -F2 -.5980815 -F3 0.24235 -F4 0.625 -F5 0.4514 -F6 0.112316 -F7 -0.123415 -F8 -0.25390285 -F9 -0.23852 -F10 0.646 -F11 0.413141 -F12 0.343216 -NN -0.1215 diff --git a/dpmert/test_data/0.json.gz b/dpmert/test_data/0.json.gz deleted file mode 100644 index 30f8dd77..00000000 Binary files a/dpmert/test_data/0.json.gz and /dev/null differ diff --git a/dpmert/test_data/1.json.gz b/dpmert/test_data/1.json.gz deleted file mode 100644 index c82cc179..00000000 Binary files a/dpmert/test_data/1.json.gz and /dev/null differ diff --git a/dpmert/test_data/c2e.txt.0 b/dpmert/test_data/c2e.txt.0 deleted file mode 100644 index 12c4abe9..00000000 --- a/dpmert/test_data/c2e.txt.0 +++ /dev/null @@ -1,2 +0,0 @@ -australia reopens embassy in manila -( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack . diff --git a/dpmert/test_data/c2e.txt.1 b/dpmert/test_data/c2e.txt.1 deleted file mode 100644 index 4ac12df1..00000000 --- a/dpmert/test_data/c2e.txt.1 +++ /dev/null @@ -1,2 +0,0 @@ -australia reopened manila embassy -( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack . diff --git a/dpmert/test_data/c2e.txt.2 b/dpmert/test_data/c2e.txt.2 deleted file mode 100644 index 2f67b72f..00000000 --- a/dpmert/test_data/c2e.txt.2 +++ /dev/null @@ -1,2 +0,0 @@ -australia to reopen embassy in manila -( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so-called confirmed terrorist attack threats . diff --git a/dpmert/test_data/c2e.txt.3 b/dpmert/test_data/c2e.txt.3 deleted file mode 100644 index 5483cef6..00000000 --- a/dpmert/test_data/c2e.txt.3 +++ /dev/null @@ -1,2 +0,0 @@ -australia to re - open its embassy to manila -( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so-called " clear " threat of terrorist attack 7 weeks ago . diff --git a/dpmert/test_data/re.txt.0 b/dpmert/test_data/re.txt.0 deleted file mode 100644 index 86eff087..00000000 --- a/dpmert/test_data/re.txt.0 +++ /dev/null @@ -1,5 +0,0 @@ -erdogan states turkey to reject any pressures to urge it to recognize cyprus -ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened . -erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus . -we will discuss this dossier in the course of membership negotiations . " -he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . " diff --git a/dpmert/test_data/re.txt.1 b/dpmert/test_data/re.txt.1 deleted file mode 100644 index 2140f198..00000000 --- a/dpmert/test_data/re.txt.1 +++ /dev/null @@ -1,5 +0,0 @@ -erdogan confirms turkey will resist any pressure to recognize cyprus -ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara . -erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus . -we shall discuss this issue in the course of the membership negotiations . " -he added : " let me be clear - i cannot confine turkey . this is something we do not accept . " diff --git a/dpmert/test_data/re.txt.2 b/dpmert/test_data/re.txt.2 deleted file mode 100644 index 94e46286..00000000 --- a/dpmert/test_data/re.txt.2 +++ /dev/null @@ -1,5 +0,0 @@ -erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus -ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara . -erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus . -we shall discuss this dossier during the negotiations on joining . " -and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . " diff --git a/dpmert/test_data/re.txt.3 b/dpmert/test_data/re.txt.3 deleted file mode 100644 index f87c3308..00000000 --- a/dpmert/test_data/re.txt.3 +++ /dev/null @@ -1,5 +0,0 @@ -erdogan stresses that turkey will reject all pressures to force it to recognize cyprus -ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not . -erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus . -we will discuss this file during the negotiations on joining . " -he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . " diff --git a/dtrain/Makefile.am b/dtrain/Makefile.am deleted file mode 100644 index ca9581f5..00000000 --- a/dtrain/Makefile.am +++ /dev/null @@ -1,7 +0,0 @@ -bin_PROGRAMS = dtrain - -dtrain_SOURCES = dtrain.cc score.cc -dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval - diff --git a/dtrain/README.md b/dtrain/README.md deleted file mode 100644 index 7edabbf1..00000000 --- a/dtrain/README.md +++ /dev/null @@ -1,48 +0,0 @@ -This is a simple (and parallelizable) tuning method for cdec -which is able to train the weights of very many (sparse) features. -It was used here: - "Joint Feature Selection in Distributed Stochastic - Learning for Large-Scale Discriminative Training in - SMT" -(Simianer, Riezler, Dyer; ACL 2012) - - -Building --------- -Builds when building cdec, see ../BUILDING . -To build only parts needed for dtrain do -``` - autoreconf -ifv - ./configure [--disable-gtest] - cd dtrain/; make -``` - -Running -------- -To run this on a dev set locally: -``` - #define DTRAIN_LOCAL -``` -otherwise remove that line or undef, then recompile. You need a single -grammar file or input annotated with per-sentence grammars (psg) as you -would use with cdec. Additionally you need to give dtrain a file with -references (--refs) when running locally. - -The input for use with hadoop streaming looks like this: -``` - \t\t\t -``` -To convert a psg to this format you need to replace all "\n" -by "\t". Make sure there are no tabs in your data. - -For an example of local usage (with the 'distributed' format) -the see test/example/ . This expects dtrain to be built without -DTRAIN_LOCAL. - -Legal ------ -Copyright (c) 2012 by Patrick Simianer - -See the file ../LICENSE.txt for the licensing terms that this software is -released under. - diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc deleted file mode 100644 index 18286668..00000000 --- a/dtrain/dtrain.cc +++ /dev/null @@ -1,657 +0,0 @@ -#include "dtrain.h" - - -bool -dtrain_init(int argc, char** argv, po::variables_map* cfg) -{ - po::options_description ini("Configuration File Options"); - ini.add_options() - ("input", po::value()->default_value("-"), "input file") - ("output", po::value()->default_value("-"), "output weights file, '-' for STDOUT") - ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") - ("decoder_config", po::value(), "configuration file for cdec") - ("print_weights", po::value(), "weights to print on each iteration") - ("stop_after", po::value()->default_value(0), "stop after X input sentences") - ("tmp", po::value()->default_value("/tmp"), "temp dir to use") - ("keep", po::value()->zero_tokens(), "keep weights files for each iteration") - ("hstreaming", po::value(), "run in hadoop streaming mode, arg is a task id") - ("epochs", po::value()->default_value(10), "# of iterations T (per shard)") - ("k", po::value()->default_value(100), "how many translations to sample") - ("sample_from", po::value()->default_value("kbest"), "where to sample translations from: 'kbest', 'forest'") - ("filter", po::value()->default_value("uniq"), "filter kbest list: 'not', 'uniq'") - ("pair_sampling", po::value()->default_value("XYX"), "how to sample pairs: 'all', 'XYX' or 'PRO'") - ("hi_lo", po::value()->default_value(0.1), "hi and lo (X) for XYX (default 0.1), <= 0.5") - ("pair_threshold", po::value()->default_value(0.), "bleu [0,1] threshold to filter pairs") - ("N", po::value()->default_value(4), "N for Ngrams (BLEU)") - ("scorer", po::value()->default_value("stupid_bleu"), "scoring: bleu, stupid_, smooth_, approx_, lc_") - ("learning_rate", po::value()->default_value(1.0), "learning rate") - ("gamma", po::value()->default_value(0.), "gamma for SVM (0 for perceptron)") - ("select_weights", po::value()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") - ("rescale", po::value()->zero_tokens(), "rescale weight vector after each input") - ("l1_reg", po::value()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") - ("l1_reg_strength", po::value(), "l1 regularization strength") - ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO - ("approx_bleu_d", po::value()->default_value(0.9), "discount for approx. BLEU") - ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") - ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") - ("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.") -#ifdef DTRAIN_LOCAL - ("refs,r", po::value(), "references in local mode") -#endif - ("noup", po::value()->zero_tokens(), "do not update weights"); - po::options_description cl("Command Line Options"); - cl.add_options() - ("config,c", po::value(), "dtrain config file") - ("quiet,q", po::value()->zero_tokens(), "be quiet") - ("verbose,v", po::value()->zero_tokens(), "be verbose"); - cl.add(ini); - po::store(parse_command_line(argc, argv, cl), *cfg); - if (cfg->count("config")) { - ifstream ini_f((*cfg)["config"].as().c_str()); - po::store(po::parse_config_file(ini_f, ini), *cfg); - } - po::notify(*cfg); - if (!cfg->count("decoder_config")) { - cerr << cl << endl; - return false; - } - if (cfg->count("hstreaming") && (*cfg)["output"].as() != "-") { - cerr << "When using 'hstreaming' the 'output' param should be '-'." << endl; - return false; - } -#ifdef DTRAIN_LOCAL - if ((*cfg)["input"].as() == "-") { - cerr << "Can't use stdin as input with this binary. Recompile without DTRAIN_LOCAL" << endl; - return false; - } -#endif - if ((*cfg)["sample_from"].as() != "kbest" - && (*cfg)["sample_from"].as() != "forest") { - cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as() << "', use 'kbest' or 'forest'." << endl; - return false; - } - if ((*cfg)["sample_from"].as() == "kbest" && (*cfg)["filter"].as() != "uniq" && - (*cfg)["filter"].as() != "not") { - cerr << "Wrong 'filter' param: '" << (*cfg)["filter"].as() << "', use 'uniq' or 'not'." << endl; - return false; - } - if ((*cfg)["pair_sampling"].as() != "all" && (*cfg)["pair_sampling"].as() != "XYX" && - (*cfg)["pair_sampling"].as() != "PRO") { - cerr << "Wrong 'pair_sampling' param: '" << (*cfg)["pair_sampling"].as() << "'." << endl; - return false; - } - if(cfg->count("hi_lo") && (*cfg)["pair_sampling"].as() != "XYX") { - cerr << "Warning: hi_lo only works with pair_sampling XYX." << endl; - } - if((*cfg)["hi_lo"].as() > 0.5 || (*cfg)["hi_lo"].as() < 0.01) { - cerr << "hi_lo must lie in [0.01, 0.5]" << endl; - return false; - } - if ((*cfg)["pair_threshold"].as() < 0) { - cerr << "The threshold must be >= 0!" << endl; - return false; - } - if ((*cfg)["select_weights"].as() != "last" && (*cfg)["select_weights"].as() != "best" && - (*cfg)["select_weights"].as() != "avg" && (*cfg)["select_weights"].as() != "VOID") { - cerr << "Wrong 'select_weights' param: '" << (*cfg)["select_weights"].as() << "', use 'last' or 'best'." << endl; - return false; - } - return true; -} - -int -main(int argc, char** argv) -{ - // handle most parameters - po::variables_map cfg; - if (!dtrain_init(argc, argv, &cfg)) exit(1); // something is wrong - bool quiet = false; - if (cfg.count("quiet")) quiet = true; - bool verbose = false; - if (cfg.count("verbose")) verbose = true; - bool noup = false; - if (cfg.count("noup")) noup = true; - bool hstreaming = false; - string task_id; - if (cfg.count("hstreaming")) { - hstreaming = true; - quiet = true; - task_id = cfg["hstreaming"].as(); - cerr.precision(17); - } - bool rescale = false; - if (cfg.count("rescale")) rescale = true; - HSReporter rep(task_id); - bool keep = false; - if (cfg.count("keep")) keep = true; - - const unsigned k = cfg["k"].as(); - const unsigned N = cfg["N"].as(); - const unsigned T = cfg["epochs"].as(); - const unsigned stop_after = cfg["stop_after"].as(); - const string filter_type = cfg["filter"].as(); - const string sample_from = cfg["sample_from"].as(); - const string pair_sampling = cfg["pair_sampling"].as(); - const score_t pair_threshold = cfg["pair_threshold"].as(); - const string select_weights = cfg["select_weights"].as(); - const float hi_lo = cfg["hi_lo"].as(); - const score_t approx_bleu_d = cfg["approx_bleu_d"].as(); - const unsigned max_pairs = cfg["max_pairs"].as(); - weight_t loss_margin = cfg["loss_margin"].as(); - if (loss_margin > 9998.) loss_margin = std::numeric_limits::max(); - bool scale_bleu_diff = false; - if (cfg.count("scale_bleu_diff")) scale_bleu_diff = true; - bool average = false; - if (select_weights == "avg") - average = true; - vector print_weights; - if (cfg.count("print_weights")) - boost::split(print_weights, cfg["print_weights"].as(), boost::is_any_of(" ")); - - // setup decoder - register_feature_functions(); - SetSilent(true); - ReadFile ini_rf(cfg["decoder_config"].as()); - if (!quiet) - cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as() << "'" << endl; - Decoder decoder(ini_rf.stream()); - - // scoring metric/scorer - string scorer_str = cfg["scorer"].as(); - LocalScorer* scorer; - if (scorer_str == "bleu") { - scorer = dynamic_cast(new BleuScorer); - } else if (scorer_str == "stupid_bleu") { - scorer = dynamic_cast(new StupidBleuScorer); - } else if (scorer_str == "smooth_bleu") { - scorer = dynamic_cast(new SmoothBleuScorer); - } else if (scorer_str == "sum_bleu") { - scorer = dynamic_cast(new SumBleuScorer); - } else if (scorer_str == "sumexp_bleu") { - scorer = dynamic_cast(new SumExpBleuScorer); - } else if (scorer_str == "sumwhatever_bleu") { - scorer = dynamic_cast(new SumWhateverBleuScorer); - } else if (scorer_str == "approx_bleu") { - scorer = dynamic_cast(new ApproxBleuScorer(N, approx_bleu_d)); - } else if (scorer_str == "lc_bleu") { - scorer = dynamic_cast(new LinearBleuScorer(N)); - } else { - cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; - exit(1); - } - vector bleu_weights; - scorer->Init(N, bleu_weights); - - // setup decoder observer - MT19937 rng; // random number generator, only for forest sampling - HypSampler* observer; - if (sample_from == "kbest") - observer = dynamic_cast(new KBestGetter(k, filter_type)); - else - observer = dynamic_cast(new KSampler(k, &rng)); - observer->SetScorer(scorer); - - // init weights - vector& dense_weights = decoder.CurrentWeightVector(); - SparseVector lambdas, cumulative_penalties, w_average; - if (cfg.count("input_weights")) Weights::InitFromFile(cfg["input_weights"].as(), &dense_weights); - Weights::InitSparseVector(dense_weights, &lambdas); - - // meta params for perceptron, SVM - weight_t eta = cfg["learning_rate"].as(); - weight_t gamma = cfg["gamma"].as(); - - // l1 regularization - bool l1naive = false; - bool l1clip = false; - bool l1cumul = false; - weight_t l1_reg = 0; - if (cfg["l1_reg"].as() != "none") { - string s = cfg["l1_reg"].as(); - if (s == "naive") l1naive = true; - else if (s == "clip") l1clip = true; - else if (s == "cumul") l1cumul = true; - l1_reg = cfg["l1_reg_strength"].as(); - } - - // output - string output_fn = cfg["output"].as(); - // input - string input_fn = cfg["input"].as(); - ReadFile input(input_fn); - // buffer input for t > 0 - vector src_str_buf; // source strings (decoder takes only strings) - vector > ref_ids_buf; // references as WordID vecs - // where temp files go - string tmp_path = cfg["tmp"].as(); -#ifdef DTRAIN_LOCAL - string refs_fn = cfg["refs"].as(); - ReadFile refs(refs_fn); -#else - string grammar_buf_fn = gettmpf(tmp_path, "dtrain-grammars"); - ogzstream grammar_buf_out; - grammar_buf_out.open(grammar_buf_fn.c_str()); -#endif - - unsigned in_sz = std::numeric_limits::max(); // input index, input size - vector > all_scores; - score_t max_score = 0.; - unsigned best_it = 0; - float overall_time = 0.; - - // output cfg - if (!quiet) { - cerr << _p5; - cerr << endl << "dtrain" << endl << "Parameters:" << endl; - cerr << setw(25) << "k " << k << endl; - cerr << setw(25) << "N " << N << endl; - cerr << setw(25) << "T " << T << endl; - cerr << setw(25) << "scorer '" << scorer_str << "'" << endl; - if (scorer_str == "approx_bleu") - cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl; - cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; - if (sample_from == "kbest") - cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; - if (!scale_bleu_diff) cerr << setw(25) << "learning rate " << eta << endl; - else cerr << setw(25) << "learning rate " << "bleu diff" << endl; - cerr << setw(25) << "gamma " << gamma << endl; - cerr << setw(25) << "loss margin " << loss_margin << endl; - cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl; - if (pair_sampling == "XYX") - cerr << setw(25) << "hi lo " << hi_lo << endl; - cerr << setw(25) << "pair threshold " << pair_threshold << endl; - cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl; - if (cfg.count("l1_reg")) - cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as() << "'" << endl; - if (rescale) - cerr << setw(25) << "rescale " << rescale << endl; - cerr << setw(25) << "max pairs " << max_pairs << endl; - cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as() << "'" << endl; - cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; -#ifdef DTRAIN_LOCAL - cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl; -#endif - cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; - if (cfg.count("input_weights")) - cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as() << "'" << endl; - if (stop_after > 0) - cerr << setw(25) << "stop_after " << stop_after << endl; - if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " inputs)" << endl; - } - - - for (unsigned t = 0; t < T; t++) // T epochs - { - - if (hstreaming) cerr << "reporter:status:Iteration #" << t+1 << " of " << T << endl; - - time_t start, end; - time(&start); -#ifndef DTRAIN_LOCAL - igzstream grammar_buf_in; - if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); -#endif - score_t score_sum = 0.; - score_t model_sum(0); - unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0; - if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl; - - while(true) - { - - string in; - bool next = false, stop = false; // next iteration or premature stop - if (t == 0) { - if(!getline(*input, in)) next = true; - } else { - if (ii == in_sz) next = true; // stop if we reach the end of our input - } - // stop after X sentences (but still go on for those) - if (stop_after > 0 && stop_after == ii && !next) stop = true; - - // produce some pretty output - if (!quiet && !verbose) { - if (ii == 0) cerr << " "; - if ((ii+1) % (DTRAIN_DOTS) == 0) { - cerr << "."; - cerr.flush(); - } - if ((ii+1) % (20*DTRAIN_DOTS) == 0) { - cerr << " " << ii+1 << endl; - if (!next && !stop) cerr << " "; - } - if (stop) { - if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl; - cerr << "Stopping after " << stop_after << " input sentences." << endl; - } else { - if (next) { - if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl; - } - } - } - - // next iteration - if (next || stop) break; - - // weights - lambdas.init_vector(&dense_weights); - - // getting input - vector ref_ids; // reference as vector -#ifndef DTRAIN_LOCAL - vector in_split; // input: sid\tsrc\tref\tpsg - if (t == 0) { - // handling input - split_in(in, in_split); - if (hstreaming && ii == 0) cerr << "reporter:counter:" << task_id << ",First ID," << in_split[0] << endl; - // getting reference - vector ref_tok; - boost::split(ref_tok, in_split[2], boost::is_any_of(" ")); - register_and_convert(ref_tok, ref_ids); - ref_ids_buf.push_back(ref_ids); - // process and set grammar - bool broken_grammar = true; // ignore broken grammars - for (string::iterator it = in.begin(); it != in.end(); it++) { - if (!isspace(*it)) { - broken_grammar = false; - break; - } - } - if (broken_grammar) { - cerr << "Broken grammar for " << ii+1 << "! Ignoring this input." << endl; - continue; - } - boost::replace_all(in, "\t", "\n"); - in += "\n"; - grammar_buf_out << in << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl; - decoder.AddSupplementalGrammarFromString(in); - src_str_buf.push_back(in_split[1]); - // decode - observer->SetRef(ref_ids); - decoder.Decode(in_split[1], observer); - } else { - // get buffered grammar - string grammar_str; - while (true) { - string rule; - getline(grammar_buf_in, rule); - if (boost::starts_with(rule, DTRAIN_GRAMMAR_DELIM)) break; - grammar_str += rule + "\n"; - } - decoder.AddSupplementalGrammarFromString(grammar_str); - // decode - observer->SetRef(ref_ids_buf[ii]); - decoder.Decode(src_str_buf[ii], observer); - } -#else - if (t == 0) { - string r_; - getline(*refs, r_); - vector ref_tok; - boost::split(ref_tok, r_, boost::is_any_of(" ")); - register_and_convert(ref_tok, ref_ids); - ref_ids_buf.push_back(ref_ids); - src_str_buf.push_back(in); - } else { - ref_ids = ref_ids_buf[ii]; - } - observer->SetRef(ref_ids); - if (t == 0) - decoder.Decode(in, observer); - else - decoder.Decode(src_str_buf[ii], observer); -#endif - - // get (scored) samples - vector* samples = observer->GetSamples(); - - if (verbose) { - cerr << "--- ref for " << ii << ": "; - if (t > 0) printWordIDVec(ref_ids_buf[ii]); - else printWordIDVec(ref_ids); - cerr << endl; - for (unsigned u = 0; u < samples->size(); u++) { - cerr << _p2 << _np << "[" << u << ". '"; - printWordIDVec((*samples)[u].w); - cerr << "'" << endl; - cerr << "SCORE=" << (*samples)[u].score << ",model="<< (*samples)[u].model << endl; - cerr << "F{" << (*samples)[u].f << "} ]" << endl << endl; - } - } - - score_sum += (*samples)[0].score; // stats for 1best - model_sum += (*samples)[0].model; - - f_count += observer->get_f_count(); - list_sz += observer->get_sz(); - - // weight updates - if (!noup) { - // get pairs - vector > pairs; - if (pair_sampling == "all") - all_pairs(samples, pairs, pair_threshold, max_pairs); - if (pair_sampling == "XYX") - partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo); - if (pair_sampling == "PRO") - PROsampling(samples, pairs, pair_threshold, max_pairs); - npairs += pairs.size(); - - for (vector >::iterator it = pairs.begin(); - it != pairs.end(); it++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - bool rank_error = true; // pair sampling already did this for us - rank_errors++; - score_t margin = std::numeric_limits::max(); -#else - bool rank_error = it->first.model <= it->second.model; - if (rank_error) rank_errors++; - score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model)); - if (!rank_error && margin < loss_margin) margin_violations++; -#endif - if (scale_bleu_diff) eta = it->first.score - it->second.score; - if (rank_error || margin < loss_margin) { - SparseVector diff_vec = it->first.f - it->second.f; - lambdas.plus_eq_v_times_s(diff_vec, eta); - if (gamma) - lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); - } - } - - // l1 regularization - if (l1naive) { - for (unsigned d = 0; d < lambdas.size(); d++) { - weight_t v = lambdas.get(d); - lambdas.set_value(d, v - sign(v) * l1_reg); - } - } else if (l1clip) { - for (unsigned d = 0; d < lambdas.size(); d++) { - if (lambdas.nonzero(d)) { - weight_t v = lambdas.get(d); - if (v > 0) { - lambdas.set_value(d, max(0., v - l1_reg)); - } else { - lambdas.set_value(d, min(0., v + l1_reg)); - } - } - } - } else if (l1cumul) { - weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input - for (unsigned d = 0; d < lambdas.size(); d++) { - if (lambdas.nonzero(d)) { - weight_t v = lambdas.get(d); - weight_t penalty = 0; - if (v > 0) { - penalty = max(0., v-(acc_penalty + cumulative_penalties.get(d))); - } else { - penalty = min(0., v+(acc_penalty - cumulative_penalties.get(d))); - } - lambdas.set_value(d, penalty); - cumulative_penalties.set_value(d, cumulative_penalties.get(d)+penalty); - } - } - } - - } - - if (rescale) lambdas /= lambdas.l2norm(); - - ++ii; - - if (hstreaming) { - rep.update_counter("Seen #"+boost::lexical_cast(t+1), 1u); - rep.update_counter("Seen", 1u); - } - - } // input loop - - if (average) w_average += lambdas; - - if (scorer_str == "approx_bleu" || scorer_str == "lc_bleu") scorer->Reset(); - - if (t == 0) { - in_sz = ii; // remember size of input (# lines) - if (hstreaming) { - rep.update_counter("|Input|", ii); - rep.update_gcounter("|Input|", ii); - rep.update_gcounter("Shards", 1u); - } - } - -#ifndef DTRAIN_LOCAL - if (t == 0) { - grammar_buf_out.close(); - } else { - grammar_buf_in.close(); - } -#endif - - // print some stats - score_t score_avg = score_sum/(score_t)in_sz; - score_t model_avg = model_sum/(score_t)in_sz; - score_t score_diff, model_diff; - if (t > 0) { - score_diff = score_avg - all_scores[t-1].first; - model_diff = model_avg - all_scores[t-1].second; - } else { - score_diff = score_avg; - model_diff = model_avg; - } - - unsigned nonz = 0; - if (!quiet || hstreaming) nonz = (unsigned)lambdas.num_nonzero(); - - if (!quiet) { - cerr << _p5 << _p << "WEIGHTS" << endl; - for (vector::iterator it = print_weights.begin(); it != print_weights.end(); it++) { - cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; - } - cerr << " ---" << endl; - cerr << _np << " 1best avg score: " << score_avg; - cerr << _p << " (" << score_diff << ")" << endl; - cerr << _np << " 1best avg model score: " << model_avg; - cerr << _p << " (" << model_diff << ")" << endl; - cerr << " avg # pairs: "; - cerr << _np << npairs/(float)in_sz << endl; - cerr << " avg # rank err: "; - cerr << rank_errors/(float)in_sz << endl; -#ifndef DTRAIN_FASTER_PERCEPTRON - cerr << " avg # margin viol: "; - cerr << margin_violations/(float)in_sz << endl; -#endif - cerr << " non0 feature count: " << nonz << endl; - cerr << " avg list sz: " << list_sz/(float)in_sz << endl; - cerr << " avg f count: " << f_count/(float)list_sz << endl; - } - - if (hstreaming) { - rep.update_counter("Score 1best avg #"+boost::lexical_cast(t+1), (unsigned)(score_avg*DTRAIN_SCALE)); - rep.update_counter("Model 1best avg #"+boost::lexical_cast(t+1), (unsigned)(model_avg*DTRAIN_SCALE)); - rep.update_counter("Pairs avg #"+boost::lexical_cast(t+1), (unsigned)((npairs/(weight_t)in_sz)*DTRAIN_SCALE)); - rep.update_counter("Rank errors avg #"+boost::lexical_cast(t+1), (unsigned)((rank_errors/(weight_t)in_sz)*DTRAIN_SCALE)); - rep.update_counter("Margin violations avg #"+boost::lexical_cast(t+1), (unsigned)((margin_violations/(weight_t)in_sz)*DTRAIN_SCALE)); - rep.update_counter("Non zero feature count #"+boost::lexical_cast(t+1), nonz); - rep.update_gcounter("Non zero feature count #"+boost::lexical_cast(t+1), nonz); - } - - pair remember; - remember.first = score_avg; - remember.second = model_avg; - all_scores.push_back(remember); - if (score_avg > max_score) { - max_score = score_avg; - best_it = t; - } - time (&end); - float time_diff = difftime(end, start); - overall_time += time_diff; - if (!quiet) { - cerr << _p2 << _np << "(time " << time_diff/60. << " min, "; - cerr << time_diff/in_sz << " s/S)" << endl; - } - if (t+1 != T && !quiet) cerr << endl; - - if (noup) break; - - // write weights to file - if (select_weights == "best" || keep) { - lambdas.init_vector(&dense_weights); - string w_fn = "weights." + boost::lexical_cast(t) + ".gz"; - Weights::WriteToFile(w_fn, dense_weights, true); - } - - } // outer loop - - if (average) w_average /= (weight_t)T; - -#ifndef DTRAIN_LOCAL - unlink(grammar_buf_fn.c_str()); -#endif - - if (!noup) { - if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl; - if (select_weights == "last" || average) { // last, average - WriteFile of(output_fn); // works with '-' - ostream& o = *of.stream(); - o.precision(17); - o << _np; - if (average) { - for (SparseVector::iterator it = w_average.begin(); it != w_average.end(); ++it) { - if (it->second == 0) continue; - o << FD::Convert(it->first) << '\t' << it->second << endl; - } - } else { - for (SparseVector::iterator it = lambdas.begin(); it != lambdas.end(); ++it) { - if (it->second == 0) continue; - o << FD::Convert(it->first) << '\t' << it->second << endl; - } - } - } else if (select_weights == "VOID") { // do nothing with the weights - } else { // best - if (output_fn != "-") { - CopyFile("weights."+boost::lexical_cast(best_it)+".gz", output_fn); - } else { - ReadFile bestw("weights."+boost::lexical_cast(best_it)+".gz"); - string o; - cout.precision(17); - cout << _np; - while(getline(*bestw, o)) cout << o << endl; - } - if (!keep) { - for (unsigned i = 0; i < T; i++) { - string s = "weights." + boost::lexical_cast(i) + ".gz"; - unlink(s.c_str()); - } - } - } - if (output_fn == "-" && hstreaming) cout << "__SHARD_COUNT__\t1" << endl; - if (!quiet) cerr << "done" << endl; - } - - if (!quiet) { - cerr << _p5 << _np << endl << "---" << endl << "Best iteration: "; - cerr << best_it+1 << " [SCORE '" << scorer_str << "'=" << max_score << "]." << endl; - cerr << "This took " << overall_time/60. << " min." << endl; - } -} - diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h deleted file mode 100644 index 4b6f415c..00000000 --- a/dtrain/dtrain.h +++ /dev/null @@ -1,97 +0,0 @@ -#ifndef _DTRAIN_H_ -#define _DTRAIN_H_ - -#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs - // DO NOT USE WITH SVM! -//#define DTRAIN_LOCAL -#define DTRAIN_DOTS 10 // after how many inputs to display a '.' -#define DTRAIN_GRAMMAR_DELIM "########EOS########" -#define DTRAIN_SCALE 100000 - - -#include -#include -#include - -#include -#include - -#include "ksampler.h" -#include "pairsampling.h" - -#include "filelib.h" - - -using namespace std; -using namespace dtrain; -namespace po = boost::program_options; - -inline void register_and_convert(const vector& strs, vector& ids) -{ - vector::const_iterator it; - for (it = strs.begin(); it < strs.end(); it++) - ids.push_back(TD::Convert(*it)); -} - -inline string gettmpf(const string path, const string infix) -{ - char fn[path.size() + infix.size() + 8]; - strcpy(fn, path.c_str()); - strcat(fn, "/"); - strcat(fn, infix.c_str()); - strcat(fn, "-XXXXXX"); - if (!mkstemp(fn)) { - cerr << "Cannot make temp file in" << path << " , exiting." << endl; - exit(1); - } - return string(fn); -} - -inline void split_in(string& s, vector& parts) -{ - unsigned f = 0; - for(unsigned i = 0; i < 3; i++) { - unsigned e = f; - f = s.find("\t", f+1); - if (e != 0) parts.push_back(s.substr(e+1, f-e-1)); - else parts.push_back(s.substr(0, f)); - } - s.erase(0, f+1); -} - -struct HSReporter -{ - string task_id_; - - HSReporter(string task_id) : task_id_(task_id) {} - - inline void update_counter(string name, unsigned amount) { - cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl; - } - inline void update_gcounter(string name, unsigned amount) { - cerr << "reporter:counter:Global," << name << "," << amount << endl; - } -}; - -inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } -inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } -inline ostream& _p2(ostream& out) { return out << setprecision(2); } -inline ostream& _p5(ostream& out) { return out << setprecision(5); } - -inline void printWordIDVec(vector& v) -{ - for (unsigned i = 0; i < v.size(); i++) { - cerr << TD::Convert(v[i]); - if (i < v.size()-1) cerr << " "; - } -} - -template -inline T sign(T z) -{ - if (z == 0) return 0; - return z < 0 ? -1 : +1; -} - -#endif - diff --git a/dtrain/hstreaming/avg.rb b/dtrain/hstreaming/avg.rb deleted file mode 100755 index 2599c732..00000000 --- a/dtrain/hstreaming/avg.rb +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env ruby -# first arg may be an int of custom shard count - -shard_count_key = "__SHARD_COUNT__" - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -w = {} -c = {} -w.default = 0 -c.default = 0 -while line = STDIN.gets - key, val = line.split /\s/ - w[key] += val.to_f - c[key] += 1 -end - -if ARGV.size == 0 - shard_count = w["__SHARD_COUNT__"] -else - shard_count = ARGV[0].to_f -end -w.each_key { |k| - if k == shard_count_key - next - else - puts "#{k}\t#{w[k]/shard_count}" - #puts "# #{c[k]}" - end -} - diff --git a/dtrain/hstreaming/cdec.ini b/dtrain/hstreaming/cdec.ini deleted file mode 100644 index d4f5cecd..00000000 --- a/dtrain/hstreaming/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=30 -feature_function=WordPenalty -feature_function=KLanguageModel nc-wmt11.en.srilm.gz -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/dtrain/hstreaming/dtrain.ini b/dtrain/hstreaming/dtrain.ini deleted file mode 100644 index a2c219a1..00000000 --- a/dtrain/hstreaming/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -input=- -output=- -decoder_config=cdec.ini -tmp=/var/hadoop/mapred/local/ -epochs=1 -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -pair_threshold=0 -select_weights=last diff --git a/dtrain/hstreaming/dtrain.sh b/dtrain/hstreaming/dtrain.sh deleted file mode 100755 index 877ff94c..00000000 --- a/dtrain/hstreaming/dtrain.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -# script to run dtrain with a task id - -pushd . &>/dev/null -cd .. -ID=$(basename $(pwd)) # attempt_... -popd &>/dev/null -./dtrain -c dtrain.ini --hstreaming $ID - diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh deleted file mode 100755 index 92419956..00000000 --- a/dtrain/hstreaming/hadoop-streaming-job.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh - -EXP=a_simple_test - -# change these vars to fit your hadoop installation -HADOOP_HOME=/usr/lib/hadoop-0.20 -JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar -HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" - - IN=input_on_hdfs -OUT=output_weights_on_hdfs - -# you can -reducer to NONE if you want to -# do feature selection/averaging locally (e.g. to -# keep weights of all epochs) -$HSTREAMING \ - -mapper "dtrain.sh" \ - -reducer "ruby lplp.rb l2 select_k 100000" \ - -input $IN \ - -output $OUT \ - -file dtrain.sh \ - -file lplp.rb \ - -file ../dtrain \ - -file dtrain.ini \ - -file cdec.ini \ - -file ../test/example/nc-wmt11.en.srilm.gz \ - -jobconf mapred.reduce.tasks=30 \ - -jobconf mapred.max.map.failures.percent=0 \ - -jobconf mapred.job.name="dtrain $EXP" - diff --git a/dtrain/hstreaming/lplp.rb b/dtrain/hstreaming/lplp.rb deleted file mode 100755 index f0cd58c5..00000000 --- a/dtrain/hstreaming/lplp.rb +++ /dev/null @@ -1,131 +0,0 @@ -# lplp.rb - -# norms -def l0(feature_column, n) - if feature_column.size >= n then return 1 else return 0 end -end - -def l1(feature_column, n=-1) - return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i } -end - -def l2(feature_column, n=-1) - return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i } -end - -def linfty(feature_column, n=-1) - return feature_column.map { |i| i.abs }.max -end - -# stats -def median(feature_column, n) - return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2] -end - -def mean(feature_column, n) - return feature_column.reduce { |sum, i| sum+i } / n -end - -# selection -def select_k(weights, norm_fun, n, k=10000) - weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p| - puts "#{p[0]}\t#{mean(p[1], n)}" - k -= 1 - if k == 0 then break end - } -end - -def cut(weights, norm_fun, n, epsilon=0.0001) - weights.each { |k,v| - if norm_fun.call(v, n).abs >= epsilon - puts "#{k}\t#{mean(v, n)}" - end - } -end - -# test -def _test() - puts - w = {} - w["a"] = [1, 2, 3] - w["b"] = [1, 2] - w["c"] = [66] - w["d"] = [10, 20, 30] - n = 3 - puts w.to_s - puts - puts "select_k" - puts "l0 expect ad" - select_k(w, method(:l0), n, 2) - puts "l1 expect cd" - select_k(w, method(:l1), n, 2) - puts "l2 expect c" - select_k(w, method(:l2), n, 1) - puts - puts "cut" - puts "l1 expect cd" - cut(w, method(:l1), n, 7) - puts - puts "median" - a = [1,2,3,4,5] - puts a.to_s - puts median(a, 5) - puts - puts "#{median(a, 7)} <- that's because we add missing 0s:" - puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s - puts - puts "mean expect bc" - w.clear - w["a"] = [2] - w["b"] = [2.1] - w["c"] = [2.2] - cut(w, method(:mean), 1, 2.05) - exit -end -#_test() - -# actually do something -def usage() - puts "lplp.rb [n] < " - puts " l0...: norms for selection" - puts "select_k: only output top k (according to the norm of their column vector) features" - puts " cut: output features with weight >= threshold" - puts " n: if we do not have a shard count use this number for averaging" - exit -end - -if ARGV.size < 3 then usage end -norm_fun = method(ARGV[0].to_sym) -type = ARGV[1] -x = ARGV[2].to_f - -shard_count_key = "__SHARD_COUNT__" - -STDIN.set_encoding 'utf-8' -STDOUT.set_encoding 'utf-8' - -w = {} -shard_count = 0 -while line = STDIN.gets - key, val = line.split /\s+/ - if key == shard_count_key - shard_count += 1 - next - end - if w.has_key? key - w[key].push val.to_f - else - w[key] = [val.to_f] - end -end - -if ARGV.size == 4 then shard_count = ARGV[3].to_f end - -if type == 'cut' - cut(w, norm_fun, shard_count, x) -elsif type == 'select_k' - select_k(w, norm_fun, shard_count, x) -else - puts "oh oh" -end - diff --git a/dtrain/hstreaming/red-test b/dtrain/hstreaming/red-test deleted file mode 100644 index 2623d697..00000000 --- a/dtrain/hstreaming/red-test +++ /dev/null @@ -1,9 +0,0 @@ -a 1 -b 2 -c 3.5 -a 1 -b 2 -c 3.5 -d 1 -e 2 -__SHARD_COUNT__ 2 diff --git a/dtrain/kbestget.h b/dtrain/kbestget.h deleted file mode 100644 index dd8882e1..00000000 --- a/dtrain/kbestget.h +++ /dev/null @@ -1,152 +0,0 @@ -#ifndef _DTRAIN_KBESTGET_H_ -#define _DTRAIN_KBESTGET_H_ - -#include "kbest.h" // cdec -#include "sentence_metadata.h" - -#include "verbose.h" -#include "viterbi.h" -#include "ff_register.h" -#include "decoder.h" -#include "weights.h" -#include "logval.h" - -using namespace std; - -namespace dtrain -{ - - -typedef double score_t; - -struct ScoredHyp -{ - vector w; - SparseVector f; - score_t model; - score_t score; - unsigned rank; -}; - -struct LocalScorer -{ - unsigned N_; - vector w_; - - virtual score_t - Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len)=0; - - void Reset() {} // only for approx bleu - - inline void - Init(unsigned N, vector weights) - { - assert(N > 0); - N_ = N; - if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_); - else w_ = weights; - } - - inline score_t - brevity_penalty(const unsigned hyp_len, const unsigned ref_len) - { - if (hyp_len > ref_len) return 1; - return exp(1 - (score_t)ref_len/hyp_len); - } -}; - -struct HypSampler : public DecoderObserver -{ - LocalScorer* scorer_; - vector* ref_; - unsigned f_count_, sz_; - virtual vector* GetSamples()=0; - inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; } - inline void SetRef(vector& ref) { ref_ = &ref; } - inline unsigned get_f_count() { return f_count_; } - inline unsigned get_sz() { return sz_; } -}; -//////////////////////////////////////////////////////////////////////////////// - - - - -struct KBestGetter : public HypSampler -{ - const unsigned k_; - const string filter_type_; - vector s_; - unsigned src_len_; - - KBestGetter(const unsigned k, const string filter_type) : - k_(k), filter_type_(filter_type) {} - - virtual void - NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) - { - src_len_ = smeta.GetSourceLength(); - KBestScored(*hg); - } - - vector* GetSamples() { return &s_; } - - void - KBestScored(const Hypergraph& forest) - { - if (filter_type_ == "uniq") { - KBestUnique(forest); - } else if (filter_type_ == "not") { - KBestNoFilter(forest); - } - } - - void - KBestUnique(const Hypergraph& forest) - { - s_.clear(); sz_ = f_count_ = 0; - KBest::KBestDerivations, ESentenceTraversal, - KBest::FilterUnique, prob_t, EdgeProb> kbest(forest, k_); - for (unsigned i = 0; i < k_; ++i) { - const KBest::KBestDerivations, ESentenceTraversal, KBest::FilterUnique, - prob_t, EdgeProb>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - ScoredHyp h; - h.w = d->yield; - h.f = d->feature_values; - h.model = log(d->score); - h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - } - - void - KBestNoFilter(const Hypergraph& forest) - { - s_.clear(); sz_ = f_count_ = 0; - KBest::KBestDerivations, ESentenceTraversal> kbest(forest, k_); - for (unsigned i = 0; i < k_; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - ScoredHyp h; - h.w = d->yield; - h.f = d->feature_values; - h.model = log(d->score); - h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - } -}; - - -} // namespace - -#endif - diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h deleted file mode 100644 index bc2f56cd..00000000 --- a/dtrain/ksampler.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _DTRAIN_KSAMPLER_H_ -#define _DTRAIN_KSAMPLER_H_ - -#include "hg_sampler.h" // cdec -#include "kbestget.h" -#include "score.h" - -namespace dtrain -{ - -bool -cmp_hyp_by_model_d(ScoredHyp a, ScoredHyp b) -{ - return a.model > b.model; -} - -struct KSampler : public HypSampler -{ - const unsigned k_; - vector s_; - MT19937* prng_; - score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector); - unsigned src_len_; - - explicit KSampler(const unsigned k, MT19937* prng) : - k_(k), prng_(prng) {} - - virtual void - NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) - { - src_len_ = smeta.GetSourceLength(); - ScoredSamples(*hg); - } - - vector* GetSamples() { return &s_; } - - void ScoredSamples(const Hypergraph& forest) { - s_.clear(); sz_ = f_count_ = 0; - std::vector samples; - HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); - for (unsigned i = 0; i < k_; ++i) { - ScoredHyp h; - h.w = samples[i].words; - h.f = samples[i].fmap; - h.model = log(samples[i].model_score); - h.rank = i; - h.score = scorer_->Score(h.w, *ref_, i, src_len_); - s_.push_back(h); - sz_++; - f_count_ += h.f.size(); - } - sort(s_.begin(), s_.end(), cmp_hyp_by_model_d); - for (unsigned i = 0; i < s_.size(); i++) s_[i].rank = i; - } -}; - - -} // namespace - -#endif - diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h deleted file mode 100644 index 84be1efb..00000000 --- a/dtrain/pairsampling.h +++ /dev/null @@ -1,149 +0,0 @@ -#ifndef _DTRAIN_PAIRSAMPLING_H_ -#define _DTRAIN_PAIRSAMPLING_H_ - -namespace dtrain -{ - - -bool -accept_pair(score_t a, score_t b, score_t threshold) -{ - if (fabs(a - b) < threshold) return false; - return true; -} - -bool -cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) -{ - return a.score > b.score; -} - -inline void -all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) -{ - sort(s->begin(), s->end(), cmp_hyp_by_score_d); - unsigned sz = s->size(); - bool b = false; - unsigned count = 0; - for (unsigned i = 0; i < sz-1; i++) { - for (unsigned j = i+1; j < sz; j++) { - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) - training.push_back(make_pair((*s)[i], (*s)[j])); - } else { - if ((*s)[i].score != (*s)[j].score) - training.push_back(make_pair((*s)[i], (*s)[j])); - } - if (++count == max) { - b = true; - break; - } - } - if (b) break; - } -} - -/* - * multipartite ranking - * sort (descending) by bleu - * compare top X to middle Y and low X - * cmp middle Y to low X - */ - -inline void -partXYX(vector* s, vector >& training, score_t threshold, unsigned max, float hi_lo) -{ - unsigned sz = s->size(); - if (sz < 2) return; - sort(s->begin(), s->end(), cmp_hyp_by_score_d); - unsigned sep = round(sz*hi_lo); - unsigned sep_hi = sep; - if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi; - else sep_hi = 1; - bool b = false; - unsigned count = 0; - for (unsigned i = 0; i < sep_hi; i++) { - for (unsigned j = sep_hi; j < sz; j++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - if ((*s)[i].model <= (*s)[j].model) { -#endif - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) - training.push_back(make_pair((*s)[i], (*s)[j])); - } else { - if ((*s)[i].score != (*s)[j].score) - training.push_back(make_pair((*s)[i], (*s)[j])); - } - if (++count == max) { - b = true; - break; - } -#ifdef DTRAIN_FASTER_PERCEPTRON - } -#endif - } - if (b) break; - } - unsigned sep_lo = sz-sep; - while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; - for (unsigned i = sep_hi; i < sz-sep_lo; i++) { - for (unsigned j = sz-sep_lo; j < sz; j++) { -#ifdef DTRAIN_FASTER_PERCEPTRON - if ((*s)[i].model <= (*s)[j].model) { -#endif - if (threshold > 0) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) - training.push_back(make_pair((*s)[i], (*s)[j])); - } else { - if ((*s)[i].score != (*s)[j].score) - training.push_back(make_pair((*s)[i], (*s)[j])); - } - if (++count == max) return; -#ifdef DTRAIN_FASTER_PERCEPTRON - } -#endif - } - } -} - -/* - * pair sampling as in - * 'Tuning as Ranking' (Hopkins & May, 2011) - * count = 5000 - * threshold = 5% BLEU (0.05 for param 3) - * cut = top 50 - */ -bool -_PRO_cmp_pair_by_diff_d(pair a, pair b) -{ - return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); -} -inline void -PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) -{ - unsigned max_count = 5000, count = 0, sz = s->size(); - bool b = false; - for (unsigned i = 0; i < sz-1; i++) { - for (unsigned j = i+1; j < sz; j++) { - if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { - training.push_back(make_pair((*s)[i], (*s)[j])); - if (++count == max_count) { - b = true; - break; - } - } - } - if (b) break; - } - if (training.size() > 50) { - sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff_d); - training.erase(training.begin()+50, training.end()); - } - return; -} - - -} // namespace - -#endif - diff --git a/dtrain/parallelize.rb b/dtrain/parallelize.rb deleted file mode 100755 index 1d277ff6..00000000 --- a/dtrain/parallelize.rb +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env ruby - - -if ARGV.size != 5 - STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb <#shards> \n" - exit -end - -dtrain_bin = '/home/pks/bin/dtrain_local' -ruby = '/usr/bin/ruby' -lplp_rb = '/home/pks/mt/cdec-dtrain/dtrain/hstreaming/lplp.rb' -lplp_args = 'l2 select_k 100000' -gzip = '/bin/gzip' - -num_shards = ARGV[0].to_i -input = ARGV[1] -refs = ARGV[2] -epochs = ARGV[3].to_i -ini = ARGV[4] - - -`mkdir work` - -def make_shards(input, refs, num_shards) - lc = `wc -l #{input}`.split.first.to_i - shard_sz = lc / num_shards - leftover = lc % num_shards - in_f = File.new input, 'r' - refs_f = File.new refs, 'r' - shard_in_files = [] - shard_refs_files = [] - 0.upto(num_shards-1) { |shard| - shard_in = File.new "work/shard.#{shard}.in", 'w+' - shard_refs = File.new "work/shard.#{shard}.refs", 'w+' - 0.upto(shard_sz-1) { |i| - shard_in.write in_f.gets - shard_refs.write refs_f.gets - } - shard_in_files << shard_in - shard_refs_files << shard_refs - } - while leftover > 0 - shard_in_files[-1].write in_f.gets - shard_refs_files[-1].write refs_f.gets - leftover -= 1 - end - (shard_in_files + shard_refs_files).each do |f| f.close end - in_f.close - refs_f.close -end - -make_shards input, refs, num_shards - -0.upto(epochs-1) { |epoch| - pids = [] - input_weights = '' - if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end - weights_files = [] - 0.upto(num_shards-1) { |shard| - pids << Kernel.fork { - `#{dtrain_bin} -c #{ini}\ - --input work/shard.#{shard}.in\ - --refs work/shard.#{shard}.refs #{input_weights}\ - --output work/weights.#{shard}.#{epoch}\ - &> work/out.#{shard}.#{epoch}` - } - weights_files << "work/weights.#{shard}.#{epoch}" - } - pids.each { |pid| Process.wait(pid) } - cat = File.new('work/weights_cat', 'w+') - weights_files.each { |f| cat.write File.new(f, 'r').read } - cat.close - `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` -} - -`rm work/weights_cat` -`#{gzip} work/*` - diff --git a/dtrain/parallelize/test/cdec.ini b/dtrain/parallelize/test/cdec.ini deleted file mode 100644 index 72e99dc5..00000000 --- a/dtrain/parallelize/test/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/dtrain/parallelize/test/dtrain.ini b/dtrain/parallelize/test/dtrain.ini deleted file mode 100644 index 03f9d240..00000000 --- a/dtrain/parallelize/test/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -decoder_config=cdec.ini diff --git a/dtrain/parallelize/test/in b/dtrain/parallelize/test/in deleted file mode 100644 index a312809f..00000000 --- a/dtrain/parallelize/test/in +++ /dev/null @@ -1,10 +0,0 @@ -barack obama erhält als vierter us @-@ präsident den frieden nobelpreis -der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . -darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . -der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . -zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . -das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . -nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . -diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . -das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . -der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/dtrain/parallelize/test/refs b/dtrain/parallelize/test/refs deleted file mode 100644 index 4d3128cb..00000000 --- a/dtrain/parallelize/test/refs +++ /dev/null @@ -1,10 +0,0 @@ -barack obama becomes the fourth american president to receive the nobel peace prize -the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . -he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . -the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . -first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . -the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . -then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . -he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . -the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . -the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . diff --git a/dtrain/score.cc b/dtrain/score.cc deleted file mode 100644 index 34fc86a9..00000000 --- a/dtrain/score.cc +++ /dev/null @@ -1,254 +0,0 @@ -#include "score.h" - -namespace dtrain -{ - - -/* - * bleu - * - * as in "BLEU: a Method for Automatic Evaluation - * of Machine Translation" - * (Papineni et al. '02) - * - * NOTE: 0 if for one n \in {1..N} count is 0 - */ -score_t -BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len) -{ - if (hyp_len == 0 || ref_len == 0) return 0.; - unsigned M = N_; - vector v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.; - sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); - } - return brevity_penalty(hyp_len, ref_len) * exp(sum); -} - -score_t -BleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - return Bleu(counts, hyp_len, ref_len); -} - -/* - * 'stupid' bleu - * - * as in "ORANGE: a Method for Evaluating - * Automatic Evaluation Metrics - * for Machine Translation" - * (Lin & Och '04) - * - * NOTE: 0 iff no 1gram match - */ -score_t -StupidBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - vector v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0, add = 0; - for (unsigned i = 0; i < M; i++) { - if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; - if (i == 1) add = 1; - sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); - } - return brevity_penalty(hyp_len, ref_len) * exp(sum); -} - -/* - * smooth bleu - * - * as in "An End-to-End Discriminative Approach - * to Machine Translation" - * (Liang et al. '06) - * - * NOTE: max is 0.9375 (with N=4) - */ -score_t -SmoothBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - vector i_bleu; - for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.); - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) { - break; - } else { - score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]); - for (unsigned j = i; j < M; j++) { - i_bleu[j] += (1/((score_t)j+1)) * i_ng; - } - } - sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i)); - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' bleu - * - * sum up Ngram precisions - */ -score_t -SumBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' (exp) bleu - * - * sum up exp(Ngram precisions) - */ -score_t -SumExpBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - if (ref_len < N_) M = ref_len; - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * 'sum' (whatever) bleu - * - * sum up exp(weight * log(Ngram precisions)) - */ -score_t -SumWhateverBleuScorer::Score(vector& hyp, vector& ref, - const unsigned /*rank*/, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (hyp_len == 0 || ref_len == 0) return 0.; - NgramCounts counts = make_ngram_counts(hyp, ref, N_); - unsigned M = N_; - vector v = w_; - if (ref_len < N_) { - M = ref_len; - for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); - } - score_t sum = 0.; - unsigned j = 1; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; - sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2.0, (double) (N_-j+1)); - j++; - } - return brevity_penalty(hyp_len, ref_len) * sum; -} - -/* - * approx. bleu - * - * as in "Online Large-Margin Training of Syntactic - * and Structural Translation Features" - * (Chiang et al. '08) - * - * NOTE: Needs some more code in dtrain.cc . - * No scaling by src len. - */ -score_t -ApproxBleuScorer::Score(vector& hyp, vector& ref, - const unsigned rank, const unsigned src_len) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (ref_len == 0) return 0.; - score_t score = 0.; - NgramCounts counts(N_); - if (hyp_len > 0) { - counts = make_ngram_counts(hyp, ref, N_); - NgramCounts tmp = glob_onebest_counts_ + counts; - score = Bleu(tmp, hyp_len, ref_len); - } - if (rank == 0) { // 'context of 1best translations' - glob_onebest_counts_ += counts; - glob_onebest_counts_ *= discount_; - glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len); - glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len); - glob_src_len_ = discount_ * (glob_src_len_ + src_len); - } - return score; -} - -/* - * Linear (Corpus) Bleu - * - * as in "Lattice Minimum Bayes-Risk Decoding - * for Statistical Machine Translation" - * (Tromble et al. '08) - * - */ -score_t -LinearBleuScorer::Score(vector& hyp, vector& ref, - const unsigned rank, const unsigned /*src_len*/) -{ - unsigned hyp_len = hyp.size(), ref_len = ref.size(); - if (ref_len == 0) return 0.; - unsigned M = N_; - if (ref_len < N_) M = ref_len; - NgramCounts counts(M); - if (hyp_len > 0) - counts = make_ngram_counts(hyp, ref, M); - score_t ret = 0.; - for (unsigned i = 0; i < M; i++) { - if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break; - ret += counts.sum_[i]/onebest_counts_.sum_[i]; - } - ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret; - if (rank == 0) { - onebest_len_ += hyp_len; - onebest_counts_ += counts; - } - return ret; -} - - -} // namespace - diff --git a/dtrain/score.h b/dtrain/score.h deleted file mode 100644 index f317c903..00000000 --- a/dtrain/score.h +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef _DTRAIN_SCORE_H_ -#define _DTRAIN_SCORE_H_ - -#include "kbestget.h" - -using namespace std; - -namespace dtrain -{ - - -struct NgramCounts -{ - unsigned N_; - map clipped_; - map sum_; - - NgramCounts(const unsigned N) : N_(N) { Zero(); } - - inline void - operator+=(const NgramCounts& rhs) - { - if (rhs.N_ > N_) Resize(rhs.N_); - for (unsigned i = 0; i < N_; i++) { - this->clipped_[i] += rhs.clipped_.find(i)->second; - this->sum_[i] += rhs.sum_.find(i)->second; - } - } - - inline const NgramCounts - operator+(const NgramCounts &other) const - { - NgramCounts result = *this; - result += other; - return result; - } - - inline void - operator*=(const score_t rhs) - { - for (unsigned i = 0; i < N_; i++) { - this->clipped_[i] *= rhs; - this->sum_[i] *= rhs; - } - } - - inline void - Add(const unsigned count, const unsigned ref_count, const unsigned i) - { - assert(i < N_); - if (count > ref_count) { - clipped_[i] += ref_count; - } else { - clipped_[i] += count; - } - sum_[i] += count; - } - - inline void - Zero() - { - for (unsigned i = 0; i < N_; i++) { - clipped_[i] = 0.; - sum_[i] = 0.; - } - } - - inline void - One() - { - for (unsigned i = 0; i < N_; i++) { - clipped_[i] = 1.; - sum_[i] = 1.; - } - } - - inline void - Print() - { - for (unsigned i = 0; i < N_; i++) { - cout << i+1 << "grams (clipped):\t" << clipped_[i] << endl; - cout << i+1 << "grams:\t\t\t" << sum_[i] << endl; - } - } - - inline void Resize(unsigned N) - { - if (N == N_) return; - else if (N > N_) { - for (unsigned i = N_; i < N; i++) { - clipped_[i] = 0.; - sum_[i] = 0.; - } - } else { // N < N_ - for (unsigned i = N_-1; i > N-1; i--) { - clipped_.erase(i); - sum_.erase(i); - } - } - N_ = N; - } -}; - -typedef map, unsigned> Ngrams; - -inline Ngrams -make_ngrams(const vector& s, const unsigned N) -{ - Ngrams ngrams; - vector ng; - for (size_t i = 0; i < s.size(); i++) { - ng.clear(); - for (unsigned j = i; j < min(i+N, s.size()); j++) { - ng.push_back(s[j]); - ngrams[ng]++; - } - } - return ngrams; -} - -inline NgramCounts -make_ngram_counts(const vector& hyp, const vector& ref, const unsigned N) -{ - Ngrams hyp_ngrams = make_ngrams(hyp, N); - Ngrams ref_ngrams = make_ngrams(ref, N); - NgramCounts counts(N); - Ngrams::iterator it; - Ngrams::iterator ti; - for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) { - ti = ref_ngrams.find(it->first); - if (ti != ref_ngrams.end()) { - counts.Add(it->second, ti->second, it->first.size() - 1); - } else { - counts.Add(it->second, 0, it->first.size() - 1); - } - } - return counts; -} - -struct BleuScorer : public LocalScorer -{ - score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len); - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct StupidBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SmoothBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SumBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SumExpBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SumWhateverBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct ApproxBleuScorer : public BleuScorer -{ - NgramCounts glob_onebest_counts_; - unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_; - score_t discount_; - - ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d) - { - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0; - } - - inline void Reset() { - glob_onebest_counts_.Zero(); - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.; - } - - score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len); -}; - -struct LinearBleuScorer : public BleuScorer -{ - unsigned onebest_len_; - NgramCounts onebest_counts_; - - LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N) - { - onebest_counts_.One(); - } - - score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned /*src_len*/); - - inline void Reset() { - onebest_len_ = 1; - onebest_counts_.One(); - } -}; - - -} // namespace - -#endif - diff --git a/dtrain/test/example/README b/dtrain/test/example/README deleted file mode 100644 index 6937b11b..00000000 --- a/dtrain/test/example/README +++ /dev/null @@ -1,8 +0,0 @@ -Small example of input format for distributed training. -Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . - -For this to work, undef 'DTRAIN_LOCAL' in dtrain.h -and recompile. - -Data is here: http://simianer.de/#dtrain - diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini deleted file mode 100644 index d5955f0e..00000000 --- a/dtrain/test/example/cdec.ini +++ /dev/null @@ -1,25 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=30 -feature_function=WordPenalty -feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini deleted file mode 100644 index 72d50ca1..00000000 --- a/dtrain/test/example/dtrain.ini +++ /dev/null @@ -1,22 +0,0 @@ -input=test/example/nc-wmt11.1k.gz # use '-' for STDIN -output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights -decoder_config=test/example/cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=2 # run over input 2 times -k=100 # use 100best lists -N=4 # optimize (approx) BLEU4 -scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) -gamma=0 # use SVM reg -sample_from=kbest # use kbest lists (as opposed to forest) -filter=uniq # only unique entries in kbest (surface form) -pair_sampling=XYX -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) -loss_margin=0 diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output deleted file mode 100644 index 05326763..00000000 --- a/dtrain/test/example/expected-output +++ /dev/null @@ -1,89 +0,0 @@ - cdec cfg 'test/example/cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading test/example/nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** - Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2912000813 - -dtrain -Parameters: - k 100 - N 4 - T 2 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 1 - gamma 0 - loss margin 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'VOID' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'test/example/cdec.ini' - input 'test/example/nc-wmt11.1k.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 2. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = -637 - WordPenalty = +1064 - LanguageModel = +1175.3 - LanguageModel_OOV = -1437 - PhraseModel_0 = +1935.6 - PhraseModel_1 = +2499.3 - PhraseModel_2 = +964.96 - PhraseModel_3 = +1410.8 - PhraseModel_4 = -5977.9 - PhraseModel_5 = +522 - PhraseModel_6 = +1089 - PassThrough = -1308 - --- - 1best avg score: 0.16963 (+0.16963) - 1best avg model score: 64485 (+64485) - avg # pairs: 1494.4 - avg # rank err: 702.6 - avg # margin viol: 0 - non0 feature count: 528 - avg list sz: 85.7 - avg f count: 102.75 -(time 0.083 min, 0.5 s/S) - -Iteration #2 of 2. - . 10 -WEIGHTS - Glue = -1196 - WordPenalty = +809.52 - LanguageModel = +3112.1 - LanguageModel_OOV = -1464 - PhraseModel_0 = +3895.5 - PhraseModel_1 = +4683.4 - PhraseModel_2 = +1092.8 - PhraseModel_3 = +1079.6 - PhraseModel_4 = -6827.7 - PhraseModel_5 = -888 - PhraseModel_6 = +142 - PassThrough = -1335 - --- - 1best avg score: 0.277 (+0.10736) - 1best avg model score: -3110.5 (-67595) - avg # pairs: 1144.2 - avg # rank err: 529.1 - avg # margin viol: 0 - non0 feature count: 859 - avg list sz: 74.9 - avg f count: 112.84 -(time 0.067 min, 0.4 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. -This took 0.15 min. diff --git a/dtrain/test/parallelize/cdec.ini b/dtrain/test/parallelize/cdec.ini deleted file mode 100644 index 72e99dc5..00000000 --- a/dtrain/test/parallelize/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/dtrain/test/parallelize/dtrain.ini b/dtrain/test/parallelize/dtrain.ini deleted file mode 100644 index 03f9d240..00000000 --- a/dtrain/test/parallelize/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -decoder_config=cdec.ini diff --git a/dtrain/test/parallelize/in b/dtrain/test/parallelize/in deleted file mode 100644 index a312809f..00000000 --- a/dtrain/test/parallelize/in +++ /dev/null @@ -1,10 +0,0 @@ -barack obama erhält als vierter us @-@ präsident den frieden nobelpreis -der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . -darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . -der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . -zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . -das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . -nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . -diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . -das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . -der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/dtrain/test/parallelize/refs b/dtrain/test/parallelize/refs deleted file mode 100644 index 4d3128cb..00000000 --- a/dtrain/test/parallelize/refs +++ /dev/null @@ -1,10 +0,0 @@ -barack obama becomes the fourth american president to receive the nobel peace prize -the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . -he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . -the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . -first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . -the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . -then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . -he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . -the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . -the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . diff --git a/dtrain/test/toy/cdec.ini b/dtrain/test/toy/cdec.ini deleted file mode 100644 index 98b02d44..00000000 --- a/dtrain/test/toy/cdec.ini +++ /dev/null @@ -1,2 +0,0 @@ -formalism=scfg -add_pass_through_rules=true diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini deleted file mode 100644 index a091732f..00000000 --- a/dtrain/test/toy/dtrain.ini +++ /dev/null @@ -1,12 +0,0 @@ -decoder_config=test/toy/cdec.ini -input=test/toy/input -output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough -k=4 -N=4 -epochs=2 -scorer=bleu -sample_from=kbest -filter=uniq -pair_sampling=all -learning_rate=1 diff --git a/dtrain/test/toy/input b/dtrain/test/toy/input deleted file mode 100644 index 4d10a9ea..00000000 --- a/dtrain/test/toy/input +++ /dev/null @@ -1,2 +0,0 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 diff --git a/minrisk/Makefile.am b/minrisk/Makefile.am deleted file mode 100644 index a24f047c..00000000 --- a/minrisk/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = minrisk_optimize - -minrisk_optimize_SOURCES = minrisk_optimize.cc -minrisk_optimize_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz - -AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/minrisk/minrisk.pl b/minrisk/minrisk.pl deleted file mode 100755 index d05b9595..00000000 --- a/minrisk/minrisk.pl +++ /dev/null @@ -1,540 +0,0 @@ -#!/usr/bin/env perl -use strict; -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } - -# Skip local config (used for distributing jobs) if we're running in local-only mode -use LocalConfig; -use Getopt::Long; -use IPC::Open2; -use POSIX ":sys_wait_h"; -my $QSUB_CMD = qsub_args(mert_memory()); -my $default_jobs = env_default_jobs(); - -my $VEST_DIR="$SCRIPT_DIR/../dpmert"; -require "$VEST_DIR/libcall.pl"; - -# Default settings -my $srcFile; -my $refFiles; -my $bin_dir = $SCRIPT_DIR; -die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; -die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $MAPINPUT = "$bin_dir/minrisk_generate_input.pl"; -my $MAPPER = "$bin_dir/minrisk_optimize"; -my $parallelize = "$VEST_DIR/parallelize.pl"; -my $libcall = "$VEST_DIR/libcall.pl"; -my $sentserver = "$VEST_DIR/sentserver"; -my $sentclient = "$VEST_DIR/sentclient"; -my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; - -my $SCORER = $FAST_SCORE; -die "Can't find $MAPPER" unless -x $MAPPER; -my $cdec = "$bin_dir/../decoder/cdec"; -die "Can't find decoder in $cdec" unless -x $cdec; -die "Can't find $parallelize" unless -x $parallelize; -die "Can't find $libcall" unless -e $libcall; -my $decoder = $cdec; -my $lines_per_mapper = 30; -my $iteration = 1; -my $best_weights; -my $psi = 1; -my $default_max_iter = 30; -my $max_iterations = $default_max_iter; -my $jobs = $default_jobs; # number of decode nodes -my $pmem = "4g"; -my $disable_clean = 0; -my %seen_weights; -my $help = 0; -my $epsilon = 0.0001; -my $dryrun = 0; -my $last_score = -10000000; -my $metric = "ibm_bleu"; -my $dir; -my $iniFile; -my $weights; -my $use_make = 1; # use make to parallelize -my $useqsub = 0; -my $initial_weights; -my $pass_suffix = ''; -my $cpbin=1; - -# regularization strength -my $tune_regularizer = 0; -my $reg = 500; -my $reg_previous = 5000; -my $dont_accum = 0; - -# Process command-line options -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "jobs=i" => \$jobs, - "dont-clean" => \$disable_clean, - "dont-accumulate" => \$dont_accum, - "pass-suffix=s" => \$pass_suffix, - "qsub" => \$useqsub, - "dry-run" => \$dryrun, - "epsilon=s" => \$epsilon, - "help" => \$help, - "weights=s" => \$initial_weights, - "reg=f" => \$reg, - "use-make=i" => \$use_make, - "max-iterations=i" => \$max_iterations, - "pmem=s" => \$pmem, - "cpbin!" => \$cpbin, - "ref-files=s" => \$refFiles, - "metric=s" => \$metric, - "source-file=s" => \$srcFile, - "workdir=s" => \$dir, -) == 0 || @ARGV!=1 || $help) { - print_help(); - exit; -} - -die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; - -if ($useqsub) { - $use_make = 0; - die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); -} - -my @missing_args = (); -if (!defined $srcFile) { push @missing_args, "--source-file"; } -if (!defined $refFiles) { push @missing_args, "--ref-files"; } -if (!defined $initial_weights) { push @missing_args, "--weights"; } -die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); - -if ($metric =~ /^(combi|ter)$/i) { - $lines_per_mapper = 5; -} - -($iniFile) = @ARGV; - - -sub write_config; -sub enseg; -sub print_help; - -my $nodelist; -my $host =check_output("hostname"); chomp $host; -my $bleu; -my $interval_count = 0; -my $logfile; -my $projected_score; - -# used in sorting scores -my $DIR_FLAG = '-r'; -if ($metric =~ /^ter$|^aer$/i) { - $DIR_FLAG = ''; -} - -my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); - -unless ($dir){ - $dir = "minrisk"; -} -unless ($dir =~ /^\//){ # convert relative path to absolute path - my $basedir = check_output("pwd"); - chomp $basedir; - $dir = "$basedir/$dir"; -} - - -# Initializations and helper functions -srand; - -my @childpids = (); -my @cleanupcmds = (); - -sub cleanup { - print STDERR "Cleanup...\n"; - for my $pid (@childpids){ unchecked_call("kill $pid"); } - for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } - exit 1; -}; -# Always call cleanup, no matter how we exit -*CORE::GLOBAL::exit = - sub{ cleanup(); }; -$SIG{INT} = "cleanup"; -$SIG{TERM} = "cleanup"; -$SIG{HUP} = "cleanup"; - -my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; -my $newIniFile = "$dir/$decoderBase.ini"; -my $inputFileName = "$dir/input"; -my $user = $ENV{"USER"}; -# process ini file --e $iniFile || die "Error: could not open $iniFile for reading\n"; -open(INI, $iniFile); - -use File::Basename qw(basename); -#pass bindir, refs to vars holding bin -sub modbin { - local $_; - my $bindir=shift; - check_call("mkdir -p $bindir"); - -d $bindir || die "couldn't make bindir $bindir"; - for (@_) { - my $src=$$_; - $$_="$bindir/".basename($src); - check_call("cp -p $src $$_"); - } -} -sub dirsize { - opendir ISEMPTY,$_[0]; - return scalar(readdir(ISEMPTY))-1; -} -my @allweights; -if ($dryrun){ - write_config(*STDERR); - exit 0; -} else { - if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs - die "ERROR: working dir $dir already exists\n\n"; - } else { - -e $dir || mkdir $dir; - mkdir "$dir/hgs"; - modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; - mkdir "$dir/scripts"; - my $cmdfile="$dir/rerun-pro.sh"; - open CMD,'>',$cmdfile; - print CMD "cd ",&getcwd,"\n"; -# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted. - my $cline=&cmdline."\n"; - print CMD $cline; - close CMD; - print STDERR $cline; - chmod(0755,$cmdfile); - check_call("cp $initial_weights $dir/weights.0"); - die "Can't find weights.0" unless (-e "$dir/weights.0"); - } - write_config(*STDERR); -} - - -# Generate initial files and values -check_call("cp $iniFile $newIniFile"); -$iniFile = $newIniFile; - -my $newsrc = "$dir/dev.input"; -enseg($srcFile, $newsrc); -$srcFile = $newsrc; -my $devSize = 0; -open F, "<$srcFile" or die "Can't read $srcFile: $!"; -while() { $devSize++; } -close F; - -unless($best_weights){ $best_weights = $weights; } -unless($projected_score){ $projected_score = 0.0; } -$seen_weights{$weights} = 1; -my $kbest = "$dir/kbest"; -if ($dont_accum) { - $kbest = ''; -} else { - check_call("mkdir -p $kbest"); - $kbest = "--kbest_repository $kbest"; -} - -my $random_seed = int(time / 1000); -my $lastWeightsFile; -my $lastPScore = 0; -# main optimization loop -while (1){ - print STDERR "\n\nITERATION $iteration\n==========\n"; - - if ($iteration > $max_iterations){ - print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; - last; - } - # iteration-specific files - my $runFile="$dir/run.raw.$iteration"; - my $onebestFile="$dir/1best.$iteration"; - my $logdir="$dir/logs.$iteration"; - my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; - my $scorerLog="$logdir/scorer.log.$iteration"; - check_call("mkdir -p $logdir"); - - - #decode - print STDERR "RUNNING DECODER AT "; - print STDERR unchecked_output("date"); - my $im1 = $iteration - 1; - my $weightsFile="$dir/weights.$im1"; - push @allweights, "-w $dir/weights.$im1"; - `rm -f $dir/hgs/*.gz`; - my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; - my $pcmd; - if ($use_make) { - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; - } else { - $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; - } - my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - my $num_hgs; - my $num_topbest; - my $retries = 0; - while($retries < 5) { - $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); - $num_topbest = check_output("wc -l < $runFile"); - print STDERR "NUMBER OF HGs: $num_hgs\n"; - print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; - if($devSize == $num_hgs && $devSize == $num_topbest) { - last; - } else { - print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; - sleep(3); - } - $retries++; - } - die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); - chomp $dec_score; - print STDERR "DECODER SCORE: $dec_score\n"; - - # save space - check_call("gzip -f $runFile"); - check_call("gzip -f $decoderLog"); - - # run optimizer - print STDERR "RUNNING OPTIMIZER AT "; - print STDERR unchecked_output("date"); - print STDERR " - GENERATE TRAINING EXEMPLARS\n"; - my $mergeLog="$logdir/prune-merge.log.$iteration"; - - my $score = 0; - my $icc = 0; - my $inweights="$dir/weights.$im1"; - my $outweights="$dir/weights.$iteration"; - $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - $cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights"; - check_call($cmd); - $lastWeightsFile = $outweights; - $iteration++; - `rm hgs/*.gz`; - print STDERR "\n==========\n"; -} - -print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; - -print STDOUT "$lastWeightsFile\n"; - -exit 0; - -sub get_lines { - my $fn = shift @_; - open FL, "<$fn" or die "Couldn't read $fn: $!"; - my $lc = 0; - while() { $lc++; } - return $lc; -} - -sub get_comma_sep_refs { - my ($r,$p) = @_; - my $o = check_output("echo $p"); - chomp $o; - my @files = split /\s+/, $o; - return "-$r " . join(" -$r ", @files); -} - -sub read_weights_file { - my ($file) = @_; - open F, "<$file" or die "Couldn't read $file: $!"; - my @r = (); - my $pm = -1; - while() { - next if /^#/; - next if /^\s*$/; - chomp; - if (/^(.+)\s+(.+)$/) { - my $m = $1; - my $w = $2; - die "Weights out of order: $m <= $pm" unless $m > $pm; - push @r, $w; - } else { - warn "Unexpected feature name in weight file: $_"; - } - } - close F; - return join ' ', @r; -} - -# subs -sub write_config { - my $fh = shift; - my $cleanup = "yes"; - if ($disable_clean) {$cleanup = "no";} - - print $fh "\n"; - print $fh "DECODER: $decoder\n"; - print $fh "INI FILE: $iniFile\n"; - print $fh "WORKING DIR: $dir\n"; - print $fh "SOURCE (DEV): $srcFile\n"; - print $fh "REFS (DEV): $refFiles\n"; - print $fh "EVAL METRIC: $metric\n"; - print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "JOBS: $jobs\n"; - print $fh "HEAD NODE: $host\n"; - print $fh "PMEM (DECODING): $pmem\n"; - print $fh "CLEANUP: $cleanup\n"; -} - -sub update_weights_file { - my ($neww, $rfn, $rpts) = @_; - my @feats = @$rfn; - my @pts = @$rpts; - my $num_feats = scalar @feats; - my $num_pts = scalar @pts; - die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; - open G, ">$neww" or die; - for (my $i = 0; $i < $num_feats; $i++) { - my $f = $feats[$i]; - my $lambda = $pts[$i]; - print G "$f $lambda\n"; - } - close G; -} - -sub enseg { - my $src = shift; - my $newsrc = shift; - open(SRC, $src); - open(NEWSRC, ">$newsrc"); - my $i=0; - while (my $line=){ - chomp $line; - if ($line =~ /^\s* tags, you must include a zero-based id attribute"; - } - } else { - print NEWSRC "$line\n"; - } - $i++; - } - close SRC; - close NEWSRC; - die "Empty dev set!" if ($i == 0); -} - -sub print_help { - - my $executable = check_output("basename $0"); chomp $executable; - print << "Help"; - -Usage: $executable [options] - - $executable [options] - Runs a complete PRO optimization using the ini file specified. - -Required: - - --ref-files - Dev set ref files. This option takes only a single string argument. - To use multiple files (including file globbing), this argument should - be quoted. - - --source-file - Dev set source file. - - --weights - Initial weights file (use empty file to start from 0) - -General options: - - --help - Print this message and exit. - - --dont-accumulate - Don't accumulate k-best lists from multiple iterations. - - --max-iterations - Maximum number of iterations to run. If not specified, defaults - to $default_max_iter. - - --metric - Metric to optimize. - Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi - - --pass-suffix - If the decoder is doing multi-pass decoding, the pass suffix "2", - "3", etc., is used to control what iteration of weights is set. - - --workdir - Directory for intermediate and output files. If not specified, the - name is derived from the ini filename. Assuming that the ini - filename begins with the decoder name and ends with ini, the default - name of the working directory is inferred from the middle part of - the filename. E.g. an ini file named decoder.foo.ini would have - a default working directory name foo. - -Regularization options: - - --reg - l2 regularization strength [default=500]. The greater this value, - the closer to zero the weights will be. - -Job control options: - - --jobs - Number of decoder processes to run in parallel. [default=$default_jobs] - - --qsub - Use qsub to run jobs in parallel (qsub must be configured in - environment/LocalEnvironment.pm) - - --pmem - Amount of physical memory requested for parallel decoding jobs - (used with qsub requests only) - -Help -} - -sub convert { - my ($str) = @_; - my @ps = split /;/, $str; - my %dict = (); - for my $p (@ps) { - my ($k, $v) = split /=/, $p; - $dict{$k} = $v; - } - return %dict; -} - - -sub cmdline { - return join ' ',($0,@ORIG_ARGV); -} - -#buggy: last arg gets quoted sometimes? -my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; -my $shell_escape_in_quote=qr{[\\"\$`!]}; - -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} - -sub escaped_shell_args { - return map {local $_=$_;chomp;escape_shell($_)} @_; -} - -sub escaped_shell_args_str { - return join ' ',&escaped_shell_args(@_); -} - -sub escaped_cmdline { - return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); -} diff --git a/minrisk/minrisk_generate_input.pl b/minrisk/minrisk_generate_input.pl deleted file mode 100755 index b30fc4fd..00000000 --- a/minrisk/minrisk_generate_input.pl +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; -my $d = shift @ARGV; -die "Can't find directory $d" unless -d $d; - -opendir(DIR, $d) or die "Can't read $d: $!"; -my @hgs = grep { /\.gz$/ } readdir(DIR); -closedir DIR; - -for my $hg (@hgs) { - my $file = $hg; - my $id = $hg; - $id =~ s/(\.json)?\.gz//; - print "$d/$file $id\n"; -} - diff --git a/minrisk/minrisk_optimize.cc b/minrisk/minrisk_optimize.cc deleted file mode 100644 index da8b5260..00000000 --- a/minrisk/minrisk_optimize.cc +++ /dev/null @@ -1,197 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "liblbfgs/lbfgs++.h" -#include "filelib.h" -#include "stringlib.h" -#include "weights.h" -#include "hg_io.h" -#include "kbest.h" -#include "viterbi.h" -#include "ns.h" -#include "ns_docscorer.h" -#include "candidate_set.h" -#include "risk.h" -#include "entropy.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") - ("weights,w",po::value(), "[REQD] Weights files from current iterations") - ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") - ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") - ("temperature,T",po::value()->default_value(0.0), "Temperature parameter for objective (>0 increases the entropy)") - ("l1_strength,C",po::value()->default_value(0.0), "L1 regularization strength") - ("memory_buffers,M",po::value()->default_value(20), "Memory buffers used in LBFGS") - ("kbest_repository,R",po::value(), "Accumulate k-best lists from previous iterations (parameter is path to repository)") - ("kbest_size,k",po::value()->default_value(500u), "Top k-hypotheses to extract") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("reference")) { - cerr << "Please specify one or more references using -r \n"; - flag = true; - } - if (!conf->count("weights")) { - cerr << "Please specify weights using -w \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -EvaluationMetric* metric = NULL; - -struct RiskObjective { - explicit RiskObjective(const vector& tr, const double temp) : training(tr), T(temp) {} - double operator()(const vector& x, double* g) const { - fill(g, g + x.size(), 0.0); - double obj = 0; - double h = 0; - for (unsigned i = 0; i < training.size(); ++i) { - training::CandidateSetRisk risk(training[i], *metric); - training::CandidateSetEntropy entropy(training[i]); - SparseVector tg, hg; - double r = risk(x, &tg); - double hh = entropy(x, &hg); - h += hh; - obj += r; - for (SparseVector::iterator it = tg.begin(); it != tg.end(); ++it) - g[it->first] += it->second; - if (T) { - for (SparseVector::iterator it = hg.begin(); it != hg.end(); ++it) - g[it->first] += T * it->second; - } - } - cerr << (1-(obj / training.size())) << " H=" << h << endl; - return obj - T * h; - } - const vector& training; - const double T; // temperature for entropy regularization -}; - -double LearnParameters(const vector& training, - const double temp, // > 0 increases the entropy, < 0 decreases the entropy - const double C1, - const unsigned memory_buffers, - vector* px) { - RiskObjective obj(training, temp); - LBFGS lbfgs(px, obj, memory_buffers, C1); - lbfgs.MinimizeFunction(); - return 0; -} - -#if 0 -struct FooLoss { - double operator()(const vector& x, double* g) const { - fill(g, g + x.size(), 0.0); - training::CandidateSet cs; - training::CandidateSetEntropy cse(cs); - cs.cs.resize(3); - cs.cs[0].fmap.set_value(FD::Convert("F1"), -1.0); - cs.cs[1].fmap.set_value(FD::Convert("F2"), 1.0); - cs.cs[2].fmap.set_value(FD::Convert("F1"), 2.0); - cs.cs[2].fmap.set_value(FD::Convert("F2"), 0.5); - SparseVector xx; - double h = cse(x, &xx); - cerr << cse(x, &xx) << endl; cerr << "G: " << xx << endl; - for (SparseVector::iterator i = xx.begin(); i != xx.end(); ++i) - g[i->first] += i->second; - return -h; - } -}; -#endif - -int main(int argc, char** argv) { -#if 0 - training::CandidateSet cs; - training::CandidateSetEntropy cse(cs); - cs.cs.resize(3); - cs.cs[0].fmap.set_value(FD::Convert("F1"), -1.0); - cs.cs[1].fmap.set_value(FD::Convert("F2"), 1.0); - cs.cs[2].fmap.set_value(FD::Convert("F1"), 2.0); - cs.cs[2].fmap.set_value(FD::Convert("F2"), 0.5); - FooLoss foo; - vector ww(FD::NumFeats()); ww[FD::Convert("F1")] = 1.0; - LBFGS lbfgs(&ww, foo, 100, 0.0); - lbfgs.MinimizeFunction(); - return 1; -#endif - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const string evaluation_metric = conf["evaluation_metric"].as(); - - metric = EvaluationMetric::Instance(evaluation_metric); - DocumentScorer ds(metric, conf["reference"].as >()); - cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; - - Hypergraph hg; - string last_file; - ReadFile in_read(conf["input"].as()); - string kbest_repo; - if (conf.count("kbest_repository")) { - kbest_repo = conf["kbest_repository"].as(); - MkDirP(kbest_repo); - } - istream &in=*in_read.stream(); - const unsigned kbest_size = conf["kbest_size"].as(); - vector weights; - const string weightsf = conf["weights"].as(); - Weights::InitFromFile(weightsf, &weights); - double t = 0; - for (unsigned i = 0; i < weights.size(); ++i) - t += weights[i] * weights[i]; - if (t > 0) { - for (unsigned i = 0; i < weights.size(); ++i) - weights[i] /= sqrt(t); - } - string line, file; - vector kis; - cerr << "Loading hypergraphs...\n"; - while(getline(in, line)) { - istringstream is(line); - int sent_id; - kis.resize(kis.size() + 1); - training::CandidateSet& curkbest = kis.back(); - string kbest_file; - if (kbest_repo.size()) { - ostringstream os; - os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; - kbest_file = os.str(); - if (FileExists(kbest_file)) - curkbest.ReadFromFile(kbest_file); - } - is >> file >> sent_id; - ReadFile rf(file); - if (kis.size() % 5 == 0) { cerr << '.'; } - if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; } - HypergraphIO::ReadFromJSON(rf.stream(), &hg); - hg.Reweight(weights); - curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]); - if (kbest_file.size()) - curkbest.WriteToFile(kbest_file); - } - cerr << "\nHypergraphs loaded.\n"; - weights.resize(FD::NumFeats()); - - double c1 = conf["l1_strength"].as(); - double temp = conf["temperature"].as(); - unsigned m = conf["memory_buffers"].as(); - LearnParameters(kis, temp, c1, m, &weights); - Weights::WriteToFile("-", weights); - return 0; -} - diff --git a/mira/Makefile.am b/mira/Makefile.am deleted file mode 100644 index 3f8f17cd..00000000 --- a/mira/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = kbest_mira - -kbest_mira_SOURCES = kbest_mira.cc -kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/mira/kbest_mira.cc b/mira/kbest_mira.cc deleted file mode 100644 index 8b7993dd..00000000 --- a/mira/kbest_mira.cc +++ /dev/null @@ -1,309 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "hg_sampler.h" -#include "sentence_metadata.h" -#include "scorer.h" -#include "verbose.h" -#include "viterbi.h" -#include "hg.h" -#include "prob.h" -#include "kbest.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" -#include "sampler.h" - -using namespace std; -namespace po = boost::program_options; - -bool invert_score; -std::tr1::shared_ptr rng; - -void RandomPermutation(int len, vector* p_ids) { - vector& ids = *p_ids; - ids.resize(len); - for (int i = 0; i < len; ++i) ids[i] = i; - for (int i = len; i > 0; --i) { - int j = rng->next() * i; - if (j == i) i--; - swap(ids[i-1], ids[j]); - } -} - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,w",po::value(),"Input feature weights file") - ("source,i",po::value(),"Source file for development set") - ("passes,p", po::value()->default_value(15), "Number of passes through the training data") - ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") - ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") - ("max_step_size,C", po::value()->default_value(0.01), "regularization strength (C)") - ("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") - ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") - ("sample_forest,f", "Instead of a k-best list, sample k hypotheses from the decoder's forest") - ("sample_forest_unit_weight_vector,x", "Before sampling (must use -f option), rescale the weight vector used so it has unit length; this may improve the quality of the samples") - ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") - ("decoder_config,c",po::value(),"Decoder configuration file"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("input_weights") || !conf->count("source") || !conf->count("decoder_config") || !conf->count("reference")) { - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -static const double kMINUS_EPSILON = -1e-6; - -struct HypothesisInfo { - SparseVector features; - double mt_metric; -}; - -struct GoodBadOracle { - std::tr1::shared_ptr good; - std::tr1::shared_ptr bad; -}; - -struct TrainingObserver : public DecoderObserver { - TrainingObserver(const int k, const DocScorer& d, bool sf, vector* o) : ds(d), oracles(*o), kbest_size(k), sample_forest(sf) {} - const DocScorer& ds; - vector& oracles; - std::tr1::shared_ptr cur_best; - const int kbest_size; - const bool sample_forest; - - const HypothesisInfo& GetCurrentBestHypothesis() const { - return *cur_best; - } - - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - UpdateOracles(smeta.GetSentenceID(), *hg); - } - - std::tr1::shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double score) { - std::tr1::shared_ptr h(new HypothesisInfo); - h->features = feats; - h->mt_metric = score; - return h; - } - - void UpdateOracles(int sent_id, const Hypergraph& forest) { - std::tr1::shared_ptr& cur_good = oracles[sent_id].good; - std::tr1::shared_ptr& cur_bad = oracles[sent_id].bad; - cur_bad.reset(); // TODO get rid of?? - - if (sample_forest) { - vector cur_prediction; - ViterbiESentence(forest, &cur_prediction); - float sentscore = ds[sent_id]->ScoreCandidate(cur_prediction)->ComputeScore(); - cur_best = MakeHypothesisInfo(ViterbiFeatures(forest), sentscore); - - vector samples; - HypergraphSampler::sample_hypotheses(forest, kbest_size, &*rng, &samples); - for (unsigned i = 0; i < samples.size(); ++i) { - sentscore = ds[sent_id]->ScoreCandidate(samples[i].words)->ComputeScore(); - if (invert_score) sentscore *= -1.0; - if (!cur_good || sentscore > cur_good->mt_metric) - cur_good = MakeHypothesisInfo(samples[i].fmap, sentscore); - if (!cur_bad || sentscore < cur_bad->mt_metric) - cur_bad = MakeHypothesisInfo(samples[i].fmap, sentscore); - } - } else { - KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); - for (int i = 0; i < kbest_size; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(forest.nodes_.size() - 1, i); - if (!d) break; - float sentscore = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); - if (invert_score) sentscore *= -1.0; - // cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; - if (i == 0) - cur_best = MakeHypothesisInfo(d->feature_values, sentscore); - if (!cur_good || sentscore > cur_good->mt_metric) - cur_good = MakeHypothesisInfo(d->feature_values, sentscore); - if (!cur_bad || sentscore < cur_bad->mt_metric) - cur_bad = MakeHypothesisInfo(d->feature_values, sentscore); - } - //cerr << "GOOD: " << cur_good->mt_metric << endl; - //cerr << " CUR: " << cur_best->mt_metric << endl; - //cerr << " BAD: " << cur_bad->mt_metric << endl; - } - } -}; - -void ReadTrainingCorpus(const string& fname, vector* c) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - while(in) { - getline(in, line); - if (!in) break; - c->push_back(line); - } -} - -bool ApproxEqual(double a, double b) { - if (a == b) return true; - return (fabs(a-b)/fabs(b)) < 0.000001; -} - -int main(int argc, char** argv) { - register_feature_functions(); - SetSilent(true); // turn off verbose decoder output - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) return 1; - - if (conf.count("random_seed")) - rng.reset(new MT19937(conf["random_seed"].as())); - else - rng.reset(new MT19937); - const bool sample_forest = conf.count("sample_forest") > 0; - const bool sample_forest_unit_weight_vector = conf.count("sample_forest_unit_weight_vector") > 0; - if (sample_forest_unit_weight_vector && !sample_forest) { - cerr << "Cannot --sample_forest_unit_weight_vector without --sample_forest" << endl; - return 1; - } - vector corpus; - ReadTrainingCorpus(conf["source"].as(), &corpus); - const string metric_name = conf["mt_metric"].as(); - ScoreType type = ScoreTypeFromString(metric_name); - if (type == TER) { - invert_score = true; - } else { - invert_score = false; - } - DocScorer ds(type, conf["reference"].as >(), ""); - cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; - if (ds.size() != corpus.size()) { - cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; - return 1; - } - - ReadFile ini_rf(conf["decoder_config"].as()); - Decoder decoder(ini_rf.stream()); - - // load initial weights - vector& dense_weights = decoder.CurrentWeightVector(); - SparseVector lambdas; - Weights::InitFromFile(conf["input_weights"].as(), &dense_weights); - Weights::InitSparseVector(dense_weights, &lambdas); - - const double max_step_size = conf["max_step_size"].as(); - const double mt_metric_scale = conf["mt_metric_scale"].as(); - - assert(corpus.size() > 0); - vector oracles(corpus.size()); - - TrainingObserver observer(conf["k_best_size"].as(), ds, sample_forest, &oracles); - int cur_sent = 0; - int lcount = 0; - int normalizer = 0; - double tot_loss = 0; - int dots = 0; - int cur_pass = 0; - SparseVector tot; - tot += lambdas; // initial weights - normalizer++; // count for initial weights - int max_iteration = conf["passes"].as() * corpus.size(); - string msg = "# MIRA tuned weights"; - string msga = "# MIRA tuned weights AVERAGED"; - vector order; - RandomPermutation(corpus.size(), &order); - while (lcount <= max_iteration) { - lambdas.init_vector(&dense_weights); - if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.'; } - if (corpus.size() == cur_sent) { - cerr << " [AVG METRIC LAST PASS=" << (tot_loss / corpus.size()) << "]\n"; - Weights::ShowLargestFeatures(dense_weights); - cur_sent = 0; - tot_loss = 0; - dots = 0; - ostringstream os; - os << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz"; - SparseVector x = tot; - x /= normalizer; - ostringstream sa; - sa << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz"; - x.init_vector(&dense_weights); - Weights::WriteToFile(os.str(), dense_weights, true, &msg); - ++cur_pass; - RandomPermutation(corpus.size(), &order); - } - if (cur_sent == 0) { - cerr << "PASS " << (lcount / corpus.size() + 1) << endl; - } - decoder.SetId(order[cur_sent]); - double sc = 1.0; - if (sample_forest_unit_weight_vector) { - sc = lambdas.l2norm(); - if (sc > 0) { - for (unsigned i = 0; i < dense_weights.size(); ++i) - dense_weights[i] /= sc; - } - } - decoder.Decode(corpus[order[cur_sent]], &observer); // update oracles - if (sc && sc != 1.0) { - for (unsigned i = 0; i < dense_weights.size(); ++i) - dense_weights[i] *= sc; - } - const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis(); - const HypothesisInfo& cur_good = *oracles[order[cur_sent]].good; - const HypothesisInfo& cur_bad = *oracles[order[cur_sent]].bad; - tot_loss += cur_hyp.mt_metric; - if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) { - const double loss = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights) + - mt_metric_scale * (cur_good.mt_metric - cur_bad.mt_metric); - //cerr << "LOSS: " << loss << endl; - if (loss > 0.0) { - SparseVector diff = cur_good.features; - diff -= cur_bad.features; - double step_size = loss / diff.l2norm_sq(); - //cerr << loss << " " << step_size << " " << diff << endl; - if (step_size > max_step_size) step_size = max_step_size; - lambdas += (cur_good.features * step_size); - lambdas -= (cur_bad.features * step_size); - //cerr << "L: " << lambdas << endl; - } - } - tot += lambdas; - ++normalizer; - ++lcount; - ++cur_sent; - } - cerr << endl; - Weights::WriteToFile("weights.mira-final.gz", dense_weights, true, &msg); - tot /= normalizer; - tot.init_vector(dense_weights); - msg = "# MIRA tuned weights (averaged vector)"; - Weights::WriteToFile("weights.mira-final-avg.gz", dense_weights, true, &msg); - cerr << "Optimization complete.\nAVERAGED WEIGHTS: weights.mira-final-avg.gz\n"; - return 0; -} - diff --git a/pro/Makefile.am b/pro/Makefile.am deleted file mode 100644 index 1e9d46b0..00000000 --- a/pro/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -bin_PROGRAMS = \ - mr_pro_map \ - mr_pro_reduce - -mr_pro_map_SOURCES = mr_pro_map.cc -mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/pro/README.shared-mem b/pro/README.shared-mem deleted file mode 100644 index 7728efc0..00000000 --- a/pro/README.shared-mem +++ /dev/null @@ -1,9 +0,0 @@ -If you want to run dist-vest.pl on a very large shared memory machine, do the -following: - - ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini - -This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the -decoder must load grammars, language models, etc., J should be smaller than I, but this will depend -on the system you are running on and the complexity of the models used for decoding. - diff --git a/pro/mr_pro_generate_mapper_input.pl b/pro/mr_pro_generate_mapper_input.pl deleted file mode 100755 index b30fc4fd..00000000 --- a/pro/mr_pro_generate_mapper_input.pl +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; -my $d = shift @ARGV; -die "Can't find directory $d" unless -d $d; - -opendir(DIR, $d) or die "Can't read $d: $!"; -my @hgs = grep { /\.gz$/ } readdir(DIR); -closedir DIR; - -for my $hg (@hgs) { - my $file = $hg; - my $id = $hg; - $id =~ s/(\.json)?\.gz//; - print "$d/$file $id\n"; -} - diff --git a/pro/mr_pro_map.cc b/pro/mr_pro_map.cc deleted file mode 100644 index eef40b8a..00000000 --- a/pro/mr_pro_map.cc +++ /dev/null @@ -1,201 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "candidate_set.h" -#include "sampler.h" -#include "filelib.h" -#include "stringlib.h" -#include "weights.h" -#include "inside_outside.h" -#include "hg_io.h" -#include "ns.h" -#include "ns_docscorer.h" - -// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011) - -using namespace std; -namespace po = boost::program_options; - -boost::shared_ptr rng; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") - ("weights,w",po::value(), "[REQD] Weights files from current iterations") - ("kbest_repository,K",po::value()->default_value("./kbest"),"K-best list repository (directory)") - ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") - ("source,s",po::value()->default_value(""), "Source file (ignored, except for AER)") - ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") - ("kbest_size,k",po::value()->default_value(1500u), "Top k-hypotheses to extract") - ("candidate_pairs,G", po::value()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)") - ("best_pairs,X", po::value()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)") - ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("reference")) { - cerr << "Please specify one or more references using -r \n"; - flag = true; - } - if (!conf->count("weights")) { - cerr << "Please specify weights using -w \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct ThresholdAlpha { - explicit ThresholdAlpha(double t = 0.05) : threshold(t) {} - double operator()(double mag) const { - if (mag < threshold) return 0.0; else return 1.0; - } - const double threshold; -}; - -struct TrainingInstance { - TrainingInstance(const SparseVector& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {} - SparseVector x; -#undef DEBUGGING_PRO -#ifdef DEBUGGING_PRO - vector a; - vector b; -#endif - bool y; - float gdiff; -}; -#ifdef DEBUGGING_PRO -ostream& operator<<(ostream& os, const TrainingInstance& d) { - return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x; -} -#endif - -struct DiffOrder { - bool operator()(const TrainingInstance& a, const TrainingInstance& b) const { - return a.gdiff > b.gdiff; - } -}; - -void Sample(const unsigned gamma, - const unsigned xi, - const training::CandidateSet& J_i, - const EvaluationMetric* metric, - vector* pv) { - const bool invert_score = metric->IsErrorMetric(); - vector v1, v2; - float avg_diff = 0; - for (unsigned i = 0; i < gamma; ++i) { - const size_t a = rng->inclusive(0, J_i.size() - 1)(); - const size_t b = rng->inclusive(0, J_i.size() - 1)(); - if (a == b) continue; - float ga = metric->ComputeScore(J_i[a].eval_feats); - float gb = metric->ComputeScore(J_i[b].eval_feats); - bool positive = gb < ga; - if (invert_score) positive = !positive; - const float gdiff = fabs(ga - gb); - if (!gdiff) continue; - avg_diff += gdiff; - SparseVector xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros(); - if (xdiff.empty()) { - cerr << "Empty diff:\n " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl; - cerr << " " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl; - continue; - } - v1.push_back(TrainingInstance(xdiff, positive, gdiff)); -#ifdef DEBUGGING_PRO - v1.back().a = J_i[a].hyp; - v1.back().b = J_i[b].hyp; - cerr << "N: " << v1.back() << endl; -#endif - } - avg_diff /= v1.size(); - - for (unsigned i = 0; i < v1.size(); ++i) { - double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff)); - // cerr << "avg_diff=" << avg_diff << " gdiff=" << v1[i].gdiff << " p=" << p << endl; - if (rng->next() < p) v2.push_back(v1[i]); - } - vector::iterator mid = v2.begin() + xi; - if (xi > v2.size()) mid = v2.end(); - partial_sort(v2.begin(), mid, v2.end(), DiffOrder()); - copy(v2.begin(), mid, back_inserter(*pv)); -#ifdef DEBUGGING_PRO - if (v2.size() >= 5) { - for (int i =0; i < (mid - v2.begin()); ++i) { - cerr << v2[i] << endl; - } - cerr << pv->back() << endl; - } -#endif -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - if (conf.count("random_seed")) - rng.reset(new MT19937(conf["random_seed"].as())); - else - rng.reset(new MT19937); - const string evaluation_metric = conf["evaluation_metric"].as(); - - EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); - DocumentScorer ds(metric, conf["reference"].as >()); - cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; - - Hypergraph hg; - string last_file; - ReadFile in_read(conf["input"].as()); - istream &in=*in_read.stream(); - const unsigned kbest_size = conf["kbest_size"].as(); - const unsigned gamma = conf["candidate_pairs"].as(); - const unsigned xi = conf["best_pairs"].as(); - string weightsf = conf["weights"].as(); - vector weights; - Weights::InitFromFile(weightsf, &weights); - string kbest_repo = conf["kbest_repository"].as(); - MkDirP(kbest_repo); - while(in) { - vector v; - string line; - getline(in, line); - if (line.empty()) continue; - istringstream is(line); - int sent_id; - string file; - // path-to-file (JSON) sent_id - is >> file >> sent_id; - ReadFile rf(file); - ostringstream os; - training::CandidateSet J_i; - os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; - const string kbest_file = os.str(); - if (FileExists(kbest_file)) - J_i.ReadFromFile(kbest_file); - HypergraphIO::ReadFromJSON(rf.stream(), &hg); - hg.Reweight(weights); - J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]); - J_i.WriteToFile(kbest_file); - - Sample(gamma, xi, J_i, metric, &v); - for (unsigned i = 0; i < v.size(); ++i) { - const TrainingInstance& vi = v[i]; - cout << vi.y << "\t" << vi.x << endl; - cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl; - } - } - return 0; -} - diff --git a/pro/mr_pro_reduce.cc b/pro/mr_pro_reduce.cc deleted file mode 100644 index 5ef9b470..00000000 --- a/pro/mr_pro_reduce.cc +++ /dev/null @@ -1,286 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include - -#include "filelib.h" -#include "weights.h" -#include "sparse_vector.h" -#include "optimize.h" -#include "liblbfgs/lbfgs++.h" - -using namespace std; -namespace po = boost::program_options; - -// since this is a ranking model, there should be equal numbers of -// positive and negative examples, so the bias should be 0 -static const double MAX_BIAS = 1e-10; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") - ("regularization_strength,C",po::value()->default_value(500.0), "l2 regularization strength") - ("l1",po::value()->default_value(0.0), "l1 regularization strength") - ("regularize_to_weights,y",po::value()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") - ("memory_buffers,m",po::value()->default_value(100), "Number of memory buffers (LBFGS)") - ("min_reg,r",po::value()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") - ("max_reg,R",po::value()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght") - ("testset,t",po::value(), "Optional held-out test set") - ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") - ("interpolate_with_weights,p",po::value()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void ParseSparseVector(string& line, size_t cur, SparseVector* out) { - SparseVector& x = *out; - size_t last_start = cur; - size_t last_comma = string::npos; - while(cur <= line.size()) { - if (line[cur] == ' ' || cur == line.size()) { - if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { - cerr << "[ERROR] " << line << endl << " position = " << cur << endl; - exit(1); - } - const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); - if (cur < line.size()) line[cur] = 0; - const weight_t val = strtod(&line[last_comma + 1], NULL); - x.set_value(fid, val); - - last_comma = string::npos; - last_start = cur+1; - } else { - if (line[cur] == '=') - last_comma = cur; - } - ++cur; - } -} - -void ReadCorpus(istream* pin, vector > >* corpus) { - istream& in = *pin; - corpus->clear(); - bool flag = false; - int lc = 0; - string line; - SparseVector x; - while(getline(in, line)) { - ++lc; - if (lc % 1000 == 0) { cerr << '.'; flag = true; } - if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } - if (line.empty()) continue; - const size_t ks = line.find("\t"); - assert(string::npos != ks); - assert(ks == 1); - const bool y = line[0] == '1'; - x.clear(); - ParseSparseVector(line, ks + 1, &x); - corpus->push_back(make_pair(y, x)); - } - if (flag) cerr << endl; -} - -void GradAdd(const SparseVector& v, const double scale, weight_t* acc) { - for (SparseVector::const_iterator it = v.begin(); - it != v.end(); ++it) { - acc[it->first] += it->second * scale; - } -} - -double ApplyRegularizationTerms(const double C, - const double T, - const vector& weights, - const vector& prev_weights, - weight_t* g) { - double reg = 0; - for (size_t i = 0; i < weights.size(); ++i) { - const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); - const double& w_i = weights[i]; - reg += C * w_i * w_i; - g[i] += 2 * C * w_i; - - const double diff_i = w_i - prev_w_i; - reg += T * diff_i * diff_i; - g[i] += 2 * T * diff_i; - } - return reg; -} - -double TrainingInference(const vector& x, - const vector > >& corpus, - weight_t* g = NULL) { - double cll = 0; - for (int i = 0; i < corpus.size(); ++i) { - const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias - double lp_false = dotprod; - double lp_true = -dotprod; - if (0 < lp_true) { - lp_true += log1p(exp(-lp_true)); - lp_false = log1p(exp(lp_false)); - } else { - lp_true = log1p(exp(lp_true)); - lp_false += log1p(exp(-lp_false)); - } - lp_true*=-1; - lp_false*=-1; - if (corpus[i].first) { // true label - cll -= lp_true; - if (g) { - // g -= corpus[i].second * exp(lp_false); - GradAdd(corpus[i].second, -exp(lp_false), g); - g[0] -= exp(lp_false); // bias - } - } else { // false label - cll -= lp_false; - if (g) { - // g += corpus[i].second * exp(lp_true); - GradAdd(corpus[i].second, exp(lp_true), g); - g[0] += exp(lp_true); // bias - } - } - } - return cll; -} - -struct ProLoss { - ProLoss(const vector > >& tr, - const vector > >& te, - const double c, - const double t, - const vector& px) : training(tr), testing(te), C(c), T(t), prev_x(px){} - double operator()(const vector& x, double* g) const { - fill(g, g + x.size(), 0.0); - double cll = TrainingInference(x, training, g); - tppl = 0; - if (testing.size()) - tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size())); - double ppl = cll / log(2); - ppl /= training.size(); - ppl = pow(2.0, ppl); - double reg = ApplyRegularizationTerms(C, T, x, prev_x, g); - return cll + reg; - } - const vector > >& training, testing; - const double C, T; - const vector& prev_x; - mutable double tppl; -}; - -// return held-out log likelihood -double LearnParameters(const vector > >& training, - const vector > >& testing, - const double C, - const double C1, - const double T, - const unsigned memory_buffers, - const vector& prev_x, - vector* px) { - assert(px->size() == prev_x.size()); - ProLoss loss(training, testing, C, T, prev_x); - LBFGS lbfgs(px, loss, memory_buffers, C1); - lbfgs.MinimizeFunction(); - return loss.tppl; -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - string line; - vector > > training, testing; - const bool tune_regularizer = conf.count("tune_regularizer"); - if (tune_regularizer && !conf.count("testset")) { - cerr << "--tune_regularizer requires --testset to be set\n"; - return 1; - } - const double min_reg = conf["min_reg"].as(); - const double max_reg = conf["max_reg"].as(); - double C = conf["regularization_strength"].as(); // will be overridden if parameter is tuned - double C1 = conf["l1"].as(); // will be overridden if parameter is tuned - const double T = conf["regularize_to_weights"].as(); - assert(C >= 0.0); - assert(min_reg >= 0.0); - assert(max_reg >= 0.0); - assert(max_reg > min_reg); - const double psi = conf["interpolate_with_weights"].as(); - if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; } - ReadCorpus(&cin, &training); - if (conf.count("testset")) { - ReadFile rf(conf["testset"].as()); - ReadCorpus(rf.stream(), &testing); - } - cerr << "Number of features: " << FD::NumFeats() << endl; - - vector x, prev_x; // x[0] is bias - if (conf.count("weights")) { - Weights::InitFromFile(conf["weights"].as(), &x); - x.resize(FD::NumFeats()); - prev_x = x; - } else { - x.resize(FD::NumFeats()); - prev_x = x; - } - cerr << " Number of features: " << x.size() << endl; - cerr << "Number of training examples: " << training.size() << endl; - cerr << "Number of testing examples: " << testing.size() << endl; - double tppl = 0.0; - vector > sp; - vector smoothed; - if (tune_regularizer) { - C = min_reg; - const double steps = 18; - double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps); - cerr << "SWEEP FACTOR: " << sweep_factor << endl; - while(C < max_reg) { - cerr << "C=" << C << "\tT=" <(), prev_x, &x); - sp.push_back(make_pair(C, tppl)); - C *= sweep_factor; - } - smoothed.resize(sp.size(), 0); - smoothed[0] = sp[0].second; - smoothed.back() = sp.back().second; - for (int i = 1; i < sp.size()-1; ++i) { - double prev = sp[i-1].second; - double next = sp[i+1].second; - double cur = sp[i].second; - smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next); - } - double best_ppl = 9999999; - unsigned best_i = 0; - for (unsigned i = 0; i < sp.size(); ++i) { - if (smoothed[i] < best_ppl) { - best_ppl = smoothed[i]; - best_i = i; - } - } - C = sp[best_i].first; - } // tune regularizer - tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as(), prev_x, &x); - if (conf.count("weights")) { - for (int i = 1; i < x.size(); ++i) { - x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi); - } - } - cout.precision(15); - cout << "# C=" << C << "\theld out perplexity="; - if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; } - if (sp.size()) { - cout << "# Parameter sweep:\n"; - for (int i = 0; i < sp.size(); ++i) { - cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl; - } - } - Weights::WriteToFile("-", x); - return 0; -} diff --git a/pro/pro.pl b/pro/pro.pl deleted file mode 100755 index 891b7e4c..00000000 --- a/pro/pro.pl +++ /dev/null @@ -1,555 +0,0 @@ -#!/usr/bin/env perl -use strict; -use File::Basename qw(basename); -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } - -# Skip local config (used for distributing jobs) if we're running in local-only mode -use LocalConfig; -use Getopt::Long; -use IPC::Open2; -use POSIX ":sys_wait_h"; -my $QSUB_CMD = qsub_args(mert_memory()); -my $default_jobs = env_default_jobs(); - -my $VEST_DIR="$SCRIPT_DIR/../dpmert"; -require "$VEST_DIR/libcall.pl"; - -# Default settings -my $srcFile; -my $refFiles; -my $bin_dir = $SCRIPT_DIR; -die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; -die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl"; -my $MAPPER = "$bin_dir/mr_pro_map"; -my $REDUCER = "$bin_dir/mr_pro_reduce"; -my $parallelize = "$VEST_DIR/parallelize.pl"; -my $libcall = "$VEST_DIR/libcall.pl"; -my $sentserver = "$VEST_DIR/sentserver"; -my $sentclient = "$VEST_DIR/sentclient"; -my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; - -my $SCORER = $FAST_SCORE; -die "Can't find $MAPPER" unless -x $MAPPER; -my $cdec = "$bin_dir/../decoder/cdec"; -die "Can't find decoder in $cdec" unless -x $cdec; -die "Can't find $parallelize" unless -x $parallelize; -die "Can't find $libcall" unless -e $libcall; -my $decoder = $cdec; -my $lines_per_mapper = 30; -my $iteration = 1; -my $best_weights; -my $psi = 1; -my $default_max_iter = 30; -my $max_iterations = $default_max_iter; -my $jobs = $default_jobs; # number of decode nodes -my $pmem = "4g"; -my $disable_clean = 0; -my %seen_weights; -my $help = 0; -my $epsilon = 0.0001; -my $dryrun = 0; -my $last_score = -10000000; -my $metric = "ibm_bleu"; -my $dir; -my $iniFile; -my $weights; -my $use_make = 1; # use make to parallelize -my $useqsub = 0; -my $initial_weights; -my $pass_suffix = ''; -my $devset; - -# regularization strength -my $reg = 500; -my $reg_previous = 5000; - -# Process command-line options -if (GetOptions( - "config=s" => \$iniFile, - "weights=s" => \$initial_weights, - "devset=s" => \$devset, - "jobs=i" => \$jobs, - "metric=s" => \$metric, - "pass-suffix=s" => \$pass_suffix, - "qsub" => \$useqsub, - "help" => \$help, - "reg=f" => \$reg, - "reg-previous=f" => \$reg_previous, - "output-dir=s" => \$dir, -) == 0 || @ARGV!=0 || $help) { - print_help(); - exit; -} - -if ($useqsub) { - $use_make = 0; - die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); -} - -my @missing_args = (); -if (!defined $iniFile) { push @missing_args, "--config"; } -if (!defined $devset) { push @missing_args, "--devset"; } -if (!defined $initial_weights) { push @missing_args, "--weights"; } -die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); - -if ($metric =~ /^(combi|ter)$/i) { - $lines_per_mapper = 5; -} - -my $host =check_output("hostname"); chomp $host; -my $bleu; -my $interval_count = 0; -my $logfile; -my $projected_score; - -# used in sorting scores -my $DIR_FLAG = '-r'; -if ($metric =~ /^ter$|^aer$/i) { - $DIR_FLAG = ''; -} - -unless ($dir){ - $dir = 'pro'; -} -unless ($dir =~ /^\//){ # convert relative path to absolute path - my $basedir = check_output("pwd"); - chomp $basedir; - $dir = "$basedir/$dir"; -} - -# Initializations and helper functions -srand; - -my @childpids = (); -my @cleanupcmds = (); - -sub cleanup { - print STDERR "Cleanup...\n"; - for my $pid (@childpids){ unchecked_call("kill $pid"); } - for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } - exit 1; -}; -# Always call cleanup, no matter how we exit -*CORE::GLOBAL::exit = - sub{ cleanup(); }; -$SIG{INT} = "cleanup"; -$SIG{TERM} = "cleanup"; -$SIG{HUP} = "cleanup"; - -my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; -my $newIniFile = "$dir/$decoderBase.ini"; -my $inputFileName = "$dir/input"; -my $user = $ENV{"USER"}; - - -# process ini file --e $iniFile || die "Error: could not open $iniFile for reading\n"; -open(INI, $iniFile); - -if (-e $dir) { - die "ERROR: working dir $dir already exists\n\n"; -} else { - mkdir "$dir" or die "Can't mkdir $dir: $!"; - mkdir "$dir/hgs" or die; - mkdir "$dir/scripts" or die; - print STDERR <) { $devSize++; } -close F; - -unless($best_weights){ $best_weights = $weights; } -unless($projected_score){ $projected_score = 0.0; } -$seen_weights{$weights} = 1; - -my $random_seed = int(time / 1000); -my $lastWeightsFile; -my $lastPScore = 0; -# main optimization loop -my @allweights; -while (1){ - print STDERR "\n\nITERATION $iteration\n==========\n"; - - if ($iteration > $max_iterations){ - print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; - last; - } - # iteration-specific files - my $runFile="$dir/run.raw.$iteration"; - my $onebestFile="$dir/1best.$iteration"; - my $logdir="$dir/logs.$iteration"; - my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; - my $scorerLog="$logdir/scorer.log.$iteration"; - check_call("mkdir -p $logdir"); - - - #decode - print STDERR "RUNNING DECODER AT "; - print STDERR unchecked_output("date"); - my $im1 = $iteration - 1; - my $weightsFile="$dir/weights.$im1"; - push @allweights, "-w $dir/weights.$im1"; - `rm -f $dir/hgs/*.gz`; - my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; - my $pcmd; - if ($use_make) { - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; - } else { - $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; - } - my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - my $num_hgs; - my $num_topbest; - my $retries = 0; - while($retries < 5) { - $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); - $num_topbest = check_output("wc -l < $runFile"); - print STDERR "NUMBER OF HGs: $num_hgs\n"; - print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; - if($devSize == $num_hgs && $devSize == $num_topbest) { - last; - } else { - print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; - sleep(3); - } - $retries++; - } - die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER -r $refs -m $metric"); - chomp $dec_score; - print STDERR "DECODER SCORE: $dec_score\n"; - - # save space - check_call("gzip -f $runFile"); - check_call("gzip -f $decoderLog"); - - # run optimizer - print STDERR "RUNNING OPTIMIZER AT "; - print STDERR unchecked_output("date"); - print STDERR " - GENERATE TRAINING EXEMPLARS\n"; - my $mergeLog="$logdir/prune-merge.log.$iteration"; - - my $score = 0; - my $icc = 0; - my $inweights="$dir/weights.$im1"; - $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - check_call("mkdir -p $dir/splag.$im1"); - $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput."; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; - my @shards = grep { /^mapinput\./ } readdir(DIR); - closedir DIR; - die "No shards!" unless scalar @shards > 0; - my $joblist = ""; - my $nmappers = 0; - @cleanupcmds = (); - my %o2i = (); - my $first_shard = 1; - my $mkfile; # only used with makefiles - my $mkfilename; - if ($use_make) { - $mkfilename = "$dir/splag.$im1/domap.mk"; - open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!"; - print $mkfile "all: $dir/splag.$im1/map.done\n\n"; - } - my @mkouts = (); # only used with makefiles - my @mapoutputs = (); - for my $shard (@shards) { - my $mapoutput = $shard; - my $client_name = $shard; - $client_name =~ s/mapinput.//; - $client_name = "pro.$client_name"; - $mapoutput =~ s/mapinput/mapoutput/; - push @mapoutputs, "$dir/splag.$im1/$mapoutput"; - $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; - my $script = "$MAPPER -s $srcFile -m $metric -r $refs -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput"; - if ($use_make) { - my $script_file = "$dir/scripts/map.$shard"; - open F, ">$script_file" or die "Can't write $script_file: $!"; - print F "#!/bin/bash\n"; - print F "$script\n"; - close F; - my $output = "$dir/splag.$im1/$mapoutput"; - push @mkouts, $output; - chmod(0755, $script_file) or die "Can't chmod $script_file: $!"; - if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } - print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n"; - } else { - my $script_file = "$dir/scripts/map.$shard"; - open F, ">$script_file" or die "Can't write $script_file: $!"; - print F "$script\n"; - close F; - if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } - - $nmappers++; - my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; - my $jobid = check_output("$qcmd"); - chomp $jobid; - $jobid =~ s/^(\d+)(.*?)$/\1/g; - $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, "qdel $jobid 2> /dev/null"); - print STDERR " $jobid"; - if ($joblist == "") { $joblist = $jobid; } - else {$joblist = $joblist . "\|" . $jobid; } - } - } - my @dev_outs = (); - my @devtest_outs = (); - @dev_outs = @mapoutputs; - if ($use_make) { - print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; - close $mkfile; - my $mcmd = "make -j $jobs -f $mkfilename"; - print STDERR "\nExecuting: $mcmd\n"; - check_call($mcmd); - } else { - print STDERR "\nLaunched $nmappers mappers.\n"; - sleep 8; - print STDERR "Waiting for mappers to complete...\n"; - while ($nmappers > 0) { - sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); - $nmappers = scalar @livejobs; - } - print STDERR "All mappers complete.\n"; - } - my $tol = 0; - my $til = 0; - my $dev_test_file = "$dir/splag.$im1/devtest.gz"; - print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n"; - print STDERR unchecked_output("date"); - $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi"; - $cmd .= " > $dir/weights.$iteration"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - $lastWeightsFile = "$dir/weights.$iteration"; - $lastPScore = $score; - $iteration++; - print STDERR "\n==========\n"; -} - - -check_call("cp $lastWeightsFile $dir/weights.final"); -print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w with the decoder)\n\n"; -print STDOUT "$dir/weights.final\n"; - -exit 0; - -sub read_weights_file { - my ($file) = @_; - open F, "<$file" or die "Couldn't read $file: $!"; - my @r = (); - my $pm = -1; - while() { - next if /^#/; - next if /^\s*$/; - chomp; - if (/^(.+)\s+(.+)$/) { - my $m = $1; - my $w = $2; - die "Weights out of order: $m <= $pm" unless $m > $pm; - push @r, $w; - } else { - warn "Unexpected feature name in weight file: $_"; - } - } - close F; - return join ' ', @r; -} - -sub enseg { - my $src = shift; - my $newsrc = shift; - open(SRC, $src); - open(NEWSRC, ">$newsrc"); - my $i=0; - while (my $line=){ - chomp $line; - if ($line =~ /^\s* tags, you must include a zero-based id attribute"; - } - } else { - print NEWSRC "$line\n"; - } - $i++; - } - close SRC; - close NEWSRC; - die "Empty dev set!" if ($i == 0); -} - -sub print_help { - - my $executable = basename($0); chomp $executable; - print << "Help"; - -Usage: $executable [options] - - $executable [options] - Runs a complete PRO optimization using the ini file specified. - -Required: - - --config - Decoder configuration file. - - --devset - Dev set source and reference data. - - --weights - Initial weights file (use empty file to start from 0) - -General options: - - --help - Print this message and exit. - - --max-iterations - Maximum number of iterations to run. If not specified, defaults - to $default_max_iter. - - --metric - Metric to optimize. - Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi - - --pass-suffix - If the decoder is doing multi-pass decoding, the pass suffix "2", - "3", etc., is used to control what iteration of weights is set. - - --workdir - Directory for intermediate and output files. If not specified, the - name is derived from the ini filename. Assuming that the ini - filename begins with the decoder name and ends with ini, the default - name of the working directory is inferred from the middle part of - the filename. E.g. an ini file named decoder.foo.ini would have - a default working directory name foo. - -Regularization options: - - --reg - l2 regularization strength [default=500]. The greater this value, - the closer to zero the weights will be. - - --reg-previous - l2 penalty for moving away from the weights from the previous - iteration. [default=5000]. The greater this value, the closer - to the previous iteration's weights the next iteration's weights - will be. - -Job control options: - - --jobs - Number of decoder processes to run in parallel. [default=$default_jobs] - - --qsub - Use qsub to run jobs in parallel (qsub must be configured in - environment/LocalEnvironment.pm) - - --pmem - Amount of physical memory requested for parallel decoding jobs - (used with qsub requests only) - -Deprecated options: - - --interpolate-with-weights - [deprecated] At each iteration the resulting weights are - interpolated with the weights from the previous iteration, with - this factor. [default=1.0, i.e., no effect] - -Help -} - -sub convert { - my ($str) = @_; - my @ps = split /;/, $str; - my %dict = (); - for my $p (@ps) { - my ($k, $v) = split /=/, $p; - $dict{$k} = $v; - } - return %dict; -} - - -sub cmdline { - return join ' ',($0,@ORIG_ARGV); -} - -#buggy: last arg gets quoted sometimes? -my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; -my $shell_escape_in_quote=qr{[\\"\$`!]}; - -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} - -sub escaped_shell_args { - return map {local $_=$_;chomp;escape_shell($_)} @_; -} - -sub escaped_shell_args_str { - return join ' ',&escaped_shell_args(@_); -} - -sub escaped_cmdline { - return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); -} - -sub split_devset { - my ($infile, $outsrc, $outref) = @_; - open F, "<$infile" or die "Can't read $infile: $!"; - open S, ">$outsrc" or die "Can't write $outsrc: $!"; - open R, ">$outref" or die "Can't write $outref: $!"; - while() { - chomp; - my ($src, @refs) = split /\s*\|\|\|\s*/; - die "Malformed devset line: $_\n" unless scalar @refs > 0; - print S "$src\n"; - print R join(' ||| ', @refs) . "\n"; - } - close R; - close S; - close F; -} - diff --git a/rampion/Makefile.am b/rampion/Makefile.am deleted file mode 100644 index f4dbb7cc..00000000 --- a/rampion/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = rampion_cccp - -rampion_cccp_SOURCES = rampion_cccp.cc -rampion_cccp_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training diff --git a/rampion/rampion.pl b/rampion/rampion.pl deleted file mode 100755 index 55f7b3f1..00000000 --- a/rampion/rampion.pl +++ /dev/null @@ -1,540 +0,0 @@ -#!/usr/bin/env perl -use strict; -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } - -# Skip local config (used for distributing jobs) if we're running in local-only mode -use LocalConfig; -use Getopt::Long; -use IPC::Open2; -use POSIX ":sys_wait_h"; -my $QSUB_CMD = qsub_args(mert_memory()); -my $default_jobs = env_default_jobs(); - -my $VEST_DIR="$SCRIPT_DIR/../dpmert"; -require "$VEST_DIR/libcall.pl"; - -# Default settings -my $srcFile; -my $refFiles; -my $bin_dir = $SCRIPT_DIR; -die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; -my $FAST_SCORE="$bin_dir/../mteval/fast_score"; -die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; -my $MAPINPUT = "$bin_dir/rampion_generate_input.pl"; -my $MAPPER = "$bin_dir/rampion_cccp"; -my $parallelize = "$VEST_DIR/parallelize.pl"; -my $libcall = "$VEST_DIR/libcall.pl"; -my $sentserver = "$VEST_DIR/sentserver"; -my $sentclient = "$VEST_DIR/sentclient"; -my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; - -my $SCORER = $FAST_SCORE; -die "Can't find $MAPPER" unless -x $MAPPER; -my $cdec = "$bin_dir/../decoder/cdec"; -die "Can't find decoder in $cdec" unless -x $cdec; -die "Can't find $parallelize" unless -x $parallelize; -die "Can't find $libcall" unless -e $libcall; -my $decoder = $cdec; -my $lines_per_mapper = 30; -my $iteration = 1; -my $best_weights; -my $psi = 1; -my $default_max_iter = 30; -my $max_iterations = $default_max_iter; -my $jobs = $default_jobs; # number of decode nodes -my $pmem = "4g"; -my $disable_clean = 0; -my %seen_weights; -my $help = 0; -my $epsilon = 0.0001; -my $dryrun = 0; -my $last_score = -10000000; -my $metric = "ibm_bleu"; -my $dir; -my $iniFile; -my $weights; -my $use_make = 1; # use make to parallelize -my $useqsub = 0; -my $initial_weights; -my $pass_suffix = ''; -my $cpbin=1; - -# regularization strength -my $tune_regularizer = 0; -my $reg = 500; -my $reg_previous = 5000; -my $dont_accum = 0; - -# Process command-line options -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "jobs=i" => \$jobs, - "dont-clean" => \$disable_clean, - "dont-accumulate" => \$dont_accum, - "pass-suffix=s" => \$pass_suffix, - "qsub" => \$useqsub, - "dry-run" => \$dryrun, - "epsilon=s" => \$epsilon, - "help" => \$help, - "weights=s" => \$initial_weights, - "reg=f" => \$reg, - "use-make=i" => \$use_make, - "max-iterations=i" => \$max_iterations, - "pmem=s" => \$pmem, - "cpbin!" => \$cpbin, - "ref-files=s" => \$refFiles, - "metric=s" => \$metric, - "source-file=s" => \$srcFile, - "workdir=s" => \$dir, -) == 0 || @ARGV!=1 || $help) { - print_help(); - exit; -} - -die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; - -if ($useqsub) { - $use_make = 0; - die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); -} - -my @missing_args = (); -if (!defined $srcFile) { push @missing_args, "--source-file"; } -if (!defined $refFiles) { push @missing_args, "--ref-files"; } -if (!defined $initial_weights) { push @missing_args, "--weights"; } -die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); - -if ($metric =~ /^(combi|ter)$/i) { - $lines_per_mapper = 5; -} - -($iniFile) = @ARGV; - - -sub write_config; -sub enseg; -sub print_help; - -my $nodelist; -my $host =check_output("hostname"); chomp $host; -my $bleu; -my $interval_count = 0; -my $logfile; -my $projected_score; - -# used in sorting scores -my $DIR_FLAG = '-r'; -if ($metric =~ /^ter$|^aer$/i) { - $DIR_FLAG = ''; -} - -my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); - -unless ($dir){ - $dir = "rampion"; -} -unless ($dir =~ /^\//){ # convert relative path to absolute path - my $basedir = check_output("pwd"); - chomp $basedir; - $dir = "$basedir/$dir"; -} - - -# Initializations and helper functions -srand; - -my @childpids = (); -my @cleanupcmds = (); - -sub cleanup { - print STDERR "Cleanup...\n"; - for my $pid (@childpids){ unchecked_call("kill $pid"); } - for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } - exit 1; -}; -# Always call cleanup, no matter how we exit -*CORE::GLOBAL::exit = - sub{ cleanup(); }; -$SIG{INT} = "cleanup"; -$SIG{TERM} = "cleanup"; -$SIG{HUP} = "cleanup"; - -my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; -my $newIniFile = "$dir/$decoderBase.ini"; -my $inputFileName = "$dir/input"; -my $user = $ENV{"USER"}; -# process ini file --e $iniFile || die "Error: could not open $iniFile for reading\n"; -open(INI, $iniFile); - -use File::Basename qw(basename); -#pass bindir, refs to vars holding bin -sub modbin { - local $_; - my $bindir=shift; - check_call("mkdir -p $bindir"); - -d $bindir || die "couldn't make bindir $bindir"; - for (@_) { - my $src=$$_; - $$_="$bindir/".basename($src); - check_call("cp -p $src $$_"); - } -} -sub dirsize { - opendir ISEMPTY,$_[0]; - return scalar(readdir(ISEMPTY))-1; -} -my @allweights; -if ($dryrun){ - write_config(*STDERR); - exit 0; -} else { - if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs - die "ERROR: working dir $dir already exists\n\n"; - } else { - -e $dir || mkdir $dir; - mkdir "$dir/hgs"; - modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; - mkdir "$dir/scripts"; - my $cmdfile="$dir/rerun-pro.sh"; - open CMD,'>',$cmdfile; - print CMD "cd ",&getcwd,"\n"; -# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted. - my $cline=&cmdline."\n"; - print CMD $cline; - close CMD; - print STDERR $cline; - chmod(0755,$cmdfile); - check_call("cp $initial_weights $dir/weights.0"); - die "Can't find weights.0" unless (-e "$dir/weights.0"); - } - write_config(*STDERR); -} - - -# Generate initial files and values -check_call("cp $iniFile $newIniFile"); -$iniFile = $newIniFile; - -my $newsrc = "$dir/dev.input"; -enseg($srcFile, $newsrc); -$srcFile = $newsrc; -my $devSize = 0; -open F, "<$srcFile" or die "Can't read $srcFile: $!"; -while() { $devSize++; } -close F; - -unless($best_weights){ $best_weights = $weights; } -unless($projected_score){ $projected_score = 0.0; } -$seen_weights{$weights} = 1; -my $kbest = "$dir/kbest"; -if ($dont_accum) { - $kbest = ''; -} else { - check_call("mkdir -p $kbest"); - $kbest = "--kbest_repository $kbest"; -} - -my $random_seed = int(time / 1000); -my $lastWeightsFile; -my $lastPScore = 0; -# main optimization loop -while (1){ - print STDERR "\n\nITERATION $iteration\n==========\n"; - - if ($iteration > $max_iterations){ - print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; - last; - } - # iteration-specific files - my $runFile="$dir/run.raw.$iteration"; - my $onebestFile="$dir/1best.$iteration"; - my $logdir="$dir/logs.$iteration"; - my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; - my $scorerLog="$logdir/scorer.log.$iteration"; - check_call("mkdir -p $logdir"); - - - #decode - print STDERR "RUNNING DECODER AT "; - print STDERR unchecked_output("date"); - my $im1 = $iteration - 1; - my $weightsFile="$dir/weights.$im1"; - push @allweights, "-w $dir/weights.$im1"; - `rm -f $dir/hgs/*.gz`; - my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; - my $pcmd; - if ($use_make) { - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; - } else { - $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; - } - my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; - print STDERR "COMMAND:\n$cmd\n"; - check_bash_call($cmd); - my $num_hgs; - my $num_topbest; - my $retries = 0; - while($retries < 5) { - $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); - $num_topbest = check_output("wc -l < $runFile"); - print STDERR "NUMBER OF HGs: $num_hgs\n"; - print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; - if($devSize == $num_hgs && $devSize == $num_topbest) { - last; - } else { - print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; - sleep(3); - } - $retries++; - } - die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); - my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); - chomp $dec_score; - print STDERR "DECODER SCORE: $dec_score\n"; - - # save space - check_call("gzip -f $runFile"); - check_call("gzip -f $decoderLog"); - - # run optimizer - print STDERR "RUNNING OPTIMIZER AT "; - print STDERR unchecked_output("date"); - print STDERR " - GENERATE TRAINING EXEMPLARS\n"; - my $mergeLog="$logdir/prune-merge.log.$iteration"; - - my $score = 0; - my $icc = 0; - my $inweights="$dir/weights.$im1"; - my $outweights="$dir/weights.$iteration"; - $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; - print STDERR "COMMAND:\n$cmd\n"; - check_call($cmd); - $cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights"; - check_call($cmd); - $lastWeightsFile = $outweights; - $iteration++; - `rm hgs/*.gz`; - print STDERR "\n==========\n"; -} - -print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; - -print STDOUT "$lastWeightsFile\n"; - -exit 0; - -sub get_lines { - my $fn = shift @_; - open FL, "<$fn" or die "Couldn't read $fn: $!"; - my $lc = 0; - while() { $lc++; } - return $lc; -} - -sub get_comma_sep_refs { - my ($r,$p) = @_; - my $o = check_output("echo $p"); - chomp $o; - my @files = split /\s+/, $o; - return "-$r " . join(" -$r ", @files); -} - -sub read_weights_file { - my ($file) = @_; - open F, "<$file" or die "Couldn't read $file: $!"; - my @r = (); - my $pm = -1; - while() { - next if /^#/; - next if /^\s*$/; - chomp; - if (/^(.+)\s+(.+)$/) { - my $m = $1; - my $w = $2; - die "Weights out of order: $m <= $pm" unless $m > $pm; - push @r, $w; - } else { - warn "Unexpected feature name in weight file: $_"; - } - } - close F; - return join ' ', @r; -} - -# subs -sub write_config { - my $fh = shift; - my $cleanup = "yes"; - if ($disable_clean) {$cleanup = "no";} - - print $fh "\n"; - print $fh "DECODER: $decoder\n"; - print $fh "INI FILE: $iniFile\n"; - print $fh "WORKING DIR: $dir\n"; - print $fh "SOURCE (DEV): $srcFile\n"; - print $fh "REFS (DEV): $refFiles\n"; - print $fh "EVAL METRIC: $metric\n"; - print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "JOBS: $jobs\n"; - print $fh "HEAD NODE: $host\n"; - print $fh "PMEM (DECODING): $pmem\n"; - print $fh "CLEANUP: $cleanup\n"; -} - -sub update_weights_file { - my ($neww, $rfn, $rpts) = @_; - my @feats = @$rfn; - my @pts = @$rpts; - my $num_feats = scalar @feats; - my $num_pts = scalar @pts; - die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; - open G, ">$neww" or die; - for (my $i = 0; $i < $num_feats; $i++) { - my $f = $feats[$i]; - my $lambda = $pts[$i]; - print G "$f $lambda\n"; - } - close G; -} - -sub enseg { - my $src = shift; - my $newsrc = shift; - open(SRC, $src); - open(NEWSRC, ">$newsrc"); - my $i=0; - while (my $line=){ - chomp $line; - if ($line =~ /^\s* tags, you must include a zero-based id attribute"; - } - } else { - print NEWSRC "$line\n"; - } - $i++; - } - close SRC; - close NEWSRC; - die "Empty dev set!" if ($i == 0); -} - -sub print_help { - - my $executable = check_output("basename $0"); chomp $executable; - print << "Help"; - -Usage: $executable [options] - - $executable [options] - Runs a complete PRO optimization using the ini file specified. - -Required: - - --ref-files - Dev set ref files. This option takes only a single string argument. - To use multiple files (including file globbing), this argument should - be quoted. - - --source-file - Dev set source file. - - --weights - Initial weights file (use empty file to start from 0) - -General options: - - --help - Print this message and exit. - - --dont-accumulate - Don't accumulate k-best lists from multiple iterations. - - --max-iterations - Maximum number of iterations to run. If not specified, defaults - to $default_max_iter. - - --metric - Metric to optimize. - Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi - - --pass-suffix - If the decoder is doing multi-pass decoding, the pass suffix "2", - "3", etc., is used to control what iteration of weights is set. - - --workdir - Directory for intermediate and output files. If not specified, the - name is derived from the ini filename. Assuming that the ini - filename begins with the decoder name and ends with ini, the default - name of the working directory is inferred from the middle part of - the filename. E.g. an ini file named decoder.foo.ini would have - a default working directory name foo. - -Regularization options: - - --reg - l2 regularization strength [default=500]. The greater this value, - the closer to zero the weights will be. - -Job control options: - - --jobs - Number of decoder processes to run in parallel. [default=$default_jobs] - - --qsub - Use qsub to run jobs in parallel (qsub must be configured in - environment/LocalEnvironment.pm) - - --pmem - Amount of physical memory requested for parallel decoding jobs - (used with qsub requests only) - -Help -} - -sub convert { - my ($str) = @_; - my @ps = split /;/, $str; - my %dict = (); - for my $p (@ps) { - my ($k, $v) = split /=/, $p; - $dict{$k} = $v; - } - return %dict; -} - - -sub cmdline { - return join ' ',($0,@ORIG_ARGV); -} - -#buggy: last arg gets quoted sometimes? -my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; -my $shell_escape_in_quote=qr{[\\"\$`!]}; - -sub escape_shell { - my ($arg)=@_; - return undef unless defined $arg; - if ($arg =~ /$is_shell_special/) { - $arg =~ s/($shell_escape_in_quote)/\\$1/g; - return "\"$arg\""; - } - return $arg; -} - -sub escaped_shell_args { - return map {local $_=$_;chomp;escape_shell($_)} @_; -} - -sub escaped_shell_args_str { - return join ' ',&escaped_shell_args(@_); -} - -sub escaped_cmdline { - return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); -} diff --git a/rampion/rampion_cccp.cc b/rampion/rampion_cccp.cc deleted file mode 100644 index 1e36dc51..00000000 --- a/rampion/rampion_cccp.cc +++ /dev/null @@ -1,168 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "filelib.h" -#include "stringlib.h" -#include "weights.h" -#include "hg_io.h" -#include "kbest.h" -#include "viterbi.h" -#include "ns.h" -#include "ns_docscorer.h" -#include "candidate_set.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") - ("weights,w",po::value(), "[REQD] Weights files from current iterations") - ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") - ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") - ("kbest_repository,R",po::value(), "Accumulate k-best lists from previous iterations (parameter is path to repository)") - ("kbest_size,k",po::value()->default_value(500u), "Top k-hypotheses to extract") - ("cccp_iterations,I", po::value()->default_value(10u), "CCCP iterations (T')") - ("ssd_iterations,J", po::value()->default_value(5u), "Stochastic subgradient iterations (T'')") - ("eta", po::value()->default_value(1e-4), "Step size") - ("regularization_strength,C", po::value()->default_value(1.0), "L2 regularization strength") - ("alpha,a", po::value()->default_value(10.0), "Cost scale (alpha); alpha * [1-metric(y,y')]") - ("help,h", "Help"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - bool flag = false; - if (!conf->count("reference")) { - cerr << "Please specify one or more references using -r \n"; - flag = true; - } - if (!conf->count("weights")) { - cerr << "Please specify weights using -w \n"; - flag = true; - } - if (flag || conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct GainFunction { - explicit GainFunction(const EvaluationMetric* m) : metric(m) {} - float operator()(const SufficientStats& eval_feats) const { - float g = metric->ComputeScore(eval_feats); - if (!metric->IsErrorMetric()) g = 1 - g; - return g; - } - const EvaluationMetric* metric; -}; - -template -void CostAugmentedSearch(const GainFunc& gain, - const training::CandidateSet& cs, - const SparseVector& w, - double alpha, - SparseVector* fmap) { - unsigned best_i = 0; - double best = -numeric_limits::infinity(); - for (unsigned i = 0; i < cs.size(); ++i) { - double s = cs[i].fmap.dot(w) + alpha * gain(cs[i].eval_feats); - if (s > best) { - best = s; - best_i = i; - } - } - *fmap = cs[best_i].fmap; -} - - - -// runs lines 4--15 of rampion algorithm -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const string evaluation_metric = conf["evaluation_metric"].as(); - - EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); - DocumentScorer ds(metric, conf["reference"].as >()); - cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; - double goodsign = -1; - double badsign = -goodsign; - - Hypergraph hg; - string last_file; - ReadFile in_read(conf["input"].as()); - string kbest_repo; - if (conf.count("kbest_repository")) { - kbest_repo = conf["kbest_repository"].as(); - MkDirP(kbest_repo); - } - istream &in=*in_read.stream(); - const unsigned kbest_size = conf["kbest_size"].as(); - const unsigned tp = conf["cccp_iterations"].as(); - const unsigned tpp = conf["ssd_iterations"].as(); - const double eta = conf["eta"].as(); - const double reg = conf["regularization_strength"].as(); - const double alpha = conf["alpha"].as(); - SparseVector weights; - { - vector vweights; - const string weightsf = conf["weights"].as(); - Weights::InitFromFile(weightsf, &vweights); - Weights::InitSparseVector(vweights, &weights); - } - string line, file; - vector kis; - cerr << "Loading hypergraphs...\n"; - while(getline(in, line)) { - istringstream is(line); - int sent_id; - kis.resize(kis.size() + 1); - training::CandidateSet& curkbest = kis.back(); - string kbest_file; - if (kbest_repo.size()) { - ostringstream os; - os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; - kbest_file = os.str(); - if (FileExists(kbest_file)) - curkbest.ReadFromFile(kbest_file); - } - is >> file >> sent_id; - ReadFile rf(file); - if (kis.size() % 5 == 0) { cerr << '.'; } - if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; } - HypergraphIO::ReadFromJSON(rf.stream(), &hg); - hg.Reweight(weights); - curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]); - if (kbest_file.size()) - curkbest.WriteToFile(kbest_file); - } - cerr << "\nHypergraphs loaded.\n"; - - vector > goals(kis.size()); // f(x_i,y+,h+) - SparseVector fear; // f(x,y-,h-) - const GainFunction gain(metric); - for (unsigned iterp = 1; iterp <= tp; ++iterp) { - cerr << "CCCP Iteration " << iterp << endl; - for (unsigned i = 0; i < goals.size(); ++i) - CostAugmentedSearch(gain, kis[i], weights, goodsign * alpha, &goals[i]); - for (unsigned iterpp = 1; iterpp <= tpp; ++iterpp) { - cerr << " SSD Iteration " << iterpp << endl; - for (unsigned i = 0; i < goals.size(); ++i) { - CostAugmentedSearch(gain, kis[i], weights, badsign * alpha, &fear); - weights -= weights * (eta * reg / goals.size()); - weights += (goals[i] - fear) * eta; - } - } - } - vector w; - weights.init_vector(&w); - Weights::WriteToFile("-", w); - return 0; -} - diff --git a/rampion/rampion_generate_input.pl b/rampion/rampion_generate_input.pl deleted file mode 100755 index b30fc4fd..00000000 --- a/rampion/rampion_generate_input.pl +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; -my $d = shift @ARGV; -die "Can't find directory $d" unless -d $d; - -opendir(DIR, $d) or die "Can't read $d: $!"; -my @hgs = grep { /\.gz$/ } readdir(DIR); -closedir DIR; - -for my $hg (@hgs) { - my $file = $hg; - my $id = $hg; - $id =~ s/(\.json)?\.gz//; - print "$d/$file $id\n"; -} - diff --git a/training/Makefile.am b/training/Makefile.am index f9c25391..e95e045f 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -1,91 +1,11 @@ -bin_PROGRAMS = \ - fast_align \ - lbl_model \ - test_ngram \ - mr_em_map_adapter \ - mr_em_adapted_reduce \ - mr_reduce_to_weights \ - mr_optimize_reduce \ - grammar_convert \ - plftools \ - collapse_weights \ - mpi_extract_reachable \ - mpi_extract_features \ - mpi_online_optimize \ - mpi_flex_optimize \ - mpi_batch_optimize \ - mpi_compute_cllh \ - augment_grammar +SUBDIRS = \ + liblbfgs \ + utils \ + crf \ + minrisk \ + dpmert \ + pro \ + dtrain \ + mira \ + rampion -noinst_PROGRAMS = \ - lbfgs_test \ - optimize_test - -TESTS = lbfgs_test optimize_test - -noinst_LIBRARIES = libtraining.a -libtraining_a_SOURCES = \ - candidate_set.cc \ - entropy.cc \ - optimize.cc \ - online_optimizer.cc \ - risk.cc - -mpi_online_optimize_SOURCES = mpi_online_optimize.cc -mpi_online_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc -mpi_flex_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc -mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -mpi_extract_features_SOURCES = mpi_extract_features.cc -mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc -mpi_batch_optimize_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc -mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -augment_grammar_SOURCES = augment_grammar.cc -augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -test_ngram_SOURCES = test_ngram.cc -test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -fast_align_SOURCES = fast_align.cc ttables.cc -fast_align_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -lbl_model_SOURCES = lbl_model.cc -lbl_model_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -grammar_convert_SOURCES = grammar_convert.cc -grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -optimize_test_SOURCES = optimize_test.cc -optimize_test_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -collapse_weights_SOURCES = collapse_weights.cc -collapse_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -lbfgs_test_SOURCES = lbfgs_test.cc -lbfgs_test_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -mr_optimize_reduce_SOURCES = mr_optimize_reduce.cc -mr_optimize_reduce_LDADD = libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -mr_em_map_adapter_SOURCES = mr_em_map_adapter.cc -mr_em_map_adapter_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -mr_reduce_to_weights_SOURCES = mr_reduce_to_weights.cc -mr_reduce_to_weights_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -mr_em_adapted_reduce_SOURCES = mr_em_adapted_reduce.cc -mr_em_adapted_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -plftools_SOURCES = plftools.cc -plftools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/utils/libutils.a -lz - -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I../klm diff --git a/training/add-model1-features-to-scfg.pl b/training/add-model1-features-to-scfg.pl deleted file mode 100755 index a0074317..00000000 --- a/training/add-model1-features-to-scfg.pl +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/perl -w - -# [X] ||| so [X,1] die [X,2] der ||| as [X,1] existing [X,2] the ||| 2.47712135315 2.53182387352 5.07100057602 ||| 0-0 2-2 4-4 -# [X] ||| so [X,1] die [X,2] der ||| this [X,1] the [X,2] of ||| 2.47712135315 3.19828724861 2.38270020485 ||| 0-0 2-2 4-4 -# [X] ||| so [X,1] die [X,2] der ||| as [X,1] the [X,2] the ||| 2.47712135315 2.53182387352 1.48463630676 ||| 0-0 2-2 4-4 -# [X] ||| so [X,1] die [X,2] der ||| is [X,1] the [X,2] of the ||| 2.47712135315 3.45197868347 2.64251494408 ||| 0-0 2-2 4-4 4-5 - -die "Usage: $0 model1.f-e model1.e-f < grammar.scfg\n (use trianing/model1 to extract the model files)\n" unless scalar @ARGV == 2; - -my $fm1 = shift @ARGV; -die unless $fm1; -my $frm1 = shift @ARGV; -die unless $frm1; -open M1,"<$fm1" or die; -open RM1,"<$frm1" or die; -print STDERR "Loading Model 1 probs from $fm1...\n"; -my %m1; -while() { - chomp; - my ($f, $e, $lp) = split /\s+/; - $m1{$e}->{$f} = exp($lp); -} -close M1; - -print STDERR "Loading Inverse Model 1 probs from $frm1...\n"; -my %rm1; -while() { - chomp; - my ($e, $f, $lp) = split /\s+/; - $rm1{$f}->{$e} = exp($lp); -} -close RM1; - -my @label = qw( EGivenF LexFGivenE LexEGivenF ); -while(<>) { - chomp; - my ($l, $f, $e, $sscores, $al) = split / \|\|\| /; - my @scores = split /\s+/, $sscores; - unless ($sscores =~ /=/) { - for (my $i=0; $i<3; $i++) { $scores[$i] = "$label[$i]=$scores[$i]"; } - } - push @scores, "RuleCount=1"; - my @fs = split /\s+/, $f; - my @es = split /\s+/, $e; - my $flen = scalar @fs; - my $elen = scalar @es; - my $pgen = 0; - my $nongen = 0; - for (my $i =0; $i < $flen; $i++) { - my $ftot = 0; - next if ($fs[$i] =~ /\[X/); - my $cr = $rm1{$fs[$i]}; - for (my $j=0; $j <= $elen; $j++) { - my $ej = ''; - if ($j < $elen) { $ej = $es[$j]; } - my $p = $cr->{$ej}; - if (defined $p) { $ftot += $p; } - } - if ($ftot == 0) { $nongen = 1; last; } - $pgen += log($ftot) - log($elen); - } - my $bad = 0; - my $good = 0; - unless ($nongen) { push @scores, "RGood=1"; $good++; } else { push @scores, "RBad=1"; $bad++; } - - $nongen = 0; - $pgen = 0; - for (my $i =0; $i < $elen; $i++) { - my $etot = 0; - next if ($es[$i] =~ /\[X/); - my $cr = $m1{$es[$i]}; -# print STDERR "$es[$i]\n"; - for (my $j=0; $j <= $flen; $j++) { - my $fj = ''; - if ($j < $flen) { $fj = $fs[$j]; } - my $p = $cr->{$fj}; -# print STDERR " $fs[$j] : $p\n"; - if (defined $p) { $etot += $p; } - } - if ($etot == 0) { $nongen = 1; last; } - $pgen += log($etot) - log($flen); - } - unless ($nongen) { - push @scores, "FGood=1"; - if ($good) { push @scores, "BothGood=1"; } else { push @scores, "SusDel=1"; } - } else { - push @scores, "FBad=1"; - if ($bad) { push @scores, "BothBad=1"; } else { push @scores, "SusHall=1"; } - } - print "$l ||| $f ||| $e ||| @scores"; - if (defined $al) { print " ||| $al\n"; } else { print "\n"; } -} - diff --git a/training/candidate_set.cc b/training/candidate_set.cc deleted file mode 100644 index 087efec3..00000000 --- a/training/candidate_set.cc +++ /dev/null @@ -1,169 +0,0 @@ -#include "candidate_set.h" - -#include - -#include - -#include "verbose.h" -#include "ns.h" -#include "filelib.h" -#include "wordid.h" -#include "tdict.h" -#include "hg.h" -#include "kbest.h" -#include "viterbi.h" - -using namespace std; - -namespace training { - -struct ApproxVectorHasher { - static const size_t MASK = 0xFFFFFFFFull; - union UType { - double f; // leave as double - size_t i; - }; - static inline double round(const double x) { - UType t; - t.f = x; - size_t r = t.i & MASK; - if ((r << 1) > MASK) - t.i += MASK - r + 1; - else - t.i &= (1ull - MASK); - return t.f; - } - size_t operator()(const SparseVector& x) const { - size_t h = 0x573915839; - for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) { - UType t; - t.f = it->second; - if (t.f) { - size_t z = (t.i >> 32); - boost::hash_combine(h, it->first); - boost::hash_combine(h, z); - } - } - return h; - } -}; - -struct ApproxVectorEquals { - bool operator()(const SparseVector& a, const SparseVector& b) const { - SparseVector::const_iterator bit = b.begin(); - for (SparseVector::const_iterator ait = a.begin(); ait != a.end(); ++ait) { - if (bit == b.end() || - ait->first != bit->first || - ApproxVectorHasher::round(ait->second) != ApproxVectorHasher::round(bit->second)) - return false; - ++bit; - } - if (bit != b.end()) return false; - return true; - } -}; - -struct CandidateCompare { - bool operator()(const Candidate& a, const Candidate& b) const { - ApproxVectorEquals eq; - return (a.ewords == b.ewords && eq(a.fmap,b.fmap)); - } -}; - -struct CandidateHasher { - size_t operator()(const Candidate& x) const { - boost::hash > hhasher; - ApproxVectorHasher vhasher; - size_t ha = hhasher(x.ewords); - boost::hash_combine(ha, vhasher(x.fmap)); - return ha; - } -}; - -static void ParseSparseVector(string& line, size_t cur, SparseVector* out) { - SparseVector& x = *out; - size_t last_start = cur; - size_t last_comma = string::npos; - while(cur <= line.size()) { - if (line[cur] == ' ' || cur == line.size()) { - if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { - cerr << "[ERROR] " << line << endl << " position = " << cur << endl; - exit(1); - } - const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); - if (cur < line.size()) line[cur] = 0; - const double val = strtod(&line[last_comma + 1], NULL); - x.set_value(fid, val); - - last_comma = string::npos; - last_start = cur+1; - } else { - if (line[cur] == '=') - last_comma = cur; - } - ++cur; - } -} - -void CandidateSet::WriteToFile(const string& file) const { - WriteFile wf(file); - ostream& out = *wf.stream(); - out.precision(10); - string ss; - for (unsigned i = 0; i < cs.size(); ++i) { - out << TD::GetString(cs[i].ewords) << endl; - out << cs[i].fmap << endl; - cs[i].eval_feats.Encode(&ss); - out << ss << endl; - } -} - -void CandidateSet::ReadFromFile(const string& file) { - if(!SILENT) cerr << "Reading candidates from " << file << endl; - ReadFile rf(file); - istream& in = *rf.stream(); - string cand; - string feats; - string ss; - while(getline(in, cand)) { - getline(in, feats); - getline(in, ss); - assert(in); - cs.push_back(Candidate()); - TD::ConvertSentence(cand, &cs.back().ewords); - ParseSparseVector(feats, 0, &cs.back().fmap); - cs.back().eval_feats = SufficientStats(ss); - } - if(!SILENT) cerr << " read " << cs.size() << " candidates\n"; -} - -void CandidateSet::Dedup() { - if(!SILENT) cerr << "Dedup in=" << cs.size(); - tr1::unordered_set u; - while(cs.size() > 0) { - u.insert(cs.back()); - cs.pop_back(); - } - tr1::unordered_set::iterator it = u.begin(); - while (it != u.end()) { - cs.push_back(*it); - it = u.erase(it); - } - if(!SILENT) cerr << " out=" << cs.size() << endl; -} - -void CandidateSet::AddKBestCandidates(const Hypergraph& hg, size_t kbest_size, const SegmentEvaluator* scorer) { - KBest::KBestDerivations, ESentenceTraversal> kbest(hg, kbest_size); - - for (unsigned i = 0; i < kbest_size; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg.nodes_.size() - 1, i); - if (!d) break; - cs.push_back(Candidate(d->yield, d->feature_values)); - if (scorer) - scorer->Evaluate(d->yield, &cs.back().eval_feats); - } - Dedup(); -} - -} diff --git a/training/candidate_set.h b/training/candidate_set.h deleted file mode 100644 index 9d326ed0..00000000 --- a/training/candidate_set.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _CANDIDATE_SET_H_ -#define _CANDIDATE_SET_H_ - -#include -#include - -#include "ns.h" -#include "wordid.h" -#include "sparse_vector.h" - -class Hypergraph; - -namespace training { - -struct Candidate { - Candidate() {} - Candidate(const std::vector& e, const SparseVector& fm) : - ewords(e), - fmap(fm) {} - Candidate(const std::vector& e, - const SparseVector& fm, - const SegmentEvaluator& se) : - ewords(e), - fmap(fm) { - se.Evaluate(ewords, &eval_feats); - } - - void swap(Candidate& other) { - eval_feats.swap(other.eval_feats); - ewords.swap(other.ewords); - fmap.swap(other.fmap); - } - - std::vector ewords; - SparseVector fmap; - SufficientStats eval_feats; -}; - -// represents some kind of collection of translation candidates, e.g. -// aggregated k-best lists, sample lists, etc. -class CandidateSet { - public: - CandidateSet() {} - inline size_t size() const { return cs.size(); } - const Candidate& operator[](size_t i) const { return cs[i]; } - - void ReadFromFile(const std::string& file); - void WriteToFile(const std::string& file) const; - void AddKBestCandidates(const Hypergraph& hg, size_t kbest_size, const SegmentEvaluator* scorer = NULL); - // TODO add code to do unique k-best - // TODO add code to draw k samples - - private: - void Dedup(); - std::vector cs; -}; - -} - -#endif diff --git a/training/cllh_observer.cc b/training/cllh_observer.cc deleted file mode 100644 index 4ec2fa65..00000000 --- a/training/cllh_observer.cc +++ /dev/null @@ -1,52 +0,0 @@ -#include "cllh_observer.h" - -#include -#include - -#include "inside_outside.h" -#include "hg.h" -#include "sentence_metadata.h" - -using namespace std; - -static const double kMINUS_EPSILON = -1e-6; - -ConditionalLikelihoodObserver::~ConditionalLikelihoodObserver() {} - -void ConditionalLikelihoodObserver::NotifyDecodingStart(const SentenceMetadata&) { - cur_obj = 0; - state = 1; -} - -void ConditionalLikelihoodObserver::NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { - assert(state == 1); - state = 2; - SparseVector cur_model_exp; - const prob_t z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); - cur_obj = log(z); -} - -void ConditionalLikelihoodObserver::NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 2); - state = 3; - SparseVector ref_exp; - const prob_t ref_z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp); - - double log_ref_z = log(ref_z); - - // rounding errors means that <0 is too strict - if ((cur_obj - log_ref_z) < kMINUS_EPSILON) { - cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl; - exit(1); - } - assert(!std::isnan(log_ref_z)); - acc_obj += (cur_obj - log_ref_z); - trg_words += smeta.GetReference().size(); -} - diff --git a/training/cllh_observer.h b/training/cllh_observer.h deleted file mode 100644 index 0de47331..00000000 --- a/training/cllh_observer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _CLLH_OBSERVER_H_ -#define _CLLH_OBSERVER_H_ - -#include "decoder.h" - -struct ConditionalLikelihoodObserver : public DecoderObserver { - - ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {} - ~ConditionalLikelihoodObserver(); - - void Reset() { - acc_obj = 0; - trg_words = 0; - } - - virtual void NotifyDecodingStart(const SentenceMetadata&); - virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg); - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg); - - unsigned trg_words; - double acc_obj; - double cur_obj; - int state; -}; - -#endif diff --git a/training/collapse_weights.cc b/training/collapse_weights.cc deleted file mode 100644 index c03eb031..00000000 --- a/training/collapse_weights.cc +++ /dev/null @@ -1,110 +0,0 @@ -char const* NOTES = - "ZF_and_E means unnormalized scaled features.\n" - "For grammars with one nonterminal: F_and_E is joint,\n" - "F_given_E and E_given_F are conditional.\n" - "TODO: group rules by root nonterminal and then normalize.\n"; - - -#include -#include -#include - -#include -#include -#include - -#include "prob.h" -#include "filelib.h" -#include "trule.h" -#include "weights.h" - -namespace po = boost::program_options; -using namespace std; - -typedef std::tr1::unordered_map, prob_t, boost::hash > > MarginalMap; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("grammar,g", po::value(), "Grammar file") - ("weights,w", po::value(), "Weights file") - ("unnormalized,u", "Always include ZF_and_E unnormalized score (default: only if sum was >1)") - ; - po::options_description clo("Command line options"); - clo.add_options() - ("config,c", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - const string cfg = (*conf)["config"].as(); - cerr << "Configuration file: " << cfg << endl; - ifstream config(cfg.c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("grammar") || !conf->count("weights")) { - cerr << dcmdline_options << endl; - cerr << NOTES << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - const string wfile = conf["weights"].as(); - const string gfile = conf["grammar"].as(); - vector w; - Weights::InitFromFile(wfile, &w); - MarginalMap e_tots; - MarginalMap f_tots; - prob_t tot; - { - ReadFile rf(gfile); - assert(*rf.stream()); - istream& in = *rf.stream(); - cerr << "Computing marginals...\n"; - int lc = 0; - while(in) { - string line; - getline(in, line); - ++lc; - if (line.empty()) continue; - TRule tr(line, true); - if (tr.GetFeatureValues().empty()) - cerr << "Line " << lc << ": empty features - may introduce bias\n"; - prob_t prob; - prob.logeq(tr.GetFeatureValues().dot(w)); - e_tots[tr.e_] += prob; - f_tots[tr.f_] += prob; - tot += prob; - } - } - bool normalized = (fabs(log(tot)) < 0.001); - cerr << "Total: " << tot << (normalized ? " [normalized]" : " [scaled]") << endl; - ReadFile rf(gfile); - istream&in = *rf.stream(); - while(in) { - string line; - getline(in, line); - if (line.empty()) continue; - TRule tr(line, true); - const double lp = tr.GetFeatureValues().dot(w); - if (std::isinf(lp)) { continue; } - tr.scores_.clear(); - - cout << tr.AsString() << " ||| F_and_E=" << lp - log(tot); - if (!normalized || conf.count("unnormalized")) { - cout << ";ZF_and_E=" << lp; - } - cout << ";F_given_E=" << lp - log(e_tots[tr.e_]) - << ";E_given_F=" << lp - log(f_tots[tr.f_]) << endl; - } - return 0; -} - diff --git a/training/crf/Makefile.am b/training/crf/Makefile.am new file mode 100644 index 00000000..d203df25 --- /dev/null +++ b/training/crf/Makefile.am @@ -0,0 +1,27 @@ +bin_PROGRAMS = \ + mpi_batch_optimize \ + mpi_compute_cllh \ + mpi_extract_features \ + mpi_extract_reachable \ + mpi_flex_optimize \ + mpi_online_optimize + +mpi_online_optimize_SOURCES = mpi_online_optimize.cc +mpi_online_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +mpi_flex_optimize_SOURCES = mpi_flex_optimize.cc +mpi_flex_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +mpi_extract_reachable_SOURCES = mpi_extract_reachable.cc +mpi_extract_reachable_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +mpi_extract_features_SOURCES = mpi_extract_features.cc +mpi_extract_features_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +mpi_batch_optimize_SOURCES = mpi_batch_optimize.cc cllh_observer.cc +mpi_batch_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +mpi_compute_cllh_SOURCES = mpi_compute_cllh.cc cllh_observer.cc +mpi_compute_cllh_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir)/training -I$(top_srcdir)/training/utils -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/crf/cllh_observer.cc b/training/crf/cllh_observer.cc new file mode 100644 index 00000000..4ec2fa65 --- /dev/null +++ b/training/crf/cllh_observer.cc @@ -0,0 +1,52 @@ +#include "cllh_observer.h" + +#include +#include + +#include "inside_outside.h" +#include "hg.h" +#include "sentence_metadata.h" + +using namespace std; + +static const double kMINUS_EPSILON = -1e-6; + +ConditionalLikelihoodObserver::~ConditionalLikelihoodObserver() {} + +void ConditionalLikelihoodObserver::NotifyDecodingStart(const SentenceMetadata&) { + cur_obj = 0; + state = 1; +} + +void ConditionalLikelihoodObserver::NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { + assert(state == 1); + state = 2; + SparseVector cur_model_exp; + const prob_t z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); + cur_obj = log(z); +} + +void ConditionalLikelihoodObserver::NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { + assert(state == 2); + state = 3; + SparseVector ref_exp; + const prob_t ref_z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp); + + double log_ref_z = log(ref_z); + + // rounding errors means that <0 is too strict + if ((cur_obj - log_ref_z) < kMINUS_EPSILON) { + cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl; + exit(1); + } + assert(!std::isnan(log_ref_z)); + acc_obj += (cur_obj - log_ref_z); + trg_words += smeta.GetReference().size(); +} + diff --git a/training/crf/cllh_observer.h b/training/crf/cllh_observer.h new file mode 100644 index 00000000..0de47331 --- /dev/null +++ b/training/crf/cllh_observer.h @@ -0,0 +1,26 @@ +#ifndef _CLLH_OBSERVER_H_ +#define _CLLH_OBSERVER_H_ + +#include "decoder.h" + +struct ConditionalLikelihoodObserver : public DecoderObserver { + + ConditionalLikelihoodObserver() : trg_words(), acc_obj(), cur_obj() {} + ~ConditionalLikelihoodObserver(); + + void Reset() { + acc_obj = 0; + trg_words = 0; + } + + virtual void NotifyDecodingStart(const SentenceMetadata&); + virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg); + virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg); + + unsigned trg_words; + double acc_obj; + double cur_obj; + int state; +}; + +#endif diff --git a/training/crf/mpi_batch_optimize.cc b/training/crf/mpi_batch_optimize.cc new file mode 100644 index 00000000..2eff07e4 --- /dev/null +++ b/training/crf/mpi_batch_optimize.cc @@ -0,0 +1,372 @@ +#include +#include +#include +#include +#include + +#include "config.h" +#ifdef HAVE_MPI +#include +#include +namespace mpi = boost::mpi; +#endif + +#include +#include +#include + +#include "sentence_metadata.h" +#include "cllh_observer.h" +#include "verbose.h" +#include "hg.h" +#include "prob.h" +#include "inside_outside.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "stringlib.h" +#include "optimize.h" +#include "fdict.h" +#include "weights.h" +#include "sparse_vector.h" + +using namespace std; +namespace po = boost::program_options; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input_weights,w",po::value(),"Input feature weights file") + ("training_data,t",po::value(),"Training data") + ("test_data,T",po::value(),"(optional) test data") + ("decoder_config,c",po::value(),"Decoder configuration file") + ("output_weights,o",po::value()->default_value("-"),"Output feature weights file") + ("optimization_method,m", po::value()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)") + ("correction_buffers,M", po::value()->default_value(10), "Number of gradients for LBFGS to maintain in memory") + ("gaussian_prior,p","Use a Gaussian prior on the weights") + ("sigma_squared", po::value()->default_value(1.0), "Sigma squared term for spherical Gaussian prior") + ("means,u", po::value(), "(optional) file containing the means for Gaussian prior"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + int lc = 0; + while(in) { + getline(in, line); + if (!in) break; + if (lc % size == rank) c->push_back(line); + ++lc; + } +} + +static const double kMINUS_EPSILON = -1e-6; + +struct TrainingObserver : public DecoderObserver { + void Reset() { + acc_grad.clear(); + acc_obj = 0; + total_complete = 0; + trg_words = 0; + } + + void SetLocalGradientAndObjective(vector* g, double* o) const { + *o = acc_obj; + for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) + (*g)[it->first] = it->second.as_float(); + } + + virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { + cur_model_exp.clear(); + cur_obj = 0; + state = 1; + } + + // compute model expectations, denominator of objective + virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + assert(state == 1); + state = 2; + const prob_t z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); + cur_obj = log(z); + cur_model_exp /= z; + } + + // compute "empirical" expectations, numerator of objective + virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { + assert(state == 2); + state = 3; + SparseVector ref_exp; + const prob_t ref_z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp); + ref_exp /= ref_z; + + double log_ref_z; +#if 0 + if (crf_uniform_empirical) { + log_ref_z = ref_exp.dot(feature_weights); + } else { + log_ref_z = log(ref_z); + } +#else + log_ref_z = log(ref_z); +#endif + + // rounding errors means that <0 is too strict + if ((cur_obj - log_ref_z) < kMINUS_EPSILON) { + cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl; + exit(1); + } + assert(!std::isnan(log_ref_z)); + ref_exp -= cur_model_exp; + acc_grad -= ref_exp; + acc_obj += (cur_obj - log_ref_z); + trg_words += smeta.GetReference().size(); + } + + virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { + if (state == 3) { + ++total_complete; + } else { + } + } + + int total_complete; + SparseVector cur_model_exp; + SparseVector acc_grad; + double acc_obj; + double cur_obj; + unsigned trg_words; + int state; +}; + +void ReadConfig(const string& ini, vector* out) { + ReadFile rf(ini); + istream& in = *rf.stream(); + while(in) { + string line; + getline(in, line); + if (!in) continue; + out->push_back(line); + } +} + +void StoreConfig(const vector& cfg, istringstream* o) { + ostringstream os; + for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; } + o->str(os.str()); +} + +template +struct VectorPlus : public binary_function, vector, vector > { + vector operator()(const vector& a, const vector& b) const { + assert(a.size() == b.size()); + vector v(a.size()); + transform(a.begin(), a.end(), b.begin(), v.begin(), plus()); + return v; + } +}; + +int main(int argc, char** argv) { +#ifdef HAVE_MPI + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + const int size = 1; + const int rank = 0; +#endif + SetSilent(true); // turn off verbose decoder output + register_feature_functions(); + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + + // load cdec.ini and set up decoder + vector cdec_ini; + ReadConfig(conf["decoder_config"].as(), &cdec_ini); + istringstream ini; + StoreConfig(cdec_ini, &ini); + if (rank == 0) cerr << "Loading grammar...\n"; + Decoder* decoder = new Decoder(&ini); + if (decoder->GetConf()["input"].as() != "-") { + cerr << "cdec.ini must not set an input file\n"; + return 1; + } + if (rank == 0) cerr << "Done loading grammar!\n"; + + // load initial weights + if (rank == 0) { cerr << "Loading weights...\n"; } + vector& lambdas = decoder->CurrentWeightVector(); + Weights::InitFromFile(conf["input_weights"].as(), &lambdas); + if (rank == 0) { cerr << "Done loading weights.\n"; } + + // freeze feature set (should be optional?) + const bool freeze_feature_set = true; + if (freeze_feature_set) FD::Freeze(); + + const int num_feats = FD::NumFeats(); + if (rank == 0) cerr << "Number of features: " << num_feats << endl; + lambdas.resize(num_feats); + + const bool gaussian_prior = conf.count("gaussian_prior"); + vector means(num_feats, 0); + if (conf.count("means")) { + if (!gaussian_prior) { + cerr << "Don't use --means without --gaussian_prior!\n"; + exit(1); + } + Weights::InitFromFile(conf["means"].as(), &means); + } + boost::shared_ptr o; + if (rank == 0) { + const string omethod = conf["optimization_method"].as(); + if (omethod == "rprop") + o.reset(new RPropOptimizer(num_feats)); // TODO add configuration + else + o.reset(new LBFGSOptimizer(num_feats, conf["correction_buffers"].as())); + cerr << "Optimizer: " << o->Name() << endl; + } + double objective = 0; + vector gradient(num_feats, 0.0); + vector rcv_grad; + rcv_grad.clear(); + bool converged = false; + + vector corpus, test_corpus; + ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); + assert(corpus.size() > 0); + if (conf.count("test_data")) + ReadTrainingCorpus(conf["test_data"].as(), rank, size, &test_corpus); + + TrainingObserver observer; + ConditionalLikelihoodObserver cllh_observer; + while (!converged) { + observer.Reset(); + cllh_observer.Reset(); +#ifdef HAVE_MPI + mpi::timer timer; + world.barrier(); +#endif + if (rank == 0) { + cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n"; + cerr << " Testset size: " << test_corpus.size() << " sentences / proc)\n"; + } + for (int i = 0; i < corpus.size(); ++i) + decoder->Decode(corpus[i], &observer); + cerr << " process " << rank << '/' << size << " done\n"; + fill(gradient.begin(), gradient.end(), 0); + observer.SetLocalGradientAndObjective(&gradient, &objective); + + unsigned total_words = 0; +#ifdef HAVE_MPI + double to = 0; + rcv_grad.resize(num_feats, 0.0); + mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus(), 0); + swap(gradient, rcv_grad); + rcv_grad.clear(); + + reduce(world, observer.trg_words, total_words, std::plus(), 0); + mpi::reduce(world, objective, to, plus(), 0); + objective = to; +#else + total_words = observer.trg_words; +#endif + if (rank == 0) + cerr << "TRAINING CORPUS: ln p(f|e)=" << objective << "\t log_2 p(f|e) = " << (objective/log(2)) << "\t cond. entropy = " << (objective/log(2) / total_words) << "\t ppl = " << pow(2, (objective/log(2) / total_words)) << endl; + + for (int i = 0; i < test_corpus.size(); ++i) + decoder->Decode(test_corpus[i], &cllh_observer); + + double test_objective = 0; + unsigned test_total_words = 0; +#ifdef HAVE_MPI + reduce(world, cllh_observer.acc_obj, test_objective, std::plus(), 0); + reduce(world, cllh_observer.trg_words, test_total_words, std::plus(), 0); +#else + test_objective = cllh_observer.acc_obj; + test_total_words = cllh_observer.trg_words; +#endif + + if (rank == 0) { // run optimizer only on rank=0 node + if (test_corpus.size()) + cerr << " TEST CORPUS: ln p(f|e)=" << test_objective << "\t log_2 p(f|e) = " << (test_objective/log(2)) << "\t cond. entropy = " << (test_objective/log(2) / test_total_words) << "\t ppl = " << pow(2, (test_objective/log(2) / test_total_words)) << endl; + if (gaussian_prior) { + const double sigsq = conf["sigma_squared"].as(); + double norm = 0; + for (int k = 1; k < lambdas.size(); ++k) { + const double& lambda_k = lambdas[k]; + if (lambda_k) { + const double param = (lambda_k - means[k]); + norm += param * param; + gradient[k] += param / sigsq; + } + } + const double reg = norm / (2.0 * sigsq); + cerr << "REGULARIZATION TERM: " << reg << endl; + objective += reg; + } + cerr << "EVALUATION #" << o->EvaluationCount() << " OBJECTIVE: " << objective << endl; + double gnorm = 0; + for (int i = 0; i < gradient.size(); ++i) + gnorm += gradient[i] * gradient[i]; + cerr << " GNORM=" << sqrt(gnorm) << endl; + vector old = lambdas; + int c = 0; + while (old == lambdas) { + ++c; + if (c > 1) { cerr << "Same lambdas, repeating optimization\n"; } + o->Optimize(objective, gradient, &lambdas); + assert(c < 5); + } + old.clear(); + Weights::SanityCheck(lambdas); + Weights::ShowLargestFeatures(lambdas); + + converged = o->HasConverged(); + if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } + + string fname = "weights.cur.gz"; + if (converged) { fname = "weights.final.gz"; } + ostringstream vv; + vv << "Objective = " << objective << " (eval count=" << o->EvaluationCount() << ")"; + const string svv = vv.str(); + Weights::WriteToFile(fname, lambdas, true, &svv); + } // rank == 0 + int cint = converged; +#ifdef HAVE_MPI + mpi::broadcast(world, &lambdas[0], lambdas.size(), 0); + mpi::broadcast(world, cint, 0); + if (rank == 0) { cerr << " ELAPSED TIME THIS ITERATION=" << timer.elapsed() << endl; } +#endif + converged = cint; + } + return 0; +} + diff --git a/training/crf/mpi_compute_cllh.cc b/training/crf/mpi_compute_cllh.cc new file mode 100644 index 00000000..066389d0 --- /dev/null +++ b/training/crf/mpi_compute_cllh.cc @@ -0,0 +1,134 @@ +#include +#include +#include +#include + +#include "config.h" +#ifdef HAVE_MPI +#include +#endif +#include +#include + +#include "cllh_observer.h" +#include "sentence_metadata.h" +#include "verbose.h" +#include "hg.h" +#include "prob.h" +#include "inside_outside.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "weights.h" + +using namespace std; +namespace po = boost::program_options; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("weights,w",po::value(),"Input feature weights file") + ("training_data,t",po::value(),"Training data corpus") + ("decoder_config,c",po::value(),"Decoder configuration file"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +void ReadInstances(const string& fname, int rank, int size, vector* c) { + assert(fname != "-"); + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + int lc = 0; + while(in) { + getline(in, line); + if (!in) break; + if (lc % size == rank) c->push_back(line); + ++lc; + } +} + +static const double kMINUS_EPSILON = -1e-6; + +#ifdef HAVE_MPI +namespace mpi = boost::mpi; +#endif + +int main(int argc, char** argv) { +#ifdef HAVE_MPI + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + const int size = 1; + const int rank = 0; +#endif + if (size > 1) SetSilent(true); // turn off verbose decoder output + register_feature_functions(); + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) + return false; + + // load cdec.ini and set up decoder + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + if (decoder.GetConf()["input"].as() != "-") { + cerr << "cdec.ini must not set an input file\n"; + abort(); + } + + // load weights + vector& weights = decoder.CurrentWeightVector(); + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &weights); + + vector corpus; + ReadInstances(conf["training_data"].as(), rank, size, &corpus); + assert(corpus.size() > 0); + + if (rank == 0) + cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; + + ConditionalLikelihoodObserver observer; + for (int i = 0; i < corpus.size(); ++i) + decoder.Decode(corpus[i], &observer); + + double objective = 0; + unsigned total_words = 0; +#ifdef HAVE_MPI + reduce(world, observer.acc_obj, objective, std::plus(), 0); + reduce(world, observer.trg_words, total_words, std::plus(), 0); +#else + objective = observer.acc_obj; +#endif + + if (rank == 0) { + cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl; + cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl; + cout << " CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl; + cout << " PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl; + } + + return 0; +} + diff --git a/training/crf/mpi_extract_features.cc b/training/crf/mpi_extract_features.cc new file mode 100644 index 00000000..6750aa15 --- /dev/null +++ b/training/crf/mpi_extract_features.cc @@ -0,0 +1,151 @@ +#include +#include +#include +#include + +#include "config.h" +#ifdef HAVE_MPI +#include +#endif +#include +#include + +#include "ff_register.h" +#include "verbose.h" +#include "filelib.h" +#include "fdict.h" +#include "decoder.h" +#include "weights.h" + +using namespace std; +namespace po = boost::program_options; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("training_data,t",po::value(),"Training data corpus") + ("decoder_config,c",po::value(),"Decoder configuration file") + ("weights,w", po::value(), "(Optional) weights file; weights may affect what features are encountered in pruning configurations") + ("output_prefix,o",po::value()->default_value("features"),"Output path prefix"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { + cerr << "Decode an input set (optionally in parallel using MPI) and write\nout the feature strings encountered.\n"; + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + int lc = 0; + while(in) { + getline(in, line); + if (!in) break; + if (lc % size == rank) c->push_back(line); + ++lc; + } +} + +static const double kMINUS_EPSILON = -1e-6; + +struct TrainingObserver : public DecoderObserver { + + virtual void NotifyDecodingStart(const SentenceMetadata&) { + } + + // compute model expectations, denominator of objective + virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { + } + + // compute "empirical" expectations, numerator of objective + virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { + } +}; + +#ifdef HAVE_MPI +namespace mpi = boost::mpi; +#endif + +int main(int argc, char** argv) { +#ifdef HAVE_MPI + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + const int size = 1; + const int rank = 0; +#endif + if (size > 1) SetSilent(true); // turn off verbose decoder output + register_feature_functions(); + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) + return false; + + // load cdec.ini and set up decoder + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + if (decoder.GetConf()["input"].as() != "-") { + cerr << "cdec.ini must not set an input file\n"; + abort(); + } + + if (FD::UsingPerfectHashFunction()) { + cerr << "Your configuration file has enabled a cmph hash function. Please disable.\n"; + return 1; + } + + // load optional weights + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &decoder.CurrentWeightVector()); + + vector corpus; + ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); + assert(corpus.size() > 0); + + TrainingObserver observer; + + if (rank == 0) + cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; + + for (int i = 0; i < corpus.size(); ++i) + decoder.Decode(corpus[i], &observer); + + { + ostringstream os; + os << conf["output_prefix"].as() << '.' << rank << "_of_" << size; + WriteFile wf(os.str()); + ostream& out = *wf.stream(); + const unsigned num_feats = FD::NumFeats(); + for (unsigned i = 1; i < num_feats; ++i) { + out << FD::Convert(i) << endl; + } + cerr << "Wrote " << os.str() << endl; + } + +#ifdef HAVE_MPI + world.barrier(); +#else +#endif + + return 0; +} + diff --git a/training/crf/mpi_extract_reachable.cc b/training/crf/mpi_extract_reachable.cc new file mode 100644 index 00000000..2a7c2b9d --- /dev/null +++ b/training/crf/mpi_extract_reachable.cc @@ -0,0 +1,163 @@ +#include +#include +#include +#include + +#include "config.h" +#ifdef HAVE_MPI +#include +#endif +#include +#include + +#include "ff_register.h" +#include "verbose.h" +#include "filelib.h" +#include "fdict.h" +#include "decoder.h" +#include "weights.h" + +using namespace std; +namespace po = boost::program_options; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("training_data,t",po::value(),"Training data corpus") + ("decoder_config,c",po::value(),"Decoder configuration file") + ("weights,w", po::value(), "(Optional) weights file; weights may affect what features are encountered in pruning configurations") + ("output_prefix,o",po::value()->default_value("reachable"),"Output path prefix"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { + cerr << "Decode an input set (optionally in parallel using MPI) and write\nout the inputs that produce reachable parallel parses.\n"; + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + int lc = 0; + while(in) { + getline(in, line); + if (!in) break; + if (lc % size == rank) c->push_back(line); + ++lc; + } +} + +static const double kMINUS_EPSILON = -1e-6; + +struct ReachabilityObserver : public DecoderObserver { + + virtual void NotifyDecodingStart(const SentenceMetadata&) { + reachable = false; + } + + // compute model expectations, denominator of objective + virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { + } + + // compute "empirical" expectations, numerator of objective + virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { + reachable = true; + } + + bool reachable; +}; + +#ifdef HAVE_MPI +namespace mpi = boost::mpi; +#endif + +int main(int argc, char** argv) { +#ifdef HAVE_MPI + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + const int size = 1; + const int rank = 0; +#endif + if (size > 1) SetSilent(true); // turn off verbose decoder output + register_feature_functions(); + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) + return false; + + // load cdec.ini and set up decoder + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + if (decoder.GetConf()["input"].as() != "-") { + cerr << "cdec.ini must not set an input file\n"; + abort(); + } + + if (FD::UsingPerfectHashFunction()) { + cerr << "Your configuration file has enabled a cmph hash function. Please disable.\n"; + return 1; + } + + // load optional weights + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &decoder.CurrentWeightVector()); + + vector corpus; + ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); + assert(corpus.size() > 0); + + + if (rank == 0) + cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; + + size_t num_reached = 0; + { + ostringstream os; + os << conf["output_prefix"].as() << '.' << rank << "_of_" << size; + WriteFile wf(os.str()); + ostream& out = *wf.stream(); + ReachabilityObserver observer; + for (int i = 0; i < corpus.size(); ++i) { + decoder.Decode(corpus[i], &observer); + if (observer.reachable) { + out << corpus[i] << endl; + ++num_reached; + } + corpus[i].clear(); + } + cerr << "Shard " << rank << '/' << size << " finished, wrote " + << num_reached << " instances to " << os.str() << endl; + } + + size_t total = 0; +#ifdef HAVE_MPI + reduce(world, num_reached, total, std::plus(), 0); +#else + total = num_reached; +#endif + if (rank == 0) { + cerr << "-----------------------------------------\n"; + cerr << "TOTAL = " << total << " instances\n"; + } + return 0; +} + diff --git a/training/crf/mpi_flex_optimize.cc b/training/crf/mpi_flex_optimize.cc new file mode 100644 index 00000000..b52decdc --- /dev/null +++ b/training/crf/mpi_flex_optimize.cc @@ -0,0 +1,386 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "stringlib.h" +#include "verbose.h" +#include "hg.h" +#include "prob.h" +#include "inside_outside.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "optimize.h" +#include "fdict.h" +#include "weights.h" +#include "sparse_vector.h" +#include "sampler.h" + +#ifdef HAVE_MPI +#include +#include +namespace mpi = boost::mpi; +#endif + +using namespace std; +namespace po = boost::program_options; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("cdec_config,c",po::value(),"Decoder configuration file") + ("weights,w",po::value(),"Initial feature weights") + ("training_data,d",po::value(),"Training data") + ("minibatch_size_per_proc,s", po::value()->default_value(6), "Number of training instances evaluated per processor in each minibatch") + ("minibatch_iterations,i", po::value()->default_value(10), "Number of optimization iterations per minibatch") + ("iterations,I", po::value()->default_value(50), "Number of passes through the training data before termination") + ("regularization_strength,C", po::value()->default_value(0.2), "Regularization strength") + ("time_series_strength,T", po::value()->default_value(0.0), "Time series regularization strength") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("lbfgs_memory_buffers,M", po::value()->default_value(10), "Number of memory buffers for LBFGS history"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("training_data") || !conf->count("cdec_config")) { + cerr << "LBFGS minibatch online optimizer (MPI support " +#if HAVE_MPI + << "enabled" +#else + << "not enabled" +#endif + << ")\n" << dcmdline_options << endl; + return false; + } + return true; +} + +void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* order) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + int id = 0; + while(in) { + getline(in, line); + if (!in) break; + if (id % size == rank) { + c->push_back(line); + order->push_back(id); + } + ++id; + } +} + +static const double kMINUS_EPSILON = -1e-6; + +struct CopyHGsObserver : public DecoderObserver { + Hypergraph* hg_; + Hypergraph* gold_hg_; + + // this can free up some memory + void RemoveRules(Hypergraph* h) { + for (unsigned i = 0; i < h->edges_.size(); ++i) + h->edges_[i].rule_.reset(); + } + + void SetCurrentHypergraphs(Hypergraph* h, Hypergraph* gold_h) { + hg_ = h; + gold_hg_ = gold_h; + } + + virtual void NotifyDecodingStart(const SentenceMetadata&) { + state = 1; + } + + // compute model expectations, denominator of objective + virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { + *hg_ = *hg; + RemoveRules(hg_); + assert(state == 1); + state = 2; + } + + // compute "empirical" expectations, numerator of objective + virtual void NotifyAlignmentForest(const SentenceMetadata&, Hypergraph* hg) { + assert(state == 2); + state = 3; + *gold_hg_ = *hg; + RemoveRules(gold_hg_); + } + + virtual void NotifyDecodingComplete(const SentenceMetadata&) { + if (state == 3) { + } else { + hg_->clear(); + gold_hg_->clear(); + } + } + + int state; +}; + +void ReadConfig(const string& ini, istringstream* out) { + ReadFile rf(ini); + istream& in = *rf.stream(); + ostringstream os; + while(in) { + string line; + getline(in, line); + if (!in) continue; + os << line << endl; + } + out->str(os.str()); +} + +#ifdef HAVE_MPI +namespace boost { namespace mpi { + template<> + struct is_commutative >, SparseVector > + : mpl::true_ { }; +} } // end namespace boost::mpi +#endif + +void AddGrad(const SparseVector x, double s, SparseVector* acc) { + for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) + acc->add_value(it->first, it->second.as_float() * s); +} + +double PNorm(const vector& v, const double p) { + double acc = 0; + for (int i = 0; i < v.size(); ++i) + acc += pow(v[i], p); + return pow(acc, 1.0 / p); +} + +void VV(ostream&os, const vector& v) { + for (int i = 1; i < v.size(); ++i) + if (v[i]) os << FD::Convert(i) << "=" << v[i] << " "; +} + +double ApplyRegularizationTerms(const double C, + const double T, + const vector& weights, + const vector& prev_weights, + double* g) { + double reg = 0; + for (size_t i = 0; i < weights.size(); ++i) { + const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); + const double& w_i = weights[i]; + reg += C * w_i * w_i; + g[i] += 2 * C * w_i; + + reg += T * (w_i - prev_w_i) * (w_i - prev_w_i); + g[i] += 2 * T * (w_i - prev_w_i); + } + return reg; +} + +int main(int argc, char** argv) { +#ifdef HAVE_MPI + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + const int size = 1; + const int rank = 0; +#endif + if (size > 1) SetSilent(true); // turn off verbose decoder output + register_feature_functions(); + MT19937* rng = NULL; + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) + return 1; + + boost::shared_ptr o; + const unsigned lbfgs_memory_buffers = conf["lbfgs_memory_buffers"].as(); + const unsigned size_per_proc = conf["minibatch_size_per_proc"].as(); + const unsigned minibatch_iterations = conf["minibatch_iterations"].as(); + const double regularization_strength = conf["regularization_strength"].as(); + const double time_series_strength = conf["time_series_strength"].as(); + const bool use_time_series_reg = time_series_strength > 0.0; + const unsigned max_iteration = conf["iterations"].as(); + + vector corpus; + vector ids; + ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); + assert(corpus.size() > 0); + + if (size_per_proc > corpus.size()) { + cerr << "Minibatch size (per processor) must be smaller or equal to the local corpus size!\n"; + return 1; + } + + // initialize decoder (loads hash functions if necessary) + istringstream ins; + ReadConfig(conf["cdec_config"].as(), &ins); + Decoder decoder(&ins); + + // load initial weights + vector prev_weights; + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &prev_weights); + + if (conf.count("random_seed")) + rng = new MT19937(conf["random_seed"].as()); + else + rng = new MT19937; + + size_t total_corpus_size = 0; +#ifdef HAVE_MPI + reduce(world, corpus.size(), total_corpus_size, std::plus(), 0); +#else + total_corpus_size = corpus.size(); +#endif + + if (rank == 0) + cerr << "Total corpus size: " << total_corpus_size << endl; + + CopyHGsObserver observer; + + int write_weights_every_ith = 100; // TODO configure + int titer = -1; + + vector& cur_weights = decoder.CurrentWeightVector(); + if (use_time_series_reg) { + cur_weights = prev_weights; + } else { + cur_weights.swap(prev_weights); + prev_weights.clear(); + } + + int iter = -1; + bool converged = false; + vector gg; + while (!converged) { +#ifdef HAVE_MPI + mpi::timer timer; +#endif + ++iter; ++titer; + if (rank == 0) { + converged = (iter == max_iteration); + string fname = "weights.cur.gz"; + if (iter % write_weights_every_ith == 0) { + ostringstream o; o << "weights.epoch_" << iter << ".gz"; + fname = o.str(); + } + if (converged) { fname = "weights.final.gz"; } + ostringstream vv; + vv << "total iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast(corpus.size())); + const string svv = vv.str(); + Weights::WriteToFile(fname, cur_weights, true, &svv); + } + + vector hgs(size_per_proc); + vector gold_hgs(size_per_proc); + for (int i = 0; i < size_per_proc; ++i) { + int ei = corpus.size() * rng->next(); + int id = ids[ei]; + observer.SetCurrentHypergraphs(&hgs[i], &gold_hgs[i]); + decoder.SetId(id); + decoder.Decode(corpus[ei], &observer); + } + + SparseVector local_grad, g; + double local_obj = 0; + o.reset(); + for (unsigned mi = 0; mi < minibatch_iterations; ++mi) { + local_grad.clear(); + g.clear(); + local_obj = 0; + + for (unsigned i = 0; i < size_per_proc; ++i) { + Hypergraph& hg = hgs[i]; + Hypergraph& hg_gold = gold_hgs[i]; + if (hg.edges_.size() < 2) continue; + + hg.Reweight(cur_weights); + hg_gold.Reweight(cur_weights); + SparseVector model_exp, gold_exp; + const prob_t z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(hg, &model_exp); + local_obj += log(z); + model_exp /= z; + AddGrad(model_exp, 1.0, &local_grad); + model_exp.clear(); + + const prob_t goldz = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(hg_gold, &gold_exp); + local_obj -= log(goldz); + + if (log(z) - log(goldz) < kMINUS_EPSILON) { + cerr << "DIFF. ERR! log_model_z < log_gold_z: " << log(z) << " " << log(goldz) << endl; + return 1; + } + + gold_exp /= goldz; + AddGrad(gold_exp, -1.0, &local_grad); + } + + double obj = 0; +#ifdef HAVE_MPI + reduce(world, local_obj, obj, std::plus(), 0); + reduce(world, local_grad, g, std::plus >(), 0); +#else + obj = local_obj; + g.swap(local_grad); +#endif + local_grad.clear(); + if (rank == 0) { + // g /= (size_per_proc * size); + if (!o) + o.reset(new LBFGSOptimizer(FD::NumFeats(), lbfgs_memory_buffers)); + gg.clear(); + gg.resize(FD::NumFeats()); + if (gg.size() != cur_weights.size()) { cur_weights.resize(gg.size()); } + for (SparseVector::iterator it = g.begin(); it != g.end(); ++it) + if (it->first) { gg[it->first] = it->second; } + g.clear(); + double r = ApplyRegularizationTerms(regularization_strength, + time_series_strength, // * (iter == 0 ? 0.0 : 1.0), + cur_weights, + prev_weights, + &gg[0]); + obj += r; + if (mi == 0 || mi == (minibatch_iterations - 1)) { + if (!mi) cerr << iter << ' '; else cerr << ' '; + cerr << "OBJ=" << obj << " (REG=" << r << ")" << " |g|=" << PNorm(gg, 2) << " |w|=" << PNorm(cur_weights, 2); + if (mi > 0) cerr << endl << flush; else cerr << ' '; + } else { cerr << '.' << flush; } + // cerr << "w = "; VV(cerr, cur_weights); cerr << endl; + // cerr << "g = "; VV(cerr, gg); cerr << endl; + o->Optimize(obj, gg, &cur_weights); + } +#ifdef HAVE_MPI + broadcast(world, cur_weights, 0); + broadcast(world, converged, 0); + world.barrier(); +#endif + } + prev_weights = cur_weights; + } + return 0; +} diff --git a/training/crf/mpi_online_optimize.cc b/training/crf/mpi_online_optimize.cc new file mode 100644 index 00000000..d6968848 --- /dev/null +++ b/training/crf/mpi_online_optimize.cc @@ -0,0 +1,374 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "stringlib.h" +#include "verbose.h" +#include "hg.h" +#include "prob.h" +#include "inside_outside.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "online_optimizer.h" +#include "fdict.h" +#include "weights.h" +#include "sparse_vector.h" +#include "sampler.h" + +#ifdef HAVE_MPI +#include +#include +namespace mpi = boost::mpi; +#endif + +using namespace std; +namespace po = boost::program_options; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input_weights,w",po::value(),"Input feature weights file") + ("frozen_features,z",po::value(), "List of features not to optimize") + ("training_data,t",po::value(),"Training data corpus") + ("training_agenda,a",po::value(), "Text file listing a series of configuration files and the number of iterations to train using each configuration successively") + ("minibatch_size_per_proc,s", po::value()->default_value(5), "Number of training instances evaluated per processor in each minibatch") + ("optimization_method,m", po::value()->default_value("sgd"), "Optimization method (sgd)") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("eta_0,e", po::value()->default_value(0.2), "Initial learning rate for SGD (eta_0)") + ("L1,1","Use L1 regularization") + ("regularization_strength,C", po::value()->default_value(1.0), "Regularization strength (C)"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("training_data") || !conf->count("training_agenda")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* order) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + int id = 0; + while(in) { + getline(in, line); + if (!in) break; + if (id % size == rank) { + c->push_back(line); + order->push_back(id); + } + ++id; + } +} + +static const double kMINUS_EPSILON = -1e-6; + +struct TrainingObserver : public DecoderObserver { + void Reset() { + acc_grad.clear(); + acc_obj = 0; + total_complete = 0; + } + + void SetLocalGradientAndObjective(vector* g, double* o) const { + *o = acc_obj; + for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) + (*g)[it->first] = it->second.as_float(); + } + + virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { + cur_model_exp.clear(); + cur_obj = 0; + state = 1; + } + + // compute model expectations, denominator of objective + virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + assert(state == 1); + state = 2; + const prob_t z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); + cur_obj = log(z); + cur_model_exp /= z; + } + + // compute "empirical" expectations, numerator of objective + virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { + assert(state == 2); + state = 3; + SparseVector ref_exp; + const prob_t ref_z = InsideOutside, + EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp); + ref_exp /= ref_z; + + double log_ref_z; +#if 0 + if (crf_uniform_empirical) { + log_ref_z = ref_exp.dot(feature_weights); + } else { + log_ref_z = log(ref_z); + } +#else + log_ref_z = log(ref_z); +#endif + + // rounding errors means that <0 is too strict + if ((cur_obj - log_ref_z) < kMINUS_EPSILON) { + cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl; + exit(1); + } + assert(!std::isnan(log_ref_z)); + ref_exp -= cur_model_exp; + acc_grad += ref_exp; + acc_obj += (cur_obj - log_ref_z); + } + + virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { + if (state == 3) { + ++total_complete; + } else { + } + } + + void GetGradient(SparseVector* g) const { + g->clear(); + for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) + g->set_value(it->first, it->second.as_float()); + } + + int total_complete; + SparseVector cur_model_exp; + SparseVector acc_grad; + double acc_obj; + double cur_obj; + int state; +}; + +#ifdef HAVE_MPI +namespace boost { namespace mpi { + template<> + struct is_commutative >, SparseVector > + : mpl::true_ { }; +} } // end namespace boost::mpi +#endif + +bool LoadAgenda(const string& file, vector >* a) { + ReadFile rf(file); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (!in) break; + if (line.empty()) continue; + if (line[0] == '#') continue; + int sc = 0; + if (line.size() < 3) return false; + for (int i = 0; i < line.size(); ++i) { if (line[i] == ' ') ++sc; } + if (sc != 1) { cerr << "Too many spaces in line: " << line << endl; return false; } + size_t d = line.find(" "); + pair x; + x.first = line.substr(0,d); + x.second = atoi(line.substr(d+1).c_str()); + a->push_back(x); + if (!FileExists(x.first)) { + cerr << "Can't find file " << x.first << endl; + return false; + } + } + return true; +} + +int main(int argc, char** argv) { + cerr << "THIS SOFTWARE IS DEPRECATED YOU SHOULD USE mpi_flex_optimize\n"; +#ifdef HAVE_MPI + mpi::environment env(argc, argv); + mpi::communicator world; + const int size = world.size(); + const int rank = world.rank(); +#else + const int size = 1; + const int rank = 0; +#endif + if (size > 1) SetSilent(true); // turn off verbose decoder output + register_feature_functions(); + std::tr1::shared_ptr rng; + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) + return 1; + + vector > agenda; + if (!LoadAgenda(conf["training_agenda"].as(), &agenda)) + return 1; + if (rank == 0) + cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n"; + + assert(agenda.size() > 0); + + if (1) { // hack to load the feature hash functions -- TODO this should not be in cdec.ini + const string& cur_config = agenda[0].first; + const unsigned max_iteration = agenda[0].second; + ReadFile ini_rf(cur_config); + Decoder decoder(ini_rf.stream()); + } + + // load initial weights + vector init_weights; + if (conf.count("input_weights")) + Weights::InitFromFile(conf["input_weights"].as(), &init_weights); + + vector frozen_fids; + if (conf.count("frozen_features")) { + ReadFile rf(conf["frozen_features"].as()); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (line.empty()) continue; + if (line[0] == ' ' || line[line.size() - 1] == ' ') { line = Trim(line); } + frozen_fids.push_back(FD::Convert(line)); + } + if (rank == 0) cerr << "Freezing " << frozen_fids.size() << " features.\n"; + } + + vector corpus; + vector ids; + ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); + assert(corpus.size() > 0); + + std::tr1::shared_ptr o; + std::tr1::shared_ptr lr; + + const unsigned size_per_proc = conf["minibatch_size_per_proc"].as(); + if (size_per_proc > corpus.size()) { + cerr << "Minibatch size must be smaller than corpus size!\n"; + return 1; + } + + size_t total_corpus_size = 0; +#ifdef HAVE_MPI + reduce(world, corpus.size(), total_corpus_size, std::plus(), 0); +#else + total_corpus_size = corpus.size(); +#endif + + if (rank == 0) { + cerr << "Total corpus size: " << total_corpus_size << endl; + const unsigned batch_size = size_per_proc * size; + // TODO config + lr.reset(new ExponentialDecayLearningRate(batch_size, conf["eta_0"].as())); + + const string omethod = conf["optimization_method"].as(); + if (omethod == "sgd") { + const double C = conf["regularization_strength"].as(); + o.reset(new CumulativeL1OnlineOptimizer(lr, total_corpus_size, C, frozen_fids)); + } else { + assert(!"fail"); + } + } + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + + SparseVector x; + Weights::InitSparseVector(init_weights, &x); + TrainingObserver observer; + + int write_weights_every_ith = 100; // TODO configure + int titer = -1; + + for (int ai = 0; ai < agenda.size(); ++ai) { + const string& cur_config = agenda[ai].first; + const unsigned max_iteration = agenda[ai].second; + if (rank == 0) + cerr << "STARTING TRAINING EPOCH " << (ai+1) << ". CONFIG=" << cur_config << endl; + // load cdec.ini and set up decoder + ReadFile ini_rf(cur_config); + Decoder decoder(ini_rf.stream()); + vector& lambdas = decoder.CurrentWeightVector(); + if (ai == 0) { lambdas.swap(init_weights); init_weights.clear(); } + + if (rank == 0) + o->ResetEpoch(); // resets the learning rate-- TODO is this good? + + int iter = -1; + bool converged = false; + while (!converged) { +#ifdef HAVE_MPI + mpi::timer timer; +#endif + x.init_vector(&lambdas); + ++iter; ++titer; + observer.Reset(); + if (rank == 0) { + converged = (iter == max_iteration); + Weights::SanityCheck(lambdas); + static int cc = 0; ++cc; if (cc > 1) { Weights::ShowLargestFeatures(lambdas); } + string fname = "weights.cur.gz"; + if (iter % write_weights_every_ith == 0) { + ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz"; + fname = o.str(); + } + if (converged && ((ai+1)==agenda.size())) { fname = "weights.final.gz"; } + ostringstream vv; + vv << "total iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << '/' << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast(corpus.size())) << " eta=" << lr->eta(titer); + const string svv = vv.str(); + cerr << svv << endl; + Weights::WriteToFile(fname, lambdas, true, &svv); + } + + for (int i = 0; i < size_per_proc; ++i) { + int ei = corpus.size() * rng->next(); + int id = ids[ei]; + decoder.SetId(id); + decoder.Decode(corpus[ei], &observer); + } + SparseVector local_grad, g; + observer.GetGradient(&local_grad); +#ifdef HAVE_MPI + reduce(world, local_grad, g, std::plus >(), 0); +#else + g.swap(local_grad); +#endif + local_grad.clear(); + if (rank == 0) { + g /= (size_per_proc * size); + o->UpdateWeights(g, FD::NumFeats(), &x); + } +#ifdef HAVE_MPI + broadcast(world, x, 0); + broadcast(world, converged, 0); + world.barrier(); + if (rank == 0) { cerr << " ELAPSED TIME THIS ITERATION=" << timer.elapsed() << endl; } +#endif + } + } + return 0; +} diff --git a/training/dep-reorder/conll2reordering-forest.pl b/training/dep-reorder/conll2reordering-forest.pl deleted file mode 100755 index 3cd226be..00000000 --- a/training/dep-reorder/conll2reordering-forest.pl +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; } -my $FIRST_CONV = "$script_dir/scripts/conll2simplecfg.pl"; -my $CDEC = "$script_dir/../../decoder/cdec"; - -our $tfile1 = "grammar1.$$"; -our $tfile2 = "text.$$"; - -die "Usage: $0 parses.conll\n" unless scalar @ARGV == 1; -open C, "<$ARGV[0]" or die "Can't read $ARGV[0]: $!"; - -END { unlink $tfile1; unlink "$tfile1.cfg"; unlink $tfile2; } - -my $first = 1; -open T, ">$tfile1" or die "Can't write $tfile1: $!"; -my $lc = 0; -my $flag = 0; -my @words = (); -while() { - print T; - chomp; - if (/^$/) { - if ($first) { $first = undef; } else { if ($flag) { print "\n"; $flag = 0; } } - $first = undef; - close T; - open SO, ">$tfile2" or die "Can't write $tfile2: $!"; - print SO "@words\n"; - close SO; - @words=(); - `$FIRST_CONV < $tfile1 > $tfile1.cfg`; - if ($? != 0) { - die "Error code: $?"; - } - my $cfg = `$CDEC -n -S 10000 -f scfg -g $tfile1.cfg -i $tfile2 --show_cfg_search_space 2>/dev/null`; - if ($? != 0) { - die "Error code: $?"; - } - my @rules = split /\n/, $cfg; - shift @rules; # get rid of output - for my $rule (@rules) { - my ($lhs, $f, $e, $feats) = split / \|\|\| /, $rule; - $f =~ s/,\d\]/\]/g; - $feats = 'TOP=1' unless $feats; - if ($lhs =~ /\[Goal_\d+\]/) { $lhs = '[S]'; } - print "$lhs ||| $f ||| $feats\n"; - if ($e eq '[1] [2]') { - my ($a, $b) = split /\s+/, $f; - $feats =~ s/=1$//; - my ($x, $y) = split /_/, $feats; - print "$lhs ||| $b $a ||| ${y}_$x=1\n"; - } - $flag = 1; - } - open T, ">$tfile1" or die "Can't write $tfile1: $!"; - $lc = -1; - } else { - my ($ind, $word, @dmmy) = split /\s+/; - push @words, $word; - } - $lc++; -} -close T; - diff --git a/training/dep-reorder/george.conll b/training/dep-reorder/george.conll deleted file mode 100644 index 7eebb360..00000000 --- a/training/dep-reorder/george.conll +++ /dev/null @@ -1,4 +0,0 @@ -1 George _ GEORGE _ _ 2 X _ _ -2 hates _ HATES _ _ 0 X _ _ -3 broccoli _ BROC _ _ 2 X _ _ - diff --git a/training/dep-reorder/scripts/conll2simplecfg.pl b/training/dep-reorder/scripts/conll2simplecfg.pl deleted file mode 100755 index b101347a..00000000 --- a/training/dep-reorder/scripts/conll2simplecfg.pl +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -# 1 在 _ 10 _ _ 4 X _ _ -# 2 门厅 _ 3 _ _ 1 X _ _ -# 3 下面 _ 23 _ _ 4 X _ _ -# 4 。 _ 45 _ _ 0 X _ _ - -my @ldeps; -my @rdeps; -@ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; } -@rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; } -my $rootcat = 0; -my @cats = ('S'); -my $len = 0; -my @noposcats = ('S'); -while(<>) { - chomp; - if (/^\s*$/) { - write_cfg($len); - $len = 0; - @cats=('S'); - @noposcats = ('S'); - @ldeps=(); for (my $i =0; $i <1000; $i++) { push @ldeps, []; } - @rdeps=(); for (my $i =0; $i <1000; $i++) { push @rdeps, []; } - next; - } - $len++; - my ($pos, $word, $d1, $xcat, $d2, $d3, $headpos, $deptype) = split /\s+/; - my $cat = "C$xcat"; - my $catpos = $cat . "_$pos"; - push @cats, $catpos; - push @noposcats, $cat; - print "[$catpos] ||| $word ||| $word ||| Word=1\n"; - if ($headpos == 0) { $rootcat = $pos; } - if ($pos < $headpos) { - push @{$ldeps[$headpos]}, $pos; - } else { - push @{$rdeps[$headpos]}, $pos; - } -} - -sub write_cfg { - my $len = shift; - for (my $i = 1; $i <= $len; $i++) { - my @lds = @{$ldeps[$i]}; - for my $ld (@lds) { - print "[$cats[$i]] ||| [$cats[$ld],1] [$cats[$i],2] ||| [1] [2] ||| $noposcats[$ld]_$noposcats[$i]=1\n"; - } - my @rds = @{$rdeps[$i]}; - for my $rd (@rds) { - print "[$cats[$i]] ||| [$cats[$i],1] [$cats[$rd],2] ||| [1] [2] ||| $noposcats[$i]_$noposcats[$rd]=1\n"; - } - } - print "[S] ||| [$cats[$rootcat],1] ||| [1] ||| TOP=1\n"; -} - diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am new file mode 100644 index 00000000..ff318bef --- /dev/null +++ b/training/dpmert/Makefile.am @@ -0,0 +1,25 @@ +bin_PROGRAMS = \ + mr_dpmert_map \ + mr_dpmert_reduce \ + mr_dpmert_generate_mapper_input + +noinst_PROGRAMS = \ + lo_test +TESTS = lo_test + +mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc +mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +# nbest2hg_SOURCES = nbest2hg.cc +# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz + +mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc +mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc +mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc +lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dpmert/ces.cc b/training/dpmert/ces.cc new file mode 100644 index 00000000..157b2d17 --- /dev/null +++ b/training/dpmert/ces.cc @@ -0,0 +1,90 @@ +#include "ces.h" + +#include +#include +#include + +// TODO, if AER is to be optimized again, we will need this +// #include "aligner.h" +#include "lattice.h" +#include "mert_geometry.h" +#include "error_surface.h" +#include "ns.h" + +using namespace std; + +const bool minimize_segments = true; // if adjacent segments have equal scores, merge them + +void ComputeErrorSurface(const SegmentEvaluator& ss, + const ConvexHull& ve, + ErrorSurface* env, + const EvaluationMetric* metric, + const Hypergraph& hg) { + vector prev_trans; + const vector >& ienv = ve.GetSortedSegs(); + env->resize(ienv.size()); + SufficientStats prev_score; // defaults to 0 + int j = 0; + for (unsigned i = 0; i < ienv.size(); ++i) { + const MERTPoint& seg = *ienv[i]; + vector trans; +#if 0 + if (type == AER) { + vector edges(hg.edges_.size(), false); + seg.CollectEdgesUsed(&edges); // get the set of edges in the viterbi + // alignment + ostringstream os; + const string* psrc = ss.GetSource(); + if (psrc == NULL) { + cerr << "AER scoring in VEST requires source, but it is missing!\n"; + abort(); + } + size_t pos = psrc->rfind(" ||| "); + if (pos == string::npos) { + cerr << "Malformed source for AER: expected |||\nINPUT: " << *psrc << endl; + abort(); + } + Lattice src; + Lattice ref; + LatticeTools::ConvertTextOrPLF(psrc->substr(0, pos), &src); + LatticeTools::ConvertTextOrPLF(psrc->substr(pos + 5), &ref); + AlignerTools::WriteAlignment(src, ref, hg, &os, true, 0, &edges); + string tstr = os.str(); + TD::ConvertSentence(tstr.substr(tstr.rfind(" ||| ") + 5), &trans); + } else { +#endif + seg.ConstructTranslation(&trans); + //} + //cerr << "Scoring: " << TD::GetString(trans) << endl; + if (trans == prev_trans) { + if (!minimize_segments) { + ErrorSegment& out = (*env)[j]; + out.delta.fields.clear(); + out.x = seg.x; + ++j; + } + //cerr << "Identical translation, skipping scoring\n"; + } else { + SufficientStats score; + ss.Evaluate(trans, &score); + // cerr << "score= " << score->ComputeScore() << "\n"; + //string x1; score.Encode(&x1); cerr << "STATS: " << x1 << endl; + const SufficientStats delta = score - prev_score; + //string x2; delta.Encode(&x2); cerr << "DELTA: " << x2 << endl; + //string xx; delta.Encode(&xx); cerr << xx << endl; + prev_trans.swap(trans); + prev_score = score; + if ((!minimize_segments) || (!delta.IsAdditiveIdentity())) { + ErrorSegment& out = (*env)[j]; + out.delta = delta; + out.x = seg.x; + ++j; + } + } + } + // cerr << " In segments: " << ienv.size() << endl; + // cerr << "Out segments: " << j << endl; + assert(j > 0); + env->resize(j); +} + diff --git a/training/dpmert/ces.h b/training/dpmert/ces.h new file mode 100644 index 00000000..e4fa2080 --- /dev/null +++ b/training/dpmert/ces.h @@ -0,0 +1,16 @@ +#ifndef _CES_H_ +#define _CES_H_ + +class ConvexHull; +class Hypergraph; +class SegmentEvaluator; +class ErrorSurface; +class EvaluationMetric; + +void ComputeErrorSurface(const SegmentEvaluator& ss, + const ConvexHull& convex_hull, + ErrorSurface* es, + const EvaluationMetric* metric, + const Hypergraph& hg); + +#endif diff --git a/training/dpmert/divide_refs.py b/training/dpmert/divide_refs.py new file mode 100755 index 00000000..b478f918 --- /dev/null +++ b/training/dpmert/divide_refs.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +import sys + +(numRefs, outPrefix) = sys.argv[1:] +numRefs = int(numRefs) + +outs = [open(outPrefix+str(i), "w") for i in range(numRefs)] + +i = 0 +for line in sys.stdin: + outs[i].write(line) + i = (i + 1) % numRefs + +for out in outs: + out.close() diff --git a/training/dpmert/dpmert.pl b/training/dpmert/dpmert.pl new file mode 100755 index 00000000..559420f5 --- /dev/null +++ b/training/dpmert/dpmert.pl @@ -0,0 +1,618 @@ +#!/usr/bin/env perl +use strict; +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use File::Basename qw(basename); +require "libcall.pl"; + +my $QSUB_CMD = qsub_args(mert_memory()); + +# Default settings +my $srcFile; # deprecated +my $refFiles; # deprecated +my $default_jobs = env_default_jobs(); +my $bin_dir = $SCRIPT_DIR; +my $util_dir = "$SCRIPT_DIR/../utils"; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; +my $MAPINPUT = "$bin_dir/mr_dpmert_generate_mapper_input"; +my $MAPPER = "$bin_dir/mr_dpmert_map"; +my $REDUCER = "$bin_dir/mr_dpmert_reduce"; +my $parallelize = "$util_dir/parallelize.pl"; +my $libcall = "$util_dir/libcall.pl"; +my $sentserver = "$util_dir/sentserver"; +my $sentclient = "$util_dir/sentclient"; +my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm"; + +my $SCORER = $FAST_SCORE; +die "Can't find $MAPPER" unless -x $MAPPER; +my $cdec = "$bin_dir/../../decoder/cdec"; +die "Can't find decoder in $cdec" unless -x $cdec; +die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; +my $decoder = $cdec; +my $lines_per_mapper = 200; +my $rand_directions = 15; +my $iteration = 1; +my $best_weights; +my $max_iterations = 15; +my $optimization_iters = 6; +my $jobs = $default_jobs; # number of decode nodes +my $pmem = "9g"; +my $disable_clean = 0; +my %seen_weights; +my $help = 0; +my $epsilon = 0.0001; +my $last_score = -10000000; +my $metric = "ibm_bleu"; +my $dir; +my $iniFile; +my $weights; +my $initialWeights; +my $bleu_weight=1; +my $use_make = 1; # use make to parallelize line search +my $useqsub; +my $pass_suffix = ''; +my $devset; +# Process command-line options +if (GetOptions( + "config=s" => \$iniFile, + "weights=s" => \$initialWeights, + "devset=s" => \$devset, + "jobs=i" => \$jobs, + "pass-suffix=s" => \$pass_suffix, + "help" => \$help, + "qsub" => \$useqsub, + "iterations=i" => \$max_iterations, + "pmem=s" => \$pmem, + "random-directions=i" => \$rand_directions, + "metric=s" => \$metric, + "source-file=s" => \$srcFile, + "output-dir=s" => \$dir, +) == 0 || @ARGV!=0 || $help) { + print_help(); + exit; +} + +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (defined $srcFile || defined $refFiles) { + die <) { $devSize++; } +close F; + +unless($best_weights){ $best_weights = $weights; } +unless($projected_score){ $projected_score = 0.0; } +$seen_weights{$weights} = 1; + +my $random_seed = int(time / 1000); +my $lastWeightsFile; +my $lastPScore = 0; +# main optimization loop +while (1){ + print STDERR "\n\nITERATION $iteration\n==========\n"; + + if ($iteration > $max_iterations){ + print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; + last; + } + # iteration-specific files + my $runFile="$dir/run.raw.$iteration"; + my $onebestFile="$dir/1best.$iteration"; + my $logdir="$dir/logs.$iteration"; + my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; + my $scorerLog="$logdir/scorer.log.$iteration"; + check_call("mkdir -p $logdir"); + + + #decode + print STDERR "RUNNING DECODER AT "; + print STDERR unchecked_output("date"); + my $im1 = $iteration - 1; + my $weightsFile="$dir/weights.$im1"; + my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; + my $pcmd; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --"; + } else { + $pcmd = "cat $srcFile | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --"; + } + my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + my $num_hgs; + my $num_topbest; + my $retries = 0; + while($retries < 5) { + $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF HGs: $num_hgs\n"; + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_hgs && $devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); + my $dec_score = check_output("cat $runFile | $SCORER $refs -m $metric"); + chomp $dec_score; + print STDERR "DECODER SCORE: $dec_score\n"; + + # save space + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); + + # run optimizer + print STDERR "RUNNING OPTIMIZER AT "; + print STDERR unchecked_output("date"); + my $mergeLog="$logdir/prune-merge.log.$iteration"; + + my $score = 0; + my $icc = 0; + my $inweights="$dir/weights.$im1"; + for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) { + print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; + print STDERR unchecked_output("date"); + $icc++; + $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter"; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + check_call("mkdir -p $dir/splag.$im1"); + $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput."; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; + my @shards = grep { /^mapinput\./ } readdir(DIR); + closedir DIR; + die "No shards!" unless scalar @shards > 0; + my $joblist = ""; + my $nmappers = 0; + my @mapoutputs = (); + @cleanupcmds = (); + my %o2i = (); + my $first_shard = 1; + my $mkfile; # only used with makefiles + my $mkfilename; + if ($use_make) { + $mkfilename = "$dir/splag.$im1/domap.mk"; + open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!"; + print $mkfile "all: $dir/splag.$im1/map.done\n\n"; + } + my @mkouts = (); # only used with makefiles + for my $shard (@shards) { + my $mapoutput = $shard; + my $client_name = $shard; + $client_name =~ s/mapinput.//; + $client_name = "dpmert.$client_name"; + $mapoutput =~ s/mapinput/mapoutput/; + push @mapoutputs, "$dir/splag.$im1/$mapoutput"; + $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; + my $script = "$MAPPER -s $srcFile -m $metric $refs < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput"; + if ($use_make) { + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "#!/bin/bash\n"; + print F "$script\n"; + close F; + my $output = "$dir/splag.$im1/$mapoutput"; + push @mkouts, $output; + chmod(0755, $script_file) or die "Can't chmod $script_file: $!"; + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } + print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n"; + } else { + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "$script\n"; + close F; + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } + + $nmappers++; + my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $jobid = check_output("$qcmd"); + chomp $jobid; + $jobid =~ s/^(\d+)(.*?)$/\1/g; + $jobid =~ s/^Your job (\d+) .*$/\1/; + push(@cleanupcmds, "qdel $jobid 2> /dev/null"); + print STDERR " $jobid"; + if ($joblist == "") { $joblist = $jobid; } + else {$joblist = $joblist . "\|" . $jobid; } + } + } + if ($use_make) { + print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; + close $mkfile; + my $mcmd = "make -j $jobs -f $mkfilename"; + print STDERR "\nExecuting: $mcmd\n"; + check_call($mcmd); + } else { + print STDERR "\nLaunched $nmappers mappers.\n"; + sleep 8; + print STDERR "Waiting for mappers to complete...\n"; + while ($nmappers > 0) { + sleep 5; + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); + $nmappers = scalar @livejobs; + } + print STDERR "All mappers complete.\n"; + } + my $tol = 0; + my $til = 0; + for my $mo (@mapoutputs) { + my $olines = get_lines($mo); + my $ilines = get_lines($o2i{$mo}); + $tol += $olines; + $til += $ilines; + die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines; + } + print STDERR "Results for $tol/$til lines\n"; + print STDERR "\nSORTING AND RUNNING VEST REDUCER\n"; + print STDERR unchecked_output("date"); + $cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -m $metric > $dir/redoutput.$im1"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + $cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1"; + # sort returns failure even when it doesn't fail for some reason + my $best=unchecked_output("$cmd"); chomp $best; + print STDERR "$best\n"; + my ($oa, $x, $xscore) = split /\|/, $best; + $score = $xscore; + print STDERR "PROJECTED SCORE: $score\n"; + if (abs($x) < $epsilon) { + print STDERR "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n"; + last; + } + my $psd = $score - $last_score; + $last_score = $score; + if (abs($psd) < $epsilon) { + print STDERR "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n"; + last; + } + my ($origin, $axis) = split /\s+/, $oa; + + my %ori = convert($origin); + my %axi = convert($axis); + + my $finalFile="$dir/weights.$im1-$opt_iter"; + open W, ">$finalFile" or die "Can't write: $finalFile: $!"; + my $norm = 0; + for my $k (sort keys %ori) { + my $dd = $ori{$k} + $axi{$k} * $x; + $norm += $dd * $dd; + } + $norm = sqrt($norm); + $norm = 1; + for my $k (sort keys %ori) { + my $v = ($ori{$k} + $axi{$k} * $x) / $norm; + print W "$k $v\n"; + } + check_call("rm $dir/splag.$im1/*"); + $inweights = $finalFile; + } + $lastWeightsFile = "$dir/weights.$iteration"; + check_call("cp $inweights $lastWeightsFile"); + if ($icc < 2) { + print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; + last; + } + $lastPScore = $score; + $iteration++; + print STDERR "\n==========\n"; +} + +check_call("cp $lastWeightsFile $dir/weights.final"); +print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w with the decoder)\n\n"; +print STDOUT "$dir/weights.final\n"; +exit 0; + + +sub get_lines { + my $fn = shift @_; + open FL, "<$fn" or die "Couldn't read $fn: $!"; + my $lc = 0; + while() { $lc++; } + return $lc; +} + +sub read_weights_file { + my ($file) = @_; + open F, "<$file" or die "Couldn't read $file: $!"; + my @r = (); + my $pm = -1; + while() { + next if /^#/; + next if /^\s*$/; + chomp; + if (/^(.+)\s+(.+)$/) { + my $m = $1; + my $w = $2; + die "Weights out of order: $m <= $pm" unless $m > $pm; + push @r, $w; + } else { + warn "Unexpected feature name in weight file: $_"; + } + } + close F; + return join ' ', @r; +} + +sub update_weights_file { + my ($neww, $rfn, $rpts) = @_; + my @feats = @$rfn; + my @pts = @$rpts; + my $num_feats = scalar @feats; + my $num_pts = scalar @pts; + die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; + open G, ">$neww" or die; + for (my $i = 0; $i < $num_feats; $i++) { + my $f = $feats[$i]; + my $lambda = $pts[$i]; + print G "$f $lambda\n"; + } + close G; +} + +sub enseg { + my $src = shift; + my $newsrc = shift; + open(SRC, $src); + open(NEWSRC, ">$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; +} + +sub print_help { + + my $executable = basename($0); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable [options] + Runs a complete MERT optimization. Required options are --weights, + --devset, and --config. + +Options: + + --config [-c ] + The decoder configuration file. + + --devset [-d ] + The source *and* references for the development set. + + --weights [-w ] + A file specifying initial feature weights. The format is + FeatureName_1 value1 + FeatureName_2 value2 + **All and only the weights listed in will be optimized!** + + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + + --iterations + Maximum number of iterations to run. If not specified, defaults + to 10. + + --pass-suffix + If the decoder is doing multi-pass decoding, the pass suffix "2", + "3", etc., is used to control what iteration of weights is set. + + --rand-directions + MERT will attempt to optimize along all of the principle directions, + set this parameter to explore other directions. Defaults to 5. + + --output-dir + Directory for intermediate and output files. + + --help + Print this message and exit. + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + +Help +} + +sub convert { + my ($str) = @_; + my @ps = split /;/, $str; + my %dict = (); + for my $p (@ps) { + my ($k, $v) = split /=/, $p; + $dict{$k} = $v; + } + return %dict; +} + + + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} + +sub split_devset { + my ($infile, $outsrc, $outref) = @_; + open F, "<$infile" or die "Can't read $infile: $!"; + open S, ">$outsrc" or die "Can't write $outsrc: $!"; + open R, ">$outref" or die "Can't write $outref: $!"; + while() { + chomp; + my ($src, @refs) = split /\s*\|\|\|\s*/; + die "Malformed devset line: $_\n" unless scalar @refs > 0; + print S "$src\n"; + print R join(' ||| ', @refs) . "\n"; + } + close R; + close S; + close F; +} + diff --git a/training/dpmert/error_surface.cc b/training/dpmert/error_surface.cc new file mode 100644 index 00000000..515b67f8 --- /dev/null +++ b/training/dpmert/error_surface.cc @@ -0,0 +1,42 @@ +#include "error_surface.h" + +#include +#include + +using namespace std; + +ErrorSurface::~ErrorSurface() {} + +void ErrorSurface::Serialize(std::string* out) const { + const int segments = this->size(); + ostringstream os(ios::binary); + os.write((const char*)&segments,sizeof(segments)); + for (int i = 0; i < segments; ++i) { + const ErrorSegment& cur = (*this)[i]; + string senc; + cur.delta.Encode(&senc); + assert(senc.size() < 1024); + unsigned char len = senc.size(); + os.write((const char*)&cur.x, sizeof(cur.x)); + os.write((const char*)&len, sizeof(len)); + os.write((const char*)&senc[0], len); + } + *out = os.str(); +} + +void ErrorSurface::Deserialize(const std::string& in) { + istringstream is(in, ios::binary); + int segments; + is.read((char*)&segments, sizeof(segments)); + this->resize(segments); + for (int i = 0; i < segments; ++i) { + ErrorSegment& cur = (*this)[i]; + unsigned char len; + is.read((char*)&cur.x, sizeof(cur.x)); + is.read((char*)&len, sizeof(len)); + string senc(len, '\0'); assert(senc.size() == len); + is.read((char*)&senc[0], len); + cur.delta = SufficientStats(senc); + } +} + diff --git a/training/dpmert/error_surface.h b/training/dpmert/error_surface.h new file mode 100644 index 00000000..bb65847b --- /dev/null +++ b/training/dpmert/error_surface.h @@ -0,0 +1,24 @@ +#ifndef _ERROR_SURFACE_H_ +#define _ERROR_SURFACE_H_ + +#include +#include + +#include "ns.h" + +class Score; + +struct ErrorSegment { + double x; + SufficientStats delta; + ErrorSegment() : x(0), delta() {} +}; + +class ErrorSurface : public std::vector { + public: + ~ErrorSurface(); + void Serialize(std::string* out) const; + void Deserialize(const std::string& in); +}; + +#endif diff --git a/training/dpmert/line_mediator.pl b/training/dpmert/line_mediator.pl new file mode 100755 index 00000000..bc2bb24c --- /dev/null +++ b/training/dpmert/line_mediator.pl @@ -0,0 +1,116 @@ +#!/usr/bin/perl -w +#hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication + +# if you don't know how to fork/exec in a C program, this could be helpful under limited cirmustances (would be ok to liaise with sentserver) + +#WARNING: because it waits for the result from command 2 after sending every line, and especially if command 1 does the same, using sentserver as command 2 won't actually buy you any real parallelism. + +use strict; +use IPC::Open2; +use POSIX qw(pipe dup2 STDIN_FILENO STDOUT_FILENO); + +my $quiet=!$ENV{DEBUG}; +$quiet=1 if $ENV{QUIET}; +sub info { + local $,=' '; + print STDERR @_ unless $quiet; +} + +my $mode='CROSS'; +my $ser='DIRECT'; +$mode='PIPE' if $ENV{PIPE}; +$mode='SNAKE' if $ENV{SNAKE}; +$mode='CROSS' if $ENV{CROSS}; +$ser='SERIAL' if $ENV{SERIAL}; +$ser='DIRECT' if $ENV{DIRECT}; +$ser='SERIAL' if $mode eq 'SNAKE'; +info("mode: $mode\n"); +info("connection: $ser\n"); + + +my @c1; +if (scalar @ARGV) { + do { + push @c1,shift + } while scalar @ARGV && $c1[$#c1] ne '--'; +} +pop @c1; +my @c2=@ARGV; +@ARGV=(); +(scalar @c1 && scalar @c2) || die qq{ +usage: $0 cmd1 args -- cmd2 args +all options are environment variables. +DEBUG=1 env var enables debugging output. +CROSS=1 hooks up two processes, 2nd of which has one line of output per line of input, expected by the first, which starts off the communication. crosses stdin/stderr of cmd1 and cmd2 line by line (both must flush on newline and output. cmd1 initiates the conversation (sends the first line). default: attempts to cross stdin/stdout of c1 and c2 directly (via two unidirectional posix pipes created before fork). +SERIAL=1: (no parallelism possible) but lines exchanged are logged if DEBUG. +if SNAKE then stdin -> c1 -> c2 -> c1 -> stdout. +if PIPE then stdin -> c1 -> c2 -> stdout (same as shell c1|c2, but with SERIAL you can see the intermediate in real time; you could do similar with c1 | tee /dev/fd/2 |c2. +DIRECT=1 (default) will override SERIAL=1. +CROSS=1 (default) will override SNAKE or PIPE. +}; + +info("1 cmd:",@c1,"\n"); +info("2 cmd:",@c2,"\n"); + +sub lineto { + select $_[0]; + $|=1; + shift; + print @_; +} + +if ($ser eq 'SERIAL') { + my ($R1,$W1,$R2,$W2); + my $c1p=open2($R1,$W1,@c1); # Open2 R W backward from Open3. + my $c2p=open2($R2,$W2,@c2); + if ($mode eq 'CROSS') { + while(<$R1>) { + info("1:",$_); + lineto($W2,$_); + last unless defined ($_=<$R2>); + info("1|2:",$_); + lineto($W1,$_); + } + } else { + my $snake=$mode eq 'SNAKE'; + while() { + info("IN:",$_); + lineto($W1,$_); + last unless defined ($_=<$R1>); + info("IN|1:",$_); + lineto($W2,$_); + last unless defined ($_=<$R2>); + info("IN|1|2:",$_); + if ($snake) { + lineto($W1,$_); + last unless defined ($_=<$R1>); + info("IN|1|2|1:",$_); + } + lineto(*STDOUT,$_); + } + } +} else { + info("DIRECT mode\n"); + my @rw1=POSIX::pipe(); + my @rw2=POSIX::pipe(); + my $pid=undef; + $SIG{CHLD} = sub { wait }; + while (not defined ($pid=fork())) { + sleep 1; + } + my $pipe = $mode eq 'PIPE'; + unless ($pipe) { + POSIX::close(STDOUT_FILENO); + POSIX::close(STDIN_FILENO); + } + if ($pid) { + POSIX::dup2($rw1[1],STDOUT_FILENO); + POSIX::dup2($rw2[0],STDIN_FILENO) unless $pipe; + exec @c1; + } else { + POSIX::dup2($rw2[1],STDOUT_FILENO) unless $pipe; + POSIX::dup2($rw1[0],STDIN_FILENO); + exec @c2; + } + while (wait()!=-1) {} +} diff --git a/training/dpmert/line_optimizer.cc b/training/dpmert/line_optimizer.cc new file mode 100644 index 00000000..9cf33502 --- /dev/null +++ b/training/dpmert/line_optimizer.cc @@ -0,0 +1,114 @@ +#include "line_optimizer.h" + +#include +#include + +#include "sparse_vector.h" +#include "ns.h" + +using namespace std; + +typedef ErrorSurface::const_iterator ErrorIter; + +// sort by increasing x-ints +struct IntervalComp { + bool operator() (const ErrorIter& a, const ErrorIter& b) const { + return a->x < b->x; + } +}; + +double LineOptimizer::LineOptimize( + const EvaluationMetric* metric, + const vector& surfaces, + const LineOptimizer::ScoreType type, + float* best_score, + const double epsilon) { + // cerr << "MIN=" << MINIMIZE_SCORE << " MAX=" << MAXIMIZE_SCORE << " MINE=" << type << endl; + vector all_ints; + for (vector::const_iterator i = surfaces.begin(); + i != surfaces.end(); ++i) { + const ErrorSurface& surface = *i; + for (ErrorIter j = surface.begin(); j != surface.end(); ++j) + all_ints.push_back(j); + } + sort(all_ints.begin(), all_ints.end(), IntervalComp()); + double last_boundary = all_ints.front()->x; + SufficientStats acc; + float& cur_best_score = *best_score; + cur_best_score = (type == MAXIMIZE_SCORE ? + -numeric_limits::max() : numeric_limits::max()); + bool left_edge = true; + double pos = numeric_limits::quiet_NaN(); + for (vector::iterator i = all_ints.begin(); + i != all_ints.end(); ++i) { + const ErrorSegment& seg = **i; + if (seg.x - last_boundary > epsilon) { + float sco = metric->ComputeScore(acc); + if ((type == MAXIMIZE_SCORE && sco > cur_best_score) || + (type == MINIMIZE_SCORE && sco < cur_best_score) ) { + cur_best_score = sco; + if (left_edge) { + pos = seg.x - 0.1; + left_edge = false; + } else { + pos = last_boundary + (seg.x - last_boundary) / 2; + } + //cerr << "NEW BEST: " << pos << " (score=" << cur_best_score << ")\n"; + } + // string xx = metric->DetailedScore(acc); cerr << "---- " << xx; +#undef SHOW_ERROR_SURFACES +#ifdef SHOW_ERROR_SURFACES + cerr << "x=" << seg.x << "\ts=" << sco << "\n"; +#endif + last_boundary = seg.x; + } + // cerr << "x-boundary=" << seg.x << "\n"; + //string x2; acc.Encode(&x2); cerr << " ACC: " << x2 << endl; + //string x1; seg.delta.Encode(&x1); cerr << " DELTA: " << x1 << endl; + acc += seg.delta; + } + float sco = metric->ComputeScore(acc); + if ((type == MAXIMIZE_SCORE && sco > cur_best_score) || + (type == MINIMIZE_SCORE && sco < cur_best_score) ) { + cur_best_score = sco; + if (left_edge) { + pos = 0; + } else { + pos = last_boundary + 1000.0; + } + } + return pos; +} + +void LineOptimizer::RandomUnitVector(const vector& features_to_optimize, + SparseVector* axis, + RandomNumberGenerator* rng) { + axis->clear(); + for (int i = 0; i < features_to_optimize.size(); ++i) + axis->set_value(features_to_optimize[i], rng->NextNormal(0.0,1.0)); + (*axis) /= axis->l2norm(); +} + +void LineOptimizer::CreateOptimizationDirections( + const vector& features_to_optimize, + int additional_random_directions, + RandomNumberGenerator* rng, + vector >* dirs + , bool include_orthogonal + ) { + dirs->clear(); + typedef SparseVector Dir; + vector &out=*dirs; + int i=0; + if (include_orthogonal) + for (;i + +#include "sparse_vector.h" +#include "error_surface.h" +#include "sampler.h" + +class EvaluationMetric; +class Weights; + +struct LineOptimizer { + + // use MINIMIZE_SCORE for things like TER, WER + // MAXIMIZE_SCORE for things like BLEU + enum ScoreType { MAXIMIZE_SCORE, MINIMIZE_SCORE }; + + // merge all the error surfaces together into a global + // error surface and find (the middle of) the best segment + static double LineOptimize( + const EvaluationMetric* metric, + const std::vector& envs, + const LineOptimizer::ScoreType type, + float* best_score, + const double epsilon = 1.0/65536.0); + + // return a random vector of length 1 where all dimensions + // not listed in dimensions will be 0. + static void RandomUnitVector(const std::vector& dimensions, + SparseVector* axis, + RandomNumberGenerator* rng); + + // generate a list of directions to optimize; the list will + // contain the orthogonal vectors corresponding to the dimensions in + // primary and then additional_random_directions directions in those + // dimensions as well. All vectors will be length 1. + static void CreateOptimizationDirections( + const std::vector& primary, + int additional_random_directions, + RandomNumberGenerator* rng, + std::vector >* dirs + , bool include_primary=true + ); + +}; + +#endif diff --git a/training/dpmert/lo_test.cc b/training/dpmert/lo_test.cc new file mode 100644 index 00000000..95a08d3d --- /dev/null +++ b/training/dpmert/lo_test.cc @@ -0,0 +1,229 @@ +#define BOOST_TEST_MODULE LineOptimizerTest +#include +#include + +#include +#include +#include + +#include + +#include "ns.h" +#include "ns_docscorer.h" +#include "ces.h" +#include "fdict.h" +#include "hg.h" +#include "kbest.h" +#include "hg_io.h" +#include "filelib.h" +#include "inside_outside.h" +#include "viterbi.h" +#include "mert_geometry.h" +#include "line_optimizer.h" + +using namespace std; + +const char* ref11 = "australia reopens embassy in manila"; +const char* ref12 = "( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack ."; +const char* ref21 = "australia reopened manila embassy"; +const char* ref22 = "( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack ."; +const char* ref31 = "australia to reopen embassy in manila"; +const char* ref32 = "( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so - called confirmed terrorist attack threats ."; +const char* ref41 = "australia to re - open its embassy to manila"; +const char* ref42 = "( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so - called \" clear \" threat of terrorist attack 7 weeks ago ."; + +BOOST_AUTO_TEST_CASE( TestCheckNaN) { + double x = 0; + double y = 0; + double z = x / y; + BOOST_CHECK_EQUAL(true, std::isnan(z)); +} + +BOOST_AUTO_TEST_CASE(TestConvexHull) { + boost::shared_ptr a1(new MERTPoint(-1, 0)); + boost::shared_ptr b1(new MERTPoint(1, 0)); + boost::shared_ptr a2(new MERTPoint(-1, 1)); + boost::shared_ptr b2(new MERTPoint(1, -1)); + vector > sa; sa.push_back(a1); sa.push_back(b1); + vector > sb; sb.push_back(a2); sb.push_back(b2); + ConvexHull a(sa); + cerr << a << endl; + ConvexHull b(sb); + ConvexHull c = a; + c *= b; + cerr << a << " (*) " << b << " = " << c << endl; + BOOST_CHECK_EQUAL(3, c.size()); +} + +BOOST_AUTO_TEST_CASE(TestConvexHullInside) { + const string json = "{\"rules\":[1,\"[X] ||| a\",2,\"[X] ||| A [1]\",3,\"[X] ||| c\",4,\"[X] ||| C [1]\",5,\"[X] ||| [1] B [2]\",6,\"[X] ||| [1] b [2]\",7,\"[X] ||| X [1]\",8,\"[X] ||| Z [1]\"],\"features\":[\"f1\",\"f2\",\"Feature_1\",\"Feature_0\",\"Model_0\",\"Model_1\",\"Model_2\",\"Model_3\",\"Model_4\",\"Model_5\",\"Model_6\",\"Model_7\"],\"edges\":[{\"tail\":[],\"feats\":[],\"rule\":1}],\"node\":{\"in_edges\":[0]},\"edges\":[{\"tail\":[0],\"feats\":[0,-0.8,1,-0.1],\"rule\":2}],\"node\":{\"in_edges\":[1]},\"edges\":[{\"tail\":[],\"feats\":[1,-1],\"rule\":3}],\"node\":{\"in_edges\":[2]},\"edges\":[{\"tail\":[2],\"feats\":[0,-0.2,1,-0.1],\"rule\":4}],\"node\":{\"in_edges\":[3]},\"edges\":[{\"tail\":[1,3],\"feats\":[0,-1.2,1,-0.2],\"rule\":5},{\"tail\":[1,3],\"feats\":[0,-0.5,1,-1.3],\"rule\":6}],\"node\":{\"in_edges\":[4,5]},\"edges\":[{\"tail\":[4],\"feats\":[0,-0.5,1,-0.8],\"rule\":7},{\"tail\":[4],\"feats\":[0,-0.7,1,-0.9],\"rule\":8}],\"node\":{\"in_edges\":[6,7]}}"; + Hypergraph hg; + istringstream instr(json); + HypergraphIO::ReadFromJSON(&instr, &hg); + SparseVector wts; + wts.set_value(FD::Convert("f1"), 0.4); + wts.set_value(FD::Convert("f2"), 1.0); + hg.Reweight(wts); + vector, prob_t> > list; + std::vector > features; + KBest::KBestDerivations, ESentenceTraversal> kbest(hg, 10); + for (int i = 0; i < 10; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg.nodes_.size() - 1, i); + if (!d) break; + cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; + } + SparseVector dir; dir.set_value(FD::Convert("f1"), 1.0); + ConvexHullWeightFunction wf(wts, dir); + ConvexHull env = Inside(hg, NULL, wf); + cerr << env << endl; + const vector >& segs = env.GetSortedSegs(); + dir *= segs[1]->x; + wts += dir; + hg.Reweight(wts); + KBest::KBestDerivations, ESentenceTraversal> kbest2(hg, 10); + for (int i = 0; i < 10; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest2.LazyKthBest(hg.nodes_.size() - 1, i); + if (!d) break; + cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; + } + for (unsigned i = 0; i < segs.size(); ++i) { + cerr << "seg=" << i << endl; + vector trans; + segs[i]->ConstructTranslation(&trans); + cerr << TD::GetString(trans) << endl; + } +} + +BOOST_AUTO_TEST_CASE( TestS1) { + int fPhraseModel_0 = FD::Convert("PhraseModel_0"); + int fPhraseModel_1 = FD::Convert("PhraseModel_1"); + int fPhraseModel_2 = FD::Convert("PhraseModel_2"); + int fLanguageModel = FD::Convert("LanguageModel"); + int fWordPenalty = FD::Convert("WordPenalty"); + int fPassThrough = FD::Convert("PassThrough"); + SparseVector wts; + wts.set_value(fWordPenalty, 4.25); + wts.set_value(fLanguageModel, -1.1165); + wts.set_value(fPhraseModel_0, -0.96); + wts.set_value(fPhraseModel_1, -0.65); + wts.set_value(fPhraseModel_2, -0.77); + wts.set_value(fPassThrough, -10.0); + + vector to_optimize; + to_optimize.push_back(fWordPenalty); + to_optimize.push_back(fLanguageModel); + to_optimize.push_back(fPhraseModel_0); + to_optimize.push_back(fPhraseModel_1); + to_optimize.push_back(fPhraseModel_2); + + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + + Hypergraph hg; + ReadFile rf(path + "/0.json.gz"); + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + hg.Reweight(wts); + + Hypergraph hg2; + ReadFile rf2(path + "/1.json.gz"); + HypergraphIO::ReadFromJSON(rf2.stream(), &hg2); + hg2.Reweight(wts); + + vector > refs1(4); + TD::ConvertSentence(ref11, &refs1[0]); + TD::ConvertSentence(ref21, &refs1[1]); + TD::ConvertSentence(ref31, &refs1[2]); + TD::ConvertSentence(ref41, &refs1[3]); + vector > refs2(4); + TD::ConvertSentence(ref12, &refs2[0]); + TD::ConvertSentence(ref22, &refs2[1]); + TD::ConvertSentence(ref32, &refs2[2]); + TD::ConvertSentence(ref42, &refs2[3]); + vector envs(2); + + RandomNumberGenerator rng; + + vector > axes; // directions to search + LineOptimizer::CreateOptimizationDirections( + to_optimize, + 10, + &rng, + &axes); + assert(axes.size() == 10 + to_optimize.size()); + for (unsigned i = 0; i < axes.size(); ++i) + cerr << axes[i] << endl; + const SparseVector& axis = axes[0]; + + cerr << "Computing Viterbi envelope using inside algorithm...\n"; + cerr << "axis: " << axis << endl; + clock_t t_start=clock(); + ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction + envs[0] = Inside(hg, NULL, wf); + envs[1] = Inside(hg2, NULL, wf); + + vector es(2); + EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); + boost::shared_ptr scorer1 = metric->CreateSegmentEvaluator(refs1); + boost::shared_ptr scorer2 = metric->CreateSegmentEvaluator(refs2); + ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg); + ComputeErrorSurface(*scorer2, envs[1], &es[1], metric, hg2); + cerr << envs[0].size() << " " << envs[1].size() << endl; + cerr << es[0].size() << " " << es[1].size() << endl; + envs.clear(); + clock_t t_env=clock(); + float score; + double m = LineOptimizer::LineOptimize(metric,es, LineOptimizer::MAXIMIZE_SCORE, &score); + clock_t t_opt=clock(); + cerr << "line optimizer returned: " << m << " (SCORE=" << score << ")\n"; + BOOST_CHECK_CLOSE(0.48719698, score, 1e-5); + SparseVector res = axis; + res *= m; + res += wts; + cerr << "res: " << res << endl; + cerr << "ENVELOPE PROCESSING=" << (static_cast(t_env - t_start) / 1000.0) << endl; + cerr << " LINE OPTIMIZATION=" << (static_cast(t_opt - t_env) / 1000.0) << endl; + hg.Reweight(res); + hg2.Reweight(res); + vector t1,t2; + ViterbiESentence(hg, &t1); + ViterbiESentence(hg2, &t2); + cerr << TD::GetString(t1) << endl; + cerr << TD::GetString(t2) << endl; +} + +BOOST_AUTO_TEST_CASE(TestZeroOrigin) { + const string json = "{\"rules\":[1,\"[X7] ||| blA ||| without ||| LHSProb=3.92173 LexE2F=2.90799 LexF2E=1.85003 GenerativeProb=10.5381 RulePenalty=1 XFE=2.77259 XEF=0.441833 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=0.693147\",2,\"[X7] ||| blA ||| except ||| LHSProb=4.92173 LexE2F=3.90799 LexF2E=1.85003 GenerativeProb=11.5381 RulePenalty=1 XFE=2.77259 XEF=1.44183 LabelledEF=2.63906 LabelledFE=4.96981 LogRuleCount=1.69315\",3,\"[S] ||| [X7,1] ||| [1] ||| GlueTop=1\",4,\"[X28] ||| EnwAn ||| title ||| LHSProb=3.96802 LexE2F=2.22462 LexF2E=1.83258 GenerativeProb=10.0863 RulePenalty=1 XFE=0 XEF=1.20397 LabelledEF=1.20397 LabelledFE=-1.98341e-08 LogRuleCount=1.09861\",5,\"[X0] ||| EnwAn ||| funny ||| LHSProb=3.98479 LexE2F=1.79176 LexF2E=3.21888 GenerativeProb=11.1681 RulePenalty=1 XFE=0 XEF=2.30259 LabelledEF=2.30259 LabelledFE=0 LogRuleCount=0 SingletonRule=1\",6,\"[X8] ||| [X7,1] EnwAn ||| entitled [1] ||| LHSProb=3.82533 LexE2F=3.21888 LexF2E=2.52573 GenerativeProb=11.3276 RulePenalty=1 XFE=1.20397 XEF=1.20397 LabelledEF=2.30259 LabelledFE=2.30259 LogRuleCount=0 SingletonRule=1\",7,\"[S] ||| [S,1] [X28,2] ||| [1] [2] ||| Glue=1\",8,\"[S] ||| [S,1] [X0,2] ||| [1] [2] ||| Glue=1\",9,\"[S] ||| [X8,1] ||| [1] ||| GlueTop=1\",10,\"[Goal] ||| [S,1] ||| [1]\"],\"features\":[\"PassThrough\",\"Glue\",\"GlueTop\",\"LanguageModel\",\"WordPenalty\",\"LHSProb\",\"LexE2F\",\"LexF2E\",\"GenerativeProb\",\"RulePenalty\",\"XFE\",\"XEF\",\"LabelledEF\",\"LabelledFE\",\"LogRuleCount\",\"SingletonRule\"],\"edges\":[{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,3.92173,6,2.90799,7,1.85003,8,10.5381,9,1,10,2.77259,11,0.441833,12,2.63906,13,4.96981,14,0.693147],\"rule\":1},{\"tail\":[],\"spans\":[0,1,-1,-1],\"feats\":[5,4.92173,6,3.90799,7,1.85003,8,11.5381,9,1,10,2.77259,11,1.44183,12,2.63906,13,4.96981,14,1.69315],\"rule\":2}],\"node\":{\"in_edges\":[0,1],\"cat\":\"X7\"},\"edges\":[{\"tail\":[0],\"spans\":[0,1,-1,-1],\"feats\":[2,1],\"rule\":3}],\"node\":{\"in_edges\":[2],\"cat\":\"S\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.96802,6,2.22462,7,1.83258,8,10.0863,9,1,11,1.20397,12,1.20397,13,-1.98341e-08,14,1.09861],\"rule\":4}],\"node\":{\"in_edges\":[3],\"cat\":\"X28\"},\"edges\":[{\"tail\":[],\"spans\":[1,2,-1,-1],\"feats\":[5,3.98479,6,1.79176,7,3.21888,8,11.1681,9,1,11,2.30259,12,2.30259,15,1],\"rule\":5}],\"node\":{\"in_edges\":[4],\"cat\":\"X0\"},\"edges\":[{\"tail\":[0],\"spans\":[0,2,-1,-1],\"feats\":[5,3.82533,6,3.21888,7,2.52573,8,11.3276,9,1,10,1.20397,11,1.20397,12,2.30259,13,2.30259,15,1],\"rule\":6}],\"node\":{\"in_edges\":[5],\"cat\":\"X8\"},\"edges\":[{\"tail\":[1,2],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":7},{\"tail\":[1,3],\"spans\":[0,2,-1,-1],\"feats\":[1,1],\"rule\":8},{\"tail\":[4],\"spans\":[0,2,-1,-1],\"feats\":[2,1],\"rule\":9}],\"node\":{\"in_edges\":[6,7,8],\"cat\":\"S\"},\"edges\":[{\"tail\":[5],\"spans\":[0,2,-1,-1],\"feats\":[],\"rule\":10}],\"node\":{\"in_edges\":[9],\"cat\":\"Goal\"}}"; + Hypergraph hg; + istringstream instr(json); + HypergraphIO::ReadFromJSON(&instr, &hg); + SparseVector wts; + wts.set_value(FD::Convert("PassThrough"), -0.929201533002898); + hg.Reweight(wts); + + vector, prob_t> > list; + std::vector > features; + KBest::KBestDerivations, ESentenceTraversal> kbest(hg, 10); + for (int i = 0; i < 10; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg.nodes_.size() - 1, i); + if (!d) break; + cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; + } + + SparseVector axis; axis.set_value(FD::Convert("Glue"),1.0); + ConvexHullWeightFunction wf(wts, axis); // wts = starting point, axis = search direction + vector envs(1); + envs[0] = Inside(hg, NULL, wf); + + vector > mr(4); + TD::ConvertSentence("untitled", &mr[0]); + TD::ConvertSentence("with no title", &mr[1]); + TD::ConvertSentence("without a title", &mr[2]); + TD::ConvertSentence("without title", &mr[3]); + EvaluationMetric* metric = EvaluationMetric::Instance("IBM_BLEU"); + boost::shared_ptr scorer1 = metric->CreateSegmentEvaluator(mr); + vector es(1); + ComputeErrorSurface(*scorer1, envs[0], &es[0], metric, hg); +} + diff --git a/training/dpmert/mert_geometry.cc b/training/dpmert/mert_geometry.cc new file mode 100644 index 00000000..d6973658 --- /dev/null +++ b/training/dpmert/mert_geometry.cc @@ -0,0 +1,185 @@ +#include "mert_geometry.h" + +#include +#include + +using namespace std; + +ConvexHull::ConvexHull(int i) { + if (i == 0) { + // do nothing - <> + } else if (i == 1) { + points.push_back(boost::shared_ptr(new MERTPoint(0, 0, 0, boost::shared_ptr(), boost::shared_ptr()))); + assert(this->IsMultiplicativeIdentity()); + } else { + cerr << "Only can create ConvexHull semiring 0 and 1 with this constructor!\n"; + abort(); + } +} + +const ConvexHull ConvexHullWeightFunction::operator()(const Hypergraph::Edge& e) const { + const double m = direction.dot(e.feature_values_); + const double b = origin.dot(e.feature_values_); + MERTPoint* point = new MERTPoint(m, b, e); + return ConvexHull(1, point); +} + +ostream& operator<<(ostream& os, const ConvexHull& env) { + os << '<'; + const vector >& points = env.GetSortedSegs(); + for (int i = 0; i < points.size(); ++i) + os << (i==0 ? "" : "|") << "x=" << points[i]->x << ",b=" << points[i]->b << ",m=" << points[i]->m << ",p1=" << points[i]->p1 << ",p2=" << points[i]->p2; + return os << '>'; +} + +#define ORIGINAL_MERT_IMPLEMENTATION 1 +#ifdef ORIGINAL_MERT_IMPLEMENTATION + +struct SlopeCompare { + bool operator() (const boost::shared_ptr& a, const boost::shared_ptr& b) const { + return a->m < b->m; + } +}; + +const ConvexHull& ConvexHull::operator+=(const ConvexHull& other) { + if (!other.is_sorted) other.Sort(); + if (points.empty()) { + points = other.points; + return *this; + } + is_sorted = false; + int j = points.size(); + points.resize(points.size() + other.points.size()); + for (int i = 0; i < other.points.size(); ++i) + points[j++] = other.points[i]; + assert(j == points.size()); + return *this; +} + +void ConvexHull::Sort() const { + sort(points.begin(), points.end(), SlopeCompare()); + const int k = points.size(); + int j = 0; + for (int i = 0; i < k; ++i) { + MERTPoint l = *points[i]; + l.x = kMinusInfinity; + // cerr << "m=" << l.m << endl; + if (0 < j) { + if (points[j-1]->m == l.m) { // lines are parallel + if (l.b <= points[j-1]->b) continue; + --j; + } + while(0 < j) { + l.x = (l.b - points[j-1]->b) / (points[j-1]->m - l.m); + if (points[j-1]->x < l.x) break; + --j; + } + if (0 == j) l.x = kMinusInfinity; + } + *points[j++] = l; + } + points.resize(j); + is_sorted = true; +} + +const ConvexHull& ConvexHull::operator*=(const ConvexHull& other) { + if (other.IsMultiplicativeIdentity()) { return *this; } + if (this->IsMultiplicativeIdentity()) { (*this) = other; return *this; } + + if (!is_sorted) Sort(); + if (!other.is_sorted) other.Sort(); + + if (this->IsEdgeEnvelope()) { +// if (other.size() > 1) +// cerr << *this << " (TIMES) " << other << endl; + boost::shared_ptr edge_parent = points[0]; + const double& edge_b = edge_parent->b; + const double& edge_m = edge_parent->m; + points.clear(); + for (int i = 0; i < other.points.size(); ++i) { + const MERTPoint& p = *other.points[i]; + const double m = p.m + edge_m; + const double b = p.b + edge_b; + const double& x = p.x; // x's don't change with * + points.push_back(boost::shared_ptr(new MERTPoint(x, m, b, edge_parent, other.points[i]))); + assert(points.back()->p1->edge); + } +// if (other.size() > 1) +// cerr << " = " << *this << endl; + } else { + vector > new_points; + int this_i = 0; + int other_i = 0; + const int this_size = points.size(); + const int other_size = other.points.size(); + double cur_x = kMinusInfinity; // moves from left to right across the + // real numbers, stopping for all inter- + // sections + double this_next_val = (1 < this_size ? points[1]->x : kPlusInfinity); + double other_next_val = (1 < other_size ? other.points[1]->x : kPlusInfinity); + while (this_i < this_size && other_i < other_size) { + const MERTPoint& this_point = *points[this_i]; + const MERTPoint& other_point= *other.points[other_i]; + const double m = this_point.m + other_point.m; + const double b = this_point.b + other_point.b; + + new_points.push_back(boost::shared_ptr(new MERTPoint(cur_x, m, b, points[this_i], other.points[other_i]))); + int comp = 0; + if (this_next_val < other_next_val) comp = -1; else + if (this_next_val > other_next_val) comp = 1; + if (0 == comp) { // the next values are equal, advance both indices + ++this_i; + ++other_i; + cur_x = this_next_val; // could be other_next_val (they're equal!) + this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity); + other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity); + } else { // advance the i with the lower x, update cur_x + if (-1 == comp) { + ++this_i; + cur_x = this_next_val; + this_next_val = (this_i+1 < this_size ? points[this_i+1]->x : kPlusInfinity); + } else { + ++other_i; + cur_x = other_next_val; + other_next_val = (other_i+1 < other_size ? other.points[other_i+1]->x : kPlusInfinity); + } + } + } + points.swap(new_points); + } + //cerr << "Multiply: result=" << (*this) << endl; + return *this; +} + +// recursively construct translation +void MERTPoint::ConstructTranslation(vector* trans) const { + const MERTPoint* cur = this; + vector > ant_trans; + while(!cur->edge) { + ant_trans.resize(ant_trans.size() + 1); + cur->p2->ConstructTranslation(&ant_trans.back()); + cur = cur->p1.get(); + } + size_t ant_size = ant_trans.size(); + vector*> pants(ant_size); + assert(ant_size == cur->edge->tail_nodes_.size()); + --ant_size; + for (int i = 0; i < pants.size(); ++i) pants[ant_size - i] = &ant_trans[i]; + cur->edge->rule_->ESubstitute(pants, trans); +} + +void MERTPoint::CollectEdgesUsed(std::vector* edges_used) const { + if (edge) { + assert(edge->id_ < edges_used->size()); + (*edges_used)[edge->id_] = true; + } + if (p1) p1->CollectEdgesUsed(edges_used); + if (p2) p2->CollectEdgesUsed(edges_used); +} + +#else + +// THIS IS THE NEW FASTER IMPLEMENTATION OF THE MERT SEMIRING OPERATIONS + +#endif + diff --git a/training/dpmert/mert_geometry.h b/training/dpmert/mert_geometry.h new file mode 100644 index 00000000..a8b6959e --- /dev/null +++ b/training/dpmert/mert_geometry.h @@ -0,0 +1,81 @@ +#ifndef _MERT_GEOMETRY_H_ +#define _MERT_GEOMETRY_H_ + +#include +#include +#include + +#include "hg.h" +#include "sparse_vector.h" + +static const double kMinusInfinity = -std::numeric_limits::infinity(); +static const double kPlusInfinity = std::numeric_limits::infinity(); + +struct MERTPoint { + MERTPoint() : x(), m(), b(), edge() {} + MERTPoint(double _m, double _b) : + x(kMinusInfinity), m(_m), b(_b), edge() {} + MERTPoint(double _x, double _m, double _b, const boost::shared_ptr& p1_, const boost::shared_ptr& p2_) : + x(_x), m(_m), b(_b), p1(p1_), p2(p2_), edge() {} + MERTPoint(double _m, double _b, const Hypergraph::Edge& edge) : + x(kMinusInfinity), m(_m), b(_b), edge(&edge) {} + + double x; // x intersection with previous segment in env, or -inf if none + double m; // this line's slope + double b; // intercept with y-axis + + // we keep a pointer to the "parents" of this segment so we can reconstruct + // the Viterbi translation corresponding to this segment + boost::shared_ptr p1; + boost::shared_ptr p2; + + // only MERTPoints created from an edge using the ConvexHullWeightFunction + // have rules + // TRulePtr rule; + const Hypergraph::Edge* edge; + + // recursively recover the Viterbi translation that will result from setting + // the weights to origin + axis * x, where x is any value from this->x up + // until the next largest x in the containing ConvexHull + void ConstructTranslation(std::vector* trans) const; + void CollectEdgesUsed(std::vector* edges_used) const; +}; + +// this is the semiring value type, +// it defines constructors for 0, 1, and the operations + and * +struct ConvexHull { + // create semiring zero + ConvexHull() : is_sorted(true) {} // zero + // for debugging: + ConvexHull(const std::vector >& s) : points(s) { Sort(); } + // create semiring 1 or 0 + explicit ConvexHull(int i); + ConvexHull(int n, MERTPoint* point) : is_sorted(true), points(n, boost::shared_ptr(point)) {} + const ConvexHull& operator+=(const ConvexHull& other); + const ConvexHull& operator*=(const ConvexHull& other); + bool IsMultiplicativeIdentity() const { + return size() == 1 && (points[0]->b == 0.0 && points[0]->m == 0.0) && (!points[0]->edge) && (!points[0]->p1) && (!points[0]->p2); } + const std::vector >& GetSortedSegs() const { + if (!is_sorted) Sort(); + return points; + } + size_t size() const { return points.size(); } + + private: + bool IsEdgeEnvelope() const { + return points.size() == 1 && points[0]->edge; } + void Sort() const; + mutable bool is_sorted; + mutable std::vector > points; +}; +std::ostream& operator<<(std::ostream& os, const ConvexHull& env); + +struct ConvexHullWeightFunction { + ConvexHullWeightFunction(const SparseVector& ori, + const SparseVector& dir) : origin(ori), direction(dir) {} + const ConvexHull operator()(const Hypergraph::Edge& e) const; + const SparseVector origin; + const SparseVector direction; +}; + +#endif diff --git a/training/dpmert/mr_dpmert_generate_mapper_input.cc b/training/dpmert/mr_dpmert_generate_mapper_input.cc new file mode 100644 index 00000000..199cd23a --- /dev/null +++ b/training/dpmert/mr_dpmert_generate_mapper_input.cc @@ -0,0 +1,81 @@ +#include +#include + +#include +#include + +#include "filelib.h" +#include "weights.h" +#include "line_optimizer.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("dev_set_size,s",po::value(),"[REQD] Development set size (# of parallel sentences)") + ("forest_repository,r",po::value(),"[REQD] Path to forest repository") + ("weights,w",po::value(),"[REQD] Current feature weights file") + ("optimize_feature,o",po::value >(), "Feature to optimize (if none specified, all weights listed in the weights file will be optimized)") + ("random_directions,d",po::value()->default_value(20),"Number of random directions to run the line optimizer in") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (conf->count("dev_set_size") == 0) { + cerr << "Please specify the size of the development set using -d N\n"; + flag = true; + } + if (conf->count("weights") == 0) { + cerr << "Please specify the starting-point weights using -w \n"; + flag = true; + } + if (conf->count("forest_repository") == 0) { + cerr << "Please specify the forest repository location using -r \n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +int main(int argc, char** argv) { + RandomNumberGenerator rng; + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + vector features; + SparseVector origin; + vector w; + Weights::InitFromFile(conf["weights"].as(), &w, &features); + Weights::InitSparseVector(w, &origin); + const string forest_repository = conf["forest_repository"].as(); + if (!DirectoryExists(forest_repository)) { + cerr << "Forest repository directory " << forest_repository << " not found!\n"; + return 1; + } + if (conf.count("optimize_feature") > 0) + features=conf["optimize_feature"].as >(); + vector > directions; + vector fids(features.size()); + for (unsigned i = 0; i < features.size(); ++i) + fids[i] = FD::Convert(features[i]); + LineOptimizer::CreateOptimizationDirections( + fids, + conf["random_directions"].as(), + &rng, + &directions); + unsigned dev_set_size = conf["dev_set_size"].as(); + for (unsigned i = 0; i < dev_set_size; ++i) { + for (unsigned j = 0; j < directions.size(); ++j) { + cout << forest_repository << '/' << i << ".json.gz " << i << ' '; + print(cout, origin, "=", ";"); + cout << ' '; + print(cout, directions[j], "=", ";"); + cout << endl; + } + } + return 0; +} diff --git a/training/dpmert/mr_dpmert_map.cc b/training/dpmert/mr_dpmert_map.cc new file mode 100644 index 00000000..d1efcf96 --- /dev/null +++ b/training/dpmert/mr_dpmert_map.cc @@ -0,0 +1,112 @@ +#include +#include +#include +#include + +#include +#include + +#include "ns.h" +#include "ns_docscorer.h" +#include "ces.h" +#include "filelib.h" +#include "stringlib.h" +#include "sparse_vector.h" +#include "mert_geometry.h" +#include "inside_outside.h" +#include "error_surface.h" +#include "b64tools.h" +#include "hg_io.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") + ("source,s",po::value(), "Source file (ignored, except for AER)") + ("evaluation_metric,m",po::value()->default_value("ibm_bleu"), "Evaluation metric being optimized") + ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (!conf->count("reference")) { + cerr << "Please specify one or more references using -r \n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +bool ReadSparseVectorString(const string& s, SparseVector* v) { +#if 0 + // this should work, but untested. + std::istringstream i(s); + i>>*v; +#else + vector fields; + Tokenize(s, ';', &fields); + if (fields.empty()) return false; + for (unsigned i = 0; i < fields.size(); ++i) { + vector pair(2); + Tokenize(fields[i], '=', &pair); + if (pair.size() != 2) { + cerr << "Error parsing vector string: " << fields[i] << endl; + return false; + } + v->set_value(FD::Convert(pair[0]), atof(pair[1].c_str())); + } + return true; +#endif +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string evaluation_metric = conf["evaluation_metric"].as(); + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + DocumentScorer ds(metric, conf["reference"].as >()); + cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; + Hypergraph hg; + string last_file; + ReadFile in_read(conf["input"].as()); + istream &in=*in_read.stream(); + while(in) { + string line; + getline(in, line); + if (line.empty()) continue; + istringstream is(line); + int sent_id; + string file, s_origin, s_direction; + // path-to-file (JSON) sent_ed starting-point search-direction + is >> file >> sent_id >> s_origin >> s_direction; + SparseVector origin; + ReadSparseVectorString(s_origin, &origin); + SparseVector direction; + ReadSparseVectorString(s_direction, &direction); + // cerr << "File: " << file << "\nDir: " << direction << "\n X: " << origin << endl; + if (last_file != file) { + last_file = file; + ReadFile rf(file); + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + } + const ConvexHullWeightFunction wf(origin, direction); + const ConvexHull hull = Inside(hg, NULL, wf); + + ErrorSurface es; + ComputeErrorSurface(*ds[sent_id], hull, &es, metric, hg); + //cerr << "Viterbi envelope has " << ve.size() << " segments\n"; + // cerr << "Error surface has " << es.size() << " segments\n"; + string val; + es.Serialize(&val); + cout << 'M' << ' ' << s_origin << ' ' << s_direction << '\t'; + B64::b64encode(val.c_str(), val.size(), &cout); + cout << endl << flush; + } + return 0; +} diff --git a/training/dpmert/mr_dpmert_reduce.cc b/training/dpmert/mr_dpmert_reduce.cc new file mode 100644 index 00000000..31512a03 --- /dev/null +++ b/training/dpmert/mr_dpmert_reduce.cc @@ -0,0 +1,77 @@ +#include +#include +#include +#include + +#include +#include + +#include "sparse_vector.h" +#include "error_surface.h" +#include "line_optimizer.h" +#include "b64tools.h" +#include "stringlib.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("evaluation_metric,m",po::value(), "Evaluation metric (IBM_BLEU, etc.)") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = conf->count("evaluation_metric") == 0; + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string evaluation_metric = conf["evaluation_metric"].as(); + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE; + if (metric->IsErrorMetric()) + opt_type = LineOptimizer::MINIMIZE_SCORE; + + vector esv; + string last_key, line, key, val; + while(getline(cin, line)) { + size_t ks = line.find("\t"); + assert(string::npos != ks); + assert(ks > 2); + key = line.substr(2, ks - 2); + val = line.substr(ks + 1); + if (key != last_key) { + if (!last_key.empty()) { + float score; + double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); + cout << last_key << "|" << x << "|" << score << endl; + } + last_key.swap(key); + esv.clear(); + } + if (val.size() % 4 != 0) { + cerr << "B64 encoding error 1! Skipping.\n"; + continue; + } + string encoded(val.size() / 4 * 3, '\0'); + if (!B64::b64decode(reinterpret_cast(&val[0]), val.size(), &encoded[0], encoded.size())) { + cerr << "B64 encoding error 2! Skipping.\n"; + continue; + } + esv.push_back(ErrorSurface()); + esv.back().Deserialize(encoded); + } + if (!esv.empty()) { + float score; + double x = LineOptimizer::LineOptimize(metric, esv, opt_type, &score); + cout << last_key << "|" << x << "|" << score << endl; + } + return 0; +} diff --git a/training/dpmert/test_aer/README b/training/dpmert/test_aer/README new file mode 100644 index 00000000..819b2e32 --- /dev/null +++ b/training/dpmert/test_aer/README @@ -0,0 +1,8 @@ +To run the test: + +../dist-vest.pl --local --metric aer cdec.ini --source-file corpus.src --ref-files=ref.0 --weights weights + +This will optimize the parameters of the tiny lexical translation model +so as to minimize the AER of the Viterbi alignment on the development +set in corpus.src according to the reference alignments in ref.0. + diff --git a/training/dpmert/test_aer/cdec.ini b/training/dpmert/test_aer/cdec.ini new file mode 100644 index 00000000..08187848 --- /dev/null +++ b/training/dpmert/test_aer/cdec.ini @@ -0,0 +1,3 @@ +formalism=lextrans +grammar=grammar +aligner=true diff --git a/training/dpmert/test_aer/corpus.src b/training/dpmert/test_aer/corpus.src new file mode 100644 index 00000000..31b23971 --- /dev/null +++ b/training/dpmert/test_aer/corpus.src @@ -0,0 +1,3 @@ +el gato negro ||| the black cat +el gato ||| the cat +el libro ||| the book diff --git a/training/dpmert/test_aer/grammar b/training/dpmert/test_aer/grammar new file mode 100644 index 00000000..9d857824 --- /dev/null +++ b/training/dpmert/test_aer/grammar @@ -0,0 +1,12 @@ +el ||| cat ||| F1=1 +el ||| the ||| F2=1 +el ||| black ||| F3=1 +el ||| book ||| F11=1 +gato ||| cat ||| F4=1 NN=1 +gato ||| black ||| F5=1 +gato ||| the ||| F6=1 +negro ||| the ||| F7=1 +negro ||| cat ||| F8=1 +negro ||| black ||| F9=1 +libro ||| the ||| F10=1 +libro ||| book ||| F12=1 NN=1 diff --git a/training/dpmert/test_aer/ref.0 b/training/dpmert/test_aer/ref.0 new file mode 100644 index 00000000..734a9c5b --- /dev/null +++ b/training/dpmert/test_aer/ref.0 @@ -0,0 +1,3 @@ +0-0 1-2 2-1 +0-0 1-1 +0-0 1-1 diff --git a/training/dpmert/test_aer/weights b/training/dpmert/test_aer/weights new file mode 100644 index 00000000..afc9282e --- /dev/null +++ b/training/dpmert/test_aer/weights @@ -0,0 +1,13 @@ +F1 0.1 +F2 -.5980815 +F3 0.24235 +F4 0.625 +F5 0.4514 +F6 0.112316 +F7 -0.123415 +F8 -0.25390285 +F9 -0.23852 +F10 0.646 +F11 0.413141 +F12 0.343216 +NN -0.1215 diff --git a/training/dpmert/test_data/0.json.gz b/training/dpmert/test_data/0.json.gz new file mode 100644 index 00000000..30f8dd77 Binary files /dev/null and b/training/dpmert/test_data/0.json.gz differ diff --git a/training/dpmert/test_data/1.json.gz b/training/dpmert/test_data/1.json.gz new file mode 100644 index 00000000..c82cc179 Binary files /dev/null and b/training/dpmert/test_data/1.json.gz differ diff --git a/training/dpmert/test_data/c2e.txt.0 b/training/dpmert/test_data/c2e.txt.0 new file mode 100644 index 00000000..12c4abe9 --- /dev/null +++ b/training/dpmert/test_data/c2e.txt.0 @@ -0,0 +1,2 @@ +australia reopens embassy in manila +( afp , manila , january 2 ) australia reopened its embassy in the philippines today , which was shut down about seven weeks ago due to what was described as a specific threat of a terrorist attack . diff --git a/training/dpmert/test_data/c2e.txt.1 b/training/dpmert/test_data/c2e.txt.1 new file mode 100644 index 00000000..4ac12df1 --- /dev/null +++ b/training/dpmert/test_data/c2e.txt.1 @@ -0,0 +1,2 @@ +australia reopened manila embassy +( agence france-presse , manila , 2nd ) - australia reopened its embassy in the philippines today . the embassy was closed seven weeks ago after what was described as a specific threat of a terrorist attack . diff --git a/training/dpmert/test_data/c2e.txt.2 b/training/dpmert/test_data/c2e.txt.2 new file mode 100644 index 00000000..2f67b72f --- /dev/null +++ b/training/dpmert/test_data/c2e.txt.2 @@ -0,0 +1,2 @@ +australia to reopen embassy in manila +( afp report from manila , january 2 ) australia reopened its embassy in the philippines today . seven weeks ago , the embassy was shut down due to so-called confirmed terrorist attack threats . diff --git a/training/dpmert/test_data/c2e.txt.3 b/training/dpmert/test_data/c2e.txt.3 new file mode 100644 index 00000000..5483cef6 --- /dev/null +++ b/training/dpmert/test_data/c2e.txt.3 @@ -0,0 +1,2 @@ +australia to re - open its embassy to manila +( afp , manila , thursday ) australia reopens its embassy to manila , which was closed for the so-called " clear " threat of terrorist attack 7 weeks ago . diff --git a/training/dpmert/test_data/re.txt.0 b/training/dpmert/test_data/re.txt.0 new file mode 100644 index 00000000..86eff087 --- /dev/null +++ b/training/dpmert/test_data/re.txt.0 @@ -0,0 +1,5 @@ +erdogan states turkey to reject any pressures to urge it to recognize cyprus +ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara will reject any pressure by the european union to urge it to recognize cyprus . this comes two weeks before the summit of european union state and government heads who will decide whether or nor membership negotiations with ankara should be opened . +erdogan told " ntv " television station that " the european union cannot address us by imposing new conditions on us with regard to cyprus . +we will discuss this dossier in the course of membership negotiations . " +he added " let me be clear , i cannot sidestep turkey , this is something we cannot accept . " diff --git a/training/dpmert/test_data/re.txt.1 b/training/dpmert/test_data/re.txt.1 new file mode 100644 index 00000000..2140f198 --- /dev/null +++ b/training/dpmert/test_data/re.txt.1 @@ -0,0 +1,5 @@ +erdogan confirms turkey will resist any pressure to recognize cyprus +ankara 12 - 1 ( afp ) - the turkish head of government , recep tayyip erdogan , announced today ( wednesday ) that ankara would resist any pressure the european union might exercise in order to force it into recognizing cyprus . this comes two weeks before a summit of european union heads of state and government , who will decide whether or not to open membership negotiations with ankara . +erdogan said to the ntv television channel : " the european union cannot engage with us through imposing new conditions on us with regard to cyprus . +we shall discuss this issue in the course of the membership negotiations . " +he added : " let me be clear - i cannot confine turkey . this is something we do not accept . " diff --git a/training/dpmert/test_data/re.txt.2 b/training/dpmert/test_data/re.txt.2 new file mode 100644 index 00000000..94e46286 --- /dev/null +++ b/training/dpmert/test_data/re.txt.2 @@ -0,0 +1,5 @@ +erdogan confirms that turkey will reject any pressures to encourage it to recognize cyprus +ankara , 12 / 1 ( afp ) - the turkish prime minister recep tayyip erdogan declared today , wednesday , that ankara will reject any pressures that the european union may apply on it to encourage to recognize cyprus . this comes two weeks before a summit of the heads of countries and governments of the european union , who will decide on whether or not to start negotiations on joining with ankara . +erdogan told the ntv television station that " it is not possible for the european union to talk to us by imposing new conditions on us regarding cyprus . +we shall discuss this dossier during the negotiations on joining . " +and he added , " let me be clear . turkey's arm should not be twisted ; this is something we cannot accept . " diff --git a/training/dpmert/test_data/re.txt.3 b/training/dpmert/test_data/re.txt.3 new file mode 100644 index 00000000..f87c3308 --- /dev/null +++ b/training/dpmert/test_data/re.txt.3 @@ -0,0 +1,5 @@ +erdogan stresses that turkey will reject all pressures to force it to recognize cyprus +ankara 12 - 1 ( afp ) - turkish prime minister recep tayyip erdogan announced today , wednesday , that ankara would refuse all pressures applied on it by the european union to force it to recognize cyprus . that came two weeks before the summit of the presidents and prime ministers of the european union , who would decide on whether to open negotiations on joining with ankara or not . +erdogan said to " ntv " tv station that the " european union can not communicate with us by imposing on us new conditions related to cyprus . +we will discuss this file during the negotiations on joining . " +he added , " let me be clear . turkey's arm should not be twisted . this is unacceptable to us . " diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am new file mode 100644 index 00000000..5b48e756 --- /dev/null +++ b/training/dtrain/Makefile.am @@ -0,0 +1,7 @@ +bin_PROGRAMS = dtrain + +dtrain_SOURCES = dtrain.cc score.cc +dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval + diff --git a/training/dtrain/README.md b/training/dtrain/README.md new file mode 100644 index 00000000..7edabbf1 --- /dev/null +++ b/training/dtrain/README.md @@ -0,0 +1,48 @@ +This is a simple (and parallelizable) tuning method for cdec +which is able to train the weights of very many (sparse) features. +It was used here: + "Joint Feature Selection in Distributed Stochastic + Learning for Large-Scale Discriminative Training in + SMT" +(Simianer, Riezler, Dyer; ACL 2012) + + +Building +-------- +Builds when building cdec, see ../BUILDING . +To build only parts needed for dtrain do +``` + autoreconf -ifv + ./configure [--disable-gtest] + cd dtrain/; make +``` + +Running +------- +To run this on a dev set locally: +``` + #define DTRAIN_LOCAL +``` +otherwise remove that line or undef, then recompile. You need a single +grammar file or input annotated with per-sentence grammars (psg) as you +would use with cdec. Additionally you need to give dtrain a file with +references (--refs) when running locally. + +The input for use with hadoop streaming looks like this: +``` + \t\t\t +``` +To convert a psg to this format you need to replace all "\n" +by "\t". Make sure there are no tabs in your data. + +For an example of local usage (with the 'distributed' format) +the see test/example/ . This expects dtrain to be built without +DTRAIN_LOCAL. + +Legal +----- +Copyright (c) 2012 by Patrick Simianer + +See the file ../LICENSE.txt for the licensing terms that this software is +released under. + diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc new file mode 100644 index 00000000..18286668 --- /dev/null +++ b/training/dtrain/dtrain.cc @@ -0,0 +1,657 @@ +#include "dtrain.h" + + +bool +dtrain_init(int argc, char** argv, po::variables_map* cfg) +{ + po::options_description ini("Configuration File Options"); + ini.add_options() + ("input", po::value()->default_value("-"), "input file") + ("output", po::value()->default_value("-"), "output weights file, '-' for STDOUT") + ("input_weights", po::value(), "input weights file (e.g. from previous iteration)") + ("decoder_config", po::value(), "configuration file for cdec") + ("print_weights", po::value(), "weights to print on each iteration") + ("stop_after", po::value()->default_value(0), "stop after X input sentences") + ("tmp", po::value()->default_value("/tmp"), "temp dir to use") + ("keep", po::value()->zero_tokens(), "keep weights files for each iteration") + ("hstreaming", po::value(), "run in hadoop streaming mode, arg is a task id") + ("epochs", po::value()->default_value(10), "# of iterations T (per shard)") + ("k", po::value()->default_value(100), "how many translations to sample") + ("sample_from", po::value()->default_value("kbest"), "where to sample translations from: 'kbest', 'forest'") + ("filter", po::value()->default_value("uniq"), "filter kbest list: 'not', 'uniq'") + ("pair_sampling", po::value()->default_value("XYX"), "how to sample pairs: 'all', 'XYX' or 'PRO'") + ("hi_lo", po::value()->default_value(0.1), "hi and lo (X) for XYX (default 0.1), <= 0.5") + ("pair_threshold", po::value()->default_value(0.), "bleu [0,1] threshold to filter pairs") + ("N", po::value()->default_value(4), "N for Ngrams (BLEU)") + ("scorer", po::value()->default_value("stupid_bleu"), "scoring: bleu, stupid_, smooth_, approx_, lc_") + ("learning_rate", po::value()->default_value(1.0), "learning rate") + ("gamma", po::value()->default_value(0.), "gamma for SVM (0 for perceptron)") + ("select_weights", po::value()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") + ("rescale", po::value()->zero_tokens(), "rescale weight vector after each input") + ("l1_reg", po::value()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") + ("l1_reg_strength", po::value(), "l1 regularization strength") + ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO + ("approx_bleu_d", po::value()->default_value(0.9), "discount for approx. BLEU") + ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") + ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") + ("max_pairs", po::value()->default_value(std::numeric_limits::max()), "max. # of pairs per Sent.") +#ifdef DTRAIN_LOCAL + ("refs,r", po::value(), "references in local mode") +#endif + ("noup", po::value()->zero_tokens(), "do not update weights"); + po::options_description cl("Command Line Options"); + cl.add_options() + ("config,c", po::value(), "dtrain config file") + ("quiet,q", po::value()->zero_tokens(), "be quiet") + ("verbose,v", po::value()->zero_tokens(), "be verbose"); + cl.add(ini); + po::store(parse_command_line(argc, argv, cl), *cfg); + if (cfg->count("config")) { + ifstream ini_f((*cfg)["config"].as().c_str()); + po::store(po::parse_config_file(ini_f, ini), *cfg); + } + po::notify(*cfg); + if (!cfg->count("decoder_config")) { + cerr << cl << endl; + return false; + } + if (cfg->count("hstreaming") && (*cfg)["output"].as() != "-") { + cerr << "When using 'hstreaming' the 'output' param should be '-'." << endl; + return false; + } +#ifdef DTRAIN_LOCAL + if ((*cfg)["input"].as() == "-") { + cerr << "Can't use stdin as input with this binary. Recompile without DTRAIN_LOCAL" << endl; + return false; + } +#endif + if ((*cfg)["sample_from"].as() != "kbest" + && (*cfg)["sample_from"].as() != "forest") { + cerr << "Wrong 'sample_from' param: '" << (*cfg)["sample_from"].as() << "', use 'kbest' or 'forest'." << endl; + return false; + } + if ((*cfg)["sample_from"].as() == "kbest" && (*cfg)["filter"].as() != "uniq" && + (*cfg)["filter"].as() != "not") { + cerr << "Wrong 'filter' param: '" << (*cfg)["filter"].as() << "', use 'uniq' or 'not'." << endl; + return false; + } + if ((*cfg)["pair_sampling"].as() != "all" && (*cfg)["pair_sampling"].as() != "XYX" && + (*cfg)["pair_sampling"].as() != "PRO") { + cerr << "Wrong 'pair_sampling' param: '" << (*cfg)["pair_sampling"].as() << "'." << endl; + return false; + } + if(cfg->count("hi_lo") && (*cfg)["pair_sampling"].as() != "XYX") { + cerr << "Warning: hi_lo only works with pair_sampling XYX." << endl; + } + if((*cfg)["hi_lo"].as() > 0.5 || (*cfg)["hi_lo"].as() < 0.01) { + cerr << "hi_lo must lie in [0.01, 0.5]" << endl; + return false; + } + if ((*cfg)["pair_threshold"].as() < 0) { + cerr << "The threshold must be >= 0!" << endl; + return false; + } + if ((*cfg)["select_weights"].as() != "last" && (*cfg)["select_weights"].as() != "best" && + (*cfg)["select_weights"].as() != "avg" && (*cfg)["select_weights"].as() != "VOID") { + cerr << "Wrong 'select_weights' param: '" << (*cfg)["select_weights"].as() << "', use 'last' or 'best'." << endl; + return false; + } + return true; +} + +int +main(int argc, char** argv) +{ + // handle most parameters + po::variables_map cfg; + if (!dtrain_init(argc, argv, &cfg)) exit(1); // something is wrong + bool quiet = false; + if (cfg.count("quiet")) quiet = true; + bool verbose = false; + if (cfg.count("verbose")) verbose = true; + bool noup = false; + if (cfg.count("noup")) noup = true; + bool hstreaming = false; + string task_id; + if (cfg.count("hstreaming")) { + hstreaming = true; + quiet = true; + task_id = cfg["hstreaming"].as(); + cerr.precision(17); + } + bool rescale = false; + if (cfg.count("rescale")) rescale = true; + HSReporter rep(task_id); + bool keep = false; + if (cfg.count("keep")) keep = true; + + const unsigned k = cfg["k"].as(); + const unsigned N = cfg["N"].as(); + const unsigned T = cfg["epochs"].as(); + const unsigned stop_after = cfg["stop_after"].as(); + const string filter_type = cfg["filter"].as(); + const string sample_from = cfg["sample_from"].as(); + const string pair_sampling = cfg["pair_sampling"].as(); + const score_t pair_threshold = cfg["pair_threshold"].as(); + const string select_weights = cfg["select_weights"].as(); + const float hi_lo = cfg["hi_lo"].as(); + const score_t approx_bleu_d = cfg["approx_bleu_d"].as(); + const unsigned max_pairs = cfg["max_pairs"].as(); + weight_t loss_margin = cfg["loss_margin"].as(); + if (loss_margin > 9998.) loss_margin = std::numeric_limits::max(); + bool scale_bleu_diff = false; + if (cfg.count("scale_bleu_diff")) scale_bleu_diff = true; + bool average = false; + if (select_weights == "avg") + average = true; + vector print_weights; + if (cfg.count("print_weights")) + boost::split(print_weights, cfg["print_weights"].as(), boost::is_any_of(" ")); + + // setup decoder + register_feature_functions(); + SetSilent(true); + ReadFile ini_rf(cfg["decoder_config"].as()); + if (!quiet) + cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as() << "'" << endl; + Decoder decoder(ini_rf.stream()); + + // scoring metric/scorer + string scorer_str = cfg["scorer"].as(); + LocalScorer* scorer; + if (scorer_str == "bleu") { + scorer = dynamic_cast(new BleuScorer); + } else if (scorer_str == "stupid_bleu") { + scorer = dynamic_cast(new StupidBleuScorer); + } else if (scorer_str == "smooth_bleu") { + scorer = dynamic_cast(new SmoothBleuScorer); + } else if (scorer_str == "sum_bleu") { + scorer = dynamic_cast(new SumBleuScorer); + } else if (scorer_str == "sumexp_bleu") { + scorer = dynamic_cast(new SumExpBleuScorer); + } else if (scorer_str == "sumwhatever_bleu") { + scorer = dynamic_cast(new SumWhateverBleuScorer); + } else if (scorer_str == "approx_bleu") { + scorer = dynamic_cast(new ApproxBleuScorer(N, approx_bleu_d)); + } else if (scorer_str == "lc_bleu") { + scorer = dynamic_cast(new LinearBleuScorer(N)); + } else { + cerr << "Don't know scoring metric: '" << scorer_str << "', exiting." << endl; + exit(1); + } + vector bleu_weights; + scorer->Init(N, bleu_weights); + + // setup decoder observer + MT19937 rng; // random number generator, only for forest sampling + HypSampler* observer; + if (sample_from == "kbest") + observer = dynamic_cast(new KBestGetter(k, filter_type)); + else + observer = dynamic_cast(new KSampler(k, &rng)); + observer->SetScorer(scorer); + + // init weights + vector& dense_weights = decoder.CurrentWeightVector(); + SparseVector lambdas, cumulative_penalties, w_average; + if (cfg.count("input_weights")) Weights::InitFromFile(cfg["input_weights"].as(), &dense_weights); + Weights::InitSparseVector(dense_weights, &lambdas); + + // meta params for perceptron, SVM + weight_t eta = cfg["learning_rate"].as(); + weight_t gamma = cfg["gamma"].as(); + + // l1 regularization + bool l1naive = false; + bool l1clip = false; + bool l1cumul = false; + weight_t l1_reg = 0; + if (cfg["l1_reg"].as() != "none") { + string s = cfg["l1_reg"].as(); + if (s == "naive") l1naive = true; + else if (s == "clip") l1clip = true; + else if (s == "cumul") l1cumul = true; + l1_reg = cfg["l1_reg_strength"].as(); + } + + // output + string output_fn = cfg["output"].as(); + // input + string input_fn = cfg["input"].as(); + ReadFile input(input_fn); + // buffer input for t > 0 + vector src_str_buf; // source strings (decoder takes only strings) + vector > ref_ids_buf; // references as WordID vecs + // where temp files go + string tmp_path = cfg["tmp"].as(); +#ifdef DTRAIN_LOCAL + string refs_fn = cfg["refs"].as(); + ReadFile refs(refs_fn); +#else + string grammar_buf_fn = gettmpf(tmp_path, "dtrain-grammars"); + ogzstream grammar_buf_out; + grammar_buf_out.open(grammar_buf_fn.c_str()); +#endif + + unsigned in_sz = std::numeric_limits::max(); // input index, input size + vector > all_scores; + score_t max_score = 0.; + unsigned best_it = 0; + float overall_time = 0.; + + // output cfg + if (!quiet) { + cerr << _p5; + cerr << endl << "dtrain" << endl << "Parameters:" << endl; + cerr << setw(25) << "k " << k << endl; + cerr << setw(25) << "N " << N << endl; + cerr << setw(25) << "T " << T << endl; + cerr << setw(25) << "scorer '" << scorer_str << "'" << endl; + if (scorer_str == "approx_bleu") + cerr << setw(25) << "approx. B discount " << approx_bleu_d << endl; + cerr << setw(25) << "sample from " << "'" << sample_from << "'" << endl; + if (sample_from == "kbest") + cerr << setw(25) << "filter " << "'" << filter_type << "'" << endl; + if (!scale_bleu_diff) cerr << setw(25) << "learning rate " << eta << endl; + else cerr << setw(25) << "learning rate " << "bleu diff" << endl; + cerr << setw(25) << "gamma " << gamma << endl; + cerr << setw(25) << "loss margin " << loss_margin << endl; + cerr << setw(25) << "pairs " << "'" << pair_sampling << "'" << endl; + if (pair_sampling == "XYX") + cerr << setw(25) << "hi lo " << hi_lo << endl; + cerr << setw(25) << "pair threshold " << pair_threshold << endl; + cerr << setw(25) << "select weights " << "'" << select_weights << "'" << endl; + if (cfg.count("l1_reg")) + cerr << setw(25) << "l1 reg " << l1_reg << " '" << cfg["l1_reg"].as() << "'" << endl; + if (rescale) + cerr << setw(25) << "rescale " << rescale << endl; + cerr << setw(25) << "max pairs " << max_pairs << endl; + cerr << setw(25) << "cdec cfg " << "'" << cfg["decoder_config"].as() << "'" << endl; + cerr << setw(25) << "input " << "'" << input_fn << "'" << endl; +#ifdef DTRAIN_LOCAL + cerr << setw(25) << "refs " << "'" << refs_fn << "'" << endl; +#endif + cerr << setw(25) << "output " << "'" << output_fn << "'" << endl; + if (cfg.count("input_weights")) + cerr << setw(25) << "weights in " << "'" << cfg["input_weights"].as() << "'" << endl; + if (stop_after > 0) + cerr << setw(25) << "stop_after " << stop_after << endl; + if (!verbose) cerr << "(a dot represents " << DTRAIN_DOTS << " inputs)" << endl; + } + + + for (unsigned t = 0; t < T; t++) // T epochs + { + + if (hstreaming) cerr << "reporter:status:Iteration #" << t+1 << " of " << T << endl; + + time_t start, end; + time(&start); +#ifndef DTRAIN_LOCAL + igzstream grammar_buf_in; + if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); +#endif + score_t score_sum = 0.; + score_t model_sum(0); + unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0; + if (!quiet) cerr << "Iteration #" << t+1 << " of " << T << "." << endl; + + while(true) + { + + string in; + bool next = false, stop = false; // next iteration or premature stop + if (t == 0) { + if(!getline(*input, in)) next = true; + } else { + if (ii == in_sz) next = true; // stop if we reach the end of our input + } + // stop after X sentences (but still go on for those) + if (stop_after > 0 && stop_after == ii && !next) stop = true; + + // produce some pretty output + if (!quiet && !verbose) { + if (ii == 0) cerr << " "; + if ((ii+1) % (DTRAIN_DOTS) == 0) { + cerr << "."; + cerr.flush(); + } + if ((ii+1) % (20*DTRAIN_DOTS) == 0) { + cerr << " " << ii+1 << endl; + if (!next && !stop) cerr << " "; + } + if (stop) { + if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl; + cerr << "Stopping after " << stop_after << " input sentences." << endl; + } else { + if (next) { + if (ii % (20*DTRAIN_DOTS) != 0) cerr << " " << ii << endl; + } + } + } + + // next iteration + if (next || stop) break; + + // weights + lambdas.init_vector(&dense_weights); + + // getting input + vector ref_ids; // reference as vector +#ifndef DTRAIN_LOCAL + vector in_split; // input: sid\tsrc\tref\tpsg + if (t == 0) { + // handling input + split_in(in, in_split); + if (hstreaming && ii == 0) cerr << "reporter:counter:" << task_id << ",First ID," << in_split[0] << endl; + // getting reference + vector ref_tok; + boost::split(ref_tok, in_split[2], boost::is_any_of(" ")); + register_and_convert(ref_tok, ref_ids); + ref_ids_buf.push_back(ref_ids); + // process and set grammar + bool broken_grammar = true; // ignore broken grammars + for (string::iterator it = in.begin(); it != in.end(); it++) { + if (!isspace(*it)) { + broken_grammar = false; + break; + } + } + if (broken_grammar) { + cerr << "Broken grammar for " << ii+1 << "! Ignoring this input." << endl; + continue; + } + boost::replace_all(in, "\t", "\n"); + in += "\n"; + grammar_buf_out << in << DTRAIN_GRAMMAR_DELIM << " " << in_split[0] << endl; + decoder.AddSupplementalGrammarFromString(in); + src_str_buf.push_back(in_split[1]); + // decode + observer->SetRef(ref_ids); + decoder.Decode(in_split[1], observer); + } else { + // get buffered grammar + string grammar_str; + while (true) { + string rule; + getline(grammar_buf_in, rule); + if (boost::starts_with(rule, DTRAIN_GRAMMAR_DELIM)) break; + grammar_str += rule + "\n"; + } + decoder.AddSupplementalGrammarFromString(grammar_str); + // decode + observer->SetRef(ref_ids_buf[ii]); + decoder.Decode(src_str_buf[ii], observer); + } +#else + if (t == 0) { + string r_; + getline(*refs, r_); + vector ref_tok; + boost::split(ref_tok, r_, boost::is_any_of(" ")); + register_and_convert(ref_tok, ref_ids); + ref_ids_buf.push_back(ref_ids); + src_str_buf.push_back(in); + } else { + ref_ids = ref_ids_buf[ii]; + } + observer->SetRef(ref_ids); + if (t == 0) + decoder.Decode(in, observer); + else + decoder.Decode(src_str_buf[ii], observer); +#endif + + // get (scored) samples + vector* samples = observer->GetSamples(); + + if (verbose) { + cerr << "--- ref for " << ii << ": "; + if (t > 0) printWordIDVec(ref_ids_buf[ii]); + else printWordIDVec(ref_ids); + cerr << endl; + for (unsigned u = 0; u < samples->size(); u++) { + cerr << _p2 << _np << "[" << u << ". '"; + printWordIDVec((*samples)[u].w); + cerr << "'" << endl; + cerr << "SCORE=" << (*samples)[u].score << ",model="<< (*samples)[u].model << endl; + cerr << "F{" << (*samples)[u].f << "} ]" << endl << endl; + } + } + + score_sum += (*samples)[0].score; // stats for 1best + model_sum += (*samples)[0].model; + + f_count += observer->get_f_count(); + list_sz += observer->get_sz(); + + // weight updates + if (!noup) { + // get pairs + vector > pairs; + if (pair_sampling == "all") + all_pairs(samples, pairs, pair_threshold, max_pairs); + if (pair_sampling == "XYX") + partXYX(samples, pairs, pair_threshold, max_pairs, hi_lo); + if (pair_sampling == "PRO") + PROsampling(samples, pairs, pair_threshold, max_pairs); + npairs += pairs.size(); + + for (vector >::iterator it = pairs.begin(); + it != pairs.end(); it++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + bool rank_error = true; // pair sampling already did this for us + rank_errors++; + score_t margin = std::numeric_limits::max(); +#else + bool rank_error = it->first.model <= it->second.model; + if (rank_error) rank_errors++; + score_t margin = fabs(fabs(it->first.model) - fabs(it->second.model)); + if (!rank_error && margin < loss_margin) margin_violations++; +#endif + if (scale_bleu_diff) eta = it->first.score - it->second.score; + if (rank_error || margin < loss_margin) { + SparseVector diff_vec = it->first.f - it->second.f; + lambdas.plus_eq_v_times_s(diff_vec, eta); + if (gamma) + lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs)); + } + } + + // l1 regularization + if (l1naive) { + for (unsigned d = 0; d < lambdas.size(); d++) { + weight_t v = lambdas.get(d); + lambdas.set_value(d, v - sign(v) * l1_reg); + } + } else if (l1clip) { + for (unsigned d = 0; d < lambdas.size(); d++) { + if (lambdas.nonzero(d)) { + weight_t v = lambdas.get(d); + if (v > 0) { + lambdas.set_value(d, max(0., v - l1_reg)); + } else { + lambdas.set_value(d, min(0., v + l1_reg)); + } + } + } + } else if (l1cumul) { + weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input + for (unsigned d = 0; d < lambdas.size(); d++) { + if (lambdas.nonzero(d)) { + weight_t v = lambdas.get(d); + weight_t penalty = 0; + if (v > 0) { + penalty = max(0., v-(acc_penalty + cumulative_penalties.get(d))); + } else { + penalty = min(0., v+(acc_penalty - cumulative_penalties.get(d))); + } + lambdas.set_value(d, penalty); + cumulative_penalties.set_value(d, cumulative_penalties.get(d)+penalty); + } + } + } + + } + + if (rescale) lambdas /= lambdas.l2norm(); + + ++ii; + + if (hstreaming) { + rep.update_counter("Seen #"+boost::lexical_cast(t+1), 1u); + rep.update_counter("Seen", 1u); + } + + } // input loop + + if (average) w_average += lambdas; + + if (scorer_str == "approx_bleu" || scorer_str == "lc_bleu") scorer->Reset(); + + if (t == 0) { + in_sz = ii; // remember size of input (# lines) + if (hstreaming) { + rep.update_counter("|Input|", ii); + rep.update_gcounter("|Input|", ii); + rep.update_gcounter("Shards", 1u); + } + } + +#ifndef DTRAIN_LOCAL + if (t == 0) { + grammar_buf_out.close(); + } else { + grammar_buf_in.close(); + } +#endif + + // print some stats + score_t score_avg = score_sum/(score_t)in_sz; + score_t model_avg = model_sum/(score_t)in_sz; + score_t score_diff, model_diff; + if (t > 0) { + score_diff = score_avg - all_scores[t-1].first; + model_diff = model_avg - all_scores[t-1].second; + } else { + score_diff = score_avg; + model_diff = model_avg; + } + + unsigned nonz = 0; + if (!quiet || hstreaming) nonz = (unsigned)lambdas.num_nonzero(); + + if (!quiet) { + cerr << _p5 << _p << "WEIGHTS" << endl; + for (vector::iterator it = print_weights.begin(); it != print_weights.end(); it++) { + cerr << setw(18) << *it << " = " << lambdas.get(FD::Convert(*it)) << endl; + } + cerr << " ---" << endl; + cerr << _np << " 1best avg score: " << score_avg; + cerr << _p << " (" << score_diff << ")" << endl; + cerr << _np << " 1best avg model score: " << model_avg; + cerr << _p << " (" << model_diff << ")" << endl; + cerr << " avg # pairs: "; + cerr << _np << npairs/(float)in_sz << endl; + cerr << " avg # rank err: "; + cerr << rank_errors/(float)in_sz << endl; +#ifndef DTRAIN_FASTER_PERCEPTRON + cerr << " avg # margin viol: "; + cerr << margin_violations/(float)in_sz << endl; +#endif + cerr << " non0 feature count: " << nonz << endl; + cerr << " avg list sz: " << list_sz/(float)in_sz << endl; + cerr << " avg f count: " << f_count/(float)list_sz << endl; + } + + if (hstreaming) { + rep.update_counter("Score 1best avg #"+boost::lexical_cast(t+1), (unsigned)(score_avg*DTRAIN_SCALE)); + rep.update_counter("Model 1best avg #"+boost::lexical_cast(t+1), (unsigned)(model_avg*DTRAIN_SCALE)); + rep.update_counter("Pairs avg #"+boost::lexical_cast(t+1), (unsigned)((npairs/(weight_t)in_sz)*DTRAIN_SCALE)); + rep.update_counter("Rank errors avg #"+boost::lexical_cast(t+1), (unsigned)((rank_errors/(weight_t)in_sz)*DTRAIN_SCALE)); + rep.update_counter("Margin violations avg #"+boost::lexical_cast(t+1), (unsigned)((margin_violations/(weight_t)in_sz)*DTRAIN_SCALE)); + rep.update_counter("Non zero feature count #"+boost::lexical_cast(t+1), nonz); + rep.update_gcounter("Non zero feature count #"+boost::lexical_cast(t+1), nonz); + } + + pair remember; + remember.first = score_avg; + remember.second = model_avg; + all_scores.push_back(remember); + if (score_avg > max_score) { + max_score = score_avg; + best_it = t; + } + time (&end); + float time_diff = difftime(end, start); + overall_time += time_diff; + if (!quiet) { + cerr << _p2 << _np << "(time " << time_diff/60. << " min, "; + cerr << time_diff/in_sz << " s/S)" << endl; + } + if (t+1 != T && !quiet) cerr << endl; + + if (noup) break; + + // write weights to file + if (select_weights == "best" || keep) { + lambdas.init_vector(&dense_weights); + string w_fn = "weights." + boost::lexical_cast(t) + ".gz"; + Weights::WriteToFile(w_fn, dense_weights, true); + } + + } // outer loop + + if (average) w_average /= (weight_t)T; + +#ifndef DTRAIN_LOCAL + unlink(grammar_buf_fn.c_str()); +#endif + + if (!noup) { + if (!quiet) cerr << endl << "Writing weights file to '" << output_fn << "' ..." << endl; + if (select_weights == "last" || average) { // last, average + WriteFile of(output_fn); // works with '-' + ostream& o = *of.stream(); + o.precision(17); + o << _np; + if (average) { + for (SparseVector::iterator it = w_average.begin(); it != w_average.end(); ++it) { + if (it->second == 0) continue; + o << FD::Convert(it->first) << '\t' << it->second << endl; + } + } else { + for (SparseVector::iterator it = lambdas.begin(); it != lambdas.end(); ++it) { + if (it->second == 0) continue; + o << FD::Convert(it->first) << '\t' << it->second << endl; + } + } + } else if (select_weights == "VOID") { // do nothing with the weights + } else { // best + if (output_fn != "-") { + CopyFile("weights."+boost::lexical_cast(best_it)+".gz", output_fn); + } else { + ReadFile bestw("weights."+boost::lexical_cast(best_it)+".gz"); + string o; + cout.precision(17); + cout << _np; + while(getline(*bestw, o)) cout << o << endl; + } + if (!keep) { + for (unsigned i = 0; i < T; i++) { + string s = "weights." + boost::lexical_cast(i) + ".gz"; + unlink(s.c_str()); + } + } + } + if (output_fn == "-" && hstreaming) cout << "__SHARD_COUNT__\t1" << endl; + if (!quiet) cerr << "done" << endl; + } + + if (!quiet) { + cerr << _p5 << _np << endl << "---" << endl << "Best iteration: "; + cerr << best_it+1 << " [SCORE '" << scorer_str << "'=" << max_score << "]." << endl; + cerr << "This took " << overall_time/60. << " min." << endl; + } +} + diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h new file mode 100644 index 00000000..4b6f415c --- /dev/null +++ b/training/dtrain/dtrain.h @@ -0,0 +1,97 @@ +#ifndef _DTRAIN_H_ +#define _DTRAIN_H_ + +#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs + // DO NOT USE WITH SVM! +//#define DTRAIN_LOCAL +#define DTRAIN_DOTS 10 // after how many inputs to display a '.' +#define DTRAIN_GRAMMAR_DELIM "########EOS########" +#define DTRAIN_SCALE 100000 + + +#include +#include +#include + +#include +#include + +#include "ksampler.h" +#include "pairsampling.h" + +#include "filelib.h" + + +using namespace std; +using namespace dtrain; +namespace po = boost::program_options; + +inline void register_and_convert(const vector& strs, vector& ids) +{ + vector::const_iterator it; + for (it = strs.begin(); it < strs.end(); it++) + ids.push_back(TD::Convert(*it)); +} + +inline string gettmpf(const string path, const string infix) +{ + char fn[path.size() + infix.size() + 8]; + strcpy(fn, path.c_str()); + strcat(fn, "/"); + strcat(fn, infix.c_str()); + strcat(fn, "-XXXXXX"); + if (!mkstemp(fn)) { + cerr << "Cannot make temp file in" << path << " , exiting." << endl; + exit(1); + } + return string(fn); +} + +inline void split_in(string& s, vector& parts) +{ + unsigned f = 0; + for(unsigned i = 0; i < 3; i++) { + unsigned e = f; + f = s.find("\t", f+1); + if (e != 0) parts.push_back(s.substr(e+1, f-e-1)); + else parts.push_back(s.substr(0, f)); + } + s.erase(0, f+1); +} + +struct HSReporter +{ + string task_id_; + + HSReporter(string task_id) : task_id_(task_id) {} + + inline void update_counter(string name, unsigned amount) { + cerr << "reporter:counter:" << task_id_ << "," << name << "," << amount << endl; + } + inline void update_gcounter(string name, unsigned amount) { + cerr << "reporter:counter:Global," << name << "," << amount << endl; + } +}; + +inline ostream& _np(ostream& out) { return out << resetiosflags(ios::showpos); } +inline ostream& _p(ostream& out) { return out << setiosflags(ios::showpos); } +inline ostream& _p2(ostream& out) { return out << setprecision(2); } +inline ostream& _p5(ostream& out) { return out << setprecision(5); } + +inline void printWordIDVec(vector& v) +{ + for (unsigned i = 0; i < v.size(); i++) { + cerr << TD::Convert(v[i]); + if (i < v.size()-1) cerr << " "; + } +} + +template +inline T sign(T z) +{ + if (z == 0) return 0; + return z < 0 ? -1 : +1; +} + +#endif + diff --git a/training/dtrain/hstreaming/avg.rb b/training/dtrain/hstreaming/avg.rb new file mode 100755 index 00000000..2599c732 --- /dev/null +++ b/training/dtrain/hstreaming/avg.rb @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby +# first arg may be an int of custom shard count + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +c = {} +w.default = 0 +c.default = 0 +while line = STDIN.gets + key, val = line.split /\s/ + w[key] += val.to_f + c[key] += 1 +end + +if ARGV.size == 0 + shard_count = w["__SHARD_COUNT__"] +else + shard_count = ARGV[0].to_f +end +w.each_key { |k| + if k == shard_count_key + next + else + puts "#{k}\t#{w[k]/shard_count}" + #puts "# #{c[k]}" + end +} + diff --git a/training/dtrain/hstreaming/cdec.ini b/training/dtrain/hstreaming/cdec.ini new file mode 100644 index 00000000..d4f5cecd --- /dev/null +++ b/training/dtrain/hstreaming/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +scfg_max_span_limit=15 +intersection_strategy=cube_pruning +cubepruning_pop_limit=30 +feature_function=WordPenalty +feature_function=KLanguageModel nc-wmt11.en.srilm.gz +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/hstreaming/dtrain.ini b/training/dtrain/hstreaming/dtrain.ini new file mode 100644 index 00000000..a2c219a1 --- /dev/null +++ b/training/dtrain/hstreaming/dtrain.ini @@ -0,0 +1,15 @@ +input=- +output=- +decoder_config=cdec.ini +tmp=/var/hadoop/mapred/local/ +epochs=1 +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +pair_threshold=0 +select_weights=last diff --git a/training/dtrain/hstreaming/dtrain.sh b/training/dtrain/hstreaming/dtrain.sh new file mode 100755 index 00000000..877ff94c --- /dev/null +++ b/training/dtrain/hstreaming/dtrain.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# script to run dtrain with a task id + +pushd . &>/dev/null +cd .. +ID=$(basename $(pwd)) # attempt_... +popd &>/dev/null +./dtrain -c dtrain.ini --hstreaming $ID + diff --git a/training/dtrain/hstreaming/hadoop-streaming-job.sh b/training/dtrain/hstreaming/hadoop-streaming-job.sh new file mode 100755 index 00000000..92419956 --- /dev/null +++ b/training/dtrain/hstreaming/hadoop-streaming-job.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +EXP=a_simple_test + +# change these vars to fit your hadoop installation +HADOOP_HOME=/usr/lib/hadoop-0.20 +JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar +HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" + + IN=input_on_hdfs +OUT=output_weights_on_hdfs + +# you can -reducer to NONE if you want to +# do feature selection/averaging locally (e.g. to +# keep weights of all epochs) +$HSTREAMING \ + -mapper "dtrain.sh" \ + -reducer "ruby lplp.rb l2 select_k 100000" \ + -input $IN \ + -output $OUT \ + -file dtrain.sh \ + -file lplp.rb \ + -file ../dtrain \ + -file dtrain.ini \ + -file cdec.ini \ + -file ../test/example/nc-wmt11.en.srilm.gz \ + -jobconf mapred.reduce.tasks=30 \ + -jobconf mapred.max.map.failures.percent=0 \ + -jobconf mapred.job.name="dtrain $EXP" + diff --git a/training/dtrain/hstreaming/lplp.rb b/training/dtrain/hstreaming/lplp.rb new file mode 100755 index 00000000..f0cd58c5 --- /dev/null +++ b/training/dtrain/hstreaming/lplp.rb @@ -0,0 +1,131 @@ +# lplp.rb + +# norms +def l0(feature_column, n) + if feature_column.size >= n then return 1 else return 0 end +end + +def l1(feature_column, n=-1) + return feature_column.map { |i| i.abs }.reduce { |sum,i| sum+i } +end + +def l2(feature_column, n=-1) + return Math.sqrt feature_column.map { |i| i.abs2 }.reduce { |sum,i| sum+i } +end + +def linfty(feature_column, n=-1) + return feature_column.map { |i| i.abs }.max +end + +# stats +def median(feature_column, n) + return feature_column.concat(0.step(n-feature_column.size-1).map{|i|0}).sort[feature_column.size/2] +end + +def mean(feature_column, n) + return feature_column.reduce { |sum, i| sum+i } / n +end + +# selection +def select_k(weights, norm_fun, n, k=10000) + weights.sort{|a,b| norm_fun.call(b[1], n) <=> norm_fun.call(a[1], n)}.each { |p| + puts "#{p[0]}\t#{mean(p[1], n)}" + k -= 1 + if k == 0 then break end + } +end + +def cut(weights, norm_fun, n, epsilon=0.0001) + weights.each { |k,v| + if norm_fun.call(v, n).abs >= epsilon + puts "#{k}\t#{mean(v, n)}" + end + } +end + +# test +def _test() + puts + w = {} + w["a"] = [1, 2, 3] + w["b"] = [1, 2] + w["c"] = [66] + w["d"] = [10, 20, 30] + n = 3 + puts w.to_s + puts + puts "select_k" + puts "l0 expect ad" + select_k(w, method(:l0), n, 2) + puts "l1 expect cd" + select_k(w, method(:l1), n, 2) + puts "l2 expect c" + select_k(w, method(:l2), n, 1) + puts + puts "cut" + puts "l1 expect cd" + cut(w, method(:l1), n, 7) + puts + puts "median" + a = [1,2,3,4,5] + puts a.to_s + puts median(a, 5) + puts + puts "#{median(a, 7)} <- that's because we add missing 0s:" + puts a.concat(0.step(7-a.size-1).map{|i|0}).to_s + puts + puts "mean expect bc" + w.clear + w["a"] = [2] + w["b"] = [2.1] + w["c"] = [2.2] + cut(w, method(:mean), 1, 2.05) + exit +end +#_test() + +# actually do something +def usage() + puts "lplp.rb [n] < " + puts " l0...: norms for selection" + puts "select_k: only output top k (according to the norm of their column vector) features" + puts " cut: output features with weight >= threshold" + puts " n: if we do not have a shard count use this number for averaging" + exit +end + +if ARGV.size < 3 then usage end +norm_fun = method(ARGV[0].to_sym) +type = ARGV[1] +x = ARGV[2].to_f + +shard_count_key = "__SHARD_COUNT__" + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +w = {} +shard_count = 0 +while line = STDIN.gets + key, val = line.split /\s+/ + if key == shard_count_key + shard_count += 1 + next + end + if w.has_key? key + w[key].push val.to_f + else + w[key] = [val.to_f] + end +end + +if ARGV.size == 4 then shard_count = ARGV[3].to_f end + +if type == 'cut' + cut(w, norm_fun, shard_count, x) +elsif type == 'select_k' + select_k(w, norm_fun, shard_count, x) +else + puts "oh oh" +end + diff --git a/training/dtrain/hstreaming/red-test b/training/dtrain/hstreaming/red-test new file mode 100644 index 00000000..2623d697 --- /dev/null +++ b/training/dtrain/hstreaming/red-test @@ -0,0 +1,9 @@ +a 1 +b 2 +c 3.5 +a 1 +b 2 +c 3.5 +d 1 +e 2 +__SHARD_COUNT__ 2 diff --git a/training/dtrain/kbestget.h b/training/dtrain/kbestget.h new file mode 100644 index 00000000..dd8882e1 --- /dev/null +++ b/training/dtrain/kbestget.h @@ -0,0 +1,152 @@ +#ifndef _DTRAIN_KBESTGET_H_ +#define _DTRAIN_KBESTGET_H_ + +#include "kbest.h" // cdec +#include "sentence_metadata.h" + +#include "verbose.h" +#include "viterbi.h" +#include "ff_register.h" +#include "decoder.h" +#include "weights.h" +#include "logval.h" + +using namespace std; + +namespace dtrain +{ + + +typedef double score_t; + +struct ScoredHyp +{ + vector w; + SparseVector f; + score_t model; + score_t score; + unsigned rank; +}; + +struct LocalScorer +{ + unsigned N_; + vector w_; + + virtual score_t + Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len)=0; + + void Reset() {} // only for approx bleu + + inline void + Init(unsigned N, vector weights) + { + assert(N > 0); + N_ = N; + if (weights.empty()) for (unsigned i = 0; i < N_; i++) w_.push_back(1./N_); + else w_ = weights; + } + + inline score_t + brevity_penalty(const unsigned hyp_len, const unsigned ref_len) + { + if (hyp_len > ref_len) return 1; + return exp(1 - (score_t)ref_len/hyp_len); + } +}; + +struct HypSampler : public DecoderObserver +{ + LocalScorer* scorer_; + vector* ref_; + unsigned f_count_, sz_; + virtual vector* GetSamples()=0; + inline void SetScorer(LocalScorer* scorer) { scorer_ = scorer; } + inline void SetRef(vector& ref) { ref_ = &ref; } + inline unsigned get_f_count() { return f_count_; } + inline unsigned get_sz() { return sz_; } +}; +//////////////////////////////////////////////////////////////////////////////// + + + + +struct KBestGetter : public HypSampler +{ + const unsigned k_; + const string filter_type_; + vector s_; + unsigned src_len_; + + KBestGetter(const unsigned k, const string filter_type) : + k_(k), filter_type_(filter_type) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) + { + src_len_ = smeta.GetSourceLength(); + KBestScored(*hg); + } + + vector* GetSamples() { return &s_; } + + void + KBestScored(const Hypergraph& forest) + { + if (filter_type_ == "uniq") { + KBestUnique(forest); + } else if (filter_type_ == "not") { + KBestNoFilter(forest); + } + } + + void + KBestUnique(const Hypergraph& forest) + { + s_.clear(); sz_ = f_count_ = 0; + KBest::KBestDerivations, ESentenceTraversal, + KBest::FilterUnique, prob_t, EdgeProb> kbest(forest, k_); + for (unsigned i = 0; i < k_; ++i) { + const KBest::KBestDerivations, ESentenceTraversal, KBest::FilterUnique, + prob_t, EdgeProb>::Derivation* d = + kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + ScoredHyp h; + h.w = d->yield; + h.f = d->feature_values; + h.model = log(d->score); + h.rank = i; + h.score = scorer_->Score(h.w, *ref_, i, src_len_); + s_.push_back(h); + sz_++; + f_count_ += h.f.size(); + } + } + + void + KBestNoFilter(const Hypergraph& forest) + { + s_.clear(); sz_ = f_count_ = 0; + KBest::KBestDerivations, ESentenceTraversal> kbest(forest, k_); + for (unsigned i = 0; i < k_; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + ScoredHyp h; + h.w = d->yield; + h.f = d->feature_values; + h.model = log(d->score); + h.rank = i; + h.score = scorer_->Score(h.w, *ref_, i, src_len_); + s_.push_back(h); + sz_++; + f_count_ += h.f.size(); + } + } +}; + + +} // namespace + +#endif + diff --git a/training/dtrain/ksampler.h b/training/dtrain/ksampler.h new file mode 100644 index 00000000..bc2f56cd --- /dev/null +++ b/training/dtrain/ksampler.h @@ -0,0 +1,61 @@ +#ifndef _DTRAIN_KSAMPLER_H_ +#define _DTRAIN_KSAMPLER_H_ + +#include "hg_sampler.h" // cdec +#include "kbestget.h" +#include "score.h" + +namespace dtrain +{ + +bool +cmp_hyp_by_model_d(ScoredHyp a, ScoredHyp b) +{ + return a.model > b.model; +} + +struct KSampler : public HypSampler +{ + const unsigned k_; + vector s_; + MT19937* prng_; + score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector); + unsigned src_len_; + + explicit KSampler(const unsigned k, MT19937* prng) : + k_(k), prng_(prng) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) + { + src_len_ = smeta.GetSourceLength(); + ScoredSamples(*hg); + } + + vector* GetSamples() { return &s_; } + + void ScoredSamples(const Hypergraph& forest) { + s_.clear(); sz_ = f_count_ = 0; + std::vector samples; + HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); + for (unsigned i = 0; i < k_; ++i) { + ScoredHyp h; + h.w = samples[i].words; + h.f = samples[i].fmap; + h.model = log(samples[i].model_score); + h.rank = i; + h.score = scorer_->Score(h.w, *ref_, i, src_len_); + s_.push_back(h); + sz_++; + f_count_ += h.f.size(); + } + sort(s_.begin(), s_.end(), cmp_hyp_by_model_d); + for (unsigned i = 0; i < s_.size(); i++) s_[i].rank = i; + } +}; + + +} // namespace + +#endif + diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h new file mode 100644 index 00000000..84be1efb --- /dev/null +++ b/training/dtrain/pairsampling.h @@ -0,0 +1,149 @@ +#ifndef _DTRAIN_PAIRSAMPLING_H_ +#define _DTRAIN_PAIRSAMPLING_H_ + +namespace dtrain +{ + + +bool +accept_pair(score_t a, score_t b, score_t threshold) +{ + if (fabs(a - b) < threshold) return false; + return true; +} + +bool +cmp_hyp_by_score_d(ScoredHyp a, ScoredHyp b) +{ + return a.score > b.score; +} + +inline void +all_pairs(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +{ + sort(s->begin(), s->end(), cmp_hyp_by_score_d); + unsigned sz = s->size(); + bool b = false; + unsigned count = 0; + for (unsigned i = 0; i < sz-1; i++) { + for (unsigned j = i+1; j < sz; j++) { + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if ((*s)[i].score != (*s)[j].score) + training.push_back(make_pair((*s)[i], (*s)[j])); + } + if (++count == max) { + b = true; + break; + } + } + if (b) break; + } +} + +/* + * multipartite ranking + * sort (descending) by bleu + * compare top X to middle Y and low X + * cmp middle Y to low X + */ + +inline void +partXYX(vector* s, vector >& training, score_t threshold, unsigned max, float hi_lo) +{ + unsigned sz = s->size(); + if (sz < 2) return; + sort(s->begin(), s->end(), cmp_hyp_by_score_d); + unsigned sep = round(sz*hi_lo); + unsigned sep_hi = sep; + if (sz > 4) while (sep_hi < sz && (*s)[sep_hi-1].score == (*s)[sep_hi].score) ++sep_hi; + else sep_hi = 1; + bool b = false; + unsigned count = 0; + for (unsigned i = 0; i < sep_hi; i++) { + for (unsigned j = sep_hi; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + if ((*s)[i].model <= (*s)[j].model) { +#endif + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if ((*s)[i].score != (*s)[j].score) + training.push_back(make_pair((*s)[i], (*s)[j])); + } + if (++count == max) { + b = true; + break; + } +#ifdef DTRAIN_FASTER_PERCEPTRON + } +#endif + } + if (b) break; + } + unsigned sep_lo = sz-sep; + while (sep_lo > 0 && (*s)[sep_lo-1].score == (*s)[sep_lo].score) --sep_lo; + for (unsigned i = sep_hi; i < sz-sep_lo; i++) { + for (unsigned j = sz-sep_lo; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON + if ((*s)[i].model <= (*s)[j].model) { +#endif + if (threshold > 0) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) + training.push_back(make_pair((*s)[i], (*s)[j])); + } else { + if ((*s)[i].score != (*s)[j].score) + training.push_back(make_pair((*s)[i], (*s)[j])); + } + if (++count == max) return; +#ifdef DTRAIN_FASTER_PERCEPTRON + } +#endif + } + } +} + +/* + * pair sampling as in + * 'Tuning as Ranking' (Hopkins & May, 2011) + * count = 5000 + * threshold = 5% BLEU (0.05 for param 3) + * cut = top 50 + */ +bool +_PRO_cmp_pair_by_diff_d(pair a, pair b) +{ + return (fabs(a.first.score - a.second.score)) > (fabs(b.first.score - b.second.score)); +} +inline void +PROsampling(vector* s, vector >& training, score_t threshold, unsigned max, float _unused=1) +{ + unsigned max_count = 5000, count = 0, sz = s->size(); + bool b = false; + for (unsigned i = 0; i < sz-1; i++) { + for (unsigned j = i+1; j < sz; j++) { + if (accept_pair((*s)[i].score, (*s)[j].score, threshold)) { + training.push_back(make_pair((*s)[i], (*s)[j])); + if (++count == max_count) { + b = true; + break; + } + } + } + if (b) break; + } + if (training.size() > 50) { + sort(training.begin(), training.end(), _PRO_cmp_pair_by_diff_d); + training.erase(training.begin()+50, training.end()); + } + return; +} + + +} // namespace + +#endif + diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb new file mode 100755 index 00000000..1d277ff6 --- /dev/null +++ b/training/dtrain/parallelize.rb @@ -0,0 +1,79 @@ +#!/usr/bin/env ruby + + +if ARGV.size != 5 + STDERR.write "Usage: " + STDERR.write "ruby parallelize.rb <#shards> \n" + exit +end + +dtrain_bin = '/home/pks/bin/dtrain_local' +ruby = '/usr/bin/ruby' +lplp_rb = '/home/pks/mt/cdec-dtrain/dtrain/hstreaming/lplp.rb' +lplp_args = 'l2 select_k 100000' +gzip = '/bin/gzip' + +num_shards = ARGV[0].to_i +input = ARGV[1] +refs = ARGV[2] +epochs = ARGV[3].to_i +ini = ARGV[4] + + +`mkdir work` + +def make_shards(input, refs, num_shards) + lc = `wc -l #{input}`.split.first.to_i + shard_sz = lc / num_shards + leftover = lc % num_shards + in_f = File.new input, 'r' + refs_f = File.new refs, 'r' + shard_in_files = [] + shard_refs_files = [] + 0.upto(num_shards-1) { |shard| + shard_in = File.new "work/shard.#{shard}.in", 'w+' + shard_refs = File.new "work/shard.#{shard}.refs", 'w+' + 0.upto(shard_sz-1) { |i| + shard_in.write in_f.gets + shard_refs.write refs_f.gets + } + shard_in_files << shard_in + shard_refs_files << shard_refs + } + while leftover > 0 + shard_in_files[-1].write in_f.gets + shard_refs_files[-1].write refs_f.gets + leftover -= 1 + end + (shard_in_files + shard_refs_files).each do |f| f.close end + in_f.close + refs_f.close +end + +make_shards input, refs, num_shards + +0.upto(epochs-1) { |epoch| + pids = [] + input_weights = '' + if epoch > 0 then input_weights = "--input_weights work/weights.#{epoch-1}" end + weights_files = [] + 0.upto(num_shards-1) { |shard| + pids << Kernel.fork { + `#{dtrain_bin} -c #{ini}\ + --input work/shard.#{shard}.in\ + --refs work/shard.#{shard}.refs #{input_weights}\ + --output work/weights.#{shard}.#{epoch}\ + &> work/out.#{shard}.#{epoch}` + } + weights_files << "work/weights.#{shard}.#{epoch}" + } + pids.each { |pid| Process.wait(pid) } + cat = File.new('work/weights_cat', 'w+') + weights_files.each { |f| cat.write File.new(f, 'r').read } + cat.close + `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat &> work/weights.#{epoch}` +} + +`rm work/weights_cat` +`#{gzip} work/*` + diff --git a/training/dtrain/parallelize/test/cdec.ini b/training/dtrain/parallelize/test/cdec.ini new file mode 100644 index 00000000..72e99dc5 --- /dev/null +++ b/training/dtrain/parallelize/test/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/parallelize/test/dtrain.ini b/training/dtrain/parallelize/test/dtrain.ini new file mode 100644 index 00000000..03f9d240 --- /dev/null +++ b/training/dtrain/parallelize/test/dtrain.ini @@ -0,0 +1,15 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +decoder_config=cdec.ini diff --git a/training/dtrain/parallelize/test/in b/training/dtrain/parallelize/test/in new file mode 100644 index 00000000..a312809f --- /dev/null +++ b/training/dtrain/parallelize/test/in @@ -0,0 +1,10 @@ +barack obama erhält als vierter us @-@ präsident den frieden nobelpreis +der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . +darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . +der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . +zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . +das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . +nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . +diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . +das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . +der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/training/dtrain/parallelize/test/refs b/training/dtrain/parallelize/test/refs new file mode 100644 index 00000000..4d3128cb --- /dev/null +++ b/training/dtrain/parallelize/test/refs @@ -0,0 +1,10 @@ +barack obama becomes the fourth american president to receive the nobel peace prize +the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . +he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . +the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . +first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . +the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . +then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . +he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . +the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . +the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc new file mode 100644 index 00000000..34fc86a9 --- /dev/null +++ b/training/dtrain/score.cc @@ -0,0 +1,254 @@ +#include "score.h" + +namespace dtrain +{ + + +/* + * bleu + * + * as in "BLEU: a Method for Automatic Evaluation + * of Machine Translation" + * (Papineni et al. '02) + * + * NOTE: 0 if for one n \in {1..N} count is 0 + */ +score_t +BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len) +{ + if (hyp_len == 0 || ref_len == 0) return 0.; + unsigned M = N_; + vector v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) return 0.; + sum += v[i] * log((score_t)counts.clipped_[i]/counts.sum_[i]); + } + return brevity_penalty(hyp_len, ref_len) * exp(sum); +} + +score_t +BleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + return Bleu(counts, hyp_len, ref_len); +} + +/* + * 'stupid' bleu + * + * as in "ORANGE: a Method for Evaluating + * Automatic Evaluation Metrics + * for Machine Translation" + * (Lin & Och '04) + * + * NOTE: 0 iff no 1gram match + */ +score_t +StupidBleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + vector v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0, add = 0; + for (unsigned i = 0; i < M; i++) { + if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; + if (i == 1) add = 1; + sum += v[i] * log(((score_t)counts.clipped_[i] + add)/((counts.sum_[i] + add))); + } + return brevity_penalty(hyp_len, ref_len) * exp(sum); +} + +/* + * smooth bleu + * + * as in "An End-to-End Discriminative Approach + * to Machine Translation" + * (Liang et al. '06) + * + * NOTE: max is 0.9375 (with N=4) + */ +score_t +SmoothBleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + if (ref_len < N_) M = ref_len; + score_t sum = 0.; + vector i_bleu; + for (unsigned i = 0; i < M; i++) i_bleu.push_back(0.); + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) { + break; + } else { + score_t i_ng = log((score_t)counts.clipped_[i]/counts.sum_[i]); + for (unsigned j = i; j < M; j++) { + i_bleu[j] += (1/((score_t)j+1)) * i_ng; + } + } + sum += exp(i_bleu[i])/pow(2.0, (double)(N_-i)); + } + return brevity_penalty(hyp_len, ref_len) * sum; +} + +/* + * 'sum' bleu + * + * sum up Ngram precisions + */ +score_t +SumBleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + if (ref_len < N_) M = ref_len; + score_t sum = 0.; + unsigned j = 1; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; + sum += ((score_t)counts.clipped_[i]/counts.sum_[i])/pow(2.0, (double) (N_-j+1)); + j++; + } + return brevity_penalty(hyp_len, ref_len) * sum; +} + +/* + * 'sum' (exp) bleu + * + * sum up exp(Ngram precisions) + */ +score_t +SumExpBleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + if (ref_len < N_) M = ref_len; + score_t sum = 0.; + unsigned j = 1; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; + sum += exp(((score_t)counts.clipped_[i]/counts.sum_[i]))/pow(2.0, (double) (N_-j+1)); + j++; + } + return brevity_penalty(hyp_len, ref_len) * sum; +} + +/* + * 'sum' (whatever) bleu + * + * sum up exp(weight * log(Ngram precisions)) + */ +score_t +SumWhateverBleuScorer::Score(vector& hyp, vector& ref, + const unsigned /*rank*/, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (hyp_len == 0 || ref_len == 0) return 0.; + NgramCounts counts = make_ngram_counts(hyp, ref, N_); + unsigned M = N_; + vector v = w_; + if (ref_len < N_) { + M = ref_len; + for (unsigned i = 0; i < M; i++) v[i] = 1/((score_t)M); + } + score_t sum = 0.; + unsigned j = 1; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || counts.clipped_[i] == 0) break; + sum += exp(v[i] * log(((score_t)counts.clipped_[i]/counts.sum_[i])))/pow(2.0, (double) (N_-j+1)); + j++; + } + return brevity_penalty(hyp_len, ref_len) * sum; +} + +/* + * approx. bleu + * + * as in "Online Large-Margin Training of Syntactic + * and Structural Translation Features" + * (Chiang et al. '08) + * + * NOTE: Needs some more code in dtrain.cc . + * No scaling by src len. + */ +score_t +ApproxBleuScorer::Score(vector& hyp, vector& ref, + const unsigned rank, const unsigned src_len) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (ref_len == 0) return 0.; + score_t score = 0.; + NgramCounts counts(N_); + if (hyp_len > 0) { + counts = make_ngram_counts(hyp, ref, N_); + NgramCounts tmp = glob_onebest_counts_ + counts; + score = Bleu(tmp, hyp_len, ref_len); + } + if (rank == 0) { // 'context of 1best translations' + glob_onebest_counts_ += counts; + glob_onebest_counts_ *= discount_; + glob_hyp_len_ = discount_ * (glob_hyp_len_ + hyp_len); + glob_ref_len_ = discount_ * (glob_ref_len_ + ref_len); + glob_src_len_ = discount_ * (glob_src_len_ + src_len); + } + return score; +} + +/* + * Linear (Corpus) Bleu + * + * as in "Lattice Minimum Bayes-Risk Decoding + * for Statistical Machine Translation" + * (Tromble et al. '08) + * + */ +score_t +LinearBleuScorer::Score(vector& hyp, vector& ref, + const unsigned rank, const unsigned /*src_len*/) +{ + unsigned hyp_len = hyp.size(), ref_len = ref.size(); + if (ref_len == 0) return 0.; + unsigned M = N_; + if (ref_len < N_) M = ref_len; + NgramCounts counts(M); + if (hyp_len > 0) + counts = make_ngram_counts(hyp, ref, M); + score_t ret = 0.; + for (unsigned i = 0; i < M; i++) { + if (counts.sum_[i] == 0 || onebest_counts_.sum_[i] == 0) break; + ret += counts.sum_[i]/onebest_counts_.sum_[i]; + } + ret = -(hyp_len/(score_t)onebest_len_) + (1./M) * ret; + if (rank == 0) { + onebest_len_ += hyp_len; + onebest_counts_ += counts; + } + return ret; +} + + +} // namespace + diff --git a/training/dtrain/score.h b/training/dtrain/score.h new file mode 100644 index 00000000..f317c903 --- /dev/null +++ b/training/dtrain/score.h @@ -0,0 +1,212 @@ +#ifndef _DTRAIN_SCORE_H_ +#define _DTRAIN_SCORE_H_ + +#include "kbestget.h" + +using namespace std; + +namespace dtrain +{ + + +struct NgramCounts +{ + unsigned N_; + map clipped_; + map sum_; + + NgramCounts(const unsigned N) : N_(N) { Zero(); } + + inline void + operator+=(const NgramCounts& rhs) + { + if (rhs.N_ > N_) Resize(rhs.N_); + for (unsigned i = 0; i < N_; i++) { + this->clipped_[i] += rhs.clipped_.find(i)->second; + this->sum_[i] += rhs.sum_.find(i)->second; + } + } + + inline const NgramCounts + operator+(const NgramCounts &other) const + { + NgramCounts result = *this; + result += other; + return result; + } + + inline void + operator*=(const score_t rhs) + { + for (unsigned i = 0; i < N_; i++) { + this->clipped_[i] *= rhs; + this->sum_[i] *= rhs; + } + } + + inline void + Add(const unsigned count, const unsigned ref_count, const unsigned i) + { + assert(i < N_); + if (count > ref_count) { + clipped_[i] += ref_count; + } else { + clipped_[i] += count; + } + sum_[i] += count; + } + + inline void + Zero() + { + for (unsigned i = 0; i < N_; i++) { + clipped_[i] = 0.; + sum_[i] = 0.; + } + } + + inline void + One() + { + for (unsigned i = 0; i < N_; i++) { + clipped_[i] = 1.; + sum_[i] = 1.; + } + } + + inline void + Print() + { + for (unsigned i = 0; i < N_; i++) { + cout << i+1 << "grams (clipped):\t" << clipped_[i] << endl; + cout << i+1 << "grams:\t\t\t" << sum_[i] << endl; + } + } + + inline void Resize(unsigned N) + { + if (N == N_) return; + else if (N > N_) { + for (unsigned i = N_; i < N; i++) { + clipped_[i] = 0.; + sum_[i] = 0.; + } + } else { // N < N_ + for (unsigned i = N_-1; i > N-1; i--) { + clipped_.erase(i); + sum_.erase(i); + } + } + N_ = N; + } +}; + +typedef map, unsigned> Ngrams; + +inline Ngrams +make_ngrams(const vector& s, const unsigned N) +{ + Ngrams ngrams; + vector ng; + for (size_t i = 0; i < s.size(); i++) { + ng.clear(); + for (unsigned j = i; j < min(i+N, s.size()); j++) { + ng.push_back(s[j]); + ngrams[ng]++; + } + } + return ngrams; +} + +inline NgramCounts +make_ngram_counts(const vector& hyp, const vector& ref, const unsigned N) +{ + Ngrams hyp_ngrams = make_ngrams(hyp, N); + Ngrams ref_ngrams = make_ngrams(ref, N); + NgramCounts counts(N); + Ngrams::iterator it; + Ngrams::iterator ti; + for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) { + ti = ref_ngrams.find(it->first); + if (ti != ref_ngrams.end()) { + counts.Add(it->second, ti->second, it->first.size() - 1); + } else { + counts.Add(it->second, 0, it->first.size() - 1); + } + } + return counts; +} + +struct BleuScorer : public LocalScorer +{ + score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len); + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct StupidBleuScorer : public LocalScorer +{ + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct SmoothBleuScorer : public LocalScorer +{ + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct SumBleuScorer : public LocalScorer +{ + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct SumExpBleuScorer : public LocalScorer +{ + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct SumWhateverBleuScorer : public LocalScorer +{ + score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); +}; + +struct ApproxBleuScorer : public BleuScorer +{ + NgramCounts glob_onebest_counts_; + unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_; + score_t discount_; + + ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d) + { + glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0; + } + + inline void Reset() { + glob_onebest_counts_.Zero(); + glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.; + } + + score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len); +}; + +struct LinearBleuScorer : public BleuScorer +{ + unsigned onebest_len_; + NgramCounts onebest_counts_; + + LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N) + { + onebest_counts_.One(); + } + + score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned /*src_len*/); + + inline void Reset() { + onebest_len_ = 1; + onebest_counts_.One(); + } +}; + + +} // namespace + +#endif + diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README new file mode 100644 index 00000000..6937b11b --- /dev/null +++ b/training/dtrain/test/example/README @@ -0,0 +1,8 @@ +Small example of input format for distributed training. +Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . + +For this to work, undef 'DTRAIN_LOCAL' in dtrain.h +and recompile. + +Data is here: http://simianer.de/#dtrain + diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini new file mode 100644 index 00000000..d5955f0e --- /dev/null +++ b/training/dtrain/test/example/cdec.ini @@ -0,0 +1,25 @@ +formalism=scfg +add_pass_through_rules=true +scfg_max_span_limit=15 +intersection_strategy=cube_pruning +cubepruning_pop_limit=30 +feature_function=WordPenalty +feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz +# all currently working feature functions for translation: +# (with those features active that were used in the ACL paper) +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +feature_function=RuleIdentityFeatures +feature_function=RuleSourceBigramFeatures +feature_function=RuleTargetBigramFeatures +feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini new file mode 100644 index 00000000..72d50ca1 --- /dev/null +++ b/training/dtrain/test/example/dtrain.ini @@ -0,0 +1,22 @@ +input=test/example/nc-wmt11.1k.gz # use '-' for STDIN +output=- # a weights file (add .gz for gzip compression) or STDOUT '-' +select_weights=VOID # don't output weights +decoder_config=test/example/cdec.ini # config for cdec +# weights for these features will be printed on each iteration +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +stop_after=10 # stop epoch after 10 inputs + +# interesting stuff +epochs=2 # run over input 2 times +k=100 # use 100best lists +N=4 # optimize (approx) BLEU4 +scorer=stupid_bleu # use 'stupid' BLEU+1 +learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) +gamma=0 # use SVM reg +sample_from=kbest # use kbest lists (as opposed to forest) +filter=uniq # only unique entries in kbest (surface form) +pair_sampling=XYX +hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here +pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) +loss_margin=0 diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output new file mode 100644 index 00000000..05326763 --- /dev/null +++ b/training/dtrain/test/example/expected-output @@ -0,0 +1,89 @@ + cdec cfg 'test/example/cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading test/example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** + Example feature: Shape_S00000_T00000 +Seeding random number sequence to 2912000813 + +dtrain +Parameters: + k 100 + N 4 + T 2 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 1 + gamma 0 + loss margin 0 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'VOID' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'test/example/cdec.ini' + input 'test/example/nc-wmt11.1k.gz' + output '-' + stop_after 10 +(a dot represents 10 inputs) +Iteration #1 of 2. + . 10 +Stopping after 10 input sentences. +WEIGHTS + Glue = -637 + WordPenalty = +1064 + LanguageModel = +1175.3 + LanguageModel_OOV = -1437 + PhraseModel_0 = +1935.6 + PhraseModel_1 = +2499.3 + PhraseModel_2 = +964.96 + PhraseModel_3 = +1410.8 + PhraseModel_4 = -5977.9 + PhraseModel_5 = +522 + PhraseModel_6 = +1089 + PassThrough = -1308 + --- + 1best avg score: 0.16963 (+0.16963) + 1best avg model score: 64485 (+64485) + avg # pairs: 1494.4 + avg # rank err: 702.6 + avg # margin viol: 0 + non0 feature count: 528 + avg list sz: 85.7 + avg f count: 102.75 +(time 0.083 min, 0.5 s/S) + +Iteration #2 of 2. + . 10 +WEIGHTS + Glue = -1196 + WordPenalty = +809.52 + LanguageModel = +3112.1 + LanguageModel_OOV = -1464 + PhraseModel_0 = +3895.5 + PhraseModel_1 = +4683.4 + PhraseModel_2 = +1092.8 + PhraseModel_3 = +1079.6 + PhraseModel_4 = -6827.7 + PhraseModel_5 = -888 + PhraseModel_6 = +142 + PassThrough = -1335 + --- + 1best avg score: 0.277 (+0.10736) + 1best avg model score: -3110.5 (-67595) + avg # pairs: 1144.2 + avg # rank err: 529.1 + avg # margin viol: 0 + non0 feature count: 859 + avg list sz: 74.9 + avg f count: 112.84 +(time 0.067 min, 0.4 s/S) + +Writing weights file to '-' ... +done + +--- +Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. +This took 0.15 min. diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini new file mode 100644 index 00000000..72e99dc5 --- /dev/null +++ b/training/dtrain/test/parallelize/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini new file mode 100644 index 00000000..03f9d240 --- /dev/null +++ b/training/dtrain/test/parallelize/dtrain.ini @@ -0,0 +1,15 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +decoder_config=cdec.ini diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in new file mode 100644 index 00000000..a312809f --- /dev/null +++ b/training/dtrain/test/parallelize/in @@ -0,0 +1,10 @@ +barack obama erhält als vierter us @-@ präsident den frieden nobelpreis +der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . +darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . +der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . +zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . +das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . +nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . +diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . +das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . +der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs new file mode 100644 index 00000000..4d3128cb --- /dev/null +++ b/training/dtrain/test/parallelize/refs @@ -0,0 +1,10 @@ +barack obama becomes the fourth american president to receive the nobel peace prize +the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . +he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . +the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . +first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . +the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . +then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . +he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . +the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . +the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini new file mode 100644 index 00000000..98b02d44 --- /dev/null +++ b/training/dtrain/test/toy/cdec.ini @@ -0,0 +1,2 @@ +formalism=scfg +add_pass_through_rules=true diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini new file mode 100644 index 00000000..a091732f --- /dev/null +++ b/training/dtrain/test/toy/dtrain.ini @@ -0,0 +1,12 @@ +decoder_config=test/toy/cdec.ini +input=test/toy/input +output=- +print_weights=logp shell_rule house_rule small_rule little_rule PassThrough +k=4 +N=4 +epochs=2 +scorer=bleu +sample_from=kbest +filter=uniq +pair_sampling=all +learning_rate=1 diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input new file mode 100644 index 00000000..4d10a9ea --- /dev/null +++ b/training/dtrain/test/toy/input @@ -0,0 +1,2 @@ +0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 +1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 diff --git a/training/entropy.cc b/training/entropy.cc deleted file mode 100644 index 4fdbe2be..00000000 --- a/training/entropy.cc +++ /dev/null @@ -1,41 +0,0 @@ -#include "entropy.h" - -#include "prob.h" -#include "candidate_set.h" - -using namespace std; - -namespace training { - -// see Mann and McCallum "Efficient Computation of Entropy Gradient ..." for -// a mostly clear derivation of: -// g = E[ F(x,y) * log p(y|x) ] + H(y | x) * E[ F(x,y) ] -double CandidateSetEntropy::operator()(const vector& params, - SparseVector* g) const { - prob_t z; - vector dps(cands_.size()); - for (unsigned i = 0; i < cands_.size(); ++i) { - dps[i] = cands_[i].fmap.dot(params); - const prob_t u(dps[i], init_lnx()); - z += u; - } - const double log_z = log(z); - - SparseVector exp_feats; - double entropy = 0; - for (unsigned i = 0; i < cands_.size(); ++i) { - const double log_prob = cands_[i].fmap.dot(params) - log_z; - const double prob = exp(log_prob); - const double e_logprob = prob * log_prob; - entropy -= e_logprob; - if (g) { - (*g) += cands_[i].fmap * e_logprob; - exp_feats += cands_[i].fmap * prob; - } - } - if (g) (*g) += exp_feats * entropy; - return entropy; -} - -} - diff --git a/training/entropy.h b/training/entropy.h deleted file mode 100644 index 796589ca..00000000 --- a/training/entropy.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _CSENTROPY_H_ -#define _CSENTROPY_H_ - -#include -#include "sparse_vector.h" - -namespace training { - class CandidateSet; - - class CandidateSetEntropy { - public: - explicit CandidateSetEntropy(const CandidateSet& cs) : cands_(cs) {} - // compute the entropy (expected log likelihood) of a CandidateSet - // (optional) the gradient of the entropy with respect to params - double operator()(const std::vector& params, - SparseVector* g = NULL) const; - private: - const CandidateSet& cands_; - }; -}; - -#endif diff --git a/training/fast_align.cc b/training/fast_align.cc deleted file mode 100644 index 7492d26f..00000000 --- a/training/fast_align.cc +++ /dev/null @@ -1,281 +0,0 @@ -#include -#include - -#include -#include - -#include "m.h" -#include "corpus_tools.h" -#include "stringlib.h" -#include "filelib.h" -#include "ttables.h" -#include "tdict.h" - -namespace po = boost::program_options; -using namespace std; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input,i",po::value(),"Parallel corpus input file") - ("reverse,r","Reverse estimation (swap source and target during training)") - ("iterations,I",po::value()->default_value(5),"Number of iterations of EM training") - //("bidir,b", "Run bidirectional alignment") - ("favor_diagonal,d", "Use a static alignment distribution that assigns higher probabilities to alignments near the diagonal") - ("prob_align_null", po::value()->default_value(0.08), "When --favor_diagonal is set, what's the probability of a null alignment?") - ("diagonal_tension,T", po::value()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (<1 = flat >1 = sharp)") - ("variational_bayes,v","Infer VB estimate of parameters under a symmetric Dirichlet prior") - ("alpha,a", po::value()->default_value(0.01), "Hyperparameter for optional Dirichlet prior") - ("no_null_word,N","Do not generate from a null token") - ("output_parameters,p", "Write model parameters instead of alignments") - ("beam_threshold,t",po::value()->default_value(-4),"When writing parameters, log_10 of beam threshold for writing parameter (-10000 to include everything, 0 max parameter only)") - ("hide_training_alignments,H", "Hide training alignments (only useful if you want to use -x option and just compute testset statistics)") - ("testset,x", po::value(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model") - ("no_add_viterbi,V","When writing model parameters, do not add Viterbi alignment points (may generate a grammar where some training sentence pairs are unreachable)"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || conf->count("input") == 0) { - cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n"; - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -int main(int argc, char** argv) { - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) return 1; - const string fname = conf["input"].as(); - const bool reverse = conf.count("reverse") > 0; - const int ITERATIONS = conf["iterations"].as(); - const double BEAM_THRESHOLD = pow(10.0, conf["beam_threshold"].as()); - const bool use_null = (conf.count("no_null_word") == 0); - const WordID kNULL = TD::Convert(""); - const bool add_viterbi = (conf.count("no_add_viterbi") == 0); - const bool variational_bayes = (conf.count("variational_bayes") > 0); - const bool write_alignments = (conf.count("output_parameters") == 0); - const double diagonal_tension = conf["diagonal_tension"].as(); - const double prob_align_null = conf["prob_align_null"].as(); - const bool hide_training_alignments = (conf.count("hide_training_alignments") > 0); - string testset; - if (conf.count("testset")) testset = conf["testset"].as(); - const double prob_align_not_null = 1.0 - prob_align_null; - const double alpha = conf["alpha"].as(); - const bool favor_diagonal = conf.count("favor_diagonal"); - if (variational_bayes && alpha <= 0.0) { - cerr << "--alpha must be > 0\n"; - return 1; - } - - TTable s2t, t2s; - TTable::Word2Word2Double s2t_viterbi; - double tot_len_ratio = 0; - double mean_srclen_multiplier = 0; - vector unnormed_a_i; - for (int iter = 0; iter < ITERATIONS; ++iter) { - const bool final_iteration = (iter == (ITERATIONS - 1)); - cerr << "ITERATION " << (iter + 1) << (final_iteration ? " (FINAL)" : "") << endl; - ReadFile rf(fname); - istream& in = *rf.stream(); - double likelihood = 0; - double denom = 0.0; - int lc = 0; - bool flag = false; - string line; - string ssrc, strg; - vector src, trg; - while(true) { - getline(in, line); - if (!in) break; - ++lc; - if (lc % 1000 == 0) { cerr << '.'; flag = true; } - if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } - src.clear(); trg.clear(); - CorpusTools::ReadLine(line, &src, &trg); - if (reverse) swap(src, trg); - if (src.size() == 0 || trg.size() == 0) { - cerr << "Error: " << lc << "\n" << line << endl; - return 1; - } - if (src.size() > unnormed_a_i.size()) - unnormed_a_i.resize(src.size()); - if (iter == 0) - tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); - denom += trg.size(); - vector probs(src.size() + 1); - bool first_al = true; // used for write_alignments - for (int j = 0; j < trg.size(); ++j) { - const WordID& f_j = trg[j]; - double sum = 0; - const double j_over_ts = double(j) / trg.size(); - double prob_a_i = 1.0 / (src.size() + use_null); // uniform (model 1) - if (use_null) { - if (favor_diagonal) prob_a_i = prob_align_null; - probs[0] = s2t.prob(kNULL, f_j) * prob_a_i; - sum += probs[0]; - } - double az = 0; - if (favor_diagonal) { - for (int ta = 0; ta < src.size(); ++ta) { - unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); - az += unnormed_a_i[ta]; - } - az /= prob_align_not_null; - } - for (int i = 1; i <= src.size(); ++i) { - if (favor_diagonal) - prob_a_i = unnormed_a_i[i-1] / az; - probs[i] = s2t.prob(src[i-1], f_j) * prob_a_i; - sum += probs[i]; - } - if (final_iteration) { - if (add_viterbi || write_alignments) { - WordID max_i = 0; - double max_p = -1; - int max_index = -1; - if (use_null) { - max_i = kNULL; - max_index = 0; - max_p = probs[0]; - } - for (int i = 1; i <= src.size(); ++i) { - if (probs[i] > max_p) { - max_index = i; - max_p = probs[i]; - max_i = src[i-1]; - } - } - if (!hide_training_alignments && write_alignments) { - if (max_index > 0) { - if (first_al) first_al = false; else cout << ' '; - if (reverse) - cout << j << '-' << (max_index - 1); - else - cout << (max_index - 1) << '-' << j; - } - } - s2t_viterbi[max_i][f_j] = 1.0; - } - } else { - if (use_null) - s2t.Increment(kNULL, f_j, probs[0] / sum); - for (int i = 1; i <= src.size(); ++i) - s2t.Increment(src[i-1], f_j, probs[i] / sum); - } - likelihood += log(sum); - } - if (write_alignments && final_iteration && !hide_training_alignments) cout << endl; - } - - // log(e) = 1.0 - double base2_likelihood = likelihood / log(2); - - if (flag) { cerr << endl; } - if (iter == 0) { - mean_srclen_multiplier = tot_len_ratio / lc; - cerr << "expected target length = source length * " << mean_srclen_multiplier << endl; - } - cerr << " log_e likelihood: " << likelihood << endl; - cerr << " log_2 likelihood: " << base2_likelihood << endl; - cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; - cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; - if (!final_iteration) { - if (variational_bayes) - s2t.NormalizeVB(alpha); - else - s2t.Normalize(); - } - } - if (testset.size()) { - ReadFile rf(testset); - istream& in = *rf.stream(); - int lc = 0; - double tlp = 0; - string line; - while (getline(in, line)) { - ++lc; - vector src, trg; - CorpusTools::ReadLine(line, &src, &trg); - cout << TD::GetString(src) << " ||| " << TD::GetString(trg) << " |||"; - if (reverse) swap(src, trg); - double log_prob = Md::log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier); - if (src.size() > unnormed_a_i.size()) - unnormed_a_i.resize(src.size()); - - // compute likelihood - for (int j = 0; j < trg.size(); ++j) { - const WordID& f_j = trg[j]; - double sum = 0; - int a_j = 0; - double max_pat = 0; - const double j_over_ts = double(j) / trg.size(); - double prob_a_i = 1.0 / (src.size() + use_null); // uniform (model 1) - if (use_null) { - if (favor_diagonal) prob_a_i = prob_align_null; - max_pat = s2t.prob(kNULL, f_j) * prob_a_i; - sum += max_pat; - } - double az = 0; - if (favor_diagonal) { - for (int ta = 0; ta < src.size(); ++ta) { - unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); - az += unnormed_a_i[ta]; - } - az /= prob_align_not_null; - } - for (int i = 1; i <= src.size(); ++i) { - if (favor_diagonal) - prob_a_i = unnormed_a_i[i-1] / az; - double pat = s2t.prob(src[i-1], f_j) * prob_a_i; - if (pat > max_pat) { max_pat = pat; a_j = i; } - sum += pat; - } - log_prob += log(sum); - if (write_alignments) { - if (a_j > 0) { - cout << ' '; - if (reverse) - cout << j << '-' << (a_j - 1); - else - cout << (a_j - 1) << '-' << j; - } - } - } - tlp += log_prob; - cout << " ||| " << log_prob << endl << flush; - } // loop over test set sentences - cerr << "TOTAL LOG PROB " << tlp << endl; - } - - if (write_alignments) return 0; - - for (TTable::Word2Word2Double::iterator ei = s2t.ttable.begin(); ei != s2t.ttable.end(); ++ei) { - const TTable::Word2Double& cpd = ei->second; - const TTable::Word2Double& vit = s2t_viterbi[ei->first]; - const string& esym = TD::Convert(ei->first); - double max_p = -1; - for (TTable::Word2Double::const_iterator fi = cpd.begin(); fi != cpd.end(); ++fi) - if (fi->second > max_p) max_p = fi->second; - const double threshold = max_p * BEAM_THRESHOLD; - for (TTable::Word2Double::const_iterator fi = cpd.begin(); fi != cpd.end(); ++fi) { - if (fi->second > threshold || (vit.find(fi->first) != vit.end())) { - cout << esym << ' ' << TD::Convert(fi->first) << ' ' << log(fi->second) << endl; - } - } - } - return 0; -} - diff --git a/training/feature_expectations.cc b/training/feature_expectations.cc deleted file mode 100644 index f1a85495..00000000 --- a/training/feature_expectations.cc +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "online_optimizer.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" -#include "sampler.h" - -#ifdef HAVE_MPI -#include -#include -namespace mpi = boost::mpi; -#endif - -using namespace std; -namespace po = boost::program_options; - -struct FComp { - const vector& w_; - FComp(const vector& w) : w_(w) {} - bool operator()(int a, int b) const { - return fabs(w_[a]) > fabs(w_[b]); - } -}; - -void ShowFeatures(const vector& w) { - vector fnums(w.size()); - for (int i = 0; i < w.size(); ++i) - fnums[i] = i; - sort(fnums.begin(), fnums.end(), FComp(w)); - for (vector::iterator i = fnums.begin(); i != fnums.end(); ++i) { - if (w[*i]) cout << FD::Convert(*i) << ' ' << w[*i] << endl; - } -} - -void ReadConfig(const string& ini, vector* out) { - ReadFile rf(ini); - istream& in = *rf.stream(); - while(in) { - string line; - getline(in, line); - if (!in) continue; - out->push_back(line); - } -} - -void StoreConfig(const vector& cfg, istringstream* o) { - ostringstream os; - for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; } - o->str(os.str()); -} - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input,i",po::value(),"Corpus of source language sentences") - ("weights,w",po::value(),"Input feature weights file") - ("decoder_config,c",po::value(), "cdec.ini file"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("input") || !conf->count("decoder_config")) { - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* order) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int id = 0; - while(in) { - getline(in, line); - if (!in) break; - if (id % size == rank) { - c->push_back(line); - order->push_back(id); - } - ++id; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct TrainingObserver : public DecoderObserver { - void Reset() { - acc_exp.clear(); - total_complete = 0; - } - - virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { - cur_model_exp.clear(); - state = 1; - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 1); - state = 2; - const prob_t z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); - cur_model_exp /= z; - acc_exp += cur_model_exp; - } - - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - cerr << "IGNORING ALIGNMENT FOREST!\n"; - } - - virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { - if (state == 2) { - ++total_complete; - } - } - - void GetExpectations(SparseVector* g) const { - g->clear(); - for (SparseVector::const_iterator it = acc_exp.begin(); it != acc_exp.end(); ++it) - g->set_value(it->first, it->second); - } - - int total_complete; - SparseVector cur_model_exp; - SparseVector acc_exp; - int state; -}; - -#ifdef HAVE_MPI -namespace boost { namespace mpi { - template<> - struct is_commutative >, SparseVector > - : mpl::true_ { }; -} } // end namespace boost::mpi -#endif - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - if (size > 1) SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) - return 1; - - // load initial weights - Weights weights; - if (conf.count("weights")) - weights.InitFromFile(conf["weights"].as()); - - vector corpus; - vector ids; - ReadTrainingCorpus(conf["input"].as(), rank, size, &corpus, &ids); - assert(corpus.size() > 0); - - vector cdec_ini; - ReadConfig(conf["decoder_config"].as(), &cdec_ini); - istringstream ini; - StoreConfig(cdec_ini, &ini); - Decoder decoder(&ini); - if (decoder.GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; - return 1; - } - - SparseVector x; - weights.InitSparseVector(&x); - TrainingObserver observer; - - weights.InitFromVector(x); - vector lambdas; - weights.InitVector(&lambdas); - decoder.SetWeights(lambdas); - observer.Reset(); - for (unsigned i = 0; i < corpus.size(); ++i) { - int id = ids[i]; - decoder.SetId(id); - decoder.Decode(corpus[i], &observer); - } - SparseVector local_exps, exps; - observer.GetExpectations(&local_exps); -#ifdef HAVE_MPI - reduce(world, local_exps, exps, std::plus >(), 0); -#else - exps.swap(local_exps); -#endif - - weights.InitFromVector(exps); - weights.InitVector(&lambdas); - ShowFeatures(lambdas); - - return 0; -} diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc deleted file mode 100644 index 607a7cb9..00000000 --- a/training/grammar_convert.cc +++ /dev/null @@ -1,348 +0,0 @@ -/* - this program modifies cfg hypergraphs (forests) and extracts kbests? - what are: json, split ? - */ -#include -#include -#include - -#include -#include - -#include "inside_outside.h" -#include "tdict.h" -#include "filelib.h" -#include "hg.h" -#include "hg_io.h" -#include "kbest.h" -#include "viterbi.h" -#include "weights.h" - -namespace po = boost::program_options; -using namespace std; - -WordID kSTART; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input,i", po::value()->default_value("-"), "Input file") - ("format,f", po::value()->default_value("cfg"), "Input format. Values: cfg, json, split") - ("output,o", po::value()->default_value("json"), "Output command. Values: json, 1best") - ("reorder,r", "Add Yamada & Knight (2002) reorderings") - ("weights,w", po::value(), "Feature weights for k-best derivations [optional]") - ("collapse_weights,C", "Collapse order features into a single feature whose value is all of the locally applying feature weights") - ("k_derivations,k", po::value(), "Show k derivations and their features") - ("max_reorder,m", po::value()->default_value(999), "Move a constituent at most this far") - ("help,h", "Print this help message and exit"); - po::options_description clo("Command line options"); - po::options_description dcmdline_options; - dcmdline_options.add(opts); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - po::notify(*conf); - - if (conf->count("help") || conf->count("input") == 0) { - cerr << "\nUsage: grammar_convert [-options]\n\nConverts a grammar file (in Hiero format) into JSON hypergraph.\n"; - cerr << dcmdline_options << endl; - exit(1); - } -} - -int GetOrCreateNode(const WordID& lhs, map* lhs2node, Hypergraph* hg) { - int& node_id = (*lhs2node)[lhs]; - if (!node_id) - node_id = hg->AddNode(lhs)->id_ + 1; - return node_id - 1; -} - -void FilterAndCheckCorrectness(int goal, Hypergraph* hg) { - if (goal < 0) { - cerr << "Error! [S] not found in grammar!\n"; - exit(1); - } - if (hg->nodes_[goal].in_edges_.size() != 1) { - cerr << "Error! [S] has more than one rewrite!\n"; - exit(1); - } - int old_size = hg->nodes_.size(); - hg->TopologicallySortNodesAndEdges(goal); - if (hg->nodes_.size() != old_size) { - cerr << "Warning! During sorting " << (old_size - hg->nodes_.size()) << " disappeared!\n"; - } - vector inside; // inside score at each node - double p = Inside(*hg, &inside); - if (!p) { - cerr << "Warning! Grammar defines the empty language!\n"; - hg->clear(); - return; - } - vector prune(hg->edges_.size(), false); - int bad_edges = 0; - for (unsigned i = 0; i < hg->edges_.size(); ++i) { - Hypergraph::Edge& edge = hg->edges_[i]; - bool bad = false; - for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { - if (!inside[edge.tail_nodes_[j]]) { - bad = true; - ++bad_edges; - } - } - prune[i] = bad; - } - cerr << "Removing " << bad_edges << " bad edges from the grammar.\n"; - for (unsigned i = 0; i < hg->edges_.size(); ++i) { - if (prune[i]) - cerr << " " << hg->edges_[i].rule_->AsString() << endl; - } - hg->PruneEdges(prune); -} - -void CreateEdge(const TRulePtr& r, const Hypergraph::TailNodeVector& tail, Hypergraph::Node* head_node, Hypergraph* hg) { - Hypergraph::Edge* new_edge = hg->AddEdge(r, tail); - hg->ConnectEdgeToHeadNode(new_edge, head_node); - new_edge->feature_values_ = r->scores_; -} - -// from a category label like "NP_2", return "NP" -string PureCategory(WordID cat) { - assert(cat < 0); - string c = TD::Convert(cat*-1); - size_t p = c.find("_"); - if (p == string::npos) return c; - return c.substr(0, p); -}; - -string ConstituentOrderFeature(const TRule& rule, const vector& pi) { - const static string kTERM_VAR = "x"; - const vector& f = rule.f(); - map used; - vector terms(f.size()); - for (int i = 0; i < f.size(); ++i) { - const string term = (f[i] < 0 ? PureCategory(f[i]) : kTERM_VAR); - int& count = used[term]; - if (!count) { - terms[i] = term; - } else { - ostringstream os; - os << term << count; - terms[i] = os.str(); - } - ++count; - } - ostringstream os; - os << PureCategory(rule.GetLHS()) << ':'; - for (int i = 0; i < f.size(); ++i) { - if (i > 0) os << '_'; - os << terms[pi[i]]; - } - return os.str(); -} - -bool CheckPermutationMask(const vector& mask, const vector& pi) { - assert(mask.size() == pi.size()); - - int req_min = -1; - int cur_max = 0; - int cur_mask = -1; - for (int i = 0; i < mask.size(); ++i) { - if (mask[i] != cur_mask) { - cur_mask = mask[i]; - req_min = cur_max - 1; - } - if (pi[i] > req_min) { - if (pi[i] > cur_max) cur_max = pi[i]; - } else { - return false; - } - } - - return true; -} - -void PermuteYKRecursive(int nodeid, const WordID& parent, const int max_reorder, Hypergraph* hg) { - // Hypergraph tmp = *hg; - Hypergraph::Node* node = &hg->nodes_[nodeid]; - if (node->in_edges_.size() != 1) { - cerr << "Multiple rewrites of [" << TD::Convert(node->cat_ * -1) << "] (parent is [" << TD::Convert(parent*-1) << "])\n"; - cerr << " not recursing!\n"; - return; - } -// for (int eii = 0; eii < node->in_edges_.size(); ++eii) { - const int oe_index = node->in_edges_.front(); - const TRule& rule = *hg->edges_[oe_index].rule_; - const Hypergraph::TailNodeVector orig_tail = hg->edges_[oe_index].tail_nodes_; - const int tail_size = orig_tail.size(); - for (int i = 0; i < tail_size; ++i) { - PermuteYKRecursive(hg->edges_[oe_index].tail_nodes_[i], node->cat_, max_reorder, hg); - } - const vector& of = rule.f_; - if (of.size() == 1) return; - // cerr << "Permuting [" << TD::Convert(node->cat_ * -1) << "]\n"; - // cerr << "ORIG: " << rule.AsString() << endl; - vector pi(of.size(), 0); - for (int i = 0; i < pi.size(); ++i) pi[i] = i; - - vector permutation_mask(of.size(), 0); - const bool dont_reorder_across_PU = true; // TODO add configuration - if (dont_reorder_across_PU) { - int cur = 0; - for (int i = 0; i < pi.size(); ++i) { - if (of[i] >= 0) continue; - const string cat = PureCategory(of[i]); - if (cat == "PU" || cat == "PU!H" || cat == "PUNC" || cat == "PUNC!H" || cat == "CC") { - ++cur; - permutation_mask[i] = cur; - ++cur; - } else { - permutation_mask[i] = cur; - } - } - } - int fid = FD::Convert(ConstituentOrderFeature(rule, pi)); - hg->edges_[oe_index].feature_values_.set_value(fid, 1.0); - while (next_permutation(pi.begin(), pi.end())) { - if (!CheckPermutationMask(permutation_mask, pi)) - continue; - vector nf(pi.size(), 0); - Hypergraph::TailNodeVector tail(pi.size(), 0); - bool skip = false; - for (int i = 0; i < pi.size(); ++i) { - int dist = pi[i] - i; if (dist < 0) dist *= -1; - if (dist > max_reorder) { skip = true; break; } - nf[i] = of[pi[i]]; - tail[i] = orig_tail[pi[i]]; - } - if (skip) continue; - TRulePtr nr(new TRule(rule)); - nr->f_ = nf; - int fid = FD::Convert(ConstituentOrderFeature(rule, pi)); - nr->scores_.set_value(fid, 1.0); - // cerr << "PERM: " << nr->AsString() << endl; - CreateEdge(nr, tail, node, hg); - } - // } -} - -void PermuteYamadaAndKnight(Hypergraph* hg, int max_reorder) { - assert(hg->nodes_.back().cat_ == kSTART); - assert(hg->nodes_.back().in_edges_.size() == 1); - PermuteYKRecursive(hg->nodes_.size() - 1, kSTART, max_reorder, hg); -} - -void CollapseWeights(Hypergraph* hg) { - int fid = FD::Convert("Reordering"); - for (int i = 0; i < hg->edges_.size(); ++i) { - Hypergraph::Edge& edge = hg->edges_[i]; - edge.feature_values_.clear(); - if (edge.edge_prob_ != prob_t::Zero()) { - edge.feature_values_.set_value(fid, log(edge.edge_prob_)); - } - } -} - -void ProcessHypergraph(const vector& w, const po::variables_map& conf, const string& ref, Hypergraph* hg) { - if (conf.count("reorder")) - PermuteYamadaAndKnight(hg, conf["max_reorder"].as()); - if (w.size() > 0) { hg->Reweight(w); } - if (conf.count("collapse_weights")) CollapseWeights(hg); - if (conf["output"].as() == "json") { - HypergraphIO::WriteToJSON(*hg, false, &cout); - if (!ref.empty()) { cerr << "REF: " << ref << endl; } - } else { - vector onebest; - ViterbiESentence(*hg, &onebest); - if (ref.empty()) { - cout << TD::GetString(onebest) << endl; - } else { - cout << TD::GetString(onebest) << " ||| " << ref << endl; - } - } - if (conf.count("k_derivations")) { - const int k = conf["k_derivations"].as(); - KBest::KBestDerivations, ESentenceTraversal> kbest(*hg, k); - for (int i = 0; i < k; ++i) { - const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = - kbest.LazyKthBest(hg->nodes_.size() - 1, i); - if (!d) break; - cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; - } - } -} - -int main(int argc, char **argv) { - kSTART = TD::Convert("S") * -1; - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - string infile = conf["input"].as(); - const bool is_split_input = (conf["format"].as() == "split"); - const bool is_json_input = is_split_input || (conf["format"].as() == "json"); - const bool collapse_weights = conf.count("collapse_weights"); - vector w; - if (conf.count("weights")) - Weights::InitFromFile(conf["weights"].as(), &w); - - if (collapse_weights && !w.size()) { - cerr << "--collapse_weights requires a weights file to be specified!\n"; - exit(1); - } - ReadFile rf(infile); - istream* in = rf.stream(); - assert(*in); - int lc = 0; - Hypergraph hg; - map lhs2node; - while(*in) { - string line; - ++lc; - getline(*in, line); - if (is_json_input) { - if (line.empty() || line[0] == '#') continue; - string ref; - if (is_split_input) { - size_t pos = line.rfind("}}"); - assert(pos != string::npos); - size_t rstart = line.find("||| ", pos); - assert(rstart != string::npos); - ref = line.substr(rstart + 4); - line = line.substr(0, pos + 2); - } - istringstream is(line); - if (HypergraphIO::ReadFromJSON(&is, &hg)) { - ProcessHypergraph(w, conf, ref, &hg); - hg.clear(); - } else { - cerr << "Error reading grammar from JSON: line " << lc << endl; - exit(1); - } - } else { - if (line.empty()) { - int goal = lhs2node[kSTART] - 1; - FilterAndCheckCorrectness(goal, &hg); - ProcessHypergraph(w, conf, "", &hg); - hg.clear(); - lhs2node.clear(); - continue; - } - if (line[0] == '#') continue; - if (line[0] != '[') { - cerr << "Line " << lc << ": bad format\n"; - exit(1); - } - TRulePtr tr(TRule::CreateRuleMonolingual(line)); - Hypergraph::TailNodeVector tail; - for (int i = 0; i < tr->f_.size(); ++i) { - WordID var_cat = tr->f_[i]; - if (var_cat < 0) - tail.push_back(GetOrCreateNode(var_cat, &lhs2node, &hg)); - } - const WordID lhs = tr->GetLHS(); - int head = GetOrCreateNode(lhs, &lhs2node, &hg); - Hypergraph::Edge* edge = hg.AddEdge(tr, tail); - edge->feature_values_ = tr->scores_; - Hypergraph::Node* node = &hg.nodes_[head]; - hg.ConnectEdgeToHeadNode(edge, node); - } - } -} - diff --git a/training/lbfgs.h b/training/lbfgs.h deleted file mode 100644 index e8baecab..00000000 --- a/training/lbfgs.h +++ /dev/null @@ -1,1459 +0,0 @@ -#ifndef SCITBX_LBFGS_H -#define SCITBX_LBFGS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace scitbx { - -//! Limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) %minimizer. -/*! Implementation of the - Limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) - algorithm for large-scale multidimensional minimization - problems. - - This code was manually derived from Java code which was - in turn derived from the Fortran program - lbfgs.f. The Java translation was - effected mostly mechanically, with some manual - clean-up; in particular, array indices start at 0 - instead of 1. Most of the comments from the Fortran - code have been pasted in. - - Information on the original LBFGS Fortran source code is - available at - http://www.netlib.org/opt/lbfgs_um.shar . The following - information is taken verbatim from the Netlib documentation - for the Fortran source. - -
-    file    opt/lbfgs_um.shar
-    for     unconstrained optimization problems
-    alg     limited memory BFGS method
-    by      J. Nocedal
-    contact nocedal@eecs.nwu.edu
-    ref     D. C. Liu and J. Nocedal, ``On the limited memory BFGS method for
-    ,       large scale optimization methods'' Mathematical Programming 45
-    ,       (1989), pp. 503-528.
-    ,       (Postscript file of this paper is available via anonymous ftp
-    ,       to eecs.nwu.edu in the directory pub/%lbfgs/lbfgs_um.)
-    
- - @author Jorge Nocedal: original Fortran version, including comments - (July 1990).
- Robert Dodier: Java translation, August 1997.
- Ralf W. Grosse-Kunstleve: C++ port, March 2002.
- Chris Dyer: serialize/deserialize functionality - */ -namespace lbfgs { - - //! Generic exception class for %lbfgs %error messages. - /*! All exceptions thrown by the minimizer are derived from this class. - */ - class error : public std::exception { - public: - //! Constructor. - error(std::string const& msg) throw() - : msg_("lbfgs error: " + msg) - {} - //! Access to error message. - virtual const char* what() const throw() { return msg_.c_str(); } - protected: - virtual ~error() throw() {} - std::string msg_; - public: - static std::string itoa(unsigned long i) { - std::ostringstream os; - os << i; - return os.str(); - } - }; - - //! Specific exception class. - class error_internal_error : public error { - public: - //! Constructor. - error_internal_error(const char* file, unsigned long line) throw() - : error( - "Internal Error: " + std::string(file) + "(" + itoa(line) + ")") - {} - }; - - //! Specific exception class. - class error_improper_input_parameter : public error { - public: - //! Constructor. - error_improper_input_parameter(std::string const& msg) throw() - : error("Improper input parameter: " + msg) - {} - }; - - //! Specific exception class. - class error_improper_input_data : public error { - public: - //! Constructor. - error_improper_input_data(std::string const& msg) throw() - : error("Improper input data: " + msg) - {} - }; - - //! Specific exception class. - class error_search_direction_not_descent : public error { - public: - //! Constructor. - error_search_direction_not_descent() throw() - : error("The search direction is not a descent direction.") - {} - }; - - //! Specific exception class. - class error_line_search_failed : public error { - public: - //! Constructor. - error_line_search_failed(std::string const& msg) throw() - : error("Line search failed: " + msg) - {} - }; - - //! Specific exception class. - class error_line_search_failed_rounding_errors - : public error_line_search_failed { - public: - //! Constructor. - error_line_search_failed_rounding_errors(std::string const& msg) throw() - : error_line_search_failed(msg) - {} - }; - - namespace detail { - - template - inline - NumType - pow2(NumType const& x) { return x * x; } - - template - inline - NumType - abs(NumType const& x) { - if (x < NumType(0)) return -x; - return x; - } - - // This class implements an algorithm for multi-dimensional line search. - template - class mcsrch - { - protected: - int infoc; - FloatType dginit; - bool brackt; - bool stage1; - FloatType finit; - FloatType dgtest; - FloatType width; - FloatType width1; - FloatType stx; - FloatType fx; - FloatType dgx; - FloatType sty; - FloatType fy; - FloatType dgy; - FloatType stmin; - FloatType stmax; - - static FloatType const& max3( - FloatType const& x, - FloatType const& y, - FloatType const& z) - { - return x < y ? (y < z ? z : y ) : (x < z ? z : x ); - } - - public: - /* Minimize a function along a search direction. This code is - a Java translation of the function MCSRCH from - lbfgs.f, which in turn is a slight modification - of the subroutine CSRCH of More' and Thuente. - The changes are to allow reverse communication, and do not - affect the performance of the routine. This function, in turn, - calls mcstep.

- - The Java translation was effected mostly mechanically, with - some manual clean-up; in particular, array indices start at 0 - instead of 1. Most of the comments from the Fortran code have - been pasted in here as well.

- - The purpose of mcsrch is to find a step which - satisfies a sufficient decrease condition and a curvature - condition.

- - At each stage this function updates an interval of uncertainty - with endpoints stx and sty. The - interval of uncertainty is initially chosen so that it - contains a minimizer of the modified function -

-                f(x+stp*s) - f(x) - ftol*stp*(gradf(x)'s).
-           
- If a step is obtained for which the modified function has a - nonpositive function value and nonnegative derivative, then - the interval of uncertainty is chosen so that it contains a - minimizer of f(x+stp*s).

- - The algorithm is designed to find a step which satisfies - the sufficient decrease condition -

-                 f(x+stp*s) <= f(X) + ftol*stp*(gradf(x)'s),
-           
- and the curvature condition -
-                 abs(gradf(x+stp*s)'s)) <= gtol*abs(gradf(x)'s).
-           
- If ftol is less than gtol and if, - for example, the function is bounded below, then there is - always a step which satisfies both conditions. If no step can - be found which satisfies both conditions, then the algorithm - usually stops when rounding errors prevent further progress. - In this case stp only satisfies the sufficient - decrease condition.

- - @author Original Fortran version by Jorge J. More' and - David J. Thuente as part of the Minpack project, June 1983, - Argonne National Laboratory. Java translation by Robert - Dodier, August 1997. - - @param n The number of variables. - - @param x On entry this contains the base point for the line - search. On exit it contains x + stp*s. - - @param f On entry this contains the value of the objective - function at x. On exit it contains the value - of the objective function at x + stp*s. - - @param g On entry this contains the gradient of the objective - function at x. On exit it contains the gradient - at x + stp*s. - - @param s The search direction. - - @param stp On entry this contains an initial estimate of a - satifactory step length. On exit stp contains - the final estimate. - - @param ftol Tolerance for the sufficient decrease condition. - - @param xtol Termination occurs when the relative width of the - interval of uncertainty is at most xtol. - - @param maxfev Termination occurs when the number of evaluations - of the objective function is at least maxfev by - the end of an iteration. - - @param info This is an output variable, which can have these - values: -

    -
  • info = -1 A return is made to compute - the function and gradient. -
  • info = 1 The sufficient decrease condition - and the directional derivative condition hold. -
- - @param nfev On exit, this is set to the number of function - evaluations. - - @param wa Temporary storage array, of length n. - */ - void run( - FloatType const& gtol, - FloatType const& stpmin, - FloatType const& stpmax, - SizeType n, - FloatType* x, - FloatType f, - const FloatType* g, - FloatType* s, - SizeType is0, - FloatType& stp, - FloatType ftol, - FloatType xtol, - SizeType maxfev, - int& info, - SizeType& nfev, - FloatType* wa); - - /* The purpose of this function is to compute a safeguarded step - for a linesearch and to update an interval of uncertainty for - a minimizer of the function.

- - The parameter stx contains the step with the - least function value. The parameter stp contains - the current step. It is assumed that the derivative at - stx is negative in the direction of the step. If - brackt is true when - mcstep returns then a minimizer has been - bracketed in an interval of uncertainty with endpoints - stx and sty.

- - Variables that must be modified by mcstep are - implemented as 1-element arrays. - - @param stx Step at the best step obtained so far. - This variable is modified by mcstep. - @param fx Function value at the best step obtained so far. - This variable is modified by mcstep. - @param dx Derivative at the best step obtained so far. - The derivative must be negative in the direction of the - step, that is, dx and stp-stx must - have opposite signs. This variable is modified by - mcstep. - - @param sty Step at the other endpoint of the interval of - uncertainty. This variable is modified by mcstep. - @param fy Function value at the other endpoint of the interval - of uncertainty. This variable is modified by - mcstep. - - @param dy Derivative at the other endpoint of the interval of - uncertainty. This variable is modified by mcstep. - - @param stp Step at the current step. If brackt is set - then on input stp must be between stx - and sty. On output stp is set to the - new step. - @param fp Function value at the current step. - @param dp Derivative at the current step. - - @param brackt Tells whether a minimizer has been bracketed. - If the minimizer has not been bracketed, then on input this - variable must be set false. If the minimizer has - been bracketed, then on output this variable is - true. - - @param stpmin Lower bound for the step. - @param stpmax Upper bound for the step. - - If the return value is 1, 2, 3, or 4, then the step has - been computed successfully. A return value of 0 indicates - improper input parameters. - - @author Jorge J. More, David J. Thuente: original Fortran version, - as part of Minpack project. Argonne Nat'l Laboratory, June 1983. - Robert Dodier: Java translation, August 1997. - */ - static int mcstep( - FloatType& stx, - FloatType& fx, - FloatType& dx, - FloatType& sty, - FloatType& fy, - FloatType& dy, - FloatType& stp, - FloatType fp, - FloatType dp, - bool& brackt, - FloatType stpmin, - FloatType stpmax); - - void serialize(std::ostream* out) const { - out->write((const char*)&infoc,sizeof(infoc)); - out->write((const char*)&dginit,sizeof(dginit)); - out->write((const char*)&brackt,sizeof(brackt)); - out->write((const char*)&stage1,sizeof(stage1)); - out->write((const char*)&finit,sizeof(finit)); - out->write((const char*)&dgtest,sizeof(dgtest)); - out->write((const char*)&width,sizeof(width)); - out->write((const char*)&width1,sizeof(width1)); - out->write((const char*)&stx,sizeof(stx)); - out->write((const char*)&fx,sizeof(fx)); - out->write((const char*)&dgx,sizeof(dgx)); - out->write((const char*)&sty,sizeof(sty)); - out->write((const char*)&fy,sizeof(fy)); - out->write((const char*)&dgy,sizeof(dgy)); - out->write((const char*)&stmin,sizeof(stmin)); - out->write((const char*)&stmax,sizeof(stmax)); - } - - void deserialize(std::istream* in) const { - in->read((char*)&infoc, sizeof(infoc)); - in->read((char*)&dginit, sizeof(dginit)); - in->read((char*)&brackt, sizeof(brackt)); - in->read((char*)&stage1, sizeof(stage1)); - in->read((char*)&finit, sizeof(finit)); - in->read((char*)&dgtest, sizeof(dgtest)); - in->read((char*)&width, sizeof(width)); - in->read((char*)&width1, sizeof(width1)); - in->read((char*)&stx, sizeof(stx)); - in->read((char*)&fx, sizeof(fx)); - in->read((char*)&dgx, sizeof(dgx)); - in->read((char*)&sty, sizeof(sty)); - in->read((char*)&fy, sizeof(fy)); - in->read((char*)&dgy, sizeof(dgy)); - in->read((char*)&stmin, sizeof(stmin)); - in->read((char*)&stmax, sizeof(stmax)); - } - }; - - template - void mcsrch::run( - FloatType const& gtol, - FloatType const& stpmin, - FloatType const& stpmax, - SizeType n, - FloatType* x, - FloatType f, - const FloatType* g, - FloatType* s, - SizeType is0, - FloatType& stp, - FloatType ftol, - FloatType xtol, - SizeType maxfev, - int& info, - SizeType& nfev, - FloatType* wa) - { - if (info != -1) { - infoc = 1; - if ( n == 0 - || maxfev == 0 - || gtol < FloatType(0) - || xtol < FloatType(0) - || stpmin < FloatType(0) - || stpmax < stpmin) { - throw error_internal_error(__FILE__, __LINE__); - } - if (stp <= FloatType(0) || ftol < FloatType(0)) { - throw error_internal_error(__FILE__, __LINE__); - } - // Compute the initial gradient in the search direction - // and check that s is a descent direction. - dginit = FloatType(0); - for (SizeType j = 0; j < n; j++) { - dginit += g[j] * s[is0+j]; - } - if (dginit >= FloatType(0)) { - throw error_search_direction_not_descent(); - } - brackt = false; - stage1 = true; - nfev = 0; - finit = f; - dgtest = ftol*dginit; - width = stpmax - stpmin; - width1 = FloatType(2) * width; - std::copy(x, x+n, wa); - // The variables stx, fx, dgx contain the values of the step, - // function, and directional derivative at the best step. - // The variables sty, fy, dgy contain the value of the step, - // function, and derivative at the other endpoint of - // the interval of uncertainty. - // The variables stp, f, dg contain the values of the step, - // function, and derivative at the current step. - stx = FloatType(0); - fx = finit; - dgx = dginit; - sty = FloatType(0); - fy = finit; - dgy = dginit; - } - for (;;) { - if (info != -1) { - // Set the minimum and maximum steps to correspond - // to the present interval of uncertainty. - if (brackt) { - stmin = std::min(stx, sty); - stmax = std::max(stx, sty); - } - else { - stmin = stx; - stmax = stp + FloatType(4) * (stp - stx); - } - // Force the step to be within the bounds stpmax and stpmin. - stp = std::max(stp, stpmin); - stp = std::min(stp, stpmax); - // If an unusual termination is to occur then let - // stp be the lowest point obtained so far. - if ( (brackt && (stp <= stmin || stp >= stmax)) - || nfev >= maxfev - 1 || infoc == 0 - || (brackt && stmax - stmin <= xtol * stmax)) { - stp = stx; - } - // Evaluate the function and gradient at stp - // and compute the directional derivative. - // We return to main program to obtain F and G. - for (SizeType j = 0; j < n; j++) { - x[j] = wa[j] + stp * s[is0+j]; - } - info=-1; - break; - } - info = 0; - nfev++; - FloatType dg(0); - for (SizeType j = 0; j < n; j++) { - dg += g[j] * s[is0+j]; - } - FloatType ftest1 = finit + stp*dgtest; - // Test for convergence. - if ((brackt && (stp <= stmin || stp >= stmax)) || infoc == 0) { - throw error_line_search_failed_rounding_errors( - "Rounding errors prevent further progress." - " There may not be a step which satisfies the" - " sufficient decrease and curvature conditions." - " Tolerances may be too small."); - } - if (stp == stpmax && f <= ftest1 && dg <= dgtest) { - throw error_line_search_failed( - "The step is at the upper bound stpmax()."); - } - if (stp == stpmin && (f > ftest1 || dg >= dgtest)) { - throw error_line_search_failed( - "The step is at the lower bound stpmin()."); - } - if (nfev >= maxfev) { - throw error_line_search_failed( - "Number of function evaluations has reached maxfev()."); - } - if (brackt && stmax - stmin <= xtol * stmax) { - throw error_line_search_failed( - "Relative width of the interval of uncertainty" - " is at most xtol()."); - } - // Check for termination. - if (f <= ftest1 && abs(dg) <= gtol * (-dginit)) { - info = 1; - break; - } - // In the first stage we seek a step for which the modified - // function has a nonpositive value and nonnegative derivative. - if ( stage1 && f <= ftest1 - && dg >= std::min(ftol, gtol) * dginit) { - stage1 = false; - } - // A modified function is used to predict the step only if - // we have not obtained a step for which the modified - // function has a nonpositive function value and nonnegative - // derivative, and if a lower function value has been - // obtained but the decrease is not sufficient. - if (stage1 && f <= fx && f > ftest1) { - // Define the modified function and derivative values. - FloatType fm = f - stp*dgtest; - FloatType fxm = fx - stx*dgtest; - FloatType fym = fy - sty*dgtest; - FloatType dgm = dg - dgtest; - FloatType dgxm = dgx - dgtest; - FloatType dgym = dgy - dgtest; - // Call cstep to update the interval of uncertainty - // and to compute the new step. - infoc = mcstep(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, - brackt, stmin, stmax); - // Reset the function and gradient values for f. - fx = fxm + stx*dgtest; - fy = fym + sty*dgtest; - dgx = dgxm + dgtest; - dgy = dgym + dgtest; - } - else { - // Call mcstep to update the interval of uncertainty - // and to compute the new step. - infoc = mcstep(stx, fx, dgx, sty, fy, dgy, stp, f, dg, - brackt, stmin, stmax); - } - // Force a sufficient decrease in the size of the - // interval of uncertainty. - if (brackt) { - if (abs(sty - stx) >= FloatType(0.66) * width1) { - stp = stx + FloatType(0.5) * (sty - stx); - } - width1 = width; - width = abs(sty - stx); - } - } - } - - template - int mcsrch::mcstep( - FloatType& stx, - FloatType& fx, - FloatType& dx, - FloatType& sty, - FloatType& fy, - FloatType& dy, - FloatType& stp, - FloatType fp, - FloatType dp, - bool& brackt, - FloatType stpmin, - FloatType stpmax) - { - bool bound; - FloatType gamma, p, q, r, s, sgnd, stpc, stpf, stpq, theta; - int info = 0; - if ( ( brackt && (stp <= std::min(stx, sty) - || stp >= std::max(stx, sty))) - || dx * (stp - stx) >= FloatType(0) || stpmax < stpmin) { - return 0; - } - // Determine if the derivatives have opposite sign. - sgnd = dp * (dx / abs(dx)); - if (fp > fx) { - // First case. A higher function value. - // The minimum is bracketed. If the cubic step is closer - // to stx than the quadratic step, the cubic step is taken, - // else the average of the cubic and quadratic steps is taken. - info = 1; - bound = true; - theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp; - s = max3(abs(theta), abs(dx), abs(dp)); - gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s)); - if (stp < stx) gamma = - gamma; - p = (gamma - dx) + theta; - q = ((gamma - dx) + gamma) + dp; - r = p/q; - stpc = stx + r * (stp - stx); - stpq = stx - + ((dx / ((fx - fp) / (stp - stx) + dx)) / FloatType(2)) - * (stp - stx); - if (abs(stpc - stx) < abs(stpq - stx)) { - stpf = stpc; - } - else { - stpf = stpc + (stpq - stpc) / FloatType(2); - } - brackt = true; - } - else if (sgnd < FloatType(0)) { - // Second case. A lower function value and derivatives of - // opposite sign. The minimum is bracketed. If the cubic - // step is closer to stx than the quadratic (secant) step, - // the cubic step is taken, else the quadratic step is taken. - info = 2; - bound = false; - theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp; - s = max3(abs(theta), abs(dx), abs(dp)); - gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s)); - if (stp > stx) gamma = - gamma; - p = (gamma - dp) + theta; - q = ((gamma - dp) + gamma) + dx; - r = p/q; - stpc = stp + r * (stx - stp); - stpq = stp + (dp / (dp - dx)) * (stx - stp); - if (abs(stpc - stp) > abs(stpq - stp)) { - stpf = stpc; - } - else { - stpf = stpq; - } - brackt = true; - } - else if (abs(dp) < abs(dx)) { - // Third case. A lower function value, derivatives of the - // same sign, and the magnitude of the derivative decreases. - // The cubic step is only used if the cubic tends to infinity - // in the direction of the step or if the minimum of the cubic - // is beyond stp. Otherwise the cubic step is defined to be - // either stpmin or stpmax. The quadratic (secant) step is also - // computed and if the minimum is bracketed then the the step - // closest to stx is taken, else the step farthest away is taken. - info = 3; - bound = true; - theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp; - s = max3(abs(theta), abs(dx), abs(dp)); - gamma = s * std::sqrt( - std::max(FloatType(0), pow2(theta / s) - (dx / s) * (dp / s))); - if (stp > stx) gamma = -gamma; - p = (gamma - dp) + theta; - q = (gamma + (dx - dp)) + gamma; - r = p/q; - if (r < FloatType(0) && gamma != FloatType(0)) { - stpc = stp + r * (stx - stp); - } - else if (stp > stx) { - stpc = stpmax; - } - else { - stpc = stpmin; - } - stpq = stp + (dp / (dp - dx)) * (stx - stp); - if (brackt) { - if (abs(stp - stpc) < abs(stp - stpq)) { - stpf = stpc; - } - else { - stpf = stpq; - } - } - else { - if (abs(stp - stpc) > abs(stp - stpq)) { - stpf = stpc; - } - else { - stpf = stpq; - } - } - } - else { - // Fourth case. A lower function value, derivatives of the - // same sign, and the magnitude of the derivative does - // not decrease. If the minimum is not bracketed, the step - // is either stpmin or stpmax, else the cubic step is taken. - info = 4; - bound = false; - if (brackt) { - theta = FloatType(3) * (fp - fy) / (sty - stp) + dy + dp; - s = max3(abs(theta), abs(dy), abs(dp)); - gamma = s * std::sqrt(pow2(theta / s) - (dy / s) * (dp / s)); - if (stp > sty) gamma = -gamma; - p = (gamma - dp) + theta; - q = ((gamma - dp) + gamma) + dy; - r = p/q; - stpc = stp + r * (sty - stp); - stpf = stpc; - } - else if (stp > stx) { - stpf = stpmax; - } - else { - stpf = stpmin; - } - } - // Update the interval of uncertainty. This update does not - // depend on the new step or the case analysis above. - if (fp > fx) { - sty = stp; - fy = fp; - dy = dp; - } - else { - if (sgnd < FloatType(0)) { - sty = stx; - fy = fx; - dy = dx; - } - stx = stp; - fx = fp; - dx = dp; - } - // Compute the new step and safeguard it. - stpf = std::min(stpmax, stpf); - stpf = std::max(stpmin, stpf); - stp = stpf; - if (brackt && bound) { - if (sty > stx) { - stp = std::min(stx + FloatType(0.66) * (sty - stx), stp); - } - else { - stp = std::max(stx + FloatType(0.66) * (sty - stx), stp); - } - } - return info; - } - - /* Compute the sum of a vector times a scalar plus another vector. - Adapted from the subroutine daxpy in - lbfgs.f. - */ - template - void daxpy( - SizeType n, - FloatType da, - const FloatType* dx, - SizeType ix0, - SizeType incx, - FloatType* dy, - SizeType iy0, - SizeType incy) - { - SizeType i, ix, iy, m; - if (n == 0) return; - if (da == FloatType(0)) return; - if (!(incx == 1 && incy == 1)) { - ix = 0; - iy = 0; - for (i = 0; i < n; i++) { - dy[iy0+iy] += da * dx[ix0+ix]; - ix += incx; - iy += incy; - } - return; - } - m = n % 4; - for (i = 0; i < m; i++) { - dy[iy0+i] += da * dx[ix0+i]; - } - for (; i < n;) { - dy[iy0+i] += da * dx[ix0+i]; i++; - dy[iy0+i] += da * dx[ix0+i]; i++; - dy[iy0+i] += da * dx[ix0+i]; i++; - dy[iy0+i] += da * dx[ix0+i]; i++; - } - } - - template - inline - void daxpy( - SizeType n, - FloatType da, - const FloatType* dx, - SizeType ix0, - FloatType* dy) - { - daxpy(n, da, dx, ix0, SizeType(1), dy, SizeType(0), SizeType(1)); - } - - /* Compute the dot product of two vectors. - Adapted from the subroutine ddot - in lbfgs.f. - */ - template - FloatType ddot( - SizeType n, - const FloatType* dx, - SizeType ix0, - SizeType incx, - const FloatType* dy, - SizeType iy0, - SizeType incy) - { - SizeType i, ix, iy, m; - FloatType dtemp(0); - if (n == 0) return FloatType(0); - if (!(incx == 1 && incy == 1)) { - ix = 0; - iy = 0; - for (i = 0; i < n; i++) { - dtemp += dx[ix0+ix] * dy[iy0+iy]; - ix += incx; - iy += incy; - } - return dtemp; - } - m = n % 5; - for (i = 0; i < m; i++) { - dtemp += dx[ix0+i] * dy[iy0+i]; - } - for (; i < n;) { - dtemp += dx[ix0+i] * dy[iy0+i]; i++; - dtemp += dx[ix0+i] * dy[iy0+i]; i++; - dtemp += dx[ix0+i] * dy[iy0+i]; i++; - dtemp += dx[ix0+i] * dy[iy0+i]; i++; - dtemp += dx[ix0+i] * dy[iy0+i]; i++; - } - return dtemp; - } - - template - inline - FloatType ddot( - SizeType n, - const FloatType* dx, - const FloatType* dy) - { - return ddot( - n, dx, SizeType(0), SizeType(1), dy, SizeType(0), SizeType(1)); - } - - } // namespace detail - - //! Interface to the LBFGS %minimizer. - /*! This class solves the unconstrained minimization problem -

-          min f(x),  x = (x1,x2,...,x_n),
-      
- using the limited-memory BFGS method. The routine is - especially effective on problems involving a large number of - variables. In a typical iteration of this method an - approximation Hk to the inverse of the Hessian - is obtained by applying m BFGS updates to a - diagonal matrix Hk0, using information from the - previous m steps. The user specifies the number - m, which determines the amount of storage - required by the routine. The user may also provide the - diagonal matrices Hk0 (parameter diag in the run() - function) if not satisfied with the default choice. The - algorithm is described in "On the limited memory BFGS method for - large scale optimization", by D. Liu and J. Nocedal, Mathematical - Programming B 45 (1989) 503-528. - - The user is required to calculate the function value - f and its gradient g. In order to - allow the user complete control over these computations, - reverse communication is used. The routine must be called - repeatedly under the control of the member functions - requests_f_and_g(), - requests_diag(). - If neither requests_f_and_g() nor requests_diag() is - true the user should check for convergence - (using class traditional_convergence_test or any - other custom test). If the convergence test is negative, - the minimizer may be called again for the next iteration. - - The steplength (stp()) is determined at each iteration - by means of the line search routine mcsrch, which is - a slight modification of the routine CSRCH written - by More' and Thuente. - - The only variables that are machine-dependent are - xtol, - stpmin and - stpmax. - - Fatal errors cause error exceptions to be thrown. - The generic class error is sub-classed (e.g. - class error_line_search_failed) to facilitate - granular %error handling. - - A note on performance: Using Compaq Fortran V5.4 and - Compaq C++ V6.5, the C++ implementation is about 15% slower - than the Fortran implementation. - */ - template - class minimizer - { - public: - //! Default constructor. Some members are not initialized! - minimizer() - : n_(0), m_(0), maxfev_(0), - gtol_(0), xtol_(0), - stpmin_(0), stpmax_(0), - ispt(0), iypt(0) - {} - - //! Constructor. - /*! @param n The number of variables in the minimization problem. - Restriction: n > 0. - - @param m The number of corrections used in the BFGS update. - Values of m less than 3 are not recommended; - large values of m will result in excessive - computing time. 3 <= m <= 7 is - recommended. - Restriction: m > 0. - - @param maxfev Maximum number of function evaluations - per line search. - Termination occurs when the number of evaluations - of the objective function is at least maxfev by - the end of an iteration. - - @param gtol Controls the accuracy of the line search. - If the function and gradient evaluations are inexpensive with - respect to the cost of the iteration (which is sometimes the - case when solving very large problems) it may be advantageous - to set gtol to a small value. A typical small - value is 0.1. - Restriction: gtol should be greater than 1e-4. - - @param xtol An estimate of the machine precision (e.g. 10e-16 - on a SUN station 3/60). The line search routine will - terminate if the relative width of the interval of - uncertainty is less than xtol. - - @param stpmin Specifies the lower bound for the step - in the line search. - The default value is 1e-20. This value need not be modified - unless the exponent is too large for the machine being used, - or unless the problem is extremely badly scaled (in which - case the exponent should be increased). - - @param stpmax specifies the upper bound for the step - in the line search. - The default value is 1e20. This value need not be modified - unless the exponent is too large for the machine being used, - or unless the problem is extremely badly scaled (in which - case the exponent should be increased). - */ - explicit - minimizer( - SizeType n, - SizeType m = 5, - SizeType maxfev = 20, - FloatType gtol = FloatType(0.9), - FloatType xtol = FloatType(1.e-16), - FloatType stpmin = FloatType(1.e-20), - FloatType stpmax = FloatType(1.e20)) - : n_(n), m_(m), maxfev_(maxfev), - gtol_(gtol), xtol_(xtol), - stpmin_(stpmin), stpmax_(stpmax), - iflag_(0), requests_f_and_g_(false), requests_diag_(false), - iter_(0), nfun_(0), stp_(0), - stp1(0), ftol(0.0001), ys(0), point(0), npt(0), - ispt(n+2*m), iypt((n+2*m)+n*m), - info(0), bound(0), nfev(0) - { - if (n_ == 0) { - throw error_improper_input_parameter("n = 0."); - } - if (m_ == 0) { - throw error_improper_input_parameter("m = 0."); - } - if (maxfev_ == 0) { - throw error_improper_input_parameter("maxfev = 0."); - } - if (gtol_ <= FloatType(1.e-4)) { - throw error_improper_input_parameter("gtol <= 1.e-4."); - } - if (xtol_ < FloatType(0)) { - throw error_improper_input_parameter("xtol < 0."); - } - if (stpmin_ < FloatType(0)) { - throw error_improper_input_parameter("stpmin < 0."); - } - if (stpmax_ < stpmin) { - throw error_improper_input_parameter("stpmax < stpmin"); - } - w_.resize(n_*(2*m_+1)+2*m_); - scratch_array_.resize(n_); - } - - //! Number of free parameters (as passed to the constructor). - SizeType n() const { return n_; } - - //! Number of corrections kept (as passed to the constructor). - SizeType m() const { return m_; } - - /*! \brief Maximum number of evaluations of the objective function - per line search (as passed to the constructor). - */ - SizeType maxfev() const { return maxfev_; } - - /*! \brief Control of the accuracy of the line search. - (as passed to the constructor). - */ - FloatType gtol() const { return gtol_; } - - //! Estimate of the machine precision (as passed to the constructor). - FloatType xtol() const { return xtol_; } - - /*! \brief Lower bound for the step in the line search. - (as passed to the constructor). - */ - FloatType stpmin() const { return stpmin_; } - - /*! \brief Upper bound for the step in the line search. - (as passed to the constructor). - */ - FloatType stpmax() const { return stpmax_; } - - //! Status indicator for reverse communication. - /*! true if the run() function returns to request - evaluation of the objective function (f) and - gradients (g) for the current point - (x). To continue the minimization the - run() function is called again with the updated values for - f and g. -

- See also: requests_diag() - */ - bool requests_f_and_g() const { return requests_f_and_g_; } - - //! Status indicator for reverse communication. - /*! true if the run() function returns to request - evaluation of the diagonal matrix (diag) - for the current point (x). - To continue the minimization the run() function is called - again with the updated values for diag. -

- See also: requests_f_and_g() - */ - bool requests_diag() const { return requests_diag_; } - - //! Number of iterations so far. - /*! Note that one iteration may involve multiple evaluations - of the objective function. -

- See also: nfun() - */ - SizeType iter() const { return iter_; } - - //! Total number of evaluations of the objective function so far. - /*! The total number of function evaluations increases by the - number of evaluations required for the line search. The total - is only increased after a successful line search. -

- See also: iter() - */ - SizeType nfun() const { return nfun_; } - - //! Norm of gradient given gradient array of length n(). - FloatType euclidean_norm(const FloatType* a) const { - return std::sqrt(detail::ddot(n_, a, a)); - } - - //! Current stepsize. - FloatType stp() const { return stp_; } - - //! Execution of one step of the minimization. - /*! @param x On initial entry this must be set by the user to - the values of the initial estimate of the solution vector. - - @param f Before initial entry or on re-entry under the - control of requests_f_and_g(), f must be set - by the user to contain the value of the objective function - at the current point x. - - @param g Before initial entry or on re-entry under the - control of requests_f_and_g(), g must be set - by the user to contain the components of the gradient at - the current point x. - - The return value is true if either - requests_f_and_g() or requests_diag() is true. - Otherwise the user should check for convergence - (e.g. using class traditional_convergence_test) and - call the run() function again to continue the minimization. - If the return value is false the user - should not update f, g or - diag (other overload) before calling - the run() function again. - - Note that x is always modified by the run() - function. Depending on the situation it can therefore be - necessary to evaluate the objective function one more time - after the minimization is terminated. - */ - bool run( - FloatType* x, - FloatType f, - const FloatType* g) - { - return generic_run(x, f, g, false, 0); - } - - //! Execution of one step of the minimization. - /*! @param x See other overload. - - @param f See other overload. - - @param g See other overload. - - @param diag On initial entry or on re-entry under the - control of requests_diag(), diag must be set by - the user to contain the values of the diagonal matrix Hk0. - The routine will return at each iteration of the algorithm - with requests_diag() set to true. -

- Restriction: all elements of diag must be - positive. - */ - bool run( - FloatType* x, - FloatType f, - const FloatType* g, - const FloatType* diag) - { - return generic_run(x, f, g, true, diag); - } - - void serialize(std::ostream* out) const { - out->write((const char*)&n_, sizeof(n_)); // sanity check - out->write((const char*)&m_, sizeof(m_)); // sanity check - SizeType fs = sizeof(FloatType); - out->write((const char*)&fs, sizeof(fs)); // sanity check - - mcsrch_instance.serialize(out); - out->write((const char*)&iflag_, sizeof(iflag_)); - out->write((const char*)&requests_f_and_g_, sizeof(requests_f_and_g_)); - out->write((const char*)&requests_diag_, sizeof(requests_diag_)); - out->write((const char*)&iter_, sizeof(iter_)); - out->write((const char*)&nfun_, sizeof(nfun_)); - out->write((const char*)&stp_, sizeof(stp_)); - out->write((const char*)&stp1, sizeof(stp1)); - out->write((const char*)&ftol, sizeof(ftol)); - out->write((const char*)&ys, sizeof(ys)); - out->write((const char*)&point, sizeof(point)); - out->write((const char*)&npt, sizeof(npt)); - out->write((const char*)&info, sizeof(info)); - out->write((const char*)&bound, sizeof(bound)); - out->write((const char*)&nfev, sizeof(nfev)); - out->write((const char*)&w_[0], sizeof(FloatType) * w_.size()); - out->write((const char*)&scratch_array_[0], sizeof(FloatType) * scratch_array_.size()); - } - - void deserialize(std::istream* in) { - SizeType n, m, fs; - in->read((char*)&n, sizeof(n)); - in->read((char*)&m, sizeof(m)); - in->read((char*)&fs, sizeof(fs)); - assert(n == n_); - assert(m == m_); - assert(fs == sizeof(FloatType)); - - mcsrch_instance.deserialize(in); - in->read((char*)&iflag_, sizeof(iflag_)); - in->read((char*)&requests_f_and_g_, sizeof(requests_f_and_g_)); - in->read((char*)&requests_diag_, sizeof(requests_diag_)); - in->read((char*)&iter_, sizeof(iter_)); - in->read((char*)&nfun_, sizeof(nfun_)); - in->read((char*)&stp_, sizeof(stp_)); - in->read((char*)&stp1, sizeof(stp1)); - in->read((char*)&ftol, sizeof(ftol)); - in->read((char*)&ys, sizeof(ys)); - in->read((char*)&point, sizeof(point)); - in->read((char*)&npt, sizeof(npt)); - in->read((char*)&info, sizeof(info)); - in->read((char*)&bound, sizeof(bound)); - in->read((char*)&nfev, sizeof(nfev)); - in->read((char*)&w_[0], sizeof(FloatType) * w_.size()); - in->read((char*)&scratch_array_[0], sizeof(FloatType) * scratch_array_.size()); - } - - protected: - static void throw_diagonal_element_not_positive(SizeType i) { - throw error_improper_input_data( - "The " + error::itoa(i) + ". diagonal element of the" - " inverse Hessian approximation is not positive."); - } - - bool generic_run( - FloatType* x, - FloatType f, - const FloatType* g, - bool diagco, - const FloatType* diag); - - detail::mcsrch mcsrch_instance; - const SizeType n_; - const SizeType m_; - const SizeType maxfev_; - const FloatType gtol_; - const FloatType xtol_; - const FloatType stpmin_; - const FloatType stpmax_; - int iflag_; - bool requests_f_and_g_; - bool requests_diag_; - SizeType iter_; - SizeType nfun_; - FloatType stp_; - FloatType stp1; - FloatType ftol; - FloatType ys; - SizeType point; - SizeType npt; - const SizeType ispt; - const SizeType iypt; - int info; - SizeType bound; - SizeType nfev; - std::vector w_; - std::vector scratch_array_; - }; - - template - bool minimizer::generic_run( - FloatType* x, - FloatType f, - const FloatType* g, - bool diagco, - const FloatType* diag) - { - bool execute_entire_while_loop = false; - if (!(requests_f_and_g_ || requests_diag_)) { - execute_entire_while_loop = true; - } - requests_f_and_g_ = false; - requests_diag_ = false; - FloatType* w = &(*(w_.begin())); - if (iflag_ == 0) { // Initialize. - nfun_ = 1; - if (diagco) { - for (SizeType i = 0; i < n_; i++) { - if (diag[i] <= FloatType(0)) { - throw_diagonal_element_not_positive(i); - } - } - } - else { - std::fill_n(scratch_array_.begin(), n_, FloatType(1)); - diag = &(*(scratch_array_.begin())); - } - for (SizeType i = 0; i < n_; i++) { - w[ispt + i] = -g[i] * diag[i]; - } - FloatType gnorm = std::sqrt(detail::ddot(n_, g, g)); - if (gnorm == FloatType(0)) return false; - stp1 = FloatType(1) / gnorm; - execute_entire_while_loop = true; - } - if (execute_entire_while_loop) { - bound = iter_; - iter_++; - info = 0; - if (iter_ != 1) { - if (iter_ > m_) bound = m_; - ys = detail::ddot( - n_, w, iypt + npt, SizeType(1), w, ispt + npt, SizeType(1)); - if (!diagco) { - FloatType yy = detail::ddot( - n_, w, iypt + npt, SizeType(1), w, iypt + npt, SizeType(1)); - std::fill_n(scratch_array_.begin(), n_, ys / yy); - diag = &(*(scratch_array_.begin())); - } - else { - iflag_ = 2; - requests_diag_ = true; - return true; - } - } - } - if (execute_entire_while_loop || iflag_ == 2) { - if (iter_ != 1) { - if (diag == 0) { - throw error_internal_error(__FILE__, __LINE__); - } - if (diagco) { - for (SizeType i = 0; i < n_; i++) { - if (diag[i] <= FloatType(0)) { - throw_diagonal_element_not_positive(i); - } - } - } - SizeType cp = point; - if (point == 0) cp = m_; - w[n_ + cp -1] = 1 / ys; - SizeType i; - for (i = 0; i < n_; i++) { - w[i] = -g[i]; - } - cp = point; - for (i = 0; i < bound; i++) { - if (cp == 0) cp = m_; - cp--; - FloatType sq = detail::ddot( - n_, w, ispt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1)); - SizeType inmc=n_+m_+cp; - SizeType iycn=iypt+cp*n_; - w[inmc] = w[n_ + cp] * sq; - detail::daxpy(n_, -w[inmc], w, iycn, w); - } - for (i = 0; i < n_; i++) { - w[i] *= diag[i]; - } - for (i = 0; i < bound; i++) { - FloatType yr = detail::ddot( - n_, w, iypt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1)); - FloatType beta = w[n_ + cp] * yr; - SizeType inmc=n_+m_+cp; - beta = w[inmc] - beta; - SizeType iscn=ispt+cp*n_; - detail::daxpy(n_, beta, w, iscn, w); - cp++; - if (cp == m_) cp = 0; - } - std::copy(w, w+n_, w+(ispt + point * n_)); - } - stp_ = FloatType(1); - if (iter_ == 1) stp_ = stp1; - std::copy(g, g+n_, w); - } - mcsrch_instance.run( - gtol_, stpmin_, stpmax_, n_, x, f, g, w, ispt + point * n_, - stp_, ftol, xtol_, maxfev_, info, nfev, &(*(scratch_array_.begin()))); - if (info == -1) { - iflag_ = 1; - requests_f_and_g_ = true; - return true; - } - if (info != 1) { - throw error_internal_error(__FILE__, __LINE__); - } - nfun_ += nfev; - npt = point*n_; - for (SizeType i = 0; i < n_; i++) { - w[ispt + npt + i] = stp_ * w[ispt + npt + i]; - w[iypt + npt + i] = g[i] - w[i]; - } - point++; - if (point == m_) point = 0; - return false; - } - - //! Traditional LBFGS convergence test. - /*! This convergence test is equivalent to the test embedded - in the lbfgs.f Fortran code. The test assumes that - there is a meaningful relation between the Euclidean norm of the - parameter vector x and the norm of the gradient - vector g. Therefore this test should not be used if - this assumption is not correct for a given problem. - */ - template - class traditional_convergence_test - { - public: - //! Default constructor. - traditional_convergence_test() - : n_(0), eps_(0) - {} - - //! Constructor. - /*! @param n The number of variables in the minimization problem. - Restriction: n > 0. - - @param eps Determines the accuracy with which the solution - is to be found. - */ - explicit - traditional_convergence_test( - SizeType n, - FloatType eps = FloatType(1.e-5)) - : n_(n), eps_(eps) - { - if (n_ == 0) { - throw error_improper_input_parameter("n = 0."); - } - if (eps_ < FloatType(0)) { - throw error_improper_input_parameter("eps < 0."); - } - } - - //! Number of free parameters (as passed to the constructor). - SizeType n() const { return n_; } - - /*! \brief Accuracy with which the solution is to be found - (as passed to the constructor). - */ - FloatType eps() const { return eps_; } - - //! Execution of the convergence test for the given parameters. - /*! Returns true if -

-            ||g|| < eps * max(1,||x||),
-          
- where ||.|| denotes the Euclidean norm. - - @param x Current solution vector. - - @param g Components of the gradient at the current - point x. - */ - bool - operator()(const FloatType* x, const FloatType* g) const - { - FloatType xnorm = std::sqrt(detail::ddot(n_, x, x)); - FloatType gnorm = std::sqrt(detail::ddot(n_, g, g)); - if (gnorm <= eps_ * std::max(FloatType(1), xnorm)) return true; - return false; - } - protected: - const SizeType n_; - const FloatType eps_; - }; - -}} // namespace scitbx::lbfgs - -template -std::ostream& operator<<(std::ostream& os, const scitbx::lbfgs::minimizer& min) { - return os << "ITER=" << min.iter() << "\tNFUN=" << min.nfun() << "\tSTP=" << min.stp() << "\tDIAG=" << min.requests_diag() << "\tF&G=" << min.requests_f_and_g(); -} - - -#endif // SCITBX_LBFGS_H diff --git a/training/lbfgs_test.cc b/training/lbfgs_test.cc deleted file mode 100644 index 9678e788..00000000 --- a/training/lbfgs_test.cc +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include -#include -#include "lbfgs.h" -#include "sparse_vector.h" -#include "fdict.h" - -using namespace std; - -double TestOptimizer() { - cerr << "TESTING NON-PERSISTENT OPTIMIZER\n"; - - // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 - // df/dx1 = 8*x1 + x2 - // df/dx2 = 2*x2 + x1 - // df/dx3 = 2*x3 + 6 - double x[3]; - double g[3]; - scitbx::lbfgs::minimizer opt(3); - scitbx::lbfgs::traditional_convergence_test converged(3); - x[0] = 8; - x[1] = 8; - x[2] = 8; - double obj = 0; - do { - g[0] = 8 * x[0] + x[1]; - g[1] = 2 * x[1] + x[0]; - g[2] = 2 * x[2] + 6; - obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; - opt.run(x, obj, g); - if (!opt.requests_f_and_g()) { - if (converged(x,g)) break; - opt.run(x, obj, g); - } - cerr << x[0] << " " << x[1] << " " << x[2] << endl; - cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; - cerr << opt << endl; - } while (true); - return obj; -} - -double TestPersistentOptimizer() { - cerr << "\nTESTING PERSISTENT OPTIMIZER\n"; - // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 - // df/dx1 = 8*x1 + x2 - // df/dx2 = 2*x2 + x1 - // df/dx3 = 2*x3 + 6 - double x[3]; - double g[3]; - scitbx::lbfgs::traditional_convergence_test converged(3); - x[0] = 8; - x[1] = 8; - x[2] = 8; - double obj = 0; - string state; - do { - g[0] = 8 * x[0] + x[1]; - g[1] = 2 * x[1] + x[0]; - g[2] = 2 * x[2] + 6; - obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; - - { - scitbx::lbfgs::minimizer opt(3); - if (state.size() > 0) { - istringstream is(state, ios::binary); - opt.deserialize(&is); - } - opt.run(x, obj, g); - ostringstream os(ios::binary); opt.serialize(&os); state = os.str(); - } - - cerr << x[0] << " " << x[1] << " " << x[2] << endl; - cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; - } while (!converged(x, g)); - return obj; -} - -void TestSparseVector() { - cerr << "Testing SparseVector serialization.\n"; - int f1 = FD::Convert("Feature_1"); - int f2 = FD::Convert("Feature_2"); - FD::Convert("LanguageModel"); - int f4 = FD::Convert("SomeFeature"); - int f5 = FD::Convert("SomeOtherFeature"); - SparseVector g; - g.set_value(f2, log(0.5)); - g.set_value(f4, log(0.125)); - g.set_value(f1, 0); - g.set_value(f5, 23.777); - ostringstream os; - double iobj = 1.5; - B64::Encode(iobj, g, &os); - cerr << iobj << "\t" << g << endl; - string data = os.str(); - cout << data << endl; - SparseVector v; - double obj; - bool decode_b64 = B64::Decode(&obj, &v, &data[0], data.size()); - cerr << obj << "\t" << v << endl; - assert(decode_b64); - assert(obj == iobj); - assert(g.size() == v.size()); -} - -int main() { - double o1 = TestOptimizer(); - double o2 = TestPersistentOptimizer(); - if (fabs(o1 - o2) > 1e-5) { - cerr << "OPTIMIZERS PERFORMED DIFFERENTLY!\n" << o1 << " vs. " << o2 << endl; - return 1; - } - TestSparseVector(); - cerr << "SUCCESS\n"; - return 0; -} - diff --git a/training/lbl_model.cc b/training/lbl_model.cc deleted file mode 100644 index a46ce33c..00000000 --- a/training/lbl_model.cc +++ /dev/null @@ -1,421 +0,0 @@ -#include - -#include "config.h" -#ifndef HAVE_EIGEN - int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; } -#else - -#include -#include -#include -#include -#include // memset -#include - -#ifdef HAVE_MPI -#include -#include -#include -namespace mpi = boost::mpi; -#endif -#include -#include -#include -#include - -#include "corpus_tools.h" -#include "optimize.h" -#include "array2d.h" -#include "m.h" -#include "lattice.h" -#include "stringlib.h" -#include "filelib.h" -#include "tdict.h" - -namespace po = boost::program_options; -using namespace std; - -#define kDIMENSIONS 10 -typedef Eigen::Matrix RVector; -typedef Eigen::Matrix RTVector; -typedef Eigen::Matrix TMatrix; -vector r_src, r_trg; - -#if HAVE_MPI -namespace boost { -namespace serialization { - -template -void serialize(Archive & ar, RVector & v, const unsigned int version) { - for (unsigned i = 0; i < kDIMENSIONS; ++i) - ar & v[i]; -} - -} // namespace serialization -} // namespace boost -#endif - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input,i",po::value(),"Input file") - ("iterations,I",po::value()->default_value(1000),"Number of iterations of training") - ("regularization_strength,C",po::value()->default_value(0.1),"L2 regularization strength (0 for no regularization)") - ("eta", po::value()->default_value(0.1f), "Eta for SGD") - ("source_embeddings,f", po::value(), "File containing source embeddings (if unset, random vectors will be used)") - ("target_embeddings,e", po::value(), "File containing target embeddings (if unset, random vectors will be used)") - ("random_seed,s", po::value(), "Random seed") - ("diagonal_tension,T", po::value()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)") - ("testset,x", po::value(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (argc < 2 || conf->count("help")) { - cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n"; - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void Normalize(RVector* v) { - double norm = v->norm(); - assert(norm > 0.0f); - *v /= norm; -} - -void Flatten(const TMatrix& m, vector* v) { - unsigned c = 0; - v->resize(kDIMENSIONS * kDIMENSIONS); - for (unsigned i = 0; i < kDIMENSIONS; ++i) - for (unsigned j = 0; j < kDIMENSIONS; ++j) { - assert(boost::math::isfinite(m(i, j))); - (*v)[c++] = m(i,j); - } -} - -void Unflatten(const vector& v, TMatrix* m) { - unsigned c = 0; - for (unsigned i = 0; i < kDIMENSIONS; ++i) - for (unsigned j = 0; j < kDIMENSIONS; ++j) { - assert(boost::math::isfinite(v[c])); - (*m)(i, j) = v[c++]; - } -} - -double ApplyRegularization(const double C, - const vector& weights, - vector* g) { - assert(weights.size() == g->size()); - double reg = 0; - for (size_t i = 0; i < weights.size(); ++i) { - const double& w_i = weights[i]; - double& g_i = (*g)[i]; - reg += C * w_i * w_i; - g_i += 2 * C * w_i; - } - return reg; -} - -void LoadEmbeddings(const string& filename, vector* pv) { - vector& v = *pv; - cerr << "Reading embeddings from " << filename << " ...\n"; - ReadFile rf(filename); - istream& in = *rf.stream(); - string line; - unsigned lc = 0; - while(getline(in, line)) { - ++lc; - size_t cur = line.find(' '); - if (cur == string::npos || cur == 0) { - cerr << "Parse error reading line " << lc << ":\n" << line << endl; - abort(); - } - WordID w = TD::Convert(line.substr(0, cur)); - if (w >= v.size()) continue; - RVector& curv = v[w]; - line[cur] = 0; - size_t start = cur + 1; - cur = start + 1; - size_t c = 0; - while(cur < line.size()) { - if (line[cur] == ' ') { - line[cur] = 0; - curv[c++] = strtod(&line[start], NULL); - start = cur + 1; - cur = start; - if (c == kDIMENSIONS) break; - } - ++cur; - } - if (c < kDIMENSIONS && cur != start) { - if (cur < line.size()) line[cur] = 0; - curv[c++] = strtod(&line[start], NULL); - } - if (c != kDIMENSIONS) { - static bool first = true; - if (first) { - cerr << " read " << c << " dimensions from embedding file, but built with " << kDIMENSIONS << " (filling in with random values)\n"; - first = false; - } - for (; c < kDIMENSIONS; ++c) curv[c] = rand(); - } - if (c == kDIMENSIONS && cur != line.size()) { - static bool first = true; - if (first) { - cerr << " embedding file contains more dimensions than configured with, truncating.\n"; - first = false; - } - } - } -} - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - std::cerr << "**MPI enabled.\n"; - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - std::cerr << "**MPI disabled.\n"; - const int rank = 0; - const int size = 1; -#endif - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) return 1; - const string fname = conf["input"].as(); - const double reg_strength = conf["regularization_strength"].as(); - const bool has_l2 = reg_strength; - assert(reg_strength >= 0.0f); - const int ITERATIONS = conf["iterations"].as(); - const double eta = conf["eta"].as(); - const double diagonal_tension = conf["diagonal_tension"].as(); - bool SGD = false; - if (diagonal_tension < 0.0) { - cerr << "Invalid value for diagonal_tension: must be >= 0\n"; - return 1; - } - string testset; - if (conf.count("testset")) testset = conf["testset"].as(); - - unsigned lc = 0; - vector unnormed_a_i; - bool flag = false; - vector > srcs, trgs; - vector vocab_e; - { - set svocab_e, svocab_f; - CorpusTools::ReadFromFile(fname, &srcs, NULL, &trgs, &svocab_e, rank, size); - copy(svocab_e.begin(), svocab_e.end(), back_inserter(vocab_e)); - } - cerr << "Number of target word types: " << vocab_e.size() << endl; - const double num_examples = lc; - - boost::shared_ptr lbfgs; - if (rank == 0) - lbfgs.reset(new LBFGSOptimizer(kDIMENSIONS * kDIMENSIONS, 100)); - r_trg.resize(TD::NumWords() + 1); - r_src.resize(TD::NumWords() + 1); - vector > trg_pos(TD::NumWords() + 1); - - if (conf.count("random_seed")) { - srand(conf["random_seed"].as()); - } else { - unsigned seed = time(NULL) + rank * 100; - cerr << "Random seed: " << seed << endl; - srand(seed); - } - - TMatrix t = TMatrix::Zero(); - if (rank == 0) { - t = TMatrix::Random() / 50.0; - for (unsigned i = 1; i < r_trg.size(); ++i) { - r_trg[i] = RVector::Random(); - r_src[i] = RVector::Random(); - } - if (conf.count("source_embeddings")) - LoadEmbeddings(conf["source_embeddings"].as(), &r_src); - if (conf.count("target_embeddings")) - LoadEmbeddings(conf["target_embeddings"].as(), &r_trg); - } - - // do optimization - TMatrix g = TMatrix::Zero(); - vector exp_src; - vector z_src; - vector flat_g, flat_t, rcv_grad; - Flatten(t, &flat_t); - bool converged = false; -#if HAVE_MPI - mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); - mpi::broadcast(world, r_trg, 0); - mpi::broadcast(world, r_src, 0); -#endif - cerr << "rank=" << rank << ": " << r_trg[0][4] << endl; - for (int iter = 0; !converged && iter < ITERATIONS; ++iter) { - if (rank == 0) cerr << "ITERATION " << (iter + 1) << endl; - Unflatten(flat_t, &t); - double likelihood = 0; - double denom = 0.0; - lc = 0; - flag = false; - g *= 0; - for (unsigned i = 0; i < srcs.size(); ++i) { - const vector& src = srcs[i]; - const vector& trg = trgs[i]; - ++lc; - if (rank == 0 && lc % 1000 == 0) { cerr << '.'; flag = true; } - if (rank == 0 && lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } - denom += trg.size(); - - exp_src.clear(); exp_src.resize(src.size(), TMatrix::Zero()); - z_src.clear(); z_src.resize(src.size(), 0.0); - Array2D exp_refs(src.size(), trg.size(), TMatrix::Zero()); - Array2D z_refs(src.size(), trg.size(), 0.0); - for (unsigned j = 0; j < trg.size(); ++j) - trg_pos[trg[j]].insert(j); - - for (unsigned i = 0; i < src.size(); ++i) { - const RVector& r_s = r_src[src[i]]; - const RTVector pred = r_s.transpose() * t; - TMatrix& exp_m = exp_src[i]; - double& z = z_src[i]; - for (unsigned k = 0; k < vocab_e.size(); ++k) { - const WordID v_k = vocab_e[k]; - const RVector& r_t = r_trg[v_k]; - const double dot_prod = pred * r_t; - const double u = exp(dot_prod); - z += u; - const TMatrix v = r_s * r_t.transpose() * u; - exp_m += v; - set& ref_locs = trg_pos[v_k]; - if (!ref_locs.empty()) { - for (set::iterator it = ref_locs.begin(); it != ref_locs.end(); ++it) { - TMatrix& exp_ref_ij = exp_refs(i, *it); - double& z_ref_ij = z_refs(i, *it); - z_ref_ij += u; - exp_ref_ij += v; - } - } - } - } - for (unsigned j = 0; j < trg.size(); ++j) - trg_pos[trg[j]].clear(); - - // model expectations for a single target generation with - // uniform alignment prior - // TODO: when using a non-uniform alignment, m_exp will be - // a function of j (below) - double m_z = 0; - TMatrix m_exp = TMatrix::Zero(); - for (unsigned i = 0; i < src.size(); ++i) { - m_exp += exp_src[i]; - m_z += z_src[i]; - } - m_exp /= m_z; - - Array2D al(src.size(), trg.size(), false); - for (unsigned j = 0; j < trg.size(); ++j) { - double ref_z = 0; - TMatrix ref_exp = TMatrix::Zero(); - int max_i = 0; - double max_s = -9999999; - for (unsigned i = 0; i < src.size(); ++i) { - ref_exp += exp_refs(i, j); - ref_z += z_refs(i, j); - if (log(z_refs(i, j)) > max_s) { - max_s = log(z_refs(i, j)); - max_i = i; - } - // TODO handle alignment prob - } - if (ref_z <= 0) { - cerr << "TRG=" << TD::Convert(trg[j]) << endl; - cerr << " LINE=" << lc << " (RANK=" << rank << "/" << size << ")" << endl; - cerr << " REF_EXP=\n" << ref_exp << endl; - cerr << " M_EXP=\n" << m_exp << endl; - abort(); - } - al(max_i, j) = true; - ref_exp /= ref_z; - g += m_exp - ref_exp; - likelihood += log(ref_z) - log(m_z); - if (SGD) { - t -= g * eta / num_examples; - g *= 0; - } - } - - if (rank == 0 && (iter == (ITERATIONS - 1) || lc < 12)) { cerr << al << endl; } - } - if (flag && rank == 0) { cerr << endl; } - - double obj = 0; - if (!SGD) { - Flatten(g, &flat_g); - obj = -likelihood; -#if HAVE_MPI - rcv_grad.resize(flat_g.size(), 0.0); - mpi::reduce(world, &flat_g[0], flat_g.size(), &rcv_grad[0], plus(), 0); - swap(flat_g, rcv_grad); - rcv_grad.clear(); - - double to = 0; - mpi::reduce(world, obj, to, plus(), 0); - obj = to; - double tlh = 0; - mpi::reduce(world, likelihood, tlh, plus(), 0); - likelihood = tlh; - double td = 0; - mpi::reduce(world, denom, td, plus(), 0); - denom = td; -#endif - } - - if (rank == 0) { - double gn = 0; - for (unsigned i = 0; i < flat_g.size(); ++i) - gn += flat_g[i]*flat_g[i]; - const double base2_likelihood = likelihood / log(2); - cerr << " log_e likelihood: " << likelihood << endl; - cerr << " log_2 likelihood: " << base2_likelihood << endl; - cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; - cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; - cerr << " gradient norm: " << sqrt(gn) << endl; - if (!SGD) { - if (has_l2) { - const double r = ApplyRegularization(reg_strength, - flat_t, - &flat_g); - obj += r; - cerr << " regularization: " << r << endl; - } - lbfgs->Optimize(obj, flat_g, &flat_t); - converged = (lbfgs->HasConverged()); - } - } -#ifdef HAVE_MPI - mpi::broadcast(world, &flat_t[0], flat_t.size(), 0); - mpi::broadcast(world, converged, 0); -#endif - } - if (rank == 0) - cerr << "TRANSLATION MATRIX:" << endl << t << endl; - return 0; -} - -#endif - diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am new file mode 100644 index 00000000..a15e821e --- /dev/null +++ b/training/minrisk/Makefile.am @@ -0,0 +1,6 @@ +bin_PROGRAMS = minrisk_optimize + +minrisk_optimize_SOURCES = minrisk_optimize.cc +minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz + +AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils diff --git a/training/minrisk/minrisk.pl b/training/minrisk/minrisk.pl new file mode 100755 index 00000000..0f8bacd0 --- /dev/null +++ b/training/minrisk/minrisk.pl @@ -0,0 +1,540 @@ +#!/usr/bin/env perl +use strict; +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use IPC::Open2; +use POSIX ":sys_wait_h"; +my $QSUB_CMD = qsub_args(mert_memory()); +my $default_jobs = env_default_jobs(); + +my $UTILS_DIR="$SCRIPT_DIR/../utils"; +require "$UTILS_DIR/libcall.pl"; + +# Default settings +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; +my $MAPINPUT = "$bin_dir/minrisk_generate_input.pl"; +my $MAPPER = "$bin_dir/minrisk_optimize"; +my $parallelize = "$UTILS_DIR/parallelize.pl"; +my $libcall = "$UTILS_DIR/libcall.pl"; +my $sentserver = "$UTILS_DIR/sentserver"; +my $sentclient = "$UTILS_DIR/sentclient"; +my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm"; + +my $SCORER = $FAST_SCORE; +die "Can't find $MAPPER" unless -x $MAPPER; +my $cdec = "$bin_dir/../../decoder/cdec"; +die "Can't find decoder in $cdec" unless -x $cdec; +die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; +my $decoder = $cdec; +my $lines_per_mapper = 30; +my $iteration = 1; +my $best_weights; +my $psi = 1; +my $default_max_iter = 30; +my $max_iterations = $default_max_iter; +my $jobs = $default_jobs; # number of decode nodes +my $pmem = "4g"; +my $disable_clean = 0; +my %seen_weights; +my $help = 0; +my $epsilon = 0.0001; +my $dryrun = 0; +my $last_score = -10000000; +my $metric = "ibm_bleu"; +my $dir; +my $iniFile; +my $weights; +my $use_make = 1; # use make to parallelize +my $useqsub = 0; +my $initial_weights; +my $pass_suffix = ''; +my $cpbin=1; + +# regularization strength +my $tune_regularizer = 0; +my $reg = 500; +my $reg_previous = 5000; +my $dont_accum = 0; + +# Process command-line options +Getopt::Long::Configure("no_auto_abbrev"); +if (GetOptions( + "jobs=i" => \$jobs, + "dont-clean" => \$disable_clean, + "dont-accumulate" => \$dont_accum, + "pass-suffix=s" => \$pass_suffix, + "qsub" => \$useqsub, + "dry-run" => \$dryrun, + "epsilon=s" => \$epsilon, + "help" => \$help, + "weights=s" => \$initial_weights, + "reg=f" => \$reg, + "use-make=i" => \$use_make, + "max-iterations=i" => \$max_iterations, + "pmem=s" => \$pmem, + "cpbin!" => \$cpbin, + "ref-files=s" => \$refFiles, + "metric=s" => \$metric, + "source-file=s" => \$srcFile, + "workdir=s" => \$dir, +) == 0 || @ARGV!=1 || $help) { + print_help(); + exit; +} + +die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; + +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (!defined $srcFile) { push @missing_args, "--source-file"; } +if (!defined $refFiles) { push @missing_args, "--ref-files"; } +if (!defined $initial_weights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); + +if ($metric =~ /^(combi|ter)$/i) { + $lines_per_mapper = 5; +} + +($iniFile) = @ARGV; + + +sub write_config; +sub enseg; +sub print_help; + +my $nodelist; +my $host =check_output("hostname"); chomp $host; +my $bleu; +my $interval_count = 0; +my $logfile; +my $projected_score; + +# used in sorting scores +my $DIR_FLAG = '-r'; +if ($metric =~ /^ter$|^aer$/i) { + $DIR_FLAG = ''; +} + +my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); + +unless ($dir){ + $dir = "minrisk"; +} +unless ($dir =~ /^\//){ # convert relative path to absolute path + my $basedir = check_output("pwd"); + chomp $basedir; + $dir = "$basedir/$dir"; +} + + +# Initializations and helper functions +srand; + +my @childpids = (); +my @cleanupcmds = (); + +sub cleanup { + print STDERR "Cleanup...\n"; + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } + exit 1; +}; +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; +$SIG{INT} = "cleanup"; +$SIG{TERM} = "cleanup"; +$SIG{HUP} = "cleanup"; + +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; +my $newIniFile = "$dir/$decoderBase.ini"; +my $inputFileName = "$dir/input"; +my $user = $ENV{"USER"}; +# process ini file +-e $iniFile || die "Error: could not open $iniFile for reading\n"; +open(INI, $iniFile); + +use File::Basename qw(basename); +#pass bindir, refs to vars holding bin +sub modbin { + local $_; + my $bindir=shift; + check_call("mkdir -p $bindir"); + -d $bindir || die "couldn't make bindir $bindir"; + for (@_) { + my $src=$$_; + $$_="$bindir/".basename($src); + check_call("cp -p $src $$_"); + } +} +sub dirsize { + opendir ISEMPTY,$_[0]; + return scalar(readdir(ISEMPTY))-1; +} +my @allweights; +if ($dryrun){ + write_config(*STDERR); + exit 0; +} else { + if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs + die "ERROR: working dir $dir already exists\n\n"; + } else { + -e $dir || mkdir $dir; + mkdir "$dir/hgs"; + modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; + mkdir "$dir/scripts"; + my $cmdfile="$dir/rerun-pro.sh"; + open CMD,'>',$cmdfile; + print CMD "cd ",&getcwd,"\n"; +# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted. + my $cline=&cmdline."\n"; + print CMD $cline; + close CMD; + print STDERR $cline; + chmod(0755,$cmdfile); + check_call("cp $initial_weights $dir/weights.0"); + die "Can't find weights.0" unless (-e "$dir/weights.0"); + } + write_config(*STDERR); +} + + +# Generate initial files and values +check_call("cp $iniFile $newIniFile"); +$iniFile = $newIniFile; + +my $newsrc = "$dir/dev.input"; +enseg($srcFile, $newsrc); +$srcFile = $newsrc; +my $devSize = 0; +open F, "<$srcFile" or die "Can't read $srcFile: $!"; +while() { $devSize++; } +close F; + +unless($best_weights){ $best_weights = $weights; } +unless($projected_score){ $projected_score = 0.0; } +$seen_weights{$weights} = 1; +my $kbest = "$dir/kbest"; +if ($dont_accum) { + $kbest = ''; +} else { + check_call("mkdir -p $kbest"); + $kbest = "--kbest_repository $kbest"; +} + +my $random_seed = int(time / 1000); +my $lastWeightsFile; +my $lastPScore = 0; +# main optimization loop +while (1){ + print STDERR "\n\nITERATION $iteration\n==========\n"; + + if ($iteration > $max_iterations){ + print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; + last; + } + # iteration-specific files + my $runFile="$dir/run.raw.$iteration"; + my $onebestFile="$dir/1best.$iteration"; + my $logdir="$dir/logs.$iteration"; + my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; + my $scorerLog="$logdir/scorer.log.$iteration"; + check_call("mkdir -p $logdir"); + + + #decode + print STDERR "RUNNING DECODER AT "; + print STDERR unchecked_output("date"); + my $im1 = $iteration - 1; + my $weightsFile="$dir/weights.$im1"; + push @allweights, "-w $dir/weights.$im1"; + `rm -f $dir/hgs/*.gz`; + my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; + my $pcmd; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; + } else { + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; + } + my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + my $num_hgs; + my $num_topbest; + my $retries = 0; + while($retries < 5) { + $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF HGs: $num_hgs\n"; + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_hgs && $devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); + my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); + chomp $dec_score; + print STDERR "DECODER SCORE: $dec_score\n"; + + # save space + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); + + # run optimizer + print STDERR "RUNNING OPTIMIZER AT "; + print STDERR unchecked_output("date"); + print STDERR " - GENERATE TRAINING EXEMPLARS\n"; + my $mergeLog="$logdir/prune-merge.log.$iteration"; + + my $score = 0; + my $icc = 0; + my $inweights="$dir/weights.$im1"; + my $outweights="$dir/weights.$iteration"; + $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + $cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights"; + check_call($cmd); + $lastWeightsFile = $outweights; + $iteration++; + `rm hgs/*.gz`; + print STDERR "\n==========\n"; +} + +print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; + +print STDOUT "$lastWeightsFile\n"; + +exit 0; + +sub get_lines { + my $fn = shift @_; + open FL, "<$fn" or die "Couldn't read $fn: $!"; + my $lc = 0; + while() { $lc++; } + return $lc; +} + +sub get_comma_sep_refs { + my ($r,$p) = @_; + my $o = check_output("echo $p"); + chomp $o; + my @files = split /\s+/, $o; + return "-$r " . join(" -$r ", @files); +} + +sub read_weights_file { + my ($file) = @_; + open F, "<$file" or die "Couldn't read $file: $!"; + my @r = (); + my $pm = -1; + while() { + next if /^#/; + next if /^\s*$/; + chomp; + if (/^(.+)\s+(.+)$/) { + my $m = $1; + my $w = $2; + die "Weights out of order: $m <= $pm" unless $m > $pm; + push @r, $w; + } else { + warn "Unexpected feature name in weight file: $_"; + } + } + close F; + return join ' ', @r; +} + +# subs +sub write_config { + my $fh = shift; + my $cleanup = "yes"; + if ($disable_clean) {$cleanup = "no";} + + print $fh "\n"; + print $fh "DECODER: $decoder\n"; + print $fh "INI FILE: $iniFile\n"; + print $fh "WORKING DIR: $dir\n"; + print $fh "SOURCE (DEV): $srcFile\n"; + print $fh "REFS (DEV): $refFiles\n"; + print $fh "EVAL METRIC: $metric\n"; + print $fh "MAX ITERATIONS: $max_iterations\n"; + print $fh "JOBS: $jobs\n"; + print $fh "HEAD NODE: $host\n"; + print $fh "PMEM (DECODING): $pmem\n"; + print $fh "CLEANUP: $cleanup\n"; +} + +sub update_weights_file { + my ($neww, $rfn, $rpts) = @_; + my @feats = @$rfn; + my @pts = @$rpts; + my $num_feats = scalar @feats; + my $num_pts = scalar @pts; + die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; + open G, ">$neww" or die; + for (my $i = 0; $i < $num_feats; $i++) { + my $f = $feats[$i]; + my $lambda = $pts[$i]; + print G "$f $lambda\n"; + } + close G; +} + +sub enseg { + my $src = shift; + my $newsrc = shift; + open(SRC, $src); + open(NEWSRC, ">$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; + die "Empty dev set!" if ($i == 0); +} + +sub print_help { + + my $executable = check_output("basename $0"); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable [options] + Runs a complete PRO optimization using the ini file specified. + +Required: + + --ref-files + Dev set ref files. This option takes only a single string argument. + To use multiple files (including file globbing), this argument should + be quoted. + + --source-file + Dev set source file. + + --weights + Initial weights file (use empty file to start from 0) + +General options: + + --help + Print this message and exit. + + --dont-accumulate + Don't accumulate k-best lists from multiple iterations. + + --max-iterations + Maximum number of iterations to run. If not specified, defaults + to $default_max_iter. + + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + + --pass-suffix + If the decoder is doing multi-pass decoding, the pass suffix "2", + "3", etc., is used to control what iteration of weights is set. + + --workdir + Directory for intermediate and output files. If not specified, the + name is derived from the ini filename. Assuming that the ini + filename begins with the decoder name and ends with ini, the default + name of the working directory is inferred from the middle part of + the filename. E.g. an ini file named decoder.foo.ini would have + a default working directory name foo. + +Regularization options: + + --reg + l2 regularization strength [default=500]. The greater this value, + the closer to zero the weights will be. + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + +Help +} + +sub convert { + my ($str) = @_; + my @ps = split /;/, $str; + my %dict = (); + for my $p (@ps) { + my ($k, $v) = split /=/, $p; + $dict{$k} = $v; + } + return %dict; +} + + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} diff --git a/training/minrisk/minrisk_generate_input.pl b/training/minrisk/minrisk_generate_input.pl new file mode 100755 index 00000000..b30fc4fd --- /dev/null +++ b/training/minrisk/minrisk_generate_input.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl -w +use strict; + +die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; +my $d = shift @ARGV; +die "Can't find directory $d" unless -d $d; + +opendir(DIR, $d) or die "Can't read $d: $!"; +my @hgs = grep { /\.gz$/ } readdir(DIR); +closedir DIR; + +for my $hg (@hgs) { + my $file = $hg; + my $id = $hg; + $id =~ s/(\.json)?\.gz//; + print "$d/$file $id\n"; +} + diff --git a/training/minrisk/minrisk_optimize.cc b/training/minrisk/minrisk_optimize.cc new file mode 100644 index 00000000..da8b5260 --- /dev/null +++ b/training/minrisk/minrisk_optimize.cc @@ -0,0 +1,197 @@ +#include +#include +#include +#include + +#include +#include + +#include "liblbfgs/lbfgs++.h" +#include "filelib.h" +#include "stringlib.h" +#include "weights.h" +#include "hg_io.h" +#include "kbest.h" +#include "viterbi.h" +#include "ns.h" +#include "ns_docscorer.h" +#include "candidate_set.h" +#include "risk.h" +#include "entropy.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") + ("weights,w",po::value(), "[REQD] Weights files from current iterations") + ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") + ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") + ("temperature,T",po::value()->default_value(0.0), "Temperature parameter for objective (>0 increases the entropy)") + ("l1_strength,C",po::value()->default_value(0.0), "L1 regularization strength") + ("memory_buffers,M",po::value()->default_value(20), "Memory buffers used in LBFGS") + ("kbest_repository,R",po::value(), "Accumulate k-best lists from previous iterations (parameter is path to repository)") + ("kbest_size,k",po::value()->default_value(500u), "Top k-hypotheses to extract") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (!conf->count("reference")) { + cerr << "Please specify one or more references using -r \n"; + flag = true; + } + if (!conf->count("weights")) { + cerr << "Please specify weights using -w \n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +EvaluationMetric* metric = NULL; + +struct RiskObjective { + explicit RiskObjective(const vector& tr, const double temp) : training(tr), T(temp) {} + double operator()(const vector& x, double* g) const { + fill(g, g + x.size(), 0.0); + double obj = 0; + double h = 0; + for (unsigned i = 0; i < training.size(); ++i) { + training::CandidateSetRisk risk(training[i], *metric); + training::CandidateSetEntropy entropy(training[i]); + SparseVector tg, hg; + double r = risk(x, &tg); + double hh = entropy(x, &hg); + h += hh; + obj += r; + for (SparseVector::iterator it = tg.begin(); it != tg.end(); ++it) + g[it->first] += it->second; + if (T) { + for (SparseVector::iterator it = hg.begin(); it != hg.end(); ++it) + g[it->first] += T * it->second; + } + } + cerr << (1-(obj / training.size())) << " H=" << h << endl; + return obj - T * h; + } + const vector& training; + const double T; // temperature for entropy regularization +}; + +double LearnParameters(const vector& training, + const double temp, // > 0 increases the entropy, < 0 decreases the entropy + const double C1, + const unsigned memory_buffers, + vector* px) { + RiskObjective obj(training, temp); + LBFGS lbfgs(px, obj, memory_buffers, C1); + lbfgs.MinimizeFunction(); + return 0; +} + +#if 0 +struct FooLoss { + double operator()(const vector& x, double* g) const { + fill(g, g + x.size(), 0.0); + training::CandidateSet cs; + training::CandidateSetEntropy cse(cs); + cs.cs.resize(3); + cs.cs[0].fmap.set_value(FD::Convert("F1"), -1.0); + cs.cs[1].fmap.set_value(FD::Convert("F2"), 1.0); + cs.cs[2].fmap.set_value(FD::Convert("F1"), 2.0); + cs.cs[2].fmap.set_value(FD::Convert("F2"), 0.5); + SparseVector xx; + double h = cse(x, &xx); + cerr << cse(x, &xx) << endl; cerr << "G: " << xx << endl; + for (SparseVector::iterator i = xx.begin(); i != xx.end(); ++i) + g[i->first] += i->second; + return -h; + } +}; +#endif + +int main(int argc, char** argv) { +#if 0 + training::CandidateSet cs; + training::CandidateSetEntropy cse(cs); + cs.cs.resize(3); + cs.cs[0].fmap.set_value(FD::Convert("F1"), -1.0); + cs.cs[1].fmap.set_value(FD::Convert("F2"), 1.0); + cs.cs[2].fmap.set_value(FD::Convert("F1"), 2.0); + cs.cs[2].fmap.set_value(FD::Convert("F2"), 0.5); + FooLoss foo; + vector ww(FD::NumFeats()); ww[FD::Convert("F1")] = 1.0; + LBFGS lbfgs(&ww, foo, 100, 0.0); + lbfgs.MinimizeFunction(); + return 1; +#endif + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string evaluation_metric = conf["evaluation_metric"].as(); + + metric = EvaluationMetric::Instance(evaluation_metric); + DocumentScorer ds(metric, conf["reference"].as >()); + cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; + + Hypergraph hg; + string last_file; + ReadFile in_read(conf["input"].as()); + string kbest_repo; + if (conf.count("kbest_repository")) { + kbest_repo = conf["kbest_repository"].as(); + MkDirP(kbest_repo); + } + istream &in=*in_read.stream(); + const unsigned kbest_size = conf["kbest_size"].as(); + vector weights; + const string weightsf = conf["weights"].as(); + Weights::InitFromFile(weightsf, &weights); + double t = 0; + for (unsigned i = 0; i < weights.size(); ++i) + t += weights[i] * weights[i]; + if (t > 0) { + for (unsigned i = 0; i < weights.size(); ++i) + weights[i] /= sqrt(t); + } + string line, file; + vector kis; + cerr << "Loading hypergraphs...\n"; + while(getline(in, line)) { + istringstream is(line); + int sent_id; + kis.resize(kis.size() + 1); + training::CandidateSet& curkbest = kis.back(); + string kbest_file; + if (kbest_repo.size()) { + ostringstream os; + os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; + kbest_file = os.str(); + if (FileExists(kbest_file)) + curkbest.ReadFromFile(kbest_file); + } + is >> file >> sent_id; + ReadFile rf(file); + if (kis.size() % 5 == 0) { cerr << '.'; } + if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; } + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + hg.Reweight(weights); + curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]); + if (kbest_file.size()) + curkbest.WriteToFile(kbest_file); + } + cerr << "\nHypergraphs loaded.\n"; + weights.resize(FD::NumFeats()); + + double c1 = conf["l1_strength"].as(); + double temp = conf["temperature"].as(); + unsigned m = conf["memory_buffers"].as(); + LearnParameters(kis, temp, c1, m, &weights); + Weights::WriteToFile("-", weights); + return 0; +} + diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am new file mode 100644 index 00000000..ae609ede --- /dev/null +++ b/training/mira/Makefile.am @@ -0,0 +1,6 @@ +bin_PROGRAMS = kbest_mira + +kbest_mira_SOURCES = kbest_mira.cc +kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/mira/kbest_mira.cc b/training/mira/kbest_mira.cc new file mode 100644 index 00000000..8b7993dd --- /dev/null +++ b/training/mira/kbest_mira.cc @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hg_sampler.h" +#include "sentence_metadata.h" +#include "scorer.h" +#include "verbose.h" +#include "viterbi.h" +#include "hg.h" +#include "prob.h" +#include "kbest.h" +#include "ff_register.h" +#include "decoder.h" +#include "filelib.h" +#include "fdict.h" +#include "weights.h" +#include "sparse_vector.h" +#include "sampler.h" + +using namespace std; +namespace po = boost::program_options; + +bool invert_score; +std::tr1::shared_ptr rng; + +void RandomPermutation(int len, vector* p_ids) { + vector& ids = *p_ids; + ids.resize(len); + for (int i = 0; i < len; ++i) ids[i] = i; + for (int i = len; i > 0; --i) { + int j = rng->next() * i; + if (j == i) i--; + swap(ids[i-1], ids[j]); + } +} + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input_weights,w",po::value(),"Input feature weights file") + ("source,i",po::value(),"Source file for development set") + ("passes,p", po::value()->default_value(15), "Number of passes through the training data") + ("reference,r",po::value >(), "[REQD] Reference translation(s) (tokenized text file)") + ("mt_metric,m",po::value()->default_value("ibm_bleu"), "Scoring metric (ibm_bleu, nist_bleu, koehn_bleu, ter, combi)") + ("max_step_size,C", po::value()->default_value(0.01), "regularization strength (C)") + ("mt_metric_scale,s", po::value()->default_value(1.0), "Amount to scale MT loss function by") + ("k_best_size,k", po::value()->default_value(250), "Size of hypothesis list to search for oracles") + ("sample_forest,f", "Instead of a k-best list, sample k hypotheses from the decoder's forest") + ("sample_forest_unit_weight_vector,x", "Before sampling (must use -f option), rescale the weight vector used so it has unit length; this may improve the quality of the samples") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("decoder_config,c",po::value(),"Decoder configuration file"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || !conf->count("input_weights") || !conf->count("source") || !conf->count("decoder_config") || !conf->count("reference")) { + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +static const double kMINUS_EPSILON = -1e-6; + +struct HypothesisInfo { + SparseVector features; + double mt_metric; +}; + +struct GoodBadOracle { + std::tr1::shared_ptr good; + std::tr1::shared_ptr bad; +}; + +struct TrainingObserver : public DecoderObserver { + TrainingObserver(const int k, const DocScorer& d, bool sf, vector* o) : ds(d), oracles(*o), kbest_size(k), sample_forest(sf) {} + const DocScorer& ds; + vector& oracles; + std::tr1::shared_ptr cur_best; + const int kbest_size; + const bool sample_forest; + + const HypothesisInfo& GetCurrentBestHypothesis() const { + return *cur_best; + } + + virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { + UpdateOracles(smeta.GetSentenceID(), *hg); + } + + std::tr1::shared_ptr MakeHypothesisInfo(const SparseVector& feats, const double score) { + std::tr1::shared_ptr h(new HypothesisInfo); + h->features = feats; + h->mt_metric = score; + return h; + } + + void UpdateOracles(int sent_id, const Hypergraph& forest) { + std::tr1::shared_ptr& cur_good = oracles[sent_id].good; + std::tr1::shared_ptr& cur_bad = oracles[sent_id].bad; + cur_bad.reset(); // TODO get rid of?? + + if (sample_forest) { + vector cur_prediction; + ViterbiESentence(forest, &cur_prediction); + float sentscore = ds[sent_id]->ScoreCandidate(cur_prediction)->ComputeScore(); + cur_best = MakeHypothesisInfo(ViterbiFeatures(forest), sentscore); + + vector samples; + HypergraphSampler::sample_hypotheses(forest, kbest_size, &*rng, &samples); + for (unsigned i = 0; i < samples.size(); ++i) { + sentscore = ds[sent_id]->ScoreCandidate(samples[i].words)->ComputeScore(); + if (invert_score) sentscore *= -1.0; + if (!cur_good || sentscore > cur_good->mt_metric) + cur_good = MakeHypothesisInfo(samples[i].fmap, sentscore); + if (!cur_bad || sentscore < cur_bad->mt_metric) + cur_bad = MakeHypothesisInfo(samples[i].fmap, sentscore); + } + } else { + KBest::KBestDerivations, ESentenceTraversal> kbest(forest, kbest_size); + for (int i = 0; i < kbest_size; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(forest.nodes_.size() - 1, i); + if (!d) break; + float sentscore = ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore(); + if (invert_score) sentscore *= -1.0; + // cerr << TD::GetString(d->yield) << " ||| " << d->score << " ||| " << sentscore << endl; + if (i == 0) + cur_best = MakeHypothesisInfo(d->feature_values, sentscore); + if (!cur_good || sentscore > cur_good->mt_metric) + cur_good = MakeHypothesisInfo(d->feature_values, sentscore); + if (!cur_bad || sentscore < cur_bad->mt_metric) + cur_bad = MakeHypothesisInfo(d->feature_values, sentscore); + } + //cerr << "GOOD: " << cur_good->mt_metric << endl; + //cerr << " CUR: " << cur_best->mt_metric << endl; + //cerr << " BAD: " << cur_bad->mt_metric << endl; + } + } +}; + +void ReadTrainingCorpus(const string& fname, vector* c) { + ReadFile rf(fname); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (!in) break; + c->push_back(line); + } +} + +bool ApproxEqual(double a, double b) { + if (a == b) return true; + return (fabs(a-b)/fabs(b)) < 0.000001; +} + +int main(int argc, char** argv) { + register_feature_functions(); + SetSilent(true); // turn off verbose decoder output + + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + const bool sample_forest = conf.count("sample_forest") > 0; + const bool sample_forest_unit_weight_vector = conf.count("sample_forest_unit_weight_vector") > 0; + if (sample_forest_unit_weight_vector && !sample_forest) { + cerr << "Cannot --sample_forest_unit_weight_vector without --sample_forest" << endl; + return 1; + } + vector corpus; + ReadTrainingCorpus(conf["source"].as(), &corpus); + const string metric_name = conf["mt_metric"].as(); + ScoreType type = ScoreTypeFromString(metric_name); + if (type == TER) { + invert_score = true; + } else { + invert_score = false; + } + DocScorer ds(type, conf["reference"].as >(), ""); + cerr << "Loaded " << ds.size() << " references for scoring with " << metric_name << endl; + if (ds.size() != corpus.size()) { + cerr << "Mismatched number of references (" << ds.size() << ") and sources (" << corpus.size() << ")\n"; + return 1; + } + + ReadFile ini_rf(conf["decoder_config"].as()); + Decoder decoder(ini_rf.stream()); + + // load initial weights + vector& dense_weights = decoder.CurrentWeightVector(); + SparseVector lambdas; + Weights::InitFromFile(conf["input_weights"].as(), &dense_weights); + Weights::InitSparseVector(dense_weights, &lambdas); + + const double max_step_size = conf["max_step_size"].as(); + const double mt_metric_scale = conf["mt_metric_scale"].as(); + + assert(corpus.size() > 0); + vector oracles(corpus.size()); + + TrainingObserver observer(conf["k_best_size"].as(), ds, sample_forest, &oracles); + int cur_sent = 0; + int lcount = 0; + int normalizer = 0; + double tot_loss = 0; + int dots = 0; + int cur_pass = 0; + SparseVector tot; + tot += lambdas; // initial weights + normalizer++; // count for initial weights + int max_iteration = conf["passes"].as() * corpus.size(); + string msg = "# MIRA tuned weights"; + string msga = "# MIRA tuned weights AVERAGED"; + vector order; + RandomPermutation(corpus.size(), &order); + while (lcount <= max_iteration) { + lambdas.init_vector(&dense_weights); + if ((cur_sent * 40 / corpus.size()) > dots) { ++dots; cerr << '.'; } + if (corpus.size() == cur_sent) { + cerr << " [AVG METRIC LAST PASS=" << (tot_loss / corpus.size()) << "]\n"; + Weights::ShowLargestFeatures(dense_weights); + cur_sent = 0; + tot_loss = 0; + dots = 0; + ostringstream os; + os << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << ".gz"; + SparseVector x = tot; + x /= normalizer; + ostringstream sa; + sa << "weights.mira-pass" << (cur_pass < 10 ? "0" : "") << cur_pass << "-avg.gz"; + x.init_vector(&dense_weights); + Weights::WriteToFile(os.str(), dense_weights, true, &msg); + ++cur_pass; + RandomPermutation(corpus.size(), &order); + } + if (cur_sent == 0) { + cerr << "PASS " << (lcount / corpus.size() + 1) << endl; + } + decoder.SetId(order[cur_sent]); + double sc = 1.0; + if (sample_forest_unit_weight_vector) { + sc = lambdas.l2norm(); + if (sc > 0) { + for (unsigned i = 0; i < dense_weights.size(); ++i) + dense_weights[i] /= sc; + } + } + decoder.Decode(corpus[order[cur_sent]], &observer); // update oracles + if (sc && sc != 1.0) { + for (unsigned i = 0; i < dense_weights.size(); ++i) + dense_weights[i] *= sc; + } + const HypothesisInfo& cur_hyp = observer.GetCurrentBestHypothesis(); + const HypothesisInfo& cur_good = *oracles[order[cur_sent]].good; + const HypothesisInfo& cur_bad = *oracles[order[cur_sent]].bad; + tot_loss += cur_hyp.mt_metric; + if (!ApproxEqual(cur_hyp.mt_metric, cur_good.mt_metric)) { + const double loss = cur_bad.features.dot(dense_weights) - cur_good.features.dot(dense_weights) + + mt_metric_scale * (cur_good.mt_metric - cur_bad.mt_metric); + //cerr << "LOSS: " << loss << endl; + if (loss > 0.0) { + SparseVector diff = cur_good.features; + diff -= cur_bad.features; + double step_size = loss / diff.l2norm_sq(); + //cerr << loss << " " << step_size << " " << diff << endl; + if (step_size > max_step_size) step_size = max_step_size; + lambdas += (cur_good.features * step_size); + lambdas -= (cur_bad.features * step_size); + //cerr << "L: " << lambdas << endl; + } + } + tot += lambdas; + ++normalizer; + ++lcount; + ++cur_sent; + } + cerr << endl; + Weights::WriteToFile("weights.mira-final.gz", dense_weights, true, &msg); + tot /= normalizer; + tot.init_vector(dense_weights); + msg = "# MIRA tuned weights (averaged vector)"; + Weights::WriteToFile("weights.mira-final-avg.gz", dense_weights, true, &msg); + cerr << "Optimization complete.\nAVERAGED WEIGHTS: weights.mira-final-avg.gz\n"; + return 0; +} + diff --git a/training/mpi_batch_optimize.cc b/training/mpi_batch_optimize.cc deleted file mode 100644 index 2eff07e4..00000000 --- a/training/mpi_batch_optimize.cc +++ /dev/null @@ -1,372 +0,0 @@ -#include -#include -#include -#include -#include - -#include "config.h" -#ifdef HAVE_MPI -#include -#include -namespace mpi = boost::mpi; -#endif - -#include -#include -#include - -#include "sentence_metadata.h" -#include "cllh_observer.h" -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "stringlib.h" -#include "optimize.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" - -using namespace std; -namespace po = boost::program_options; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,w",po::value(),"Input feature weights file") - ("training_data,t",po::value(),"Training data") - ("test_data,T",po::value(),"(optional) test data") - ("decoder_config,c",po::value(),"Decoder configuration file") - ("output_weights,o",po::value()->default_value("-"),"Output feature weights file") - ("optimization_method,m", po::value()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)") - ("correction_buffers,M", po::value()->default_value(10), "Number of gradients for LBFGS to maintain in memory") - ("gaussian_prior,p","Use a Gaussian prior on the weights") - ("sigma_squared", po::value()->default_value(1.0), "Sigma squared term for spherical Gaussian prior") - ("means,u", po::value(), "(optional) file containing the means for Gaussian prior"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("input_weights") || !(conf->count("training_data")) || !conf->count("decoder_config")) { - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int lc = 0; - while(in) { - getline(in, line); - if (!in) break; - if (lc % size == rank) c->push_back(line); - ++lc; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct TrainingObserver : public DecoderObserver { - void Reset() { - acc_grad.clear(); - acc_obj = 0; - total_complete = 0; - trg_words = 0; - } - - void SetLocalGradientAndObjective(vector* g, double* o) const { - *o = acc_obj; - for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second.as_float(); - } - - virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { - cur_model_exp.clear(); - cur_obj = 0; - state = 1; - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 1); - state = 2; - const prob_t z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); - cur_obj = log(z); - cur_model_exp /= z; - } - - // compute "empirical" expectations, numerator of objective - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 2); - state = 3; - SparseVector ref_exp; - const prob_t ref_z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp); - ref_exp /= ref_z; - - double log_ref_z; -#if 0 - if (crf_uniform_empirical) { - log_ref_z = ref_exp.dot(feature_weights); - } else { - log_ref_z = log(ref_z); - } -#else - log_ref_z = log(ref_z); -#endif - - // rounding errors means that <0 is too strict - if ((cur_obj - log_ref_z) < kMINUS_EPSILON) { - cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl; - exit(1); - } - assert(!std::isnan(log_ref_z)); - ref_exp -= cur_model_exp; - acc_grad -= ref_exp; - acc_obj += (cur_obj - log_ref_z); - trg_words += smeta.GetReference().size(); - } - - virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { - if (state == 3) { - ++total_complete; - } else { - } - } - - int total_complete; - SparseVector cur_model_exp; - SparseVector acc_grad; - double acc_obj; - double cur_obj; - unsigned trg_words; - int state; -}; - -void ReadConfig(const string& ini, vector* out) { - ReadFile rf(ini); - istream& in = *rf.stream(); - while(in) { - string line; - getline(in, line); - if (!in) continue; - out->push_back(line); - } -} - -void StoreConfig(const vector& cfg, istringstream* o) { - ostringstream os; - for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; } - o->str(os.str()); -} - -template -struct VectorPlus : public binary_function, vector, vector > { - vector operator()(const vector& a, const vector& b) const { - assert(a.size() == b.size()); - vector v(a.size()); - transform(a.begin(), a.end(), b.begin(), v.begin(), plus()); - return v; - } -}; - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) return 1; - - // load cdec.ini and set up decoder - vector cdec_ini; - ReadConfig(conf["decoder_config"].as(), &cdec_ini); - istringstream ini; - StoreConfig(cdec_ini, &ini); - if (rank == 0) cerr << "Loading grammar...\n"; - Decoder* decoder = new Decoder(&ini); - if (decoder->GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; - return 1; - } - if (rank == 0) cerr << "Done loading grammar!\n"; - - // load initial weights - if (rank == 0) { cerr << "Loading weights...\n"; } - vector& lambdas = decoder->CurrentWeightVector(); - Weights::InitFromFile(conf["input_weights"].as(), &lambdas); - if (rank == 0) { cerr << "Done loading weights.\n"; } - - // freeze feature set (should be optional?) - const bool freeze_feature_set = true; - if (freeze_feature_set) FD::Freeze(); - - const int num_feats = FD::NumFeats(); - if (rank == 0) cerr << "Number of features: " << num_feats << endl; - lambdas.resize(num_feats); - - const bool gaussian_prior = conf.count("gaussian_prior"); - vector means(num_feats, 0); - if (conf.count("means")) { - if (!gaussian_prior) { - cerr << "Don't use --means without --gaussian_prior!\n"; - exit(1); - } - Weights::InitFromFile(conf["means"].as(), &means); - } - boost::shared_ptr o; - if (rank == 0) { - const string omethod = conf["optimization_method"].as(); - if (omethod == "rprop") - o.reset(new RPropOptimizer(num_feats)); // TODO add configuration - else - o.reset(new LBFGSOptimizer(num_feats, conf["correction_buffers"].as())); - cerr << "Optimizer: " << o->Name() << endl; - } - double objective = 0; - vector gradient(num_feats, 0.0); - vector rcv_grad; - rcv_grad.clear(); - bool converged = false; - - vector corpus, test_corpus; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); - assert(corpus.size() > 0); - if (conf.count("test_data")) - ReadTrainingCorpus(conf["test_data"].as(), rank, size, &test_corpus); - - TrainingObserver observer; - ConditionalLikelihoodObserver cllh_observer; - while (!converged) { - observer.Reset(); - cllh_observer.Reset(); -#ifdef HAVE_MPI - mpi::timer timer; - world.barrier(); -#endif - if (rank == 0) { - cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n"; - cerr << " Testset size: " << test_corpus.size() << " sentences / proc)\n"; - } - for (int i = 0; i < corpus.size(); ++i) - decoder->Decode(corpus[i], &observer); - cerr << " process " << rank << '/' << size << " done\n"; - fill(gradient.begin(), gradient.end(), 0); - observer.SetLocalGradientAndObjective(&gradient, &objective); - - unsigned total_words = 0; -#ifdef HAVE_MPI - double to = 0; - rcv_grad.resize(num_feats, 0.0); - mpi::reduce(world, &gradient[0], gradient.size(), &rcv_grad[0], plus(), 0); - swap(gradient, rcv_grad); - rcv_grad.clear(); - - reduce(world, observer.trg_words, total_words, std::plus(), 0); - mpi::reduce(world, objective, to, plus(), 0); - objective = to; -#else - total_words = observer.trg_words; -#endif - if (rank == 0) - cerr << "TRAINING CORPUS: ln p(f|e)=" << objective << "\t log_2 p(f|e) = " << (objective/log(2)) << "\t cond. entropy = " << (objective/log(2) / total_words) << "\t ppl = " << pow(2, (objective/log(2) / total_words)) << endl; - - for (int i = 0; i < test_corpus.size(); ++i) - decoder->Decode(test_corpus[i], &cllh_observer); - - double test_objective = 0; - unsigned test_total_words = 0; -#ifdef HAVE_MPI - reduce(world, cllh_observer.acc_obj, test_objective, std::plus(), 0); - reduce(world, cllh_observer.trg_words, test_total_words, std::plus(), 0); -#else - test_objective = cllh_observer.acc_obj; - test_total_words = cllh_observer.trg_words; -#endif - - if (rank == 0) { // run optimizer only on rank=0 node - if (test_corpus.size()) - cerr << " TEST CORPUS: ln p(f|e)=" << test_objective << "\t log_2 p(f|e) = " << (test_objective/log(2)) << "\t cond. entropy = " << (test_objective/log(2) / test_total_words) << "\t ppl = " << pow(2, (test_objective/log(2) / test_total_words)) << endl; - if (gaussian_prior) { - const double sigsq = conf["sigma_squared"].as(); - double norm = 0; - for (int k = 1; k < lambdas.size(); ++k) { - const double& lambda_k = lambdas[k]; - if (lambda_k) { - const double param = (lambda_k - means[k]); - norm += param * param; - gradient[k] += param / sigsq; - } - } - const double reg = norm / (2.0 * sigsq); - cerr << "REGULARIZATION TERM: " << reg << endl; - objective += reg; - } - cerr << "EVALUATION #" << o->EvaluationCount() << " OBJECTIVE: " << objective << endl; - double gnorm = 0; - for (int i = 0; i < gradient.size(); ++i) - gnorm += gradient[i] * gradient[i]; - cerr << " GNORM=" << sqrt(gnorm) << endl; - vector old = lambdas; - int c = 0; - while (old == lambdas) { - ++c; - if (c > 1) { cerr << "Same lambdas, repeating optimization\n"; } - o->Optimize(objective, gradient, &lambdas); - assert(c < 5); - } - old.clear(); - Weights::SanityCheck(lambdas); - Weights::ShowLargestFeatures(lambdas); - - converged = o->HasConverged(); - if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } - - string fname = "weights.cur.gz"; - if (converged) { fname = "weights.final.gz"; } - ostringstream vv; - vv << "Objective = " << objective << " (eval count=" << o->EvaluationCount() << ")"; - const string svv = vv.str(); - Weights::WriteToFile(fname, lambdas, true, &svv); - } // rank == 0 - int cint = converged; -#ifdef HAVE_MPI - mpi::broadcast(world, &lambdas[0], lambdas.size(), 0); - mpi::broadcast(world, cint, 0); - if (rank == 0) { cerr << " ELAPSED TIME THIS ITERATION=" << timer.elapsed() << endl; } -#endif - converged = cint; - } - return 0; -} - diff --git a/training/mpi_compute_cllh.cc b/training/mpi_compute_cllh.cc deleted file mode 100644 index 066389d0..00000000 --- a/training/mpi_compute_cllh.cc +++ /dev/null @@ -1,134 +0,0 @@ -#include -#include -#include -#include - -#include "config.h" -#ifdef HAVE_MPI -#include -#endif -#include -#include - -#include "cllh_observer.h" -#include "sentence_metadata.h" -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "weights.h" - -using namespace std; -namespace po = boost::program_options; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("weights,w",po::value(),"Input feature weights file") - ("training_data,t",po::value(),"Training data corpus") - ("decoder_config,c",po::value(),"Decoder configuration file"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadInstances(const string& fname, int rank, int size, vector* c) { - assert(fname != "-"); - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int lc = 0; - while(in) { - getline(in, line); - if (!in) break; - if (lc % size == rank) c->push_back(line); - ++lc; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -#ifdef HAVE_MPI -namespace mpi = boost::mpi; -#endif - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - if (size > 1) SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) - return false; - - // load cdec.ini and set up decoder - ReadFile ini_rf(conf["decoder_config"].as()); - Decoder decoder(ini_rf.stream()); - if (decoder.GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; - abort(); - } - - // load weights - vector& weights = decoder.CurrentWeightVector(); - if (conf.count("weights")) - Weights::InitFromFile(conf["weights"].as(), &weights); - - vector corpus; - ReadInstances(conf["training_data"].as(), rank, size, &corpus); - assert(corpus.size() > 0); - - if (rank == 0) - cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; - - ConditionalLikelihoodObserver observer; - for (int i = 0; i < corpus.size(); ++i) - decoder.Decode(corpus[i], &observer); - - double objective = 0; - unsigned total_words = 0; -#ifdef HAVE_MPI - reduce(world, observer.acc_obj, objective, std::plus(), 0); - reduce(world, observer.trg_words, total_words, std::plus(), 0); -#else - objective = observer.acc_obj; -#endif - - if (rank == 0) { - cout << "CONDITIONAL LOG_e LIKELIHOOD: " << objective << endl; - cout << "CONDITIONAL LOG_2 LIKELIHOOD: " << (objective/log(2)) << endl; - cout << " CONDITIONAL ENTROPY: " << (objective/log(2) / total_words) << endl; - cout << " PERPLEXITY: " << pow(2, (objective/log(2) / total_words)) << endl; - } - - return 0; -} - diff --git a/training/mpi_em_optimize.cc b/training/mpi_em_optimize.cc deleted file mode 100644 index 48683b15..00000000 --- a/training/mpi_em_optimize.cc +++ /dev/null @@ -1,389 +0,0 @@ -#include -#include -#include -#include -#include - -#ifdef HAVE_MPI -#include -#endif - -#include -#include -#include - -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "optimize.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" - -using namespace std; -using boost::shared_ptr; -namespace po = boost::program_options; - -void SanityCheck(const vector& w) { - for (int i = 0; i < w.size(); ++i) { - assert(!isnan(w[i])); - assert(!isinf(w[i])); - } -} - -struct FComp { - const vector& w_; - FComp(const vector& w) : w_(w) {} - bool operator()(int a, int b) const { - return fabs(w_[a]) > fabs(w_[b]); - } -}; - -void ShowLargestFeatures(const vector& w) { - vector fnums(w.size()); - for (int i = 0; i < w.size(); ++i) - fnums[i] = i; - vector::iterator mid = fnums.begin(); - mid += (w.size() > 10 ? 10 : w.size()); - partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); - cerr << "TOP FEATURES:"; - for (vector::iterator i = fnums.begin(); i != mid; ++i) { - cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; - } - cerr << endl; -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,w",po::value(),"Input feature weights file") - ("training_data,t",po::value(),"Training data") - ("decoder_config,c",po::value(),"Decoder configuration file") - ("output_weights,o",po::value()->default_value("-"),"Output feature weights file"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !(conf->count("training_data")) || !conf->count("decoder_config")) { - cerr << dcmdline_options << endl; -#ifdef HAVE_MPI - MPI::Finalize(); -#endif - exit(1); - } -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int lc = 0; - while(in) { - getline(in, line); - if (!in) break; - if (lc % size == rank) c->push_back(line); - ++lc; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct TrainingObserver : public DecoderObserver { - void Reset() { - total_complete = 0; - cur_obj = 0; - tot_obj = 0; - tot.clear(); - } - - void SetLocalGradientAndObjective(SparseVector* g, double* o) const { - *o = tot_obj; - *g = tot; - } - - virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { - cur_obj = 0; - state = 1; - } - - void ExtractExpectedCounts(Hypergraph* hg) { - vector posts; - cur.clear(); - const prob_t z = hg->ComputeEdgePosteriors(1.0, &posts); - cur_obj = log(z); - for (int i = 0; i < posts.size(); ++i) { - const SparseVector& efeats = hg->edges_[i].feature_values_; - const double post = static_cast(posts[i] / z); - for (SparseVector::const_iterator j = efeats.begin(); j != efeats.end(); ++j) - cur.add_value(j->first, post); - } - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 1); - state = 2; - ExtractExpectedCounts(hg); - } - - // replace translation forest, since we're doing EM training (we don't know which) - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 2); - state = 3; - ExtractExpectedCounts(hg); - } - - virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { - ++total_complete; - tot_obj += cur_obj; - tot += cur; - } - - int total_complete; - double cur_obj; - double tot_obj; - SparseVector cur, tot; - int state; -}; - -void ReadConfig(const string& ini, vector* out) { - ReadFile rf(ini); - istream& in = *rf.stream(); - while(in) { - string line; - getline(in, line); - if (!in) continue; - out->push_back(line); - } -} - -void StoreConfig(const vector& cfg, istringstream* o) { - ostringstream os; - for (int i = 0; i < cfg.size(); ++i) { os << cfg[i] << endl; } - o->str(os.str()); -} - -struct OptimizableMultinomialFamily { - struct CPD { - CPD() : z() {} - double z; - map c2counts; - }; - map counts; - double Value(WordID conditioning, WordID generated) const { - map::const_iterator it = counts.find(conditioning); - assert(it != counts.end()); - map::const_iterator r = it->second.c2counts.find(generated); - if (r == it->second.c2counts.end()) return 0; - return r->second; - } - void Increment(WordID conditioning, WordID generated, double count) { - CPD& cc = counts[conditioning]; - cc.z += count; - cc.c2counts[generated] += count; - } - void Optimize() { - for (map::iterator i = counts.begin(); i != counts.end(); ++i) { - CPD& cpd = i->second; - for (map::iterator j = cpd.c2counts.begin(); j != cpd.c2counts.end(); ++j) { - j->second /= cpd.z; - // cerr << "P(" << TD::Convert(j->first) << " | " << TD::Convert(i->first) << " ) = " << j->second << endl; - } - } - } - void Clear() { - counts.clear(); - } -}; - -struct CountManager { - CountManager(size_t num_types) : oms_(num_types) {} - virtual ~CountManager(); - virtual void AddCounts(const SparseVector& c) = 0; - void Optimize(SparseVector* weights) { - for (int i = 0; i < oms_.size(); ++i) { - oms_[i].Optimize(); - } - GetOptimalValues(weights); - for (int i = 0; i < oms_.size(); ++i) { - oms_[i].Clear(); - } - } - virtual void GetOptimalValues(SparseVector* wv) const = 0; - vector oms_; -}; -CountManager::~CountManager() {} - -struct TaggerCountManager : public CountManager { - // 0 = transitions, 2 = emissions - TaggerCountManager() : CountManager(2) {} - void AddCounts(const SparseVector& c); - void GetOptimalValues(SparseVector* wv) const { - for (set::const_iterator it = fids_.begin(); it != fids_.end(); ++it) { - int ftype; - WordID cond, gen; - bool is_optimized = TaggerCountManager::GetFeature(*it, &ftype, &cond, &gen); - assert(is_optimized); - wv->set_value(*it, log(oms_[ftype].Value(cond, gen))); - } - } - // Id:0:a=1 Bi:a_b=1 Bi:b_c=1 Bi:c_d=1 Uni:a=1 Uni:b=1 Uni:c=1 Uni:d=1 Id:1:b=1 Bi:BOS_a=1 Id:2:c=1 - static bool GetFeature(const int fid, int* feature_type, WordID* cond, WordID* gen) { - const string& feat = FD::Convert(fid); - if (feat.size() > 5 && feat[0] == 'I' && feat[1] == 'd' && feat[2] == ':') { - // emission - const size_t p = feat.rfind(':'); - assert(p != string::npos); - *cond = TD::Convert(feat.substr(p+1)); - *gen = TD::Convert(feat.substr(3, p - 3)); - *feature_type = 1; - return true; - } else if (feat[0] == 'B' && feat.size() > 5 && feat[2] == ':' && feat[1] == 'i') { - // transition - const size_t p = feat.rfind('_'); - assert(p != string::npos); - *gen = TD::Convert(feat.substr(p+1)); - *cond = TD::Convert(feat.substr(3, p - 3)); - *feature_type = 0; - return true; - } else if (feat[0] == 'U' && feat.size() > 4 && feat[1] == 'n' && feat[2] == 'i' && feat[3] == ':') { - // ignore - return false; - } else { - cerr << "Don't know how to deal with feature of type: " << feat << endl; - abort(); - } - } - set fids_; -}; - -void TaggerCountManager::AddCounts(const SparseVector& c) { - for (SparseVector::const_iterator it = c.begin(); it != c.end(); ++it) { - const double& val = it->second; - int ftype; - WordID cond, gen; - if (GetFeature(it->first, &ftype, &cond, &gen)) { - oms_[ftype].Increment(cond, gen, val); - fids_.insert(it->first); - } - } -} - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - MPI::Init(argc, argv); - const int size = MPI::COMM_WORLD.Get_size(); - const int rank = MPI::COMM_WORLD.Get_rank(); -#else - const int size = 1; - const int rank = 0; -#endif - SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - TaggerCountManager tcm; - - // load cdec.ini and set up decoder - vector cdec_ini; - ReadConfig(conf["decoder_config"].as(), &cdec_ini); - istringstream ini; - StoreConfig(cdec_ini, &ini); - if (rank == 0) cerr << "Loading grammar...\n"; - Decoder* decoder = new Decoder(&ini); - if (decoder->GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; -#ifdef HAVE_MPI - MPI::COMM_WORLD.Abort(1); -#endif - } - if (rank == 0) cerr << "Done loading grammar!\n"; - Weights w; - if (conf.count("input_weights")) - w.InitFromFile(conf["input_weights"].as()); - - double objective = 0; - bool converged = false; - - vector lambdas; - w.InitVector(&lambdas); - vector corpus; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); - assert(corpus.size() > 0); - - int iteration = 0; - TrainingObserver observer; - while (!converged) { - ++iteration; - observer.Reset(); - if (rank == 0) { - cerr << "Starting decoding... (~" << corpus.size() << " sentences / proc)\n"; - } - decoder->SetWeights(lambdas); - for (int i = 0; i < corpus.size(); ++i) - decoder->Decode(corpus[i], &observer); - - SparseVector x; - observer.SetLocalGradientAndObjective(&x, &objective); - cerr << "COUNTS = " << x << endl; - cerr << " OBJ = " << objective << endl; - tcm.AddCounts(x); - -#if 0 -#ifdef HAVE_MPI - MPI::COMM_WORLD.Reduce(const_cast(&gradient.data()[0]), &rcv_grad[0], num_feats, MPI::DOUBLE, MPI::SUM, 0); - MPI::COMM_WORLD.Reduce(&objective, &to, 1, MPI::DOUBLE, MPI::SUM, 0); - swap(gradient, rcv_grad); - objective = to; -#endif -#endif - - if (rank == 0) { - SparseVector wsv; - tcm.Optimize(&wsv); - - w.InitFromVector(wsv); - w.InitVector(&lambdas); - - ShowLargestFeatures(lambdas); - - converged = iteration > 100; - if (converged) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } - - string fname = "weights.cur.gz"; - if (converged) { fname = "weights.final.gz"; } - ostringstream vv; - vv << "Objective = " << objective << " (ITERATION=" << iteration << ")"; - const string svv = vv.str(); - w.WriteToFile(fname, true, &svv); - } // rank == 0 - int cint = converged; -#ifdef HAVE_MPI - MPI::COMM_WORLD.Bcast(const_cast(&lambdas.data()[0]), num_feats, MPI::DOUBLE, 0); - MPI::COMM_WORLD.Bcast(&cint, 1, MPI::INT, 0); - MPI::COMM_WORLD.Barrier(); -#endif - converged = cint; - } -#ifdef HAVE_MPI - MPI::Finalize(); -#endif - return 0; -} diff --git a/training/mpi_extract_features.cc b/training/mpi_extract_features.cc deleted file mode 100644 index 6750aa15..00000000 --- a/training/mpi_extract_features.cc +++ /dev/null @@ -1,151 +0,0 @@ -#include -#include -#include -#include - -#include "config.h" -#ifdef HAVE_MPI -#include -#endif -#include -#include - -#include "ff_register.h" -#include "verbose.h" -#include "filelib.h" -#include "fdict.h" -#include "decoder.h" -#include "weights.h" - -using namespace std; -namespace po = boost::program_options; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("training_data,t",po::value(),"Training data corpus") - ("decoder_config,c",po::value(),"Decoder configuration file") - ("weights,w", po::value(), "(Optional) weights file; weights may affect what features are encountered in pruning configurations") - ("output_prefix,o",po::value()->default_value("features"),"Output path prefix"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { - cerr << "Decode an input set (optionally in parallel using MPI) and write\nout the feature strings encountered.\n"; - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int lc = 0; - while(in) { - getline(in, line); - if (!in) break; - if (lc % size == rank) c->push_back(line); - ++lc; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct TrainingObserver : public DecoderObserver { - - virtual void NotifyDecodingStart(const SentenceMetadata&) { - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { - } - - // compute "empirical" expectations, numerator of objective - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - } -}; - -#ifdef HAVE_MPI -namespace mpi = boost::mpi; -#endif - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - if (size > 1) SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) - return false; - - // load cdec.ini and set up decoder - ReadFile ini_rf(conf["decoder_config"].as()); - Decoder decoder(ini_rf.stream()); - if (decoder.GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; - abort(); - } - - if (FD::UsingPerfectHashFunction()) { - cerr << "Your configuration file has enabled a cmph hash function. Please disable.\n"; - return 1; - } - - // load optional weights - if (conf.count("weights")) - Weights::InitFromFile(conf["weights"].as(), &decoder.CurrentWeightVector()); - - vector corpus; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); - assert(corpus.size() > 0); - - TrainingObserver observer; - - if (rank == 0) - cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; - - for (int i = 0; i < corpus.size(); ++i) - decoder.Decode(corpus[i], &observer); - - { - ostringstream os; - os << conf["output_prefix"].as() << '.' << rank << "_of_" << size; - WriteFile wf(os.str()); - ostream& out = *wf.stream(); - const unsigned num_feats = FD::NumFeats(); - for (unsigned i = 1; i < num_feats; ++i) { - out << FD::Convert(i) << endl; - } - cerr << "Wrote " << os.str() << endl; - } - -#ifdef HAVE_MPI - world.barrier(); -#else -#endif - - return 0; -} - diff --git a/training/mpi_extract_reachable.cc b/training/mpi_extract_reachable.cc deleted file mode 100644 index 2a7c2b9d..00000000 --- a/training/mpi_extract_reachable.cc +++ /dev/null @@ -1,163 +0,0 @@ -#include -#include -#include -#include - -#include "config.h" -#ifdef HAVE_MPI -#include -#endif -#include -#include - -#include "ff_register.h" -#include "verbose.h" -#include "filelib.h" -#include "fdict.h" -#include "decoder.h" -#include "weights.h" - -using namespace std; -namespace po = boost::program_options; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("training_data,t",po::value(),"Training data corpus") - ("decoder_config,c",po::value(),"Decoder configuration file") - ("weights,w", po::value(), "(Optional) weights file; weights may affect what features are encountered in pruning configurations") - ("output_prefix,o",po::value()->default_value("reachable"),"Output path prefix"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { - cerr << "Decode an input set (optionally in parallel using MPI) and write\nout the inputs that produce reachable parallel parses.\n"; - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int lc = 0; - while(in) { - getline(in, line); - if (!in) break; - if (lc % size == rank) c->push_back(line); - ++lc; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct ReachabilityObserver : public DecoderObserver { - - virtual void NotifyDecodingStart(const SentenceMetadata&) { - reachable = false; - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { - } - - // compute "empirical" expectations, numerator of objective - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - reachable = true; - } - - bool reachable; -}; - -#ifdef HAVE_MPI -namespace mpi = boost::mpi; -#endif - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - if (size > 1) SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) - return false; - - // load cdec.ini and set up decoder - ReadFile ini_rf(conf["decoder_config"].as()); - Decoder decoder(ini_rf.stream()); - if (decoder.GetConf()["input"].as() != "-") { - cerr << "cdec.ini must not set an input file\n"; - abort(); - } - - if (FD::UsingPerfectHashFunction()) { - cerr << "Your configuration file has enabled a cmph hash function. Please disable.\n"; - return 1; - } - - // load optional weights - if (conf.count("weights")) - Weights::InitFromFile(conf["weights"].as(), &decoder.CurrentWeightVector()); - - vector corpus; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus); - assert(corpus.size() > 0); - - - if (rank == 0) - cerr << "Each processor is decoding ~" << corpus.size() << " training examples...\n"; - - size_t num_reached = 0; - { - ostringstream os; - os << conf["output_prefix"].as() << '.' << rank << "_of_" << size; - WriteFile wf(os.str()); - ostream& out = *wf.stream(); - ReachabilityObserver observer; - for (int i = 0; i < corpus.size(); ++i) { - decoder.Decode(corpus[i], &observer); - if (observer.reachable) { - out << corpus[i] << endl; - ++num_reached; - } - corpus[i].clear(); - } - cerr << "Shard " << rank << '/' << size << " finished, wrote " - << num_reached << " instances to " << os.str() << endl; - } - - size_t total = 0; -#ifdef HAVE_MPI - reduce(world, num_reached, total, std::plus(), 0); -#else - total = num_reached; -#endif - if (rank == 0) { - cerr << "-----------------------------------------\n"; - cerr << "TOTAL = " << total << " instances\n"; - } - return 0; -} - diff --git a/training/mpi_flex_optimize.cc b/training/mpi_flex_optimize.cc deleted file mode 100644 index b52decdc..00000000 --- a/training/mpi_flex_optimize.cc +++ /dev/null @@ -1,386 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "stringlib.h" -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "optimize.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" -#include "sampler.h" - -#ifdef HAVE_MPI -#include -#include -namespace mpi = boost::mpi; -#endif - -using namespace std; -namespace po = boost::program_options; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("cdec_config,c",po::value(),"Decoder configuration file") - ("weights,w",po::value(),"Initial feature weights") - ("training_data,d",po::value(),"Training data") - ("minibatch_size_per_proc,s", po::value()->default_value(6), "Number of training instances evaluated per processor in each minibatch") - ("minibatch_iterations,i", po::value()->default_value(10), "Number of optimization iterations per minibatch") - ("iterations,I", po::value()->default_value(50), "Number of passes through the training data before termination") - ("regularization_strength,C", po::value()->default_value(0.2), "Regularization strength") - ("time_series_strength,T", po::value()->default_value(0.0), "Time series regularization strength") - ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") - ("lbfgs_memory_buffers,M", po::value()->default_value(10), "Number of memory buffers for LBFGS history"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("training_data") || !conf->count("cdec_config")) { - cerr << "LBFGS minibatch online optimizer (MPI support " -#if HAVE_MPI - << "enabled" -#else - << "not enabled" -#endif - << ")\n" << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* order) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int id = 0; - while(in) { - getline(in, line); - if (!in) break; - if (id % size == rank) { - c->push_back(line); - order->push_back(id); - } - ++id; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct CopyHGsObserver : public DecoderObserver { - Hypergraph* hg_; - Hypergraph* gold_hg_; - - // this can free up some memory - void RemoveRules(Hypergraph* h) { - for (unsigned i = 0; i < h->edges_.size(); ++i) - h->edges_[i].rule_.reset(); - } - - void SetCurrentHypergraphs(Hypergraph* h, Hypergraph* gold_h) { - hg_ = h; - gold_hg_ = gold_h; - } - - virtual void NotifyDecodingStart(const SentenceMetadata&) { - state = 1; - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata&, Hypergraph* hg) { - *hg_ = *hg; - RemoveRules(hg_); - assert(state == 1); - state = 2; - } - - // compute "empirical" expectations, numerator of objective - virtual void NotifyAlignmentForest(const SentenceMetadata&, Hypergraph* hg) { - assert(state == 2); - state = 3; - *gold_hg_ = *hg; - RemoveRules(gold_hg_); - } - - virtual void NotifyDecodingComplete(const SentenceMetadata&) { - if (state == 3) { - } else { - hg_->clear(); - gold_hg_->clear(); - } - } - - int state; -}; - -void ReadConfig(const string& ini, istringstream* out) { - ReadFile rf(ini); - istream& in = *rf.stream(); - ostringstream os; - while(in) { - string line; - getline(in, line); - if (!in) continue; - os << line << endl; - } - out->str(os.str()); -} - -#ifdef HAVE_MPI -namespace boost { namespace mpi { - template<> - struct is_commutative >, SparseVector > - : mpl::true_ { }; -} } // end namespace boost::mpi -#endif - -void AddGrad(const SparseVector x, double s, SparseVector* acc) { - for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) - acc->add_value(it->first, it->second.as_float() * s); -} - -double PNorm(const vector& v, const double p) { - double acc = 0; - for (int i = 0; i < v.size(); ++i) - acc += pow(v[i], p); - return pow(acc, 1.0 / p); -} - -void VV(ostream&os, const vector& v) { - for (int i = 1; i < v.size(); ++i) - if (v[i]) os << FD::Convert(i) << "=" << v[i] << " "; -} - -double ApplyRegularizationTerms(const double C, - const double T, - const vector& weights, - const vector& prev_weights, - double* g) { - double reg = 0; - for (size_t i = 0; i < weights.size(); ++i) { - const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); - const double& w_i = weights[i]; - reg += C * w_i * w_i; - g[i] += 2 * C * w_i; - - reg += T * (w_i - prev_w_i) * (w_i - prev_w_i); - g[i] += 2 * T * (w_i - prev_w_i); - } - return reg; -} - -int main(int argc, char** argv) { -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - if (size > 1) SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - MT19937* rng = NULL; - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) - return 1; - - boost::shared_ptr o; - const unsigned lbfgs_memory_buffers = conf["lbfgs_memory_buffers"].as(); - const unsigned size_per_proc = conf["minibatch_size_per_proc"].as(); - const unsigned minibatch_iterations = conf["minibatch_iterations"].as(); - const double regularization_strength = conf["regularization_strength"].as(); - const double time_series_strength = conf["time_series_strength"].as(); - const bool use_time_series_reg = time_series_strength > 0.0; - const unsigned max_iteration = conf["iterations"].as(); - - vector corpus; - vector ids; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); - assert(corpus.size() > 0); - - if (size_per_proc > corpus.size()) { - cerr << "Minibatch size (per processor) must be smaller or equal to the local corpus size!\n"; - return 1; - } - - // initialize decoder (loads hash functions if necessary) - istringstream ins; - ReadConfig(conf["cdec_config"].as(), &ins); - Decoder decoder(&ins); - - // load initial weights - vector prev_weights; - if (conf.count("weights")) - Weights::InitFromFile(conf["weights"].as(), &prev_weights); - - if (conf.count("random_seed")) - rng = new MT19937(conf["random_seed"].as()); - else - rng = new MT19937; - - size_t total_corpus_size = 0; -#ifdef HAVE_MPI - reduce(world, corpus.size(), total_corpus_size, std::plus(), 0); -#else - total_corpus_size = corpus.size(); -#endif - - if (rank == 0) - cerr << "Total corpus size: " << total_corpus_size << endl; - - CopyHGsObserver observer; - - int write_weights_every_ith = 100; // TODO configure - int titer = -1; - - vector& cur_weights = decoder.CurrentWeightVector(); - if (use_time_series_reg) { - cur_weights = prev_weights; - } else { - cur_weights.swap(prev_weights); - prev_weights.clear(); - } - - int iter = -1; - bool converged = false; - vector gg; - while (!converged) { -#ifdef HAVE_MPI - mpi::timer timer; -#endif - ++iter; ++titer; - if (rank == 0) { - converged = (iter == max_iteration); - string fname = "weights.cur.gz"; - if (iter % write_weights_every_ith == 0) { - ostringstream o; o << "weights.epoch_" << iter << ".gz"; - fname = o.str(); - } - if (converged) { fname = "weights.final.gz"; } - ostringstream vv; - vv << "total iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast(corpus.size())); - const string svv = vv.str(); - Weights::WriteToFile(fname, cur_weights, true, &svv); - } - - vector hgs(size_per_proc); - vector gold_hgs(size_per_proc); - for (int i = 0; i < size_per_proc; ++i) { - int ei = corpus.size() * rng->next(); - int id = ids[ei]; - observer.SetCurrentHypergraphs(&hgs[i], &gold_hgs[i]); - decoder.SetId(id); - decoder.Decode(corpus[ei], &observer); - } - - SparseVector local_grad, g; - double local_obj = 0; - o.reset(); - for (unsigned mi = 0; mi < minibatch_iterations; ++mi) { - local_grad.clear(); - g.clear(); - local_obj = 0; - - for (unsigned i = 0; i < size_per_proc; ++i) { - Hypergraph& hg = hgs[i]; - Hypergraph& hg_gold = gold_hgs[i]; - if (hg.edges_.size() < 2) continue; - - hg.Reweight(cur_weights); - hg_gold.Reweight(cur_weights); - SparseVector model_exp, gold_exp; - const prob_t z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(hg, &model_exp); - local_obj += log(z); - model_exp /= z; - AddGrad(model_exp, 1.0, &local_grad); - model_exp.clear(); - - const prob_t goldz = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(hg_gold, &gold_exp); - local_obj -= log(goldz); - - if (log(z) - log(goldz) < kMINUS_EPSILON) { - cerr << "DIFF. ERR! log_model_z < log_gold_z: " << log(z) << " " << log(goldz) << endl; - return 1; - } - - gold_exp /= goldz; - AddGrad(gold_exp, -1.0, &local_grad); - } - - double obj = 0; -#ifdef HAVE_MPI - reduce(world, local_obj, obj, std::plus(), 0); - reduce(world, local_grad, g, std::plus >(), 0); -#else - obj = local_obj; - g.swap(local_grad); -#endif - local_grad.clear(); - if (rank == 0) { - // g /= (size_per_proc * size); - if (!o) - o.reset(new LBFGSOptimizer(FD::NumFeats(), lbfgs_memory_buffers)); - gg.clear(); - gg.resize(FD::NumFeats()); - if (gg.size() != cur_weights.size()) { cur_weights.resize(gg.size()); } - for (SparseVector::iterator it = g.begin(); it != g.end(); ++it) - if (it->first) { gg[it->first] = it->second; } - g.clear(); - double r = ApplyRegularizationTerms(regularization_strength, - time_series_strength, // * (iter == 0 ? 0.0 : 1.0), - cur_weights, - prev_weights, - &gg[0]); - obj += r; - if (mi == 0 || mi == (minibatch_iterations - 1)) { - if (!mi) cerr << iter << ' '; else cerr << ' '; - cerr << "OBJ=" << obj << " (REG=" << r << ")" << " |g|=" << PNorm(gg, 2) << " |w|=" << PNorm(cur_weights, 2); - if (mi > 0) cerr << endl << flush; else cerr << ' '; - } else { cerr << '.' << flush; } - // cerr << "w = "; VV(cerr, cur_weights); cerr << endl; - // cerr << "g = "; VV(cerr, gg); cerr << endl; - o->Optimize(obj, gg, &cur_weights); - } -#ifdef HAVE_MPI - broadcast(world, cur_weights, 0); - broadcast(world, converged, 0); - world.barrier(); -#endif - } - prev_weights = cur_weights; - } - return 0; -} diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc deleted file mode 100644 index d6968848..00000000 --- a/training/mpi_online_optimize.cc +++ /dev/null @@ -1,374 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "stringlib.h" -#include "verbose.h" -#include "hg.h" -#include "prob.h" -#include "inside_outside.h" -#include "ff_register.h" -#include "decoder.h" -#include "filelib.h" -#include "online_optimizer.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" -#include "sampler.h" - -#ifdef HAVE_MPI -#include -#include -namespace mpi = boost::mpi; -#endif - -using namespace std; -namespace po = boost::program_options; - -bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,w",po::value(),"Input feature weights file") - ("frozen_features,z",po::value(), "List of features not to optimize") - ("training_data,t",po::value(),"Training data corpus") - ("training_agenda,a",po::value(), "Text file listing a series of configuration files and the number of iterations to train using each configuration successively") - ("minibatch_size_per_proc,s", po::value()->default_value(5), "Number of training instances evaluated per processor in each minibatch") - ("optimization_method,m", po::value()->default_value("sgd"), "Optimization method (sgd)") - ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") - ("eta_0,e", po::value()->default_value(0.2), "Initial learning rate for SGD (eta_0)") - ("L1,1","Use L1 regularization") - ("regularization_strength,C", po::value()->default_value(1.0), "Regularization strength (C)"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("training_data") || !conf->count("training_agenda")) { - cerr << dcmdline_options << endl; - return false; - } - return true; -} - -void ReadTrainingCorpus(const string& fname, int rank, int size, vector* c, vector* order) { - ReadFile rf(fname); - istream& in = *rf.stream(); - string line; - int id = 0; - while(in) { - getline(in, line); - if (!in) break; - if (id % size == rank) { - c->push_back(line); - order->push_back(id); - } - ++id; - } -} - -static const double kMINUS_EPSILON = -1e-6; - -struct TrainingObserver : public DecoderObserver { - void Reset() { - acc_grad.clear(); - acc_obj = 0; - total_complete = 0; - } - - void SetLocalGradientAndObjective(vector* g, double* o) const { - *o = acc_obj; - for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - (*g)[it->first] = it->second.as_float(); - } - - virtual void NotifyDecodingStart(const SentenceMetadata& smeta) { - cur_model_exp.clear(); - cur_obj = 0; - state = 1; - } - - // compute model expectations, denominator of objective - virtual void NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 1); - state = 2; - const prob_t z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &cur_model_exp); - cur_obj = log(z); - cur_model_exp /= z; - } - - // compute "empirical" expectations, numerator of objective - virtual void NotifyAlignmentForest(const SentenceMetadata& smeta, Hypergraph* hg) { - assert(state == 2); - state = 3; - SparseVector ref_exp; - const prob_t ref_z = InsideOutside, - EdgeFeaturesAndProbWeightFunction>(*hg, &ref_exp); - ref_exp /= ref_z; - - double log_ref_z; -#if 0 - if (crf_uniform_empirical) { - log_ref_z = ref_exp.dot(feature_weights); - } else { - log_ref_z = log(ref_z); - } -#else - log_ref_z = log(ref_z); -#endif - - // rounding errors means that <0 is too strict - if ((cur_obj - log_ref_z) < kMINUS_EPSILON) { - cerr << "DIFF. ERR! log_model_z < log_ref_z: " << cur_obj << " " << log_ref_z << endl; - exit(1); - } - assert(!std::isnan(log_ref_z)); - ref_exp -= cur_model_exp; - acc_grad += ref_exp; - acc_obj += (cur_obj - log_ref_z); - } - - virtual void NotifyDecodingComplete(const SentenceMetadata& smeta) { - if (state == 3) { - ++total_complete; - } else { - } - } - - void GetGradient(SparseVector* g) const { - g->clear(); - for (SparseVector::const_iterator it = acc_grad.begin(); it != acc_grad.end(); ++it) - g->set_value(it->first, it->second.as_float()); - } - - int total_complete; - SparseVector cur_model_exp; - SparseVector acc_grad; - double acc_obj; - double cur_obj; - int state; -}; - -#ifdef HAVE_MPI -namespace boost { namespace mpi { - template<> - struct is_commutative >, SparseVector > - : mpl::true_ { }; -} } // end namespace boost::mpi -#endif - -bool LoadAgenda(const string& file, vector >* a) { - ReadFile rf(file); - istream& in = *rf.stream(); - string line; - while(in) { - getline(in, line); - if (!in) break; - if (line.empty()) continue; - if (line[0] == '#') continue; - int sc = 0; - if (line.size() < 3) return false; - for (int i = 0; i < line.size(); ++i) { if (line[i] == ' ') ++sc; } - if (sc != 1) { cerr << "Too many spaces in line: " << line << endl; return false; } - size_t d = line.find(" "); - pair x; - x.first = line.substr(0,d); - x.second = atoi(line.substr(d+1).c_str()); - a->push_back(x); - if (!FileExists(x.first)) { - cerr << "Can't find file " << x.first << endl; - return false; - } - } - return true; -} - -int main(int argc, char** argv) { - cerr << "THIS SOFTWARE IS DEPRECATED YOU SHOULD USE mpi_flex_optimize\n"; -#ifdef HAVE_MPI - mpi::environment env(argc, argv); - mpi::communicator world; - const int size = world.size(); - const int rank = world.rank(); -#else - const int size = 1; - const int rank = 0; -#endif - if (size > 1) SetSilent(true); // turn off verbose decoder output - register_feature_functions(); - std::tr1::shared_ptr rng; - - po::variables_map conf; - if (!InitCommandLine(argc, argv, &conf)) - return 1; - - vector > agenda; - if (!LoadAgenda(conf["training_agenda"].as(), &agenda)) - return 1; - if (rank == 0) - cerr << "Loaded agenda defining " << agenda.size() << " training epochs\n"; - - assert(agenda.size() > 0); - - if (1) { // hack to load the feature hash functions -- TODO this should not be in cdec.ini - const string& cur_config = agenda[0].first; - const unsigned max_iteration = agenda[0].second; - ReadFile ini_rf(cur_config); - Decoder decoder(ini_rf.stream()); - } - - // load initial weights - vector init_weights; - if (conf.count("input_weights")) - Weights::InitFromFile(conf["input_weights"].as(), &init_weights); - - vector frozen_fids; - if (conf.count("frozen_features")) { - ReadFile rf(conf["frozen_features"].as()); - istream& in = *rf.stream(); - string line; - while(in) { - getline(in, line); - if (line.empty()) continue; - if (line[0] == ' ' || line[line.size() - 1] == ' ') { line = Trim(line); } - frozen_fids.push_back(FD::Convert(line)); - } - if (rank == 0) cerr << "Freezing " << frozen_fids.size() << " features.\n"; - } - - vector corpus; - vector ids; - ReadTrainingCorpus(conf["training_data"].as(), rank, size, &corpus, &ids); - assert(corpus.size() > 0); - - std::tr1::shared_ptr o; - std::tr1::shared_ptr lr; - - const unsigned size_per_proc = conf["minibatch_size_per_proc"].as(); - if (size_per_proc > corpus.size()) { - cerr << "Minibatch size must be smaller than corpus size!\n"; - return 1; - } - - size_t total_corpus_size = 0; -#ifdef HAVE_MPI - reduce(world, corpus.size(), total_corpus_size, std::plus(), 0); -#else - total_corpus_size = corpus.size(); -#endif - - if (rank == 0) { - cerr << "Total corpus size: " << total_corpus_size << endl; - const unsigned batch_size = size_per_proc * size; - // TODO config - lr.reset(new ExponentialDecayLearningRate(batch_size, conf["eta_0"].as())); - - const string omethod = conf["optimization_method"].as(); - if (omethod == "sgd") { - const double C = conf["regularization_strength"].as(); - o.reset(new CumulativeL1OnlineOptimizer(lr, total_corpus_size, C, frozen_fids)); - } else { - assert(!"fail"); - } - } - if (conf.count("random_seed")) - rng.reset(new MT19937(conf["random_seed"].as())); - else - rng.reset(new MT19937); - - SparseVector x; - Weights::InitSparseVector(init_weights, &x); - TrainingObserver observer; - - int write_weights_every_ith = 100; // TODO configure - int titer = -1; - - for (int ai = 0; ai < agenda.size(); ++ai) { - const string& cur_config = agenda[ai].first; - const unsigned max_iteration = agenda[ai].second; - if (rank == 0) - cerr << "STARTING TRAINING EPOCH " << (ai+1) << ". CONFIG=" << cur_config << endl; - // load cdec.ini and set up decoder - ReadFile ini_rf(cur_config); - Decoder decoder(ini_rf.stream()); - vector& lambdas = decoder.CurrentWeightVector(); - if (ai == 0) { lambdas.swap(init_weights); init_weights.clear(); } - - if (rank == 0) - o->ResetEpoch(); // resets the learning rate-- TODO is this good? - - int iter = -1; - bool converged = false; - while (!converged) { -#ifdef HAVE_MPI - mpi::timer timer; -#endif - x.init_vector(&lambdas); - ++iter; ++titer; - observer.Reset(); - if (rank == 0) { - converged = (iter == max_iteration); - Weights::SanityCheck(lambdas); - static int cc = 0; ++cc; if (cc > 1) { Weights::ShowLargestFeatures(lambdas); } - string fname = "weights.cur.gz"; - if (iter % write_weights_every_ith == 0) { - ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz"; - fname = o.str(); - } - if (converged && ((ai+1)==agenda.size())) { fname = "weights.final.gz"; } - ostringstream vv; - vv << "total iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << '/' << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast(corpus.size())) << " eta=" << lr->eta(titer); - const string svv = vv.str(); - cerr << svv << endl; - Weights::WriteToFile(fname, lambdas, true, &svv); - } - - for (int i = 0; i < size_per_proc; ++i) { - int ei = corpus.size() * rng->next(); - int id = ids[ei]; - decoder.SetId(id); - decoder.Decode(corpus[ei], &observer); - } - SparseVector local_grad, g; - observer.GetGradient(&local_grad); -#ifdef HAVE_MPI - reduce(world, local_grad, g, std::plus >(), 0); -#else - g.swap(local_grad); -#endif - local_grad.clear(); - if (rank == 0) { - g /= (size_per_proc * size); - o->UpdateWeights(g, FD::NumFeats(), &x); - } -#ifdef HAVE_MPI - broadcast(world, x, 0); - broadcast(world, converged, 0); - world.barrier(); - if (rank == 0) { cerr << " ELAPSED TIME THIS ITERATION=" << timer.elapsed() << endl; } -#endif - } - } - return 0; -} diff --git a/training/mr_em_adapted_reduce.cc b/training/mr_em_adapted_reduce.cc deleted file mode 100644 index f65b5440..00000000 --- a/training/mr_em_adapted_reduce.cc +++ /dev/null @@ -1,173 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "filelib.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" -#include "m.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("optimization_method,m", po::value()->default_value("em"), "Optimization method (em, vb)") - ("input_format,f",po::value()->default_value("b64"),"Encoding of the input (b64 or text)"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -double NoZero(const double& x) { - if (x) return x; - return 1e-35; -} - -void Maximize(const bool use_vb, - const double& alpha, - const int total_event_types, - SparseVector* pc) { - const SparseVector& counts = *pc; - - if (use_vb) - assert(total_event_types >= counts.size()); - - double tot = 0; - for (SparseVector::const_iterator it = counts.begin(); - it != counts.end(); ++it) - tot += it->second; -// cerr << " = " << tot << endl; - assert(tot > 0.0); - double ltot = log(tot); - if (use_vb) - ltot = Md::digamma(tot + total_event_types * alpha); - for (SparseVector::const_iterator it = counts.begin(); - it != counts.end(); ++it) { - if (use_vb) { - pc->set_value(it->first, NoZero(Md::digamma(it->second + alpha) - ltot)); - } else { - pc->set_value(it->first, NoZero(log(it->second) - ltot)); - } - } -#if 0 - if (counts.size() < 50) { - for (SparseVector::const_iterator it = counts.begin(); - it != counts.end(); ++it) { - cerr << " p(" << FD::Convert(it->first) << ")=" << exp(it->second); - } - cerr << endl; - } -#endif -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - const bool use_b64 = conf["input_format"].as() == "b64"; - const bool use_vb = conf["optimization_method"].as() == "vb"; - const double alpha = 1e-09; - if (use_vb) - cerr << "Using variational Bayes, make sure alphas are set\n"; - - const string s_obj = "**OBJ**"; - // E-step - string cur_key = ""; - SparseVector acc; - double logprob = 0; - while(cin) { - string line; - getline(cin, line); - if (line.empty()) continue; - int feat; - double val; - size_t i = line.find("\t"); - const string key = line.substr(0, i); - assert(i != string::npos); - ++i; - if (key != cur_key) { - if (cur_key.size() > 0) { - // TODO shouldn't be num_active, should be total number - // of events - Maximize(use_vb, alpha, acc.size(), &acc); - cout << cur_key << '\t'; - if (use_b64) - B64::Encode(0.0, acc, &cout); - else - cout << acc; - cout << endl; - acc.clear(); - } - cur_key = key; - } - if (use_b64) { - SparseVector g; - double obj; - if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) { - cerr << "B64 decoder returned error, skipping!\n"; - continue; - } - logprob += obj; - acc += g; - } else { // text encoding - your counts will not be accurate! - while (i < line.size()) { - size_t start = i; - while (line[i] != '=' && i < line.size()) ++i; - if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; } - string fname = line.substr(start, i - start); - if (fname == s_obj) { - feat = -1; - } else { - feat = FD::Convert(line.substr(start, i - start)); - } - ++i; - start = i; - while (line[i] != ';' && i < line.size()) ++i; - if (i - start == 0) continue; - val = atof(line.substr(start, i - start).c_str()); - ++i; - if (feat == -1) { - logprob += val; - } else { - acc.add_value(feat, val); - } - } - } - } - // TODO shouldn't be num_active, should be total number - // of events - Maximize(use_vb, alpha, acc.size(), &acc); - cout << cur_key << '\t'; - if (use_b64) - B64::Encode(0.0, acc, &cout); - else - cout << acc; - cout << endl << flush; - - cerr << "LOGPROB: " << logprob << endl; - - return 0; -} diff --git a/training/mr_em_map_adapter.cc b/training/mr_em_map_adapter.cc deleted file mode 100644 index ead4598d..00000000 --- a/training/mr_em_map_adapter.cc +++ /dev/null @@ -1,160 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include "boost/tuple/tuple.hpp" - -#include "fdict.h" -#include "sparse_vector.h" - -using namespace std; -namespace po = boost::program_options; - -// useful for EM models parameterized by a bunch of multinomials -// this converts event counts (returned from cdec as feature expectations) -// into different keys and values (which are lists of all the events, -// conditioned on the key) for summing and normalization by a reducer - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("buffer_size,b", po::value()->default_value(1), "Buffer size (in # of counts) before emitting counts") - ("format,f",po::value()->default_value("b64"), "Encoding of the input (b64 or text)"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -struct EventMapper { - int Map(int fid) { - int& cv = map_[fid]; - if (!cv) { - cv = GetConditioningVariable(fid); - } - return cv; - } - void Clear() { map_.clear(); } - protected: - virtual int GetConditioningVariable(int fid) const = 0; - private: - map map_; -}; - -struct LexAlignEventMapper : public EventMapper { - protected: - virtual int GetConditioningVariable(int fid) const { - const string& str = FD::Convert(fid); - size_t pos = str.rfind("_"); - if (pos == string::npos || pos == 0 || pos >= str.size() - 1) { - cerr << "Bad feature for EM adapter: " << str << endl; - abort(); - } - return FD::Convert(str.substr(0, pos)); - } -}; - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - const bool use_b64 = conf["format"].as() == "b64"; - const int buffer_size = conf["buffer_size"].as(); - - const string s_obj = "**OBJ**"; - // 0**OBJ**=12.2;Feat1=2.3;Feat2=-0.2; - // 0**OBJ**=1.1;Feat1=1.0; - - EventMapper* event_mapper = new LexAlignEventMapper; - map > counts; - size_t total = 0; - while(cin) { - string line; - getline(cin, line); - if (line.empty()) continue; - int feat; - double val; - size_t i = line.find("\t"); - assert(i != string::npos); - ++i; - SparseVector g; - double obj = 0; - if (use_b64) { - if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) { - cerr << "B64 decoder returned error, skipping!\n"; - continue; - } - } else { // text encoding - your counts will not be accurate! - while (i < line.size()) { - size_t start = i; - while (line[i] != '=' && i < line.size()) ++i; - if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; } - string fname = line.substr(start, i - start); - if (fname == s_obj) { - feat = -1; - } else { - feat = FD::Convert(line.substr(start, i - start)); - } - ++i; - start = i; - while (line[i] != ';' && i < line.size()) ++i; - if (i - start == 0) continue; - val = atof(line.substr(start, i - start).c_str()); - ++i; - if (feat == -1) { - obj = val; - } else { - g.set_value(feat, val); - } - } - } - //cerr << "OBJ: " << obj << endl; - const SparseVector& cg = g; - for (SparseVector::const_iterator it = cg.begin(); it != cg.end(); ++it) { - const int cond_var = event_mapper->Map(it->first); - SparseVector& cond_counts = counts[cond_var]; - int delta = cond_counts.size(); - cond_counts.add_value(it->first, it->second); - delta = cond_counts.size() - delta; - total += delta; - } - if (total > buffer_size) { - for (map >::iterator it = counts.begin(); - it != counts.end(); ++it) { - const SparseVector& cc = it->second; - cout << FD::Convert(it->first) << '\t'; - if (use_b64) { - B64::Encode(0.0, cc, &cout); - } else { - abort(); - } - cout << endl; - } - cout << flush; - total = 0; - counts.clear(); - } - } - - return 0; -} - diff --git a/training/mr_optimize_reduce.cc b/training/mr_optimize_reduce.cc deleted file mode 100644 index d490192f..00000000 --- a/training/mr_optimize_reduce.cc +++ /dev/null @@ -1,231 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "optimize.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" - -using namespace std; -namespace po = boost::program_options; - -void SanityCheck(const vector& w) { - for (int i = 0; i < w.size(); ++i) { - assert(!std::isnan(w[i])); - assert(!std::isinf(w[i])); - } -} - -struct FComp { - const vector& w_; - FComp(const vector& w) : w_(w) {} - bool operator()(int a, int b) const { - return fabs(w_[a]) > fabs(w_[b]); - } -}; - -void ShowLargestFeatures(const vector& w) { - vector fnums(w.size()); - for (int i = 0; i < w.size(); ++i) - fnums[i] = i; - vector::iterator mid = fnums.begin(); - mid += (w.size() > 10 ? 10 : w.size()); - partial_sort(fnums.begin(), mid, fnums.end(), FComp(w)); - cerr << "TOP FEATURES:"; - for (vector::iterator i = fnums.begin(); i != mid; ++i) { - cerr << ' ' << FD::Convert(*i) << '=' << w[*i]; - } - cerr << endl; -} - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_weights,i",po::value(),"Input feature weights file") - ("output_weights,o",po::value()->default_value("-"),"Output feature weights file") - ("optimization_method,m", po::value()->default_value("lbfgs"), "Optimization method (sgd, lbfgs, rprop)") - ("state,s",po::value(),"Read (and write if output_state is not set) optimizer state from this state file. In the first iteration, the file should not exist.") - ("input_format,f",po::value()->default_value("b64"),"Encoding of the input (b64 or text)") - ("output_state,S", po::value(), "Output state file (optional override)") - ("correction_buffers,M", po::value()->default_value(10), "Number of gradients for LBFGS to maintain in memory") - ("eta,e", po::value()->default_value(0.1), "Learning rate for SGD (eta)") - ("gaussian_prior,p","Use a Gaussian prior on the weights") - ("means,u", po::value(), "File containing the means for Gaussian prior") - ("sigma_squared", po::value()->default_value(1.0), "Sigma squared term for spherical Gaussian prior"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help") || !conf->count("input_weights") || !conf->count("state")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - const bool use_b64 = conf["input_format"].as() == "b64"; - - vector lambdas; - Weights::InitFromFile(conf["input_weights"].as(), &lambdas); - const string s_obj = "**OBJ**"; - int num_feats = FD::NumFeats(); - cerr << "Number of features: " << num_feats << endl; - const bool gaussian_prior = conf.count("gaussian_prior"); - vector means(num_feats, 0); - if (conf.count("means")) { - if (!gaussian_prior) { - cerr << "Don't use --means without --gaussian_prior!\n"; - exit(1); - } - Weights::InitFromFile(conf["means"].as(), &means); - } - boost::shared_ptr o; - const string omethod = conf["optimization_method"].as(); - if (omethod == "rprop") - o.reset(new RPropOptimizer(num_feats)); // TODO add configuration - else - o.reset(new LBFGSOptimizer(num_feats, conf["correction_buffers"].as())); - cerr << "Optimizer: " << o->Name() << endl; - string state_file = conf["state"].as(); - { - ifstream in(state_file.c_str(), ios::binary); - if (in) - o->Load(&in); - else - cerr << "No state file found, assuming ITERATION 1\n"; - } - - double objective = 0; - vector gradient(num_feats, 0); - // 0**OBJ**=12.2;Feat1=2.3;Feat2=-0.2; - // 0**OBJ**=1.1;Feat1=1.0; - int total_lines = 0; // TODO - this should be a count of the - // training instances!! - while(cin) { - string line; - getline(cin, line); - if (line.empty()) continue; - ++total_lines; - int feat; - double val; - size_t i = line.find("\t"); - assert(i != string::npos); - ++i; - if (use_b64) { - SparseVector g; - double obj; - if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) { - cerr << "B64 decoder returned error, skipping gradient!\n"; - cerr << " START: " << line.substr(0,line.size() > 200 ? 200 : line.size()) << endl; - if (line.size() > 200) - cerr << " END: " << line.substr(line.size() - 200, 200) << endl; - cout << "-1\tRESTART\n"; - exit(99); - } - objective += obj; - const SparseVector& cg = g; - for (SparseVector::const_iterator it = cg.begin(); it != cg.end(); ++it) { - if (it->first >= num_feats) { - cerr << "Unexpected feature in gradient: " << FD::Convert(it->first) << endl; - abort(); - } - gradient[it->first] -= it->second; - } - } else { // text encoding - your gradients will not be accurate! - while (i < line.size()) { - size_t start = i; - while (line[i] != '=' && i < line.size()) ++i; - if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; } - string fname = line.substr(start, i - start); - if (fname == s_obj) { - feat = -1; - } else { - feat = FD::Convert(line.substr(start, i - start)); - if (feat >= num_feats) { - cerr << "Unexpected feature in gradient: " << line.substr(start, i - start) << endl; - abort(); - } - } - ++i; - start = i; - while (line[i] != ';' && i < line.size()) ++i; - if (i - start == 0) continue; - val = atof(line.substr(start, i - start).c_str()); - ++i; - if (feat == -1) { - objective += val; - } else { - gradient[feat] -= val; - } - } - } - } - - if (gaussian_prior) { - const double sigsq = conf["sigma_squared"].as(); - double norm = 0; - for (int k = 1; k < lambdas.size(); ++k) { - const double& lambda_k = lambdas[k]; - if (lambda_k) { - const double param = (lambda_k - means[k]); - norm += param * param; - gradient[k] += param / sigsq; - } - } - const double reg = norm / (2.0 * sigsq); - cerr << "REGULARIZATION TERM: " << reg << endl; - objective += reg; - } - cerr << "EVALUATION #" << o->EvaluationCount() << " OBJECTIVE: " << objective << endl; - double gnorm = 0; - for (int i = 0; i < gradient.size(); ++i) - gnorm += gradient[i] * gradient[i]; - cerr << " GNORM=" << sqrt(gnorm) << endl; - vector old = lambdas; - int c = 0; - while (old == lambdas) { - ++c; - if (c > 1) { cerr << "Same lambdas, repeating optimization\n"; } - o->Optimize(objective, gradient, &lambdas); - assert(c < 5); - } - old.clear(); - SanityCheck(lambdas); - ShowLargestFeatures(lambdas); - Weights::WriteToFile(conf["output_weights"].as(), lambdas, false); - - const bool conv = o->HasConverged(); - if (conv) { cerr << "OPTIMIZER REPORTS CONVERGENCE!\n"; } - - if (conf.count("output_state")) - state_file = conf["output_state"].as(); - ofstream out(state_file.c_str(), ios::binary); - cerr << "Writing state to: " << state_file << endl; - o->Save(&out); - out.close(); - - cout << o->EvaluationCount() << "\t" << conv << endl; - return 0; -} diff --git a/training/mr_reduce_to_weights.cc b/training/mr_reduce_to_weights.cc deleted file mode 100644 index 16b47720..00000000 --- a/training/mr_reduce_to_weights.cc +++ /dev/null @@ -1,109 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "filelib.h" -#include "fdict.h" -#include "weights.h" -#include "sparse_vector.h" - -using namespace std; -namespace po = boost::program_options; - -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { - po::options_description opts("Configuration options"); - opts.add_options() - ("input_format,f",po::value()->default_value("b64"),"Encoding of the input (b64 or text)") - ("input,i",po::value()->default_value("-"),"Read file from") - ("output,o",po::value()->default_value("-"),"Write weights to"); - po::options_description clo("Command line options"); - clo.add_options() - ("config", po::value(), "Configuration file") - ("help,h", "Print this help message and exit"); - po::options_description dconfig_options, dcmdline_options; - dconfig_options.add(opts); - dcmdline_options.add(opts).add(clo); - - po::store(parse_command_line(argc, argv, dcmdline_options), *conf); - if (conf->count("config")) { - ifstream config((*conf)["config"].as().c_str()); - po::store(po::parse_config_file(config, dconfig_options), *conf); - } - po::notify(*conf); - - if (conf->count("help")) { - cerr << dcmdline_options << endl; - exit(1); - } -} - -void WriteWeights(const SparseVector& weights, ostream* out) { - for (SparseVector::const_iterator it = weights.begin(); - it != weights.end(); ++it) { - (*out) << FD::Convert(it->first) << " " << it->second << endl; - } -} - -int main(int argc, char** argv) { - po::variables_map conf; - InitCommandLine(argc, argv, &conf); - - const bool use_b64 = conf["input_format"].as() == "b64"; - - const string s_obj = "**OBJ**"; - // E-step - ReadFile rf(conf["input"].as()); - istream* in = rf.stream(); - assert(*in); - WriteFile wf(conf["output"].as()); - ostream* out = wf.stream(); - out->precision(17); - while(*in) { - string line; - getline(*in, line); - if (line.empty()) continue; - int feat; - double val; - size_t i = line.find("\t"); - assert(i != string::npos); - ++i; - if (use_b64) { - SparseVector g; - double obj; - if (!B64::Decode(&obj, &g, &line[i], line.size() - i)) { - cerr << "B64 decoder returned error, skipping!\n"; - continue; - } - WriteWeights(g, out); - } else { // text encoding - your counts will not be accurate! - SparseVector weights; - while (i < line.size()) { - size_t start = i; - while (line[i] != '=' && i < line.size()) ++i; - if (i == line.size()) { cerr << "FORMAT ERROR\n"; break; } - string fname = line.substr(start, i - start); - if (fname == s_obj) { - feat = -1; - } else { - feat = FD::Convert(line.substr(start, i - start)); - } - ++i; - start = i; - while (line[i] != ';' && i < line.size()) ++i; - if (i - start == 0) continue; - val = atof(line.substr(start, i - start).c_str()); - ++i; - if (feat != -1) { - weights.set_value(feat, val); - } - } - WriteWeights(weights, out); - } - } - - return 0; -} diff --git a/training/online_optimizer.cc b/training/online_optimizer.cc deleted file mode 100644 index 3ed95452..00000000 --- a/training/online_optimizer.cc +++ /dev/null @@ -1,16 +0,0 @@ -#include "online_optimizer.h" - -LearningRateSchedule::~LearningRateSchedule() {} - -double StandardLearningRate::eta(int k) const { - return eta_0_ / (1.0 + k / N_); -} - -double ExponentialDecayLearningRate::eta(int k) const { - return eta_0_ * pow(alpha_, k / N_); -} - -OnlineOptimizer::~OnlineOptimizer() {} - -void OnlineOptimizer::ResetEpochImpl() {} - diff --git a/training/online_optimizer.h b/training/online_optimizer.h deleted file mode 100644 index 28d89344..00000000 --- a/training/online_optimizer.h +++ /dev/null @@ -1,129 +0,0 @@ -#ifndef _ONL_OPTIMIZE_H_ -#define _ONL_OPTIMIZE_H_ - -#include -#include -#include -#include -#include "sparse_vector.h" - -struct LearningRateSchedule { - virtual ~LearningRateSchedule(); - // returns the learning rate for the kth iteration - virtual double eta(int k) const = 0; -}; - -// TODO in the Tsoruoaka et al. (ACL 2009) paper, they use N -// to mean the batch size in most places, but it doesn't completely -// make sense to me in the learning rate schedules-- this needs -// to be worked out to make sure they didn't mean corpus size -// in some places and batch size in others (since in the paper they -// only ever work with batch sizes of 1) -struct StandardLearningRate : public LearningRateSchedule { - StandardLearningRate( - size_t batch_size, // batch size, not corpus size! - double eta_0 = 0.2) : - eta_0_(eta_0), - N_(static_cast(batch_size)) {} - - virtual double eta(int k) const; - - private: - const double eta_0_; - const double N_; -}; - -struct ExponentialDecayLearningRate : public LearningRateSchedule { - ExponentialDecayLearningRate( - size_t batch_size, // batch size, not corpus size! - double eta_0 = 0.2, - double alpha = 0.85 // recommended by Tsuruoka et al. (ACL 2009) - ) : eta_0_(eta_0), - N_(static_cast(batch_size)), - alpha_(alpha) { - assert(alpha > 0); - assert(alpha < 1.0); - } - - virtual double eta(int k) const; - - private: - const double eta_0_; - const double N_; - const double alpha_; -}; - -class OnlineOptimizer { - public: - virtual ~OnlineOptimizer(); - OnlineOptimizer(const std::tr1::shared_ptr& s, - size_t batch_size, - const std::vector& frozen_feats = std::vector()) - : N_(batch_size),schedule_(s),k_() { - for (int i = 0; i < frozen_feats.size(); ++i) - frozen_.insert(frozen_feats[i]); - } - void ResetEpoch() { k_ = 0; ResetEpochImpl(); } - void UpdateWeights(const SparseVector& approx_g, int max_feat, SparseVector* weights) { - ++k_; - const double eta = schedule_->eta(k_); - UpdateWeightsImpl(eta, approx_g, max_feat, weights); - } - - protected: - virtual void ResetEpochImpl(); - virtual void UpdateWeightsImpl(const double& eta, const SparseVector& approx_g, int max_feat, SparseVector* weights) = 0; - const size_t N_; // number of training instances per batch - std::set frozen_; // frozen (non-optimizing) features - - private: - std::tr1::shared_ptr schedule_; - int k_; // iteration count -}; - -class CumulativeL1OnlineOptimizer : public OnlineOptimizer { - public: - CumulativeL1OnlineOptimizer(const std::tr1::shared_ptr& s, - size_t training_instances, double C, - const std::vector& frozen) : - OnlineOptimizer(s, training_instances, frozen), C_(C), u_() {} - - protected: - void ResetEpochImpl() { u_ = 0; } - void UpdateWeightsImpl(const double& eta, const SparseVector& approx_g, int max_feat, SparseVector* weights) { - u_ += eta * C_ / N_; - for (SparseVector::const_iterator it = approx_g.begin(); - it != approx_g.end(); ++it) { - if (frozen_.count(it->first) == 0) - weights->add_value(it->first, eta * it->second); - } - for (int i = 1; i < max_feat; ++i) - if (frozen_.count(i) == 0) ApplyPenalty(i, weights); - } - - private: - void ApplyPenalty(int i, SparseVector* w) { - const double z = w->value(i); - double w_i = z; - double q_i = q_.value(i); - if (w_i > 0.0) - w_i = std::max(0.0, w_i - (u_ + q_i)); - else if (w_i < 0.0) - w_i = std::min(0.0, w_i + (u_ - q_i)); - q_i += w_i - z; - if (q_i == 0.0) - q_.erase(i); - else - q_.set_value(i, q_i); - if (w_i == 0.0) - w->erase(i); - else - w->set_value(i, w_i); - } - - const double C_; // reguarlization strength - double u_; - SparseVector q_; -}; - -#endif diff --git a/training/optimize.cc b/training/optimize.cc deleted file mode 100644 index 41ac90d8..00000000 --- a/training/optimize.cc +++ /dev/null @@ -1,102 +0,0 @@ -#include "optimize.h" - -#include -#include - -#include "lbfgs.h" - -using namespace std; - -BatchOptimizer::~BatchOptimizer() {} - -void BatchOptimizer::Save(ostream* out) const { - out->write((const char*)&eval_, sizeof(eval_)); - out->write((const char*)&has_converged_, sizeof(has_converged_)); - SaveImpl(out); - unsigned int magic = 0xABCDDCBA; // should be uint32_t - out->write((const char*)&magic, sizeof(magic)); -} - -void BatchOptimizer::Load(istream* in) { - in->read((char*)&eval_, sizeof(eval_)); - in->read((char*)&has_converged_, sizeof(has_converged_)); - LoadImpl(in); - unsigned int magic = 0; // should be uint32_t - in->read((char*)&magic, sizeof(magic)); - assert(magic == 0xABCDDCBA); - cerr << Name() << " EVALUATION #" << eval_ << endl; -} - -void BatchOptimizer::SaveImpl(ostream* out) const { - (void)out; -} - -void BatchOptimizer::LoadImpl(istream* in) { - (void)in; -} - -string RPropOptimizer::Name() const { - return "RPropOptimizer"; -} - -void RPropOptimizer::OptimizeImpl(const double& obj, - const vector& g, - vector* x) { - for (int i = 0; i < g.size(); ++i) { - const double g_i = g[i]; - const double sign_i = (signbit(g_i) ? -1.0 : 1.0); - const double prod = g_i * prev_g_[i]; - if (prod > 0.0) { - const double dij = min(delta_ij_[i] * eta_plus_, delta_max_); - (*x)[i] -= dij * sign_i; - delta_ij_[i] = dij; - prev_g_[i] = g_i; - } else if (prod < 0.0) { - delta_ij_[i] = max(delta_ij_[i] * eta_minus_, delta_min_); - prev_g_[i] = 0.0; - } else { - (*x)[i] -= delta_ij_[i] * sign_i; - prev_g_[i] = g_i; - } - } -} - -void RPropOptimizer::SaveImpl(ostream* out) const { - const size_t n = prev_g_.size(); - out->write((const char*)&n, sizeof(n)); - out->write((const char*)&prev_g_[0], sizeof(double) * n); - out->write((const char*)&delta_ij_[0], sizeof(double) * n); -} - -void RPropOptimizer::LoadImpl(istream* in) { - size_t n; - in->read((char*)&n, sizeof(n)); - assert(n == prev_g_.size()); - assert(n == delta_ij_.size()); - in->read((char*)&prev_g_[0], sizeof(double) * n); - in->read((char*)&delta_ij_[0], sizeof(double) * n); -} - -string LBFGSOptimizer::Name() const { - return "LBFGSOptimizer"; -} - -LBFGSOptimizer::LBFGSOptimizer(int num_feats, int memory_buffers) : - opt_(num_feats, memory_buffers) {} - -void LBFGSOptimizer::SaveImpl(ostream* out) const { - opt_.serialize(out); -} - -void LBFGSOptimizer::LoadImpl(istream* in) { - opt_.deserialize(in); -} - -void LBFGSOptimizer::OptimizeImpl(const double& obj, - const vector& g, - vector* x) { - opt_.run(&(*x)[0], obj, &g[0]); - if (!opt_.requests_f_and_g()) opt_.run(&(*x)[0], obj, &g[0]); - // cerr << opt_ << endl; -} - diff --git a/training/optimize.h b/training/optimize.h deleted file mode 100644 index 07943b44..00000000 --- a/training/optimize.h +++ /dev/null @@ -1,92 +0,0 @@ -#ifndef _OPTIMIZE_H_ -#define _OPTIMIZE_H_ - -#include -#include -#include -#include - -#include "lbfgs.h" - -// abstract base class for first order optimizers -// order of invocation: new, Load(), Optimize(), Save(), delete -class BatchOptimizer { - public: - BatchOptimizer() : eval_(1), has_converged_(false) {} - virtual ~BatchOptimizer(); - virtual std::string Name() const = 0; - int EvaluationCount() const { return eval_; } - bool HasConverged() const { return has_converged_; } - - void Optimize(const double& obj, - const std::vector& g, - std::vector* x) { - assert(g.size() == x->size()); - ++eval_; - OptimizeImpl(obj, g, x); - scitbx::lbfgs::traditional_convergence_test converged(g.size()); - has_converged_ = converged(&(*x)[0], &g[0]); - } - - void Save(std::ostream* out) const; - void Load(std::istream* in); - protected: - virtual void SaveImpl(std::ostream* out) const; - virtual void LoadImpl(std::istream* in); - virtual void OptimizeImpl(const double& obj, - const std::vector& g, - std::vector* x) = 0; - - int eval_; - private: - bool has_converged_; -}; - -class RPropOptimizer : public BatchOptimizer { - public: - explicit RPropOptimizer(int num_vars, - double eta_plus = 1.2, - double eta_minus = 0.5, - double delta_0 = 0.1, - double delta_max = 50.0, - double delta_min = 1e-6) : - prev_g_(num_vars, 0.0), - delta_ij_(num_vars, delta_0), - eta_plus_(eta_plus), - eta_minus_(eta_minus), - delta_max_(delta_max), - delta_min_(delta_min) { - assert(eta_plus > 1.0); - assert(eta_minus > 0.0 && eta_minus < 1.0); - assert(delta_max > 0.0); - assert(delta_min > 0.0); - } - std::string Name() const; - void OptimizeImpl(const double& obj, - const std::vector& g, - std::vector* x); - void SaveImpl(std::ostream* out) const; - void LoadImpl(std::istream* in); - private: - std::vector prev_g_; - std::vector delta_ij_; - const double eta_plus_; - const double eta_minus_; - const double delta_max_; - const double delta_min_; -}; - -class LBFGSOptimizer : public BatchOptimizer { - public: - explicit LBFGSOptimizer(int num_vars, int memory_buffers = 10); - std::string Name() const; - void SaveImpl(std::ostream* out) const; - void LoadImpl(std::istream* in); - void OptimizeImpl(const double& obj, - const std::vector& g, - std::vector* x); - private: - scitbx::lbfgs::minimizer opt_; -}; - -#endif diff --git a/training/optimize_test.cc b/training/optimize_test.cc deleted file mode 100644 index bff2ca03..00000000 --- a/training/optimize_test.cc +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -#include "optimize.h" -#include "online_optimizer.h" -#include "sparse_vector.h" -#include "fdict.h" - -using namespace std; - -double TestOptimizer(BatchOptimizer* opt) { - cerr << "TESTING NON-PERSISTENT OPTIMIZER\n"; - - // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 - // df/dx1 = 8*x1 + x2 - // df/dx2 = 2*x2 + x1 - // df/dx3 = 2*x3 + 6 - vector x(3); - vector g(3); - x[0] = 8; - x[1] = 8; - x[2] = 8; - double obj = 0; - do { - g[0] = 8 * x[0] + x[1]; - g[1] = 2 * x[1] + x[0]; - g[2] = 2 * x[2] + 6; - obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; - opt->Optimize(obj, g, &x); - - cerr << x[0] << " " << x[1] << " " << x[2] << endl; - cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; - } while (!opt->HasConverged()); - return obj; -} - -double TestPersistentOptimizer(BatchOptimizer* opt) { - cerr << "\nTESTING PERSISTENT OPTIMIZER\n"; - // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 - // df/dx1 = 8*x1 + x2 - // df/dx2 = 2*x2 + x1 - // df/dx3 = 2*x3 + 6 - vector x(3); - vector g(3); - x[0] = 8; - x[1] = 8; - x[2] = 8; - double obj = 0; - string state; - bool converged = false; - while (!converged) { - g[0] = 8 * x[0] + x[1]; - g[1] = 2 * x[1] + x[0]; - g[2] = 2 * x[2] + 6; - obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; - - { - if (state.size() > 0) { - istringstream is(state, ios::binary); - opt->Load(&is); - } - opt->Optimize(obj, g, &x); - ostringstream os(ios::binary); opt->Save(&os); state = os.str(); - - } - - cerr << x[0] << " " << x[1] << " " << x[2] << endl; - cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; - converged = opt->HasConverged(); - if (!converged) { - // now screw up the state (should be undone by Load) - obj += 2.0; - g[1] = -g[2]; - vector x2 = x; - try { - opt->Optimize(obj, g, &x2); - } catch (...) { } - } - } - return obj; -} - -template -void TestOptimizerVariants(int num_vars) { - O oa(num_vars); - cerr << "-------------------------------------------------------------------------\n"; - cerr << "TESTING: " << oa.Name() << endl; - double o1 = TestOptimizer(&oa); - O ob(num_vars); - double o2 = TestPersistentOptimizer(&ob); - if (o1 != o2) { - cerr << oa.Name() << " VARIANTS PERFORMED DIFFERENTLY!\n" << o1 << " vs. " << o2 << endl; - exit(1); - } - cerr << oa.Name() << " SUCCESS\n"; -} - -using namespace std::tr1; - -void TestOnline() { - size_t N = 20; - double C = 1.0; - double eta0 = 0.2; - std::tr1::shared_ptr r(new ExponentialDecayLearningRate(N, eta0, 0.85)); - //shared_ptr r(new StandardLearningRate(N, eta0)); - CumulativeL1OnlineOptimizer opt(r, N, C, std::vector()); - assert(r->eta(10) < r->eta(1)); -} - -int main() { - int n = 3; - TestOptimizerVariants(n); - TestOptimizerVariants(n); - TestOnline(); - return 0; -} - diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am new file mode 100644 index 00000000..1916b6b2 --- /dev/null +++ b/training/pro/Makefile.am @@ -0,0 +1,11 @@ +bin_PROGRAMS = \ + mr_pro_map \ + mr_pro_reduce + +mr_pro_map_SOURCES = mr_pro_map.cc +mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +mr_pro_reduce_SOURCES = mr_pro_reduce.cc +mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training diff --git a/training/pro/mr_pro_generate_mapper_input.pl b/training/pro/mr_pro_generate_mapper_input.pl new file mode 100755 index 00000000..b30fc4fd --- /dev/null +++ b/training/pro/mr_pro_generate_mapper_input.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl -w +use strict; + +die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; +my $d = shift @ARGV; +die "Can't find directory $d" unless -d $d; + +opendir(DIR, $d) or die "Can't read $d: $!"; +my @hgs = grep { /\.gz$/ } readdir(DIR); +closedir DIR; + +for my $hg (@hgs) { + my $file = $hg; + my $id = $hg; + $id =~ s/(\.json)?\.gz//; + print "$d/$file $id\n"; +} + diff --git a/training/pro/mr_pro_map.cc b/training/pro/mr_pro_map.cc new file mode 100644 index 00000000..eef40b8a --- /dev/null +++ b/training/pro/mr_pro_map.cc @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "candidate_set.h" +#include "sampler.h" +#include "filelib.h" +#include "stringlib.h" +#include "weights.h" +#include "inside_outside.h" +#include "hg_io.h" +#include "ns.h" +#include "ns_docscorer.h" + +// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011) + +using namespace std; +namespace po = boost::program_options; + +boost::shared_ptr rng; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") + ("weights,w",po::value(), "[REQD] Weights files from current iterations") + ("kbest_repository,K",po::value()->default_value("./kbest"),"K-best list repository (directory)") + ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") + ("source,s",po::value()->default_value(""), "Source file (ignored, except for AER)") + ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") + ("kbest_size,k",po::value()->default_value(1500u), "Top k-hypotheses to extract") + ("candidate_pairs,G", po::value()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)") + ("best_pairs,X", po::value()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)") + ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (!conf->count("reference")) { + cerr << "Please specify one or more references using -r \n"; + flag = true; + } + if (!conf->count("weights")) { + cerr << "Please specify weights using -w \n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +struct ThresholdAlpha { + explicit ThresholdAlpha(double t = 0.05) : threshold(t) {} + double operator()(double mag) const { + if (mag < threshold) return 0.0; else return 1.0; + } + const double threshold; +}; + +struct TrainingInstance { + TrainingInstance(const SparseVector& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {} + SparseVector x; +#undef DEBUGGING_PRO +#ifdef DEBUGGING_PRO + vector a; + vector b; +#endif + bool y; + float gdiff; +}; +#ifdef DEBUGGING_PRO +ostream& operator<<(ostream& os, const TrainingInstance& d) { + return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x; +} +#endif + +struct DiffOrder { + bool operator()(const TrainingInstance& a, const TrainingInstance& b) const { + return a.gdiff > b.gdiff; + } +}; + +void Sample(const unsigned gamma, + const unsigned xi, + const training::CandidateSet& J_i, + const EvaluationMetric* metric, + vector* pv) { + const bool invert_score = metric->IsErrorMetric(); + vector v1, v2; + float avg_diff = 0; + for (unsigned i = 0; i < gamma; ++i) { + const size_t a = rng->inclusive(0, J_i.size() - 1)(); + const size_t b = rng->inclusive(0, J_i.size() - 1)(); + if (a == b) continue; + float ga = metric->ComputeScore(J_i[a].eval_feats); + float gb = metric->ComputeScore(J_i[b].eval_feats); + bool positive = gb < ga; + if (invert_score) positive = !positive; + const float gdiff = fabs(ga - gb); + if (!gdiff) continue; + avg_diff += gdiff; + SparseVector xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros(); + if (xdiff.empty()) { + cerr << "Empty diff:\n " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl; + cerr << " " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl; + continue; + } + v1.push_back(TrainingInstance(xdiff, positive, gdiff)); +#ifdef DEBUGGING_PRO + v1.back().a = J_i[a].hyp; + v1.back().b = J_i[b].hyp; + cerr << "N: " << v1.back() << endl; +#endif + } + avg_diff /= v1.size(); + + for (unsigned i = 0; i < v1.size(); ++i) { + double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff)); + // cerr << "avg_diff=" << avg_diff << " gdiff=" << v1[i].gdiff << " p=" << p << endl; + if (rng->next() < p) v2.push_back(v1[i]); + } + vector::iterator mid = v2.begin() + xi; + if (xi > v2.size()) mid = v2.end(); + partial_sort(v2.begin(), mid, v2.end(), DiffOrder()); + copy(v2.begin(), mid, back_inserter(*pv)); +#ifdef DEBUGGING_PRO + if (v2.size() >= 5) { + for (int i =0; i < (mid - v2.begin()); ++i) { + cerr << v2[i] << endl; + } + cerr << pv->back() << endl; + } +#endif +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + if (conf.count("random_seed")) + rng.reset(new MT19937(conf["random_seed"].as())); + else + rng.reset(new MT19937); + const string evaluation_metric = conf["evaluation_metric"].as(); + + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + DocumentScorer ds(metric, conf["reference"].as >()); + cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; + + Hypergraph hg; + string last_file; + ReadFile in_read(conf["input"].as()); + istream &in=*in_read.stream(); + const unsigned kbest_size = conf["kbest_size"].as(); + const unsigned gamma = conf["candidate_pairs"].as(); + const unsigned xi = conf["best_pairs"].as(); + string weightsf = conf["weights"].as(); + vector weights; + Weights::InitFromFile(weightsf, &weights); + string kbest_repo = conf["kbest_repository"].as(); + MkDirP(kbest_repo); + while(in) { + vector v; + string line; + getline(in, line); + if (line.empty()) continue; + istringstream is(line); + int sent_id; + string file; + // path-to-file (JSON) sent_id + is >> file >> sent_id; + ReadFile rf(file); + ostringstream os; + training::CandidateSet J_i; + os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; + const string kbest_file = os.str(); + if (FileExists(kbest_file)) + J_i.ReadFromFile(kbest_file); + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + hg.Reweight(weights); + J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]); + J_i.WriteToFile(kbest_file); + + Sample(gamma, xi, J_i, metric, &v); + for (unsigned i = 0; i < v.size(); ++i) { + const TrainingInstance& vi = v[i]; + cout << vi.y << "\t" << vi.x << endl; + cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl; + } + } + return 0; +} + diff --git a/training/pro/mr_pro_reduce.cc b/training/pro/mr_pro_reduce.cc new file mode 100644 index 00000000..5ef9b470 --- /dev/null +++ b/training/pro/mr_pro_reduce.cc @@ -0,0 +1,286 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "filelib.h" +#include "weights.h" +#include "sparse_vector.h" +#include "optimize.h" +#include "liblbfgs/lbfgs++.h" + +using namespace std; +namespace po = boost::program_options; + +// since this is a ranking model, there should be equal numbers of +// positive and negative examples, so the bias should be 0 +static const double MAX_BIAS = 1e-10; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") + ("regularization_strength,C",po::value()->default_value(500.0), "l2 regularization strength") + ("l1",po::value()->default_value(0.0), "l1 regularization strength") + ("regularize_to_weights,y",po::value()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") + ("memory_buffers,m",po::value()->default_value(100), "Number of memory buffers (LBFGS)") + ("min_reg,r",po::value()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") + ("max_reg,R",po::value()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght") + ("testset,t",po::value(), "Optional held-out test set") + ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") + ("interpolate_with_weights,p",po::value()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +void ParseSparseVector(string& line, size_t cur, SparseVector* out) { + SparseVector& x = *out; + size_t last_start = cur; + size_t last_comma = string::npos; + while(cur <= line.size()) { + if (line[cur] == ' ' || cur == line.size()) { + if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { + cerr << "[ERROR] " << line << endl << " position = " << cur << endl; + exit(1); + } + const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); + if (cur < line.size()) line[cur] = 0; + const weight_t val = strtod(&line[last_comma + 1], NULL); + x.set_value(fid, val); + + last_comma = string::npos; + last_start = cur+1; + } else { + if (line[cur] == '=') + last_comma = cur; + } + ++cur; + } +} + +void ReadCorpus(istream* pin, vector > >* corpus) { + istream& in = *pin; + corpus->clear(); + bool flag = false; + int lc = 0; + string line; + SparseVector x; + while(getline(in, line)) { + ++lc; + if (lc % 1000 == 0) { cerr << '.'; flag = true; } + if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; } + if (line.empty()) continue; + const size_t ks = line.find("\t"); + assert(string::npos != ks); + assert(ks == 1); + const bool y = line[0] == '1'; + x.clear(); + ParseSparseVector(line, ks + 1, &x); + corpus->push_back(make_pair(y, x)); + } + if (flag) cerr << endl; +} + +void GradAdd(const SparseVector& v, const double scale, weight_t* acc) { + for (SparseVector::const_iterator it = v.begin(); + it != v.end(); ++it) { + acc[it->first] += it->second * scale; + } +} + +double ApplyRegularizationTerms(const double C, + const double T, + const vector& weights, + const vector& prev_weights, + weight_t* g) { + double reg = 0; + for (size_t i = 0; i < weights.size(); ++i) { + const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); + const double& w_i = weights[i]; + reg += C * w_i * w_i; + g[i] += 2 * C * w_i; + + const double diff_i = w_i - prev_w_i; + reg += T * diff_i * diff_i; + g[i] += 2 * T * diff_i; + } + return reg; +} + +double TrainingInference(const vector& x, + const vector > >& corpus, + weight_t* g = NULL) { + double cll = 0; + for (int i = 0; i < corpus.size(); ++i) { + const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias + double lp_false = dotprod; + double lp_true = -dotprod; + if (0 < lp_true) { + lp_true += log1p(exp(-lp_true)); + lp_false = log1p(exp(lp_false)); + } else { + lp_true = log1p(exp(lp_true)); + lp_false += log1p(exp(-lp_false)); + } + lp_true*=-1; + lp_false*=-1; + if (corpus[i].first) { // true label + cll -= lp_true; + if (g) { + // g -= corpus[i].second * exp(lp_false); + GradAdd(corpus[i].second, -exp(lp_false), g); + g[0] -= exp(lp_false); // bias + } + } else { // false label + cll -= lp_false; + if (g) { + // g += corpus[i].second * exp(lp_true); + GradAdd(corpus[i].second, exp(lp_true), g); + g[0] += exp(lp_true); // bias + } + } + } + return cll; +} + +struct ProLoss { + ProLoss(const vector > >& tr, + const vector > >& te, + const double c, + const double t, + const vector& px) : training(tr), testing(te), C(c), T(t), prev_x(px){} + double operator()(const vector& x, double* g) const { + fill(g, g + x.size(), 0.0); + double cll = TrainingInference(x, training, g); + tppl = 0; + if (testing.size()) + tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size())); + double ppl = cll / log(2); + ppl /= training.size(); + ppl = pow(2.0, ppl); + double reg = ApplyRegularizationTerms(C, T, x, prev_x, g); + return cll + reg; + } + const vector > >& training, testing; + const double C, T; + const vector& prev_x; + mutable double tppl; +}; + +// return held-out log likelihood +double LearnParameters(const vector > >& training, + const vector > >& testing, + const double C, + const double C1, + const double T, + const unsigned memory_buffers, + const vector& prev_x, + vector* px) { + assert(px->size() == prev_x.size()); + ProLoss loss(training, testing, C, T, prev_x); + LBFGS lbfgs(px, loss, memory_buffers, C1); + lbfgs.MinimizeFunction(); + return loss.tppl; +} + +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + string line; + vector > > training, testing; + const bool tune_regularizer = conf.count("tune_regularizer"); + if (tune_regularizer && !conf.count("testset")) { + cerr << "--tune_regularizer requires --testset to be set\n"; + return 1; + } + const double min_reg = conf["min_reg"].as(); + const double max_reg = conf["max_reg"].as(); + double C = conf["regularization_strength"].as(); // will be overridden if parameter is tuned + double C1 = conf["l1"].as(); // will be overridden if parameter is tuned + const double T = conf["regularize_to_weights"].as(); + assert(C >= 0.0); + assert(min_reg >= 0.0); + assert(max_reg >= 0.0); + assert(max_reg > min_reg); + const double psi = conf["interpolate_with_weights"].as(); + if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; } + ReadCorpus(&cin, &training); + if (conf.count("testset")) { + ReadFile rf(conf["testset"].as()); + ReadCorpus(rf.stream(), &testing); + } + cerr << "Number of features: " << FD::NumFeats() << endl; + + vector x, prev_x; // x[0] is bias + if (conf.count("weights")) { + Weights::InitFromFile(conf["weights"].as(), &x); + x.resize(FD::NumFeats()); + prev_x = x; + } else { + x.resize(FD::NumFeats()); + prev_x = x; + } + cerr << " Number of features: " << x.size() << endl; + cerr << "Number of training examples: " << training.size() << endl; + cerr << "Number of testing examples: " << testing.size() << endl; + double tppl = 0.0; + vector > sp; + vector smoothed; + if (tune_regularizer) { + C = min_reg; + const double steps = 18; + double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps); + cerr << "SWEEP FACTOR: " << sweep_factor << endl; + while(C < max_reg) { + cerr << "C=" << C << "\tT=" <(), prev_x, &x); + sp.push_back(make_pair(C, tppl)); + C *= sweep_factor; + } + smoothed.resize(sp.size(), 0); + smoothed[0] = sp[0].second; + smoothed.back() = sp.back().second; + for (int i = 1; i < sp.size()-1; ++i) { + double prev = sp[i-1].second; + double next = sp[i+1].second; + double cur = sp[i].second; + smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next); + } + double best_ppl = 9999999; + unsigned best_i = 0; + for (unsigned i = 0; i < sp.size(); ++i) { + if (smoothed[i] < best_ppl) { + best_ppl = smoothed[i]; + best_i = i; + } + } + C = sp[best_i].first; + } // tune regularizer + tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as(), prev_x, &x); + if (conf.count("weights")) { + for (int i = 1; i < x.size(); ++i) { + x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi); + } + } + cout.precision(15); + cout << "# C=" << C << "\theld out perplexity="; + if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; } + if (sp.size()) { + cout << "# Parameter sweep:\n"; + for (int i = 0; i < sp.size(); ++i) { + cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl; + } + } + Weights::WriteToFile("-", x); + return 0; +} diff --git a/training/pro/pro.pl b/training/pro/pro.pl new file mode 100755 index 00000000..3b30c379 --- /dev/null +++ b/training/pro/pro.pl @@ -0,0 +1,555 @@ +#!/usr/bin/env perl +use strict; +use File::Basename qw(basename); +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use IPC::Open2; +use POSIX ":sys_wait_h"; +my $QSUB_CMD = qsub_args(mert_memory()); +my $default_jobs = env_default_jobs(); + +my $UTILS_DIR="$SCRIPT_DIR/../utils"; +require "$UTILS_DIR/libcall.pl"; + +# Default settings +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; +my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl"; +my $MAPPER = "$bin_dir/mr_pro_map"; +my $REDUCER = "$bin_dir/mr_pro_reduce"; +my $parallelize = "$UTILS_DIR/parallelize.pl"; +my $libcall = "$UTILS_DIR/libcall.pl"; +my $sentserver = "$UTILS_DIR/sentserver"; +my $sentclient = "$UTILS_DIR/sentclient"; +my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm"; + +my $SCORER = $FAST_SCORE; +die "Can't find $MAPPER" unless -x $MAPPER; +my $cdec = "$bin_dir/../../decoder/cdec"; +die "Can't find decoder in $cdec" unless -x $cdec; +die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; +my $decoder = $cdec; +my $lines_per_mapper = 30; +my $iteration = 1; +my $best_weights; +my $psi = 1; +my $default_max_iter = 30; +my $max_iterations = $default_max_iter; +my $jobs = $default_jobs; # number of decode nodes +my $pmem = "4g"; +my $disable_clean = 0; +my %seen_weights; +my $help = 0; +my $epsilon = 0.0001; +my $dryrun = 0; +my $last_score = -10000000; +my $metric = "ibm_bleu"; +my $dir; +my $iniFile; +my $weights; +my $use_make = 1; # use make to parallelize +my $useqsub = 0; +my $initial_weights; +my $pass_suffix = ''; +my $devset; + +# regularization strength +my $reg = 500; +my $reg_previous = 5000; + +# Process command-line options +if (GetOptions( + "config=s" => \$iniFile, + "weights=s" => \$initial_weights, + "devset=s" => \$devset, + "jobs=i" => \$jobs, + "metric=s" => \$metric, + "pass-suffix=s" => \$pass_suffix, + "qsub" => \$useqsub, + "help" => \$help, + "reg=f" => \$reg, + "reg-previous=f" => \$reg_previous, + "output-dir=s" => \$dir, +) == 0 || @ARGV!=0 || $help) { + print_help(); + exit; +} + +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (!defined $iniFile) { push @missing_args, "--config"; } +if (!defined $devset) { push @missing_args, "--devset"; } +if (!defined $initial_weights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); + +if ($metric =~ /^(combi|ter)$/i) { + $lines_per_mapper = 5; +} + +my $host =check_output("hostname"); chomp $host; +my $bleu; +my $interval_count = 0; +my $logfile; +my $projected_score; + +# used in sorting scores +my $DIR_FLAG = '-r'; +if ($metric =~ /^ter$|^aer$/i) { + $DIR_FLAG = ''; +} + +unless ($dir){ + $dir = 'pro'; +} +unless ($dir =~ /^\//){ # convert relative path to absolute path + my $basedir = check_output("pwd"); + chomp $basedir; + $dir = "$basedir/$dir"; +} + +# Initializations and helper functions +srand; + +my @childpids = (); +my @cleanupcmds = (); + +sub cleanup { + print STDERR "Cleanup...\n"; + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } + exit 1; +}; +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; +$SIG{INT} = "cleanup"; +$SIG{TERM} = "cleanup"; +$SIG{HUP} = "cleanup"; + +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; +my $newIniFile = "$dir/$decoderBase.ini"; +my $inputFileName = "$dir/input"; +my $user = $ENV{"USER"}; + + +# process ini file +-e $iniFile || die "Error: could not open $iniFile for reading\n"; +open(INI, $iniFile); + +if (-e $dir) { + die "ERROR: working dir $dir already exists\n\n"; +} else { + mkdir "$dir" or die "Can't mkdir $dir: $!"; + mkdir "$dir/hgs" or die; + mkdir "$dir/scripts" or die; + print STDERR <) { $devSize++; } +close F; + +unless($best_weights){ $best_weights = $weights; } +unless($projected_score){ $projected_score = 0.0; } +$seen_weights{$weights} = 1; + +my $random_seed = int(time / 1000); +my $lastWeightsFile; +my $lastPScore = 0; +# main optimization loop +my @allweights; +while (1){ + print STDERR "\n\nITERATION $iteration\n==========\n"; + + if ($iteration > $max_iterations){ + print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; + last; + } + # iteration-specific files + my $runFile="$dir/run.raw.$iteration"; + my $onebestFile="$dir/1best.$iteration"; + my $logdir="$dir/logs.$iteration"; + my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; + my $scorerLog="$logdir/scorer.log.$iteration"; + check_call("mkdir -p $logdir"); + + + #decode + print STDERR "RUNNING DECODER AT "; + print STDERR unchecked_output("date"); + my $im1 = $iteration - 1; + my $weightsFile="$dir/weights.$im1"; + push @allweights, "-w $dir/weights.$im1"; + `rm -f $dir/hgs/*.gz`; + my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; + my $pcmd; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; + } else { + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; + } + my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + my $num_hgs; + my $num_topbest; + my $retries = 0; + while($retries < 5) { + $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF HGs: $num_hgs\n"; + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_hgs && $devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); + my $dec_score = check_output("cat $runFile | $SCORER -r $refs -m $metric"); + chomp $dec_score; + print STDERR "DECODER SCORE: $dec_score\n"; + + # save space + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); + + # run optimizer + print STDERR "RUNNING OPTIMIZER AT "; + print STDERR unchecked_output("date"); + print STDERR " - GENERATE TRAINING EXEMPLARS\n"; + my $mergeLog="$logdir/prune-merge.log.$iteration"; + + my $score = 0; + my $icc = 0; + my $inweights="$dir/weights.$im1"; + $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + check_call("mkdir -p $dir/splag.$im1"); + $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput."; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; + my @shards = grep { /^mapinput\./ } readdir(DIR); + closedir DIR; + die "No shards!" unless scalar @shards > 0; + my $joblist = ""; + my $nmappers = 0; + @cleanupcmds = (); + my %o2i = (); + my $first_shard = 1; + my $mkfile; # only used with makefiles + my $mkfilename; + if ($use_make) { + $mkfilename = "$dir/splag.$im1/domap.mk"; + open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!"; + print $mkfile "all: $dir/splag.$im1/map.done\n\n"; + } + my @mkouts = (); # only used with makefiles + my @mapoutputs = (); + for my $shard (@shards) { + my $mapoutput = $shard; + my $client_name = $shard; + $client_name =~ s/mapinput.//; + $client_name = "pro.$client_name"; + $mapoutput =~ s/mapinput/mapoutput/; + push @mapoutputs, "$dir/splag.$im1/$mapoutput"; + $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; + my $script = "$MAPPER -s $srcFile -m $metric -r $refs -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput"; + if ($use_make) { + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "#!/bin/bash\n"; + print F "$script\n"; + close F; + my $output = "$dir/splag.$im1/$mapoutput"; + push @mkouts, $output; + chmod(0755, $script_file) or die "Can't chmod $script_file: $!"; + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } + print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n"; + } else { + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "$script\n"; + close F; + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } + + $nmappers++; + my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $jobid = check_output("$qcmd"); + chomp $jobid; + $jobid =~ s/^(\d+)(.*?)$/\1/g; + $jobid =~ s/^Your job (\d+) .*$/\1/; + push(@cleanupcmds, "qdel $jobid 2> /dev/null"); + print STDERR " $jobid"; + if ($joblist == "") { $joblist = $jobid; } + else {$joblist = $joblist . "\|" . $jobid; } + } + } + my @dev_outs = (); + my @devtest_outs = (); + @dev_outs = @mapoutputs; + if ($use_make) { + print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; + close $mkfile; + my $mcmd = "make -j $jobs -f $mkfilename"; + print STDERR "\nExecuting: $mcmd\n"; + check_call($mcmd); + } else { + print STDERR "\nLaunched $nmappers mappers.\n"; + sleep 8; + print STDERR "Waiting for mappers to complete...\n"; + while ($nmappers > 0) { + sleep 5; + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); + $nmappers = scalar @livejobs; + } + print STDERR "All mappers complete.\n"; + } + my $tol = 0; + my $til = 0; + my $dev_test_file = "$dir/splag.$im1/devtest.gz"; + print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n"; + print STDERR unchecked_output("date"); + $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi"; + $cmd .= " > $dir/weights.$iteration"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + $lastWeightsFile = "$dir/weights.$iteration"; + $lastPScore = $score; + $iteration++; + print STDERR "\n==========\n"; +} + + +check_call("cp $lastWeightsFile $dir/weights.final"); +print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w with the decoder)\n\n"; +print STDOUT "$dir/weights.final\n"; + +exit 0; + +sub read_weights_file { + my ($file) = @_; + open F, "<$file" or die "Couldn't read $file: $!"; + my @r = (); + my $pm = -1; + while() { + next if /^#/; + next if /^\s*$/; + chomp; + if (/^(.+)\s+(.+)$/) { + my $m = $1; + my $w = $2; + die "Weights out of order: $m <= $pm" unless $m > $pm; + push @r, $w; + } else { + warn "Unexpected feature name in weight file: $_"; + } + } + close F; + return join ' ', @r; +} + +sub enseg { + my $src = shift; + my $newsrc = shift; + open(SRC, $src); + open(NEWSRC, ">$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; + die "Empty dev set!" if ($i == 0); +} + +sub print_help { + + my $executable = basename($0); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable [options] + Runs a complete PRO optimization using the ini file specified. + +Required: + + --config + Decoder configuration file. + + --devset + Dev set source and reference data. + + --weights + Initial weights file (use empty file to start from 0) + +General options: + + --help + Print this message and exit. + + --max-iterations + Maximum number of iterations to run. If not specified, defaults + to $default_max_iter. + + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + + --pass-suffix + If the decoder is doing multi-pass decoding, the pass suffix "2", + "3", etc., is used to control what iteration of weights is set. + + --workdir + Directory for intermediate and output files. If not specified, the + name is derived from the ini filename. Assuming that the ini + filename begins with the decoder name and ends with ini, the default + name of the working directory is inferred from the middle part of + the filename. E.g. an ini file named decoder.foo.ini would have + a default working directory name foo. + +Regularization options: + + --reg + l2 regularization strength [default=500]. The greater this value, + the closer to zero the weights will be. + + --reg-previous + l2 penalty for moving away from the weights from the previous + iteration. [default=5000]. The greater this value, the closer + to the previous iteration's weights the next iteration's weights + will be. + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + +Deprecated options: + + --interpolate-with-weights + [deprecated] At each iteration the resulting weights are + interpolated with the weights from the previous iteration, with + this factor. [default=1.0, i.e., no effect] + +Help +} + +sub convert { + my ($str) = @_; + my @ps = split /;/, $str; + my %dict = (); + for my $p (@ps) { + my ($k, $v) = split /=/, $p; + $dict{$k} = $v; + } + return %dict; +} + + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} + +sub split_devset { + my ($infile, $outsrc, $outref) = @_; + open F, "<$infile" or die "Can't read $infile: $!"; + open S, ">$outsrc" or die "Can't write $outsrc: $!"; + open R, ">$outref" or die "Can't write $outref: $!"; + while() { + chomp; + my ($src, @refs) = split /\s*\|\|\|\s*/; + die "Malformed devset line: $_\n" unless scalar @refs > 0; + print S "$src\n"; + print R join(' ||| ', @refs) . "\n"; + } + close R; + close S; + close F; +} + diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am new file mode 100644 index 00000000..1633d0f7 --- /dev/null +++ b/training/rampion/Makefile.am @@ -0,0 +1,6 @@ +bin_PROGRAMS = rampion_cccp + +rampion_cccp_SOURCES = rampion_cccp.cc +rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils diff --git a/training/rampion/rampion.pl b/training/rampion/rampion.pl new file mode 100755 index 00000000..ae084db6 --- /dev/null +++ b/training/rampion/rampion.pl @@ -0,0 +1,540 @@ +#!/usr/bin/env perl +use strict; +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment", "$SCRIPT_DIR/../utils"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use IPC::Open2; +use POSIX ":sys_wait_h"; +my $QSUB_CMD = qsub_args(mert_memory()); +my $default_jobs = env_default_jobs(); + +my $UTILS_DIR="$SCRIPT_DIR/../utils"; +require "$UTILS_DIR/libcall.pl"; + +# Default settings +my $srcFile; +my $refFiles; +my $bin_dir = $SCRIPT_DIR; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; +my $MAPINPUT = "$bin_dir/rampion_generate_input.pl"; +my $MAPPER = "$bin_dir/rampion_cccp"; +my $parallelize = "$UTILS_DIR/parallelize.pl"; +my $libcall = "$UTILS_DIR/libcall.pl"; +my $sentserver = "$UTILS_DIR/sentserver"; +my $sentclient = "$UTILS_DIR/sentclient"; +my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm"; + +my $SCORER = $FAST_SCORE; +die "Can't find $MAPPER" unless -x $MAPPER; +my $cdec = "$bin_dir/../../decoder/cdec"; +die "Can't find decoder in $cdec" unless -x $cdec; +die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; +my $decoder = $cdec; +my $lines_per_mapper = 30; +my $iteration = 1; +my $best_weights; +my $psi = 1; +my $default_max_iter = 30; +my $max_iterations = $default_max_iter; +my $jobs = $default_jobs; # number of decode nodes +my $pmem = "4g"; +my $disable_clean = 0; +my %seen_weights; +my $help = 0; +my $epsilon = 0.0001; +my $dryrun = 0; +my $last_score = -10000000; +my $metric = "ibm_bleu"; +my $dir; +my $iniFile; +my $weights; +my $use_make = 1; # use make to parallelize +my $useqsub = 0; +my $initial_weights; +my $pass_suffix = ''; +my $cpbin=1; + +# regularization strength +my $tune_regularizer = 0; +my $reg = 500; +my $reg_previous = 5000; +my $dont_accum = 0; + +# Process command-line options +Getopt::Long::Configure("no_auto_abbrev"); +if (GetOptions( + "jobs=i" => \$jobs, + "dont-clean" => \$disable_clean, + "dont-accumulate" => \$dont_accum, + "pass-suffix=s" => \$pass_suffix, + "qsub" => \$useqsub, + "dry-run" => \$dryrun, + "epsilon=s" => \$epsilon, + "help" => \$help, + "weights=s" => \$initial_weights, + "reg=f" => \$reg, + "use-make=i" => \$use_make, + "max-iterations=i" => \$max_iterations, + "pmem=s" => \$pmem, + "cpbin!" => \$cpbin, + "ref-files=s" => \$refFiles, + "metric=s" => \$metric, + "source-file=s" => \$srcFile, + "workdir=s" => \$dir, +) == 0 || @ARGV!=1 || $help) { + print_help(); + exit; +} + +die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; + +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (!defined $srcFile) { push @missing_args, "--source-file"; } +if (!defined $refFiles) { push @missing_args, "--ref-files"; } +if (!defined $initial_weights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); + +if ($metric =~ /^(combi|ter)$/i) { + $lines_per_mapper = 5; +} + +($iniFile) = @ARGV; + + +sub write_config; +sub enseg; +sub print_help; + +my $nodelist; +my $host =check_output("hostname"); chomp $host; +my $bleu; +my $interval_count = 0; +my $logfile; +my $projected_score; + +# used in sorting scores +my $DIR_FLAG = '-r'; +if ($metric =~ /^ter$|^aer$/i) { + $DIR_FLAG = ''; +} + +my $refs_comma_sep = get_comma_sep_refs('r',$refFiles); + +unless ($dir){ + $dir = "rampion"; +} +unless ($dir =~ /^\//){ # convert relative path to absolute path + my $basedir = check_output("pwd"); + chomp $basedir; + $dir = "$basedir/$dir"; +} + + +# Initializations and helper functions +srand; + +my @childpids = (); +my @cleanupcmds = (); + +sub cleanup { + print STDERR "Cleanup...\n"; + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } + exit 1; +}; +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; +$SIG{INT} = "cleanup"; +$SIG{TERM} = "cleanup"; +$SIG{HUP} = "cleanup"; + +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; +my $newIniFile = "$dir/$decoderBase.ini"; +my $inputFileName = "$dir/input"; +my $user = $ENV{"USER"}; +# process ini file +-e $iniFile || die "Error: could not open $iniFile for reading\n"; +open(INI, $iniFile); + +use File::Basename qw(basename); +#pass bindir, refs to vars holding bin +sub modbin { + local $_; + my $bindir=shift; + check_call("mkdir -p $bindir"); + -d $bindir || die "couldn't make bindir $bindir"; + for (@_) { + my $src=$$_; + $$_="$bindir/".basename($src); + check_call("cp -p $src $$_"); + } +} +sub dirsize { + opendir ISEMPTY,$_[0]; + return scalar(readdir(ISEMPTY))-1; +} +my @allweights; +if ($dryrun){ + write_config(*STDERR); + exit 0; +} else { + if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs + die "ERROR: working dir $dir already exists\n\n"; + } else { + -e $dir || mkdir $dir; + mkdir "$dir/hgs"; + modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; + mkdir "$dir/scripts"; + my $cmdfile="$dir/rerun-pro.sh"; + open CMD,'>',$cmdfile; + print CMD "cd ",&getcwd,"\n"; +# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted. + my $cline=&cmdline."\n"; + print CMD $cline; + close CMD; + print STDERR $cline; + chmod(0755,$cmdfile); + check_call("cp $initial_weights $dir/weights.0"); + die "Can't find weights.0" unless (-e "$dir/weights.0"); + } + write_config(*STDERR); +} + + +# Generate initial files and values +check_call("cp $iniFile $newIniFile"); +$iniFile = $newIniFile; + +my $newsrc = "$dir/dev.input"; +enseg($srcFile, $newsrc); +$srcFile = $newsrc; +my $devSize = 0; +open F, "<$srcFile" or die "Can't read $srcFile: $!"; +while() { $devSize++; } +close F; + +unless($best_weights){ $best_weights = $weights; } +unless($projected_score){ $projected_score = 0.0; } +$seen_weights{$weights} = 1; +my $kbest = "$dir/kbest"; +if ($dont_accum) { + $kbest = ''; +} else { + check_call("mkdir -p $kbest"); + $kbest = "--kbest_repository $kbest"; +} + +my $random_seed = int(time / 1000); +my $lastWeightsFile; +my $lastPScore = 0; +# main optimization loop +while (1){ + print STDERR "\n\nITERATION $iteration\n==========\n"; + + if ($iteration > $max_iterations){ + print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; + last; + } + # iteration-specific files + my $runFile="$dir/run.raw.$iteration"; + my $onebestFile="$dir/1best.$iteration"; + my $logdir="$dir/logs.$iteration"; + my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; + my $scorerLog="$logdir/scorer.log.$iteration"; + check_call("mkdir -p $logdir"); + + + #decode + print STDERR "RUNNING DECODER AT "; + print STDERR unchecked_output("date"); + my $im1 = $iteration - 1; + my $weightsFile="$dir/weights.$im1"; + push @allweights, "-w $dir/weights.$im1"; + `rm -f $dir/hgs/*.gz`; + my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; + my $pcmd; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; + } else { + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; + } + my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; + print STDERR "COMMAND:\n$cmd\n"; + check_bash_call($cmd); + my $num_hgs; + my $num_topbest; + my $retries = 0; + while($retries < 5) { + $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF HGs: $num_hgs\n"; + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_hgs && $devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); + my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric"); + chomp $dec_score; + print STDERR "DECODER SCORE: $dec_score\n"; + + # save space + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); + + # run optimizer + print STDERR "RUNNING OPTIMIZER AT "; + print STDERR unchecked_output("date"); + print STDERR " - GENERATE TRAINING EXEMPLARS\n"; + my $mergeLog="$logdir/prune-merge.log.$iteration"; + + my $score = 0; + my $icc = 0; + my $inweights="$dir/weights.$im1"; + my $outweights="$dir/weights.$iteration"; + $cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1"; + print STDERR "COMMAND:\n$cmd\n"; + check_call($cmd); + $cmd="$MAPPER $refs_comma_sep -m $metric -i $dir/agenda.$im1 $kbest -w $inweights > $outweights"; + check_call($cmd); + $lastWeightsFile = $outweights; + $iteration++; + `rm hgs/*.gz`; + print STDERR "\n==========\n"; +} + +print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w with the decoder)\n\n"; + +print STDOUT "$lastWeightsFile\n"; + +exit 0; + +sub get_lines { + my $fn = shift @_; + open FL, "<$fn" or die "Couldn't read $fn: $!"; + my $lc = 0; + while() { $lc++; } + return $lc; +} + +sub get_comma_sep_refs { + my ($r,$p) = @_; + my $o = check_output("echo $p"); + chomp $o; + my @files = split /\s+/, $o; + return "-$r " . join(" -$r ", @files); +} + +sub read_weights_file { + my ($file) = @_; + open F, "<$file" or die "Couldn't read $file: $!"; + my @r = (); + my $pm = -1; + while() { + next if /^#/; + next if /^\s*$/; + chomp; + if (/^(.+)\s+(.+)$/) { + my $m = $1; + my $w = $2; + die "Weights out of order: $m <= $pm" unless $m > $pm; + push @r, $w; + } else { + warn "Unexpected feature name in weight file: $_"; + } + } + close F; + return join ' ', @r; +} + +# subs +sub write_config { + my $fh = shift; + my $cleanup = "yes"; + if ($disable_clean) {$cleanup = "no";} + + print $fh "\n"; + print $fh "DECODER: $decoder\n"; + print $fh "INI FILE: $iniFile\n"; + print $fh "WORKING DIR: $dir\n"; + print $fh "SOURCE (DEV): $srcFile\n"; + print $fh "REFS (DEV): $refFiles\n"; + print $fh "EVAL METRIC: $metric\n"; + print $fh "MAX ITERATIONS: $max_iterations\n"; + print $fh "JOBS: $jobs\n"; + print $fh "HEAD NODE: $host\n"; + print $fh "PMEM (DECODING): $pmem\n"; + print $fh "CLEANUP: $cleanup\n"; +} + +sub update_weights_file { + my ($neww, $rfn, $rpts) = @_; + my @feats = @$rfn; + my @pts = @$rpts; + my $num_feats = scalar @feats; + my $num_pts = scalar @pts; + die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts; + open G, ">$neww" or die; + for (my $i = 0; $i < $num_feats; $i++) { + my $f = $feats[$i]; + my $lambda = $pts[$i]; + print G "$f $lambda\n"; + } + close G; +} + +sub enseg { + my $src = shift; + my $newsrc = shift; + open(SRC, $src); + open(NEWSRC, ">$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; + die "Empty dev set!" if ($i == 0); +} + +sub print_help { + + my $executable = check_output("basename $0"); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable [options] + Runs a complete PRO optimization using the ini file specified. + +Required: + + --ref-files + Dev set ref files. This option takes only a single string argument. + To use multiple files (including file globbing), this argument should + be quoted. + + --source-file + Dev set source file. + + --weights + Initial weights file (use empty file to start from 0) + +General options: + + --help + Print this message and exit. + + --dont-accumulate + Don't accumulate k-best lists from multiple iterations. + + --max-iterations + Maximum number of iterations to run. If not specified, defaults + to $default_max_iter. + + --metric + Metric to optimize. + Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi + + --pass-suffix + If the decoder is doing multi-pass decoding, the pass suffix "2", + "3", etc., is used to control what iteration of weights is set. + + --workdir + Directory for intermediate and output files. If not specified, the + name is derived from the ini filename. Assuming that the ini + filename begins with the decoder name and ends with ini, the default + name of the working directory is inferred from the middle part of + the filename. E.g. an ini file named decoder.foo.ini would have + a default working directory name foo. + +Regularization options: + + --reg + l2 regularization strength [default=500]. The greater this value, + the closer to zero the weights will be. + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + +Help +} + +sub convert { + my ($str) = @_; + my @ps = split /;/, $str; + my %dict = (); + for my $p (@ps) { + my ($k, $v) = split /=/, $p; + $dict{$k} = $v; + } + return %dict; +} + + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} diff --git a/training/rampion/rampion_cccp.cc b/training/rampion/rampion_cccp.cc new file mode 100644 index 00000000..1e36dc51 --- /dev/null +++ b/training/rampion/rampion_cccp.cc @@ -0,0 +1,168 @@ +#include +#include +#include +#include + +#include +#include + +#include "filelib.h" +#include "stringlib.h" +#include "weights.h" +#include "hg_io.h" +#include "kbest.h" +#include "viterbi.h" +#include "ns.h" +#include "ns_docscorer.h" +#include "candidate_set.h" + +using namespace std; +namespace po = boost::program_options; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("reference,r",po::value >(), "[REQD] Reference translation (tokenized text)") + ("weights,w",po::value(), "[REQD] Weights files from current iterations") + ("input,i",po::value()->default_value("-"), "Input file to map (- is STDIN)") + ("evaluation_metric,m",po::value()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)") + ("kbest_repository,R",po::value(), "Accumulate k-best lists from previous iterations (parameter is path to repository)") + ("kbest_size,k",po::value()->default_value(500u), "Top k-hypotheses to extract") + ("cccp_iterations,I", po::value()->default_value(10u), "CCCP iterations (T')") + ("ssd_iterations,J", po::value()->default_value(5u), "Stochastic subgradient iterations (T'')") + ("eta", po::value()->default_value(1e-4), "Step size") + ("regularization_strength,C", po::value()->default_value(1.0), "L2 regularization strength") + ("alpha,a", po::value()->default_value(10.0), "Cost scale (alpha); alpha * [1-metric(y,y')]") + ("help,h", "Help"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + bool flag = false; + if (!conf->count("reference")) { + cerr << "Please specify one or more references using -r \n"; + flag = true; + } + if (!conf->count("weights")) { + cerr << "Please specify weights using -w \n"; + flag = true; + } + if (flag || conf->count("help")) { + cerr << dcmdline_options << endl; + exit(1); + } +} + +struct GainFunction { + explicit GainFunction(const EvaluationMetric* m) : metric(m) {} + float operator()(const SufficientStats& eval_feats) const { + float g = metric->ComputeScore(eval_feats); + if (!metric->IsErrorMetric()) g = 1 - g; + return g; + } + const EvaluationMetric* metric; +}; + +template +void CostAugmentedSearch(const GainFunc& gain, + const training::CandidateSet& cs, + const SparseVector& w, + double alpha, + SparseVector* fmap) { + unsigned best_i = 0; + double best = -numeric_limits::infinity(); + for (unsigned i = 0; i < cs.size(); ++i) { + double s = cs[i].fmap.dot(w) + alpha * gain(cs[i].eval_feats); + if (s > best) { + best = s; + best_i = i; + } + } + *fmap = cs[best_i].fmap; +} + + + +// runs lines 4--15 of rampion algorithm +int main(int argc, char** argv) { + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + const string evaluation_metric = conf["evaluation_metric"].as(); + + EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric); + DocumentScorer ds(metric, conf["reference"].as >()); + cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl; + double goodsign = -1; + double badsign = -goodsign; + + Hypergraph hg; + string last_file; + ReadFile in_read(conf["input"].as()); + string kbest_repo; + if (conf.count("kbest_repository")) { + kbest_repo = conf["kbest_repository"].as(); + MkDirP(kbest_repo); + } + istream &in=*in_read.stream(); + const unsigned kbest_size = conf["kbest_size"].as(); + const unsigned tp = conf["cccp_iterations"].as(); + const unsigned tpp = conf["ssd_iterations"].as(); + const double eta = conf["eta"].as(); + const double reg = conf["regularization_strength"].as(); + const double alpha = conf["alpha"].as(); + SparseVector weights; + { + vector vweights; + const string weightsf = conf["weights"].as(); + Weights::InitFromFile(weightsf, &vweights); + Weights::InitSparseVector(vweights, &weights); + } + string line, file; + vector kis; + cerr << "Loading hypergraphs...\n"; + while(getline(in, line)) { + istringstream is(line); + int sent_id; + kis.resize(kis.size() + 1); + training::CandidateSet& curkbest = kis.back(); + string kbest_file; + if (kbest_repo.size()) { + ostringstream os; + os << kbest_repo << "/kbest." << sent_id << ".txt.gz"; + kbest_file = os.str(); + if (FileExists(kbest_file)) + curkbest.ReadFromFile(kbest_file); + } + is >> file >> sent_id; + ReadFile rf(file); + if (kis.size() % 5 == 0) { cerr << '.'; } + if (kis.size() % 200 == 0) { cerr << " [" << kis.size() << "]\n"; } + HypergraphIO::ReadFromJSON(rf.stream(), &hg); + hg.Reweight(weights); + curkbest.AddKBestCandidates(hg, kbest_size, ds[sent_id]); + if (kbest_file.size()) + curkbest.WriteToFile(kbest_file); + } + cerr << "\nHypergraphs loaded.\n"; + + vector > goals(kis.size()); // f(x_i,y+,h+) + SparseVector fear; // f(x,y-,h-) + const GainFunction gain(metric); + for (unsigned iterp = 1; iterp <= tp; ++iterp) { + cerr << "CCCP Iteration " << iterp << endl; + for (unsigned i = 0; i < goals.size(); ++i) + CostAugmentedSearch(gain, kis[i], weights, goodsign * alpha, &goals[i]); + for (unsigned iterpp = 1; iterpp <= tpp; ++iterpp) { + cerr << " SSD Iteration " << iterpp << endl; + for (unsigned i = 0; i < goals.size(); ++i) { + CostAugmentedSearch(gain, kis[i], weights, badsign * alpha, &fear); + weights -= weights * (eta * reg / goals.size()); + weights += (goals[i] - fear) * eta; + } + } + } + vector w; + weights.init_vector(&w); + Weights::WriteToFile("-", w); + return 0; +} + diff --git a/training/rampion/rampion_generate_input.pl b/training/rampion/rampion_generate_input.pl new file mode 100755 index 00000000..b30fc4fd --- /dev/null +++ b/training/rampion/rampion_generate_input.pl @@ -0,0 +1,18 @@ +#!/usr/bin/perl -w +use strict; + +die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1; +my $d = shift @ARGV; +die "Can't find directory $d" unless -d $d; + +opendir(DIR, $d) or die "Can't read $d: $!"; +my @hgs = grep { /\.gz$/ } readdir(DIR); +closedir DIR; + +for my $hg (@hgs) { + my $file = $hg; + my $id = $hg; + $id =~ s/(\.json)?\.gz//; + print "$d/$file $id\n"; +} + diff --git a/training/risk.cc b/training/risk.cc deleted file mode 100644 index d5a12cfd..00000000 --- a/training/risk.cc +++ /dev/null @@ -1,45 +0,0 @@ -#include "risk.h" - -#include "prob.h" -#include "candidate_set.h" -#include "ns.h" - -using namespace std; - -namespace training { - -// g = \sum_e p(e|f) * loss(e) * (phi(e,f) - E[phi(e,f)]) -double CandidateSetRisk::operator()(const vector& params, - SparseVector* g) const { - prob_t z; - for (unsigned i = 0; i < cands_.size(); ++i) { - const prob_t u(cands_[i].fmap.dot(params), init_lnx()); - z += u; - } - const double log_z = log(z); - - SparseVector exp_feats; - if (g) { - for (unsigned i = 0; i < cands_.size(); ++i) { - const double log_prob = cands_[i].fmap.dot(params) - log_z; - const double prob = exp(log_prob); - exp_feats += cands_[i].fmap * prob; - } - } - - double risk = 0; - for (unsigned i = 0; i < cands_.size(); ++i) { - const double log_prob = cands_[i].fmap.dot(params) - log_z; - const double prob = exp(log_prob); - const double cost = metric_.IsErrorMetric() ? metric_.ComputeScore(cands_[i].eval_feats) - : 1.0 - metric_.ComputeScore(cands_[i].eval_feats); - const double r = prob * cost; - risk += r; - if (g) (*g) += (cands_[i].fmap - exp_feats) * r; - } - return risk; -} - -} - - diff --git a/training/risk.h b/training/risk.h deleted file mode 100644 index 2e8db0fb..00000000 --- a/training/risk.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _RISK_H_ -#define _RISK_H_ - -#include -#include "sparse_vector.h" -class EvaluationMetric; - -namespace training { - class CandidateSet; - - class CandidateSetRisk { - public: - explicit CandidateSetRisk(const CandidateSet& cs, const EvaluationMetric& metric) : - cands_(cs), - metric_(metric) {} - // compute the risk (expected loss) of a CandidateSet - // (optional) the gradient of the risk with respect to params - double operator()(const std::vector& params, - SparseVector* g = NULL) const; - private: - const CandidateSet& cands_; - const EvaluationMetric& metric_; - }; -}; - -#endif diff --git a/training/ttables.cc b/training/ttables.cc deleted file mode 100644 index 45bf14c5..00000000 --- a/training/ttables.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include "ttables.h" - -#include - -#include "dict.h" - -using namespace std; -using namespace std::tr1; - -void TTable::DeserializeProbsFromText(std::istream* in) { - int c = 0; - while(*in) { - string e; - string f; - double p; - (*in) >> e >> f >> p; - if (e.empty()) break; - ++c; - ttable[TD::Convert(e)][TD::Convert(f)] = p; - } - cerr << "Loaded " << c << " translation parameters.\n"; -} - -void TTable::SerializeHelper(string* out, const Word2Word2Double& o) { - assert(!"not implemented"); -} - -void TTable::DeserializeHelper(const string& in, Word2Word2Double* o) { - assert(!"not implemented"); -} - diff --git a/training/ttables.h b/training/ttables.h deleted file mode 100644 index 9baa13ca..00000000 --- a/training/ttables.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef _TTABLES_H_ -#define _TTABLES_H_ - -#include -#include - -#include "sparse_vector.h" -#include "m.h" -#include "wordid.h" -#include "tdict.h" - -class TTable { - public: - TTable() {} - typedef std::tr1::unordered_map Word2Double; - typedef std::tr1::unordered_map Word2Word2Double; - inline double prob(const int& e, const int& f) const { - const Word2Word2Double::const_iterator cit = ttable.find(e); - if (cit != ttable.end()) { - const Word2Double& cpd = cit->second; - const Word2Double::const_iterator it = cpd.find(f); - if (it == cpd.end()) return 1e-9; - return it->second; - } else { - return 1e-9; - } - } - inline void Increment(const int& e, const int& f) { - counts[e][f] += 1.0; - } - inline void Increment(const int& e, const int& f, double x) { - counts[e][f] += x; - } - void NormalizeVB(const double alpha) { - ttable.swap(counts); - for (Word2Word2Double::iterator cit = ttable.begin(); - cit != ttable.end(); ++cit) { - double tot = 0; - Word2Double& cpd = cit->second; - for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) - tot += it->second + alpha; - for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) - it->second = exp(Md::digamma(it->second + alpha) - Md::digamma(tot)); - } - counts.clear(); - } - void Normalize() { - ttable.swap(counts); - for (Word2Word2Double::iterator cit = ttable.begin(); - cit != ttable.end(); ++cit) { - double tot = 0; - Word2Double& cpd = cit->second; - for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) - tot += it->second; - for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) - it->second /= tot; - } - counts.clear(); - } - // adds counts from another TTable - probabilities remain unchanged - TTable& operator+=(const TTable& rhs) { - for (Word2Word2Double::const_iterator it = rhs.counts.begin(); - it != rhs.counts.end(); ++it) { - const Word2Double& cpd = it->second; - Word2Double& tgt = counts[it->first]; - for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) { - tgt[j->first] += j->second; - } - } - return *this; - } - void ShowTTable() const { - for (Word2Word2Double::const_iterator it = ttable.begin(); it != ttable.end(); ++it) { - const Word2Double& cpd = it->second; - for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) { - std::cerr << "P(" << TD::Convert(j->first) << '|' << TD::Convert(it->first) << ") = " << j->second << std::endl; - } - } - } - void ShowCounts() const { - for (Word2Word2Double::const_iterator it = counts.begin(); it != counts.end(); ++it) { - const Word2Double& cpd = it->second; - for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) { - std::cerr << "c(" << TD::Convert(j->first) << '|' << TD::Convert(it->first) << ") = " << j->second << std::endl; - } - } - } - void DeserializeProbsFromText(std::istream* in); - void SerializeCounts(std::string* out) const { SerializeHelper(out, counts); } - void DeserializeCounts(const std::string& in) { DeserializeHelper(in, &counts); } - void SerializeProbs(std::string* out) const { SerializeHelper(out, ttable); } - void DeserializeProbs(const std::string& in) { DeserializeHelper(in, &ttable); } - private: - static void SerializeHelper(std::string*, const Word2Word2Double& o); - static void DeserializeHelper(const std::string&, Word2Word2Double* o); - public: - Word2Word2Double ttable; - Word2Word2Double counts; -}; - -#endif diff --git a/training/utils/candidate_set.cc b/training/utils/candidate_set.cc new file mode 100644 index 00000000..087efec3 --- /dev/null +++ b/training/utils/candidate_set.cc @@ -0,0 +1,169 @@ +#include "candidate_set.h" + +#include + +#include + +#include "verbose.h" +#include "ns.h" +#include "filelib.h" +#include "wordid.h" +#include "tdict.h" +#include "hg.h" +#include "kbest.h" +#include "viterbi.h" + +using namespace std; + +namespace training { + +struct ApproxVectorHasher { + static const size_t MASK = 0xFFFFFFFFull; + union UType { + double f; // leave as double + size_t i; + }; + static inline double round(const double x) { + UType t; + t.f = x; + size_t r = t.i & MASK; + if ((r << 1) > MASK) + t.i += MASK - r + 1; + else + t.i &= (1ull - MASK); + return t.f; + } + size_t operator()(const SparseVector& x) const { + size_t h = 0x573915839; + for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) { + UType t; + t.f = it->second; + if (t.f) { + size_t z = (t.i >> 32); + boost::hash_combine(h, it->first); + boost::hash_combine(h, z); + } + } + return h; + } +}; + +struct ApproxVectorEquals { + bool operator()(const SparseVector& a, const SparseVector& b) const { + SparseVector::const_iterator bit = b.begin(); + for (SparseVector::const_iterator ait = a.begin(); ait != a.end(); ++ait) { + if (bit == b.end() || + ait->first != bit->first || + ApproxVectorHasher::round(ait->second) != ApproxVectorHasher::round(bit->second)) + return false; + ++bit; + } + if (bit != b.end()) return false; + return true; + } +}; + +struct CandidateCompare { + bool operator()(const Candidate& a, const Candidate& b) const { + ApproxVectorEquals eq; + return (a.ewords == b.ewords && eq(a.fmap,b.fmap)); + } +}; + +struct CandidateHasher { + size_t operator()(const Candidate& x) const { + boost::hash > hhasher; + ApproxVectorHasher vhasher; + size_t ha = hhasher(x.ewords); + boost::hash_combine(ha, vhasher(x.fmap)); + return ha; + } +}; + +static void ParseSparseVector(string& line, size_t cur, SparseVector* out) { + SparseVector& x = *out; + size_t last_start = cur; + size_t last_comma = string::npos; + while(cur <= line.size()) { + if (line[cur] == ' ' || cur == line.size()) { + if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) { + cerr << "[ERROR] " << line << endl << " position = " << cur << endl; + exit(1); + } + const int fid = FD::Convert(line.substr(last_start, last_comma - last_start)); + if (cur < line.size()) line[cur] = 0; + const double val = strtod(&line[last_comma + 1], NULL); + x.set_value(fid, val); + + last_comma = string::npos; + last_start = cur+1; + } else { + if (line[cur] == '=') + last_comma = cur; + } + ++cur; + } +} + +void CandidateSet::WriteToFile(const string& file) const { + WriteFile wf(file); + ostream& out = *wf.stream(); + out.precision(10); + string ss; + for (unsigned i = 0; i < cs.size(); ++i) { + out << TD::GetString(cs[i].ewords) << endl; + out << cs[i].fmap << endl; + cs[i].eval_feats.Encode(&ss); + out << ss << endl; + } +} + +void CandidateSet::ReadFromFile(const string& file) { + if(!SILENT) cerr << "Reading candidates from " << file << endl; + ReadFile rf(file); + istream& in = *rf.stream(); + string cand; + string feats; + string ss; + while(getline(in, cand)) { + getline(in, feats); + getline(in, ss); + assert(in); + cs.push_back(Candidate()); + TD::ConvertSentence(cand, &cs.back().ewords); + ParseSparseVector(feats, 0, &cs.back().fmap); + cs.back().eval_feats = SufficientStats(ss); + } + if(!SILENT) cerr << " read " << cs.size() << " candidates\n"; +} + +void CandidateSet::Dedup() { + if(!SILENT) cerr << "Dedup in=" << cs.size(); + tr1::unordered_set u; + while(cs.size() > 0) { + u.insert(cs.back()); + cs.pop_back(); + } + tr1::unordered_set::iterator it = u.begin(); + while (it != u.end()) { + cs.push_back(*it); + it = u.erase(it); + } + if(!SILENT) cerr << " out=" << cs.size() << endl; +} + +void CandidateSet::AddKBestCandidates(const Hypergraph& hg, size_t kbest_size, const SegmentEvaluator* scorer) { + KBest::KBestDerivations, ESentenceTraversal> kbest(hg, kbest_size); + + for (unsigned i = 0; i < kbest_size; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg.nodes_.size() - 1, i); + if (!d) break; + cs.push_back(Candidate(d->yield, d->feature_values)); + if (scorer) + scorer->Evaluate(d->yield, &cs.back().eval_feats); + } + Dedup(); +} + +} diff --git a/training/utils/candidate_set.h b/training/utils/candidate_set.h new file mode 100644 index 00000000..9d326ed0 --- /dev/null +++ b/training/utils/candidate_set.h @@ -0,0 +1,60 @@ +#ifndef _CANDIDATE_SET_H_ +#define _CANDIDATE_SET_H_ + +#include +#include + +#include "ns.h" +#include "wordid.h" +#include "sparse_vector.h" + +class Hypergraph; + +namespace training { + +struct Candidate { + Candidate() {} + Candidate(const std::vector& e, const SparseVector& fm) : + ewords(e), + fmap(fm) {} + Candidate(const std::vector& e, + const SparseVector& fm, + const SegmentEvaluator& se) : + ewords(e), + fmap(fm) { + se.Evaluate(ewords, &eval_feats); + } + + void swap(Candidate& other) { + eval_feats.swap(other.eval_feats); + ewords.swap(other.ewords); + fmap.swap(other.fmap); + } + + std::vector ewords; + SparseVector fmap; + SufficientStats eval_feats; +}; + +// represents some kind of collection of translation candidates, e.g. +// aggregated k-best lists, sample lists, etc. +class CandidateSet { + public: + CandidateSet() {} + inline size_t size() const { return cs.size(); } + const Candidate& operator[](size_t i) const { return cs[i]; } + + void ReadFromFile(const std::string& file); + void WriteToFile(const std::string& file) const; + void AddKBestCandidates(const Hypergraph& hg, size_t kbest_size, const SegmentEvaluator* scorer = NULL); + // TODO add code to do unique k-best + // TODO add code to draw k samples + + private: + void Dedup(); + std::vector cs; +}; + +} + +#endif diff --git a/training/utils/decode-and-evaluate.pl b/training/utils/decode-and-evaluate.pl new file mode 100755 index 00000000..1a332c08 --- /dev/null +++ b/training/utils/decode-and-evaluate.pl @@ -0,0 +1,246 @@ +#!/usr/bin/env perl +use strict; +my @ORIG_ARGV=@ARGV; +use Cwd qw(getcwd); +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode +use LocalConfig; +use Getopt::Long; +use File::Basename qw(basename); +my $QSUB_CMD = qsub_args(mert_memory()); + +require "libcall.pl"; + +# Default settings +my $default_jobs = env_default_jobs(); +my $bin_dir = $SCRIPT_DIR; +die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; +my $FAST_SCORE="$bin_dir/../../mteval/fast_score"; +die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE; +my $parallelize = "$bin_dir/parallelize.pl"; +my $libcall = "$bin_dir/libcall.pl"; +my $sentserver = "$bin_dir/sentserver"; +my $sentclient = "$bin_dir/sentclient"; +my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm"; + +my $SCORER = $FAST_SCORE; +my $cdec = "$bin_dir/../../decoder/cdec"; +die "Can't find decoder in $cdec" unless -x $cdec; +die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; +my $decoder = $cdec; +my $jobs = $default_jobs; # number of decode nodes +my $pmem = "9g"; +my $help = 0; +my $config; +my $test_set; +my $weights; +my $use_make = 1; +my $useqsub; +my $cpbin=1; +# Process command-line options +if (GetOptions( + "jobs=i" => \$jobs, + "help" => \$help, + "qsub" => \$useqsub, + "input=s" => \$test_set, + "config=s" => \$config, + "weights=s" => \$weights, +) == 0 || @ARGV!=0 || $help) { + print_help(); + exit; +} + +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); + +if (!defined $test_set) { push @missing_args, "--input"; } +if (!defined $config) { push @missing_args, "--config"; } +if (!defined $weights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args); + +my @tf = localtime(time); +my $tname = basename($test_set); +$tname =~ s/\.(sgm|sgml|xml)$//i; +my $dir = "eval.$tname." . sprintf('%d%02d%02d-%02d%02d%02d', 1900+$tf[5], $tf[4], $tf[3], $tf[2], $tf[1], $tf[0]); + +my $time = unchecked_output("date"); + +check_call("mkdir -p $dir"); + +split_devset($test_set, "$dir/test.input.raw", "$dir/test.refs"); +my $refs = "-r $dir/test.refs"; +my $newsrc = "$dir/test.input"; +enseg("$dir/test.input.raw", $newsrc); +my $src_file = $newsrc; +open F, "<$src_file" or die "Can't read $src_file: $!"; close F; + +my $test_trans="$dir/test.trans"; +my $logdir="$dir/logs"; +my $decoderLog="$logdir/decoder.sentserver.log"; +check_call("mkdir -p $logdir"); + +#decode +print STDERR "RUNNING DECODER AT "; +print STDERR unchecked_output("date"); +my $decoder_cmd = "$decoder -c $config --weights $weights"; +my $pcmd; +if ($use_make) { + $pcmd = "cat $src_file | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --"; +} else { + $pcmd = "cat $src_file | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --"; +} +my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $test_trans"; +check_bash_call($cmd); +print STDERR "DECODER COMPLETED AT "; +print STDERR unchecked_output("date"); +print STDERR "\nOUTPUT: $test_trans\n\n"; +my $bleu = check_output("cat $test_trans | $SCORER $refs -m ibm_bleu"); +chomp $bleu; +print STDERR "BLEU: $bleu\n"; +my $ter = check_output("cat $test_trans | $SCORER $refs -m ter"); +chomp $ter; +print STDERR " TER: $ter\n"; +open TR, ">$dir/test.scores" or die "Can't write $dir/test.scores: $!"; +print TR <$newsrc"); + my $i=0; + while (my $line=){ + chomp $line; + if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } + } else { + print NEWSRC "$line\n"; + } + $i++; + } + close SRC; + close NEWSRC; +} + +sub print_help { + my $executable = basename($0); chomp $executable; + print << "Help"; + +Usage: $executable [options] + + $executable --config cdec.ini --weights weights.txt [--jobs N] [--qsub] + +Options: + + --help + Print this message and exit. + + --config + A path to the cdec.ini file. + + --weights + A file specifying feature weights. + + --dir + Directory for intermediate and output files. + +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + +Help +} + +sub convert { + my ($str) = @_; + my @ps = split /;/, $str; + my %dict = (); + for my $p (@ps) { + my ($k, $v) = split /=/, $p; + $dict{$k} = $v; + } + return %dict; +} + + + +sub cmdline { + return join ' ',($0,@ORIG_ARGV); +} + +#buggy: last arg gets quoted sometimes? +my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]}; +my $shell_escape_in_quote=qr{[\\"\$`!]}; + +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} + +sub escaped_shell_args { + return map {local $_=$_;chomp;escape_shell($_)} @_; +} + +sub escaped_shell_args_str { + return join ' ',&escaped_shell_args(@_); +} + +sub escaped_cmdline { + return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); +} + +sub split_devset { + my ($infile, $outsrc, $outref) = @_; + open F, "<$infile" or die "Can't read $infile: $!"; + open S, ">$outsrc" or die "Can't write $outsrc: $!"; + open R, ">$outref" or die "Can't write $outref: $!"; + while() { + chomp; + my ($src, @refs) = split /\s*\|\|\|\s*/; + die "Malformed devset line: $_\n" unless scalar @refs > 0; + print S "$src\n"; + print R join(' ||| ', @refs) . "\n"; + } + close R; + close S; + close F; +} + diff --git a/training/utils/entropy.cc b/training/utils/entropy.cc new file mode 100644 index 00000000..4fdbe2be --- /dev/null +++ b/training/utils/entropy.cc @@ -0,0 +1,41 @@ +#include "entropy.h" + +#include "prob.h" +#include "candidate_set.h" + +using namespace std; + +namespace training { + +// see Mann and McCallum "Efficient Computation of Entropy Gradient ..." for +// a mostly clear derivation of: +// g = E[ F(x,y) * log p(y|x) ] + H(y | x) * E[ F(x,y) ] +double CandidateSetEntropy::operator()(const vector& params, + SparseVector* g) const { + prob_t z; + vector dps(cands_.size()); + for (unsigned i = 0; i < cands_.size(); ++i) { + dps[i] = cands_[i].fmap.dot(params); + const prob_t u(dps[i], init_lnx()); + z += u; + } + const double log_z = log(z); + + SparseVector exp_feats; + double entropy = 0; + for (unsigned i = 0; i < cands_.size(); ++i) { + const double log_prob = cands_[i].fmap.dot(params) - log_z; + const double prob = exp(log_prob); + const double e_logprob = prob * log_prob; + entropy -= e_logprob; + if (g) { + (*g) += cands_[i].fmap * e_logprob; + exp_feats += cands_[i].fmap * prob; + } + } + if (g) (*g) += exp_feats * entropy; + return entropy; +} + +} + diff --git a/training/utils/entropy.h b/training/utils/entropy.h new file mode 100644 index 00000000..796589ca --- /dev/null +++ b/training/utils/entropy.h @@ -0,0 +1,22 @@ +#ifndef _CSENTROPY_H_ +#define _CSENTROPY_H_ + +#include +#include "sparse_vector.h" + +namespace training { + class CandidateSet; + + class CandidateSetEntropy { + public: + explicit CandidateSetEntropy(const CandidateSet& cs) : cands_(cs) {} + // compute the entropy (expected log likelihood) of a CandidateSet + // (optional) the gradient of the entropy with respect to params + double operator()(const std::vector& params, + SparseVector* g = NULL) const; + private: + const CandidateSet& cands_; + }; +}; + +#endif diff --git a/training/utils/grammar_convert.cc b/training/utils/grammar_convert.cc new file mode 100644 index 00000000..607a7cb9 --- /dev/null +++ b/training/utils/grammar_convert.cc @@ -0,0 +1,348 @@ +/* + this program modifies cfg hypergraphs (forests) and extracts kbests? + what are: json, split ? + */ +#include +#include +#include + +#include +#include + +#include "inside_outside.h" +#include "tdict.h" +#include "filelib.h" +#include "hg.h" +#include "hg_io.h" +#include "kbest.h" +#include "viterbi.h" +#include "weights.h" + +namespace po = boost::program_options; +using namespace std; + +WordID kSTART; + +void InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input,i", po::value()->default_value("-"), "Input file") + ("format,f", po::value()->default_value("cfg"), "Input format. Values: cfg, json, split") + ("output,o", po::value()->default_value("json"), "Output command. Values: json, 1best") + ("reorder,r", "Add Yamada & Knight (2002) reorderings") + ("weights,w", po::value(), "Feature weights for k-best derivations [optional]") + ("collapse_weights,C", "Collapse order features into a single feature whose value is all of the locally applying feature weights") + ("k_derivations,k", po::value(), "Show k derivations and their features") + ("max_reorder,m", po::value()->default_value(999), "Move a constituent at most this far") + ("help,h", "Print this help message and exit"); + po::options_description clo("Command line options"); + po::options_description dcmdline_options; + dcmdline_options.add(opts); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + po::notify(*conf); + + if (conf->count("help") || conf->count("input") == 0) { + cerr << "\nUsage: grammar_convert [-options]\n\nConverts a grammar file (in Hiero format) into JSON hypergraph.\n"; + cerr << dcmdline_options << endl; + exit(1); + } +} + +int GetOrCreateNode(const WordID& lhs, map* lhs2node, Hypergraph* hg) { + int& node_id = (*lhs2node)[lhs]; + if (!node_id) + node_id = hg->AddNode(lhs)->id_ + 1; + return node_id - 1; +} + +void FilterAndCheckCorrectness(int goal, Hypergraph* hg) { + if (goal < 0) { + cerr << "Error! [S] not found in grammar!\n"; + exit(1); + } + if (hg->nodes_[goal].in_edges_.size() != 1) { + cerr << "Error! [S] has more than one rewrite!\n"; + exit(1); + } + int old_size = hg->nodes_.size(); + hg->TopologicallySortNodesAndEdges(goal); + if (hg->nodes_.size() != old_size) { + cerr << "Warning! During sorting " << (old_size - hg->nodes_.size()) << " disappeared!\n"; + } + vector inside; // inside score at each node + double p = Inside(*hg, &inside); + if (!p) { + cerr << "Warning! Grammar defines the empty language!\n"; + hg->clear(); + return; + } + vector prune(hg->edges_.size(), false); + int bad_edges = 0; + for (unsigned i = 0; i < hg->edges_.size(); ++i) { + Hypergraph::Edge& edge = hg->edges_[i]; + bool bad = false; + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { + if (!inside[edge.tail_nodes_[j]]) { + bad = true; + ++bad_edges; + } + } + prune[i] = bad; + } + cerr << "Removing " << bad_edges << " bad edges from the grammar.\n"; + for (unsigned i = 0; i < hg->edges_.size(); ++i) { + if (prune[i]) + cerr << " " << hg->edges_[i].rule_->AsString() << endl; + } + hg->PruneEdges(prune); +} + +void CreateEdge(const TRulePtr& r, const Hypergraph::TailNodeVector& tail, Hypergraph::Node* head_node, Hypergraph* hg) { + Hypergraph::Edge* new_edge = hg->AddEdge(r, tail); + hg->ConnectEdgeToHeadNode(new_edge, head_node); + new_edge->feature_values_ = r->scores_; +} + +// from a category label like "NP_2", return "NP" +string PureCategory(WordID cat) { + assert(cat < 0); + string c = TD::Convert(cat*-1); + size_t p = c.find("_"); + if (p == string::npos) return c; + return c.substr(0, p); +}; + +string ConstituentOrderFeature(const TRule& rule, const vector& pi) { + const static string kTERM_VAR = "x"; + const vector& f = rule.f(); + map used; + vector terms(f.size()); + for (int i = 0; i < f.size(); ++i) { + const string term = (f[i] < 0 ? PureCategory(f[i]) : kTERM_VAR); + int& count = used[term]; + if (!count) { + terms[i] = term; + } else { + ostringstream os; + os << term << count; + terms[i] = os.str(); + } + ++count; + } + ostringstream os; + os << PureCategory(rule.GetLHS()) << ':'; + for (int i = 0; i < f.size(); ++i) { + if (i > 0) os << '_'; + os << terms[pi[i]]; + } + return os.str(); +} + +bool CheckPermutationMask(const vector& mask, const vector& pi) { + assert(mask.size() == pi.size()); + + int req_min = -1; + int cur_max = 0; + int cur_mask = -1; + for (int i = 0; i < mask.size(); ++i) { + if (mask[i] != cur_mask) { + cur_mask = mask[i]; + req_min = cur_max - 1; + } + if (pi[i] > req_min) { + if (pi[i] > cur_max) cur_max = pi[i]; + } else { + return false; + } + } + + return true; +} + +void PermuteYKRecursive(int nodeid, const WordID& parent, const int max_reorder, Hypergraph* hg) { + // Hypergraph tmp = *hg; + Hypergraph::Node* node = &hg->nodes_[nodeid]; + if (node->in_edges_.size() != 1) { + cerr << "Multiple rewrites of [" << TD::Convert(node->cat_ * -1) << "] (parent is [" << TD::Convert(parent*-1) << "])\n"; + cerr << " not recursing!\n"; + return; + } +// for (int eii = 0; eii < node->in_edges_.size(); ++eii) { + const int oe_index = node->in_edges_.front(); + const TRule& rule = *hg->edges_[oe_index].rule_; + const Hypergraph::TailNodeVector orig_tail = hg->edges_[oe_index].tail_nodes_; + const int tail_size = orig_tail.size(); + for (int i = 0; i < tail_size; ++i) { + PermuteYKRecursive(hg->edges_[oe_index].tail_nodes_[i], node->cat_, max_reorder, hg); + } + const vector& of = rule.f_; + if (of.size() == 1) return; + // cerr << "Permuting [" << TD::Convert(node->cat_ * -1) << "]\n"; + // cerr << "ORIG: " << rule.AsString() << endl; + vector pi(of.size(), 0); + for (int i = 0; i < pi.size(); ++i) pi[i] = i; + + vector permutation_mask(of.size(), 0); + const bool dont_reorder_across_PU = true; // TODO add configuration + if (dont_reorder_across_PU) { + int cur = 0; + for (int i = 0; i < pi.size(); ++i) { + if (of[i] >= 0) continue; + const string cat = PureCategory(of[i]); + if (cat == "PU" || cat == "PU!H" || cat == "PUNC" || cat == "PUNC!H" || cat == "CC") { + ++cur; + permutation_mask[i] = cur; + ++cur; + } else { + permutation_mask[i] = cur; + } + } + } + int fid = FD::Convert(ConstituentOrderFeature(rule, pi)); + hg->edges_[oe_index].feature_values_.set_value(fid, 1.0); + while (next_permutation(pi.begin(), pi.end())) { + if (!CheckPermutationMask(permutation_mask, pi)) + continue; + vector nf(pi.size(), 0); + Hypergraph::TailNodeVector tail(pi.size(), 0); + bool skip = false; + for (int i = 0; i < pi.size(); ++i) { + int dist = pi[i] - i; if (dist < 0) dist *= -1; + if (dist > max_reorder) { skip = true; break; } + nf[i] = of[pi[i]]; + tail[i] = orig_tail[pi[i]]; + } + if (skip) continue; + TRulePtr nr(new TRule(rule)); + nr->f_ = nf; + int fid = FD::Convert(ConstituentOrderFeature(rule, pi)); + nr->scores_.set_value(fid, 1.0); + // cerr << "PERM: " << nr->AsString() << endl; + CreateEdge(nr, tail, node, hg); + } + // } +} + +void PermuteYamadaAndKnight(Hypergraph* hg, int max_reorder) { + assert(hg->nodes_.back().cat_ == kSTART); + assert(hg->nodes_.back().in_edges_.size() == 1); + PermuteYKRecursive(hg->nodes_.size() - 1, kSTART, max_reorder, hg); +} + +void CollapseWeights(Hypergraph* hg) { + int fid = FD::Convert("Reordering"); + for (int i = 0; i < hg->edges_.size(); ++i) { + Hypergraph::Edge& edge = hg->edges_[i]; + edge.feature_values_.clear(); + if (edge.edge_prob_ != prob_t::Zero()) { + edge.feature_values_.set_value(fid, log(edge.edge_prob_)); + } + } +} + +void ProcessHypergraph(const vector& w, const po::variables_map& conf, const string& ref, Hypergraph* hg) { + if (conf.count("reorder")) + PermuteYamadaAndKnight(hg, conf["max_reorder"].as()); + if (w.size() > 0) { hg->Reweight(w); } + if (conf.count("collapse_weights")) CollapseWeights(hg); + if (conf["output"].as() == "json") { + HypergraphIO::WriteToJSON(*hg, false, &cout); + if (!ref.empty()) { cerr << "REF: " << ref << endl; } + } else { + vector onebest; + ViterbiESentence(*hg, &onebest); + if (ref.empty()) { + cout << TD::GetString(onebest) << endl; + } else { + cout << TD::GetString(onebest) << " ||| " << ref << endl; + } + } + if (conf.count("k_derivations")) { + const int k = conf["k_derivations"].as(); + KBest::KBestDerivations, ESentenceTraversal> kbest(*hg, k); + for (int i = 0; i < k; ++i) { + const KBest::KBestDerivations, ESentenceTraversal>::Derivation* d = + kbest.LazyKthBest(hg->nodes_.size() - 1, i); + if (!d) break; + cerr << log(d->score) << " ||| " << TD::GetString(d->yield) << " ||| " << d->feature_values << endl; + } + } +} + +int main(int argc, char **argv) { + kSTART = TD::Convert("S") * -1; + po::variables_map conf; + InitCommandLine(argc, argv, &conf); + string infile = conf["input"].as(); + const bool is_split_input = (conf["format"].as() == "split"); + const bool is_json_input = is_split_input || (conf["format"].as() == "json"); + const bool collapse_weights = conf.count("collapse_weights"); + vector w; + if (conf.count("weights")) + Weights::InitFromFile(conf["weights"].as(), &w); + + if (collapse_weights && !w.size()) { + cerr << "--collapse_weights requires a weights file to be specified!\n"; + exit(1); + } + ReadFile rf(infile); + istream* in = rf.stream(); + assert(*in); + int lc = 0; + Hypergraph hg; + map lhs2node; + while(*in) { + string line; + ++lc; + getline(*in, line); + if (is_json_input) { + if (line.empty() || line[0] == '#') continue; + string ref; + if (is_split_input) { + size_t pos = line.rfind("}}"); + assert(pos != string::npos); + size_t rstart = line.find("||| ", pos); + assert(rstart != string::npos); + ref = line.substr(rstart + 4); + line = line.substr(0, pos + 2); + } + istringstream is(line); + if (HypergraphIO::ReadFromJSON(&is, &hg)) { + ProcessHypergraph(w, conf, ref, &hg); + hg.clear(); + } else { + cerr << "Error reading grammar from JSON: line " << lc << endl; + exit(1); + } + } else { + if (line.empty()) { + int goal = lhs2node[kSTART] - 1; + FilterAndCheckCorrectness(goal, &hg); + ProcessHypergraph(w, conf, "", &hg); + hg.clear(); + lhs2node.clear(); + continue; + } + if (line[0] == '#') continue; + if (line[0] != '[') { + cerr << "Line " << lc << ": bad format\n"; + exit(1); + } + TRulePtr tr(TRule::CreateRuleMonolingual(line)); + Hypergraph::TailNodeVector tail; + for (int i = 0; i < tr->f_.size(); ++i) { + WordID var_cat = tr->f_[i]; + if (var_cat < 0) + tail.push_back(GetOrCreateNode(var_cat, &lhs2node, &hg)); + } + const WordID lhs = tr->GetLHS(); + int head = GetOrCreateNode(lhs, &lhs2node, &hg); + Hypergraph::Edge* edge = hg.AddEdge(tr, tail); + edge->feature_values_ = tr->scores_; + Hypergraph::Node* node = &hg.nodes_[head]; + hg.ConnectEdgeToHeadNode(edge, node); + } + } +} + diff --git a/training/utils/lbfgs.h b/training/utils/lbfgs.h new file mode 100644 index 00000000..e8baecab --- /dev/null +++ b/training/utils/lbfgs.h @@ -0,0 +1,1459 @@ +#ifndef SCITBX_LBFGS_H +#define SCITBX_LBFGS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace scitbx { + +//! Limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) %minimizer. +/*! Implementation of the + Limited-memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) + algorithm for large-scale multidimensional minimization + problems. + + This code was manually derived from Java code which was + in turn derived from the Fortran program + lbfgs.f. The Java translation was + effected mostly mechanically, with some manual + clean-up; in particular, array indices start at 0 + instead of 1. Most of the comments from the Fortran + code have been pasted in. + + Information on the original LBFGS Fortran source code is + available at + http://www.netlib.org/opt/lbfgs_um.shar . The following + information is taken verbatim from the Netlib documentation + for the Fortran source. + +
+    file    opt/lbfgs_um.shar
+    for     unconstrained optimization problems
+    alg     limited memory BFGS method
+    by      J. Nocedal
+    contact nocedal@eecs.nwu.edu
+    ref     D. C. Liu and J. Nocedal, ``On the limited memory BFGS method for
+    ,       large scale optimization methods'' Mathematical Programming 45
+    ,       (1989), pp. 503-528.
+    ,       (Postscript file of this paper is available via anonymous ftp
+    ,       to eecs.nwu.edu in the directory pub/%lbfgs/lbfgs_um.)
+    
+ + @author Jorge Nocedal: original Fortran version, including comments + (July 1990).
+ Robert Dodier: Java translation, August 1997.
+ Ralf W. Grosse-Kunstleve: C++ port, March 2002.
+ Chris Dyer: serialize/deserialize functionality + */ +namespace lbfgs { + + //! Generic exception class for %lbfgs %error messages. + /*! All exceptions thrown by the minimizer are derived from this class. + */ + class error : public std::exception { + public: + //! Constructor. + error(std::string const& msg) throw() + : msg_("lbfgs error: " + msg) + {} + //! Access to error message. + virtual const char* what() const throw() { return msg_.c_str(); } + protected: + virtual ~error() throw() {} + std::string msg_; + public: + static std::string itoa(unsigned long i) { + std::ostringstream os; + os << i; + return os.str(); + } + }; + + //! Specific exception class. + class error_internal_error : public error { + public: + //! Constructor. + error_internal_error(const char* file, unsigned long line) throw() + : error( + "Internal Error: " + std::string(file) + "(" + itoa(line) + ")") + {} + }; + + //! Specific exception class. + class error_improper_input_parameter : public error { + public: + //! Constructor. + error_improper_input_parameter(std::string const& msg) throw() + : error("Improper input parameter: " + msg) + {} + }; + + //! Specific exception class. + class error_improper_input_data : public error { + public: + //! Constructor. + error_improper_input_data(std::string const& msg) throw() + : error("Improper input data: " + msg) + {} + }; + + //! Specific exception class. + class error_search_direction_not_descent : public error { + public: + //! Constructor. + error_search_direction_not_descent() throw() + : error("The search direction is not a descent direction.") + {} + }; + + //! Specific exception class. + class error_line_search_failed : public error { + public: + //! Constructor. + error_line_search_failed(std::string const& msg) throw() + : error("Line search failed: " + msg) + {} + }; + + //! Specific exception class. + class error_line_search_failed_rounding_errors + : public error_line_search_failed { + public: + //! Constructor. + error_line_search_failed_rounding_errors(std::string const& msg) throw() + : error_line_search_failed(msg) + {} + }; + + namespace detail { + + template + inline + NumType + pow2(NumType const& x) { return x * x; } + + template + inline + NumType + abs(NumType const& x) { + if (x < NumType(0)) return -x; + return x; + } + + // This class implements an algorithm for multi-dimensional line search. + template + class mcsrch + { + protected: + int infoc; + FloatType dginit; + bool brackt; + bool stage1; + FloatType finit; + FloatType dgtest; + FloatType width; + FloatType width1; + FloatType stx; + FloatType fx; + FloatType dgx; + FloatType sty; + FloatType fy; + FloatType dgy; + FloatType stmin; + FloatType stmax; + + static FloatType const& max3( + FloatType const& x, + FloatType const& y, + FloatType const& z) + { + return x < y ? (y < z ? z : y ) : (x < z ? z : x ); + } + + public: + /* Minimize a function along a search direction. This code is + a Java translation of the function MCSRCH from + lbfgs.f, which in turn is a slight modification + of the subroutine CSRCH of More' and Thuente. + The changes are to allow reverse communication, and do not + affect the performance of the routine. This function, in turn, + calls mcstep.

+ + The Java translation was effected mostly mechanically, with + some manual clean-up; in particular, array indices start at 0 + instead of 1. Most of the comments from the Fortran code have + been pasted in here as well.

+ + The purpose of mcsrch is to find a step which + satisfies a sufficient decrease condition and a curvature + condition.

+ + At each stage this function updates an interval of uncertainty + with endpoints stx and sty. The + interval of uncertainty is initially chosen so that it + contains a minimizer of the modified function +

+                f(x+stp*s) - f(x) - ftol*stp*(gradf(x)'s).
+           
+ If a step is obtained for which the modified function has a + nonpositive function value and nonnegative derivative, then + the interval of uncertainty is chosen so that it contains a + minimizer of f(x+stp*s).

+ + The algorithm is designed to find a step which satisfies + the sufficient decrease condition +

+                 f(x+stp*s) <= f(X) + ftol*stp*(gradf(x)'s),
+           
+ and the curvature condition +
+                 abs(gradf(x+stp*s)'s)) <= gtol*abs(gradf(x)'s).
+           
+ If ftol is less than gtol and if, + for example, the function is bounded below, then there is + always a step which satisfies both conditions. If no step can + be found which satisfies both conditions, then the algorithm + usually stops when rounding errors prevent further progress. + In this case stp only satisfies the sufficient + decrease condition.

+ + @author Original Fortran version by Jorge J. More' and + David J. Thuente as part of the Minpack project, June 1983, + Argonne National Laboratory. Java translation by Robert + Dodier, August 1997. + + @param n The number of variables. + + @param x On entry this contains the base point for the line + search. On exit it contains x + stp*s. + + @param f On entry this contains the value of the objective + function at x. On exit it contains the value + of the objective function at x + stp*s. + + @param g On entry this contains the gradient of the objective + function at x. On exit it contains the gradient + at x + stp*s. + + @param s The search direction. + + @param stp On entry this contains an initial estimate of a + satifactory step length. On exit stp contains + the final estimate. + + @param ftol Tolerance for the sufficient decrease condition. + + @param xtol Termination occurs when the relative width of the + interval of uncertainty is at most xtol. + + @param maxfev Termination occurs when the number of evaluations + of the objective function is at least maxfev by + the end of an iteration. + + @param info This is an output variable, which can have these + values: +

    +
  • info = -1 A return is made to compute + the function and gradient. +
  • info = 1 The sufficient decrease condition + and the directional derivative condition hold. +
+ + @param nfev On exit, this is set to the number of function + evaluations. + + @param wa Temporary storage array, of length n. + */ + void run( + FloatType const& gtol, + FloatType const& stpmin, + FloatType const& stpmax, + SizeType n, + FloatType* x, + FloatType f, + const FloatType* g, + FloatType* s, + SizeType is0, + FloatType& stp, + FloatType ftol, + FloatType xtol, + SizeType maxfev, + int& info, + SizeType& nfev, + FloatType* wa); + + /* The purpose of this function is to compute a safeguarded step + for a linesearch and to update an interval of uncertainty for + a minimizer of the function.

+ + The parameter stx contains the step with the + least function value. The parameter stp contains + the current step. It is assumed that the derivative at + stx is negative in the direction of the step. If + brackt is true when + mcstep returns then a minimizer has been + bracketed in an interval of uncertainty with endpoints + stx and sty.

+ + Variables that must be modified by mcstep are + implemented as 1-element arrays. + + @param stx Step at the best step obtained so far. + This variable is modified by mcstep. + @param fx Function value at the best step obtained so far. + This variable is modified by mcstep. + @param dx Derivative at the best step obtained so far. + The derivative must be negative in the direction of the + step, that is, dx and stp-stx must + have opposite signs. This variable is modified by + mcstep. + + @param sty Step at the other endpoint of the interval of + uncertainty. This variable is modified by mcstep. + @param fy Function value at the other endpoint of the interval + of uncertainty. This variable is modified by + mcstep. + + @param dy Derivative at the other endpoint of the interval of + uncertainty. This variable is modified by mcstep. + + @param stp Step at the current step. If brackt is set + then on input stp must be between stx + and sty. On output stp is set to the + new step. + @param fp Function value at the current step. + @param dp Derivative at the current step. + + @param brackt Tells whether a minimizer has been bracketed. + If the minimizer has not been bracketed, then on input this + variable must be set false. If the minimizer has + been bracketed, then on output this variable is + true. + + @param stpmin Lower bound for the step. + @param stpmax Upper bound for the step. + + If the return value is 1, 2, 3, or 4, then the step has + been computed successfully. A return value of 0 indicates + improper input parameters. + + @author Jorge J. More, David J. Thuente: original Fortran version, + as part of Minpack project. Argonne Nat'l Laboratory, June 1983. + Robert Dodier: Java translation, August 1997. + */ + static int mcstep( + FloatType& stx, + FloatType& fx, + FloatType& dx, + FloatType& sty, + FloatType& fy, + FloatType& dy, + FloatType& stp, + FloatType fp, + FloatType dp, + bool& brackt, + FloatType stpmin, + FloatType stpmax); + + void serialize(std::ostream* out) const { + out->write((const char*)&infoc,sizeof(infoc)); + out->write((const char*)&dginit,sizeof(dginit)); + out->write((const char*)&brackt,sizeof(brackt)); + out->write((const char*)&stage1,sizeof(stage1)); + out->write((const char*)&finit,sizeof(finit)); + out->write((const char*)&dgtest,sizeof(dgtest)); + out->write((const char*)&width,sizeof(width)); + out->write((const char*)&width1,sizeof(width1)); + out->write((const char*)&stx,sizeof(stx)); + out->write((const char*)&fx,sizeof(fx)); + out->write((const char*)&dgx,sizeof(dgx)); + out->write((const char*)&sty,sizeof(sty)); + out->write((const char*)&fy,sizeof(fy)); + out->write((const char*)&dgy,sizeof(dgy)); + out->write((const char*)&stmin,sizeof(stmin)); + out->write((const char*)&stmax,sizeof(stmax)); + } + + void deserialize(std::istream* in) const { + in->read((char*)&infoc, sizeof(infoc)); + in->read((char*)&dginit, sizeof(dginit)); + in->read((char*)&brackt, sizeof(brackt)); + in->read((char*)&stage1, sizeof(stage1)); + in->read((char*)&finit, sizeof(finit)); + in->read((char*)&dgtest, sizeof(dgtest)); + in->read((char*)&width, sizeof(width)); + in->read((char*)&width1, sizeof(width1)); + in->read((char*)&stx, sizeof(stx)); + in->read((char*)&fx, sizeof(fx)); + in->read((char*)&dgx, sizeof(dgx)); + in->read((char*)&sty, sizeof(sty)); + in->read((char*)&fy, sizeof(fy)); + in->read((char*)&dgy, sizeof(dgy)); + in->read((char*)&stmin, sizeof(stmin)); + in->read((char*)&stmax, sizeof(stmax)); + } + }; + + template + void mcsrch::run( + FloatType const& gtol, + FloatType const& stpmin, + FloatType const& stpmax, + SizeType n, + FloatType* x, + FloatType f, + const FloatType* g, + FloatType* s, + SizeType is0, + FloatType& stp, + FloatType ftol, + FloatType xtol, + SizeType maxfev, + int& info, + SizeType& nfev, + FloatType* wa) + { + if (info != -1) { + infoc = 1; + if ( n == 0 + || maxfev == 0 + || gtol < FloatType(0) + || xtol < FloatType(0) + || stpmin < FloatType(0) + || stpmax < stpmin) { + throw error_internal_error(__FILE__, __LINE__); + } + if (stp <= FloatType(0) || ftol < FloatType(0)) { + throw error_internal_error(__FILE__, __LINE__); + } + // Compute the initial gradient in the search direction + // and check that s is a descent direction. + dginit = FloatType(0); + for (SizeType j = 0; j < n; j++) { + dginit += g[j] * s[is0+j]; + } + if (dginit >= FloatType(0)) { + throw error_search_direction_not_descent(); + } + brackt = false; + stage1 = true; + nfev = 0; + finit = f; + dgtest = ftol*dginit; + width = stpmax - stpmin; + width1 = FloatType(2) * width; + std::copy(x, x+n, wa); + // The variables stx, fx, dgx contain the values of the step, + // function, and directional derivative at the best step. + // The variables sty, fy, dgy contain the value of the step, + // function, and derivative at the other endpoint of + // the interval of uncertainty. + // The variables stp, f, dg contain the values of the step, + // function, and derivative at the current step. + stx = FloatType(0); + fx = finit; + dgx = dginit; + sty = FloatType(0); + fy = finit; + dgy = dginit; + } + for (;;) { + if (info != -1) { + // Set the minimum and maximum steps to correspond + // to the present interval of uncertainty. + if (brackt) { + stmin = std::min(stx, sty); + stmax = std::max(stx, sty); + } + else { + stmin = stx; + stmax = stp + FloatType(4) * (stp - stx); + } + // Force the step to be within the bounds stpmax and stpmin. + stp = std::max(stp, stpmin); + stp = std::min(stp, stpmax); + // If an unusual termination is to occur then let + // stp be the lowest point obtained so far. + if ( (brackt && (stp <= stmin || stp >= stmax)) + || nfev >= maxfev - 1 || infoc == 0 + || (brackt && stmax - stmin <= xtol * stmax)) { + stp = stx; + } + // Evaluate the function and gradient at stp + // and compute the directional derivative. + // We return to main program to obtain F and G. + for (SizeType j = 0; j < n; j++) { + x[j] = wa[j] + stp * s[is0+j]; + } + info=-1; + break; + } + info = 0; + nfev++; + FloatType dg(0); + for (SizeType j = 0; j < n; j++) { + dg += g[j] * s[is0+j]; + } + FloatType ftest1 = finit + stp*dgtest; + // Test for convergence. + if ((brackt && (stp <= stmin || stp >= stmax)) || infoc == 0) { + throw error_line_search_failed_rounding_errors( + "Rounding errors prevent further progress." + " There may not be a step which satisfies the" + " sufficient decrease and curvature conditions." + " Tolerances may be too small."); + } + if (stp == stpmax && f <= ftest1 && dg <= dgtest) { + throw error_line_search_failed( + "The step is at the upper bound stpmax()."); + } + if (stp == stpmin && (f > ftest1 || dg >= dgtest)) { + throw error_line_search_failed( + "The step is at the lower bound stpmin()."); + } + if (nfev >= maxfev) { + throw error_line_search_failed( + "Number of function evaluations has reached maxfev()."); + } + if (brackt && stmax - stmin <= xtol * stmax) { + throw error_line_search_failed( + "Relative width of the interval of uncertainty" + " is at most xtol()."); + } + // Check for termination. + if (f <= ftest1 && abs(dg) <= gtol * (-dginit)) { + info = 1; + break; + } + // In the first stage we seek a step for which the modified + // function has a nonpositive value and nonnegative derivative. + if ( stage1 && f <= ftest1 + && dg >= std::min(ftol, gtol) * dginit) { + stage1 = false; + } + // A modified function is used to predict the step only if + // we have not obtained a step for which the modified + // function has a nonpositive function value and nonnegative + // derivative, and if a lower function value has been + // obtained but the decrease is not sufficient. + if (stage1 && f <= fx && f > ftest1) { + // Define the modified function and derivative values. + FloatType fm = f - stp*dgtest; + FloatType fxm = fx - stx*dgtest; + FloatType fym = fy - sty*dgtest; + FloatType dgm = dg - dgtest; + FloatType dgxm = dgx - dgtest; + FloatType dgym = dgy - dgtest; + // Call cstep to update the interval of uncertainty + // and to compute the new step. + infoc = mcstep(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, + brackt, stmin, stmax); + // Reset the function and gradient values for f. + fx = fxm + stx*dgtest; + fy = fym + sty*dgtest; + dgx = dgxm + dgtest; + dgy = dgym + dgtest; + } + else { + // Call mcstep to update the interval of uncertainty + // and to compute the new step. + infoc = mcstep(stx, fx, dgx, sty, fy, dgy, stp, f, dg, + brackt, stmin, stmax); + } + // Force a sufficient decrease in the size of the + // interval of uncertainty. + if (brackt) { + if (abs(sty - stx) >= FloatType(0.66) * width1) { + stp = stx + FloatType(0.5) * (sty - stx); + } + width1 = width; + width = abs(sty - stx); + } + } + } + + template + int mcsrch::mcstep( + FloatType& stx, + FloatType& fx, + FloatType& dx, + FloatType& sty, + FloatType& fy, + FloatType& dy, + FloatType& stp, + FloatType fp, + FloatType dp, + bool& brackt, + FloatType stpmin, + FloatType stpmax) + { + bool bound; + FloatType gamma, p, q, r, s, sgnd, stpc, stpf, stpq, theta; + int info = 0; + if ( ( brackt && (stp <= std::min(stx, sty) + || stp >= std::max(stx, sty))) + || dx * (stp - stx) >= FloatType(0) || stpmax < stpmin) { + return 0; + } + // Determine if the derivatives have opposite sign. + sgnd = dp * (dx / abs(dx)); + if (fp > fx) { + // First case. A higher function value. + // The minimum is bracketed. If the cubic step is closer + // to stx than the quadratic step, the cubic step is taken, + // else the average of the cubic and quadratic steps is taken. + info = 1; + bound = true; + theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp; + s = max3(abs(theta), abs(dx), abs(dp)); + gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s)); + if (stp < stx) gamma = - gamma; + p = (gamma - dx) + theta; + q = ((gamma - dx) + gamma) + dp; + r = p/q; + stpc = stx + r * (stp - stx); + stpq = stx + + ((dx / ((fx - fp) / (stp - stx) + dx)) / FloatType(2)) + * (stp - stx); + if (abs(stpc - stx) < abs(stpq - stx)) { + stpf = stpc; + } + else { + stpf = stpc + (stpq - stpc) / FloatType(2); + } + brackt = true; + } + else if (sgnd < FloatType(0)) { + // Second case. A lower function value and derivatives of + // opposite sign. The minimum is bracketed. If the cubic + // step is closer to stx than the quadratic (secant) step, + // the cubic step is taken, else the quadratic step is taken. + info = 2; + bound = false; + theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp; + s = max3(abs(theta), abs(dx), abs(dp)); + gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s)); + if (stp > stx) gamma = - gamma; + p = (gamma - dp) + theta; + q = ((gamma - dp) + gamma) + dx; + r = p/q; + stpc = stp + r * (stx - stp); + stpq = stp + (dp / (dp - dx)) * (stx - stp); + if (abs(stpc - stp) > abs(stpq - stp)) { + stpf = stpc; + } + else { + stpf = stpq; + } + brackt = true; + } + else if (abs(dp) < abs(dx)) { + // Third case. A lower function value, derivatives of the + // same sign, and the magnitude of the derivative decreases. + // The cubic step is only used if the cubic tends to infinity + // in the direction of the step or if the minimum of the cubic + // is beyond stp. Otherwise the cubic step is defined to be + // either stpmin or stpmax. The quadratic (secant) step is also + // computed and if the minimum is bracketed then the the step + // closest to stx is taken, else the step farthest away is taken. + info = 3; + bound = true; + theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp; + s = max3(abs(theta), abs(dx), abs(dp)); + gamma = s * std::sqrt( + std::max(FloatType(0), pow2(theta / s) - (dx / s) * (dp / s))); + if (stp > stx) gamma = -gamma; + p = (gamma - dp) + theta; + q = (gamma + (dx - dp)) + gamma; + r = p/q; + if (r < FloatType(0) && gamma != FloatType(0)) { + stpc = stp + r * (stx - stp); + } + else if (stp > stx) { + stpc = stpmax; + } + else { + stpc = stpmin; + } + stpq = stp + (dp / (dp - dx)) * (stx - stp); + if (brackt) { + if (abs(stp - stpc) < abs(stp - stpq)) { + stpf = stpc; + } + else { + stpf = stpq; + } + } + else { + if (abs(stp - stpc) > abs(stp - stpq)) { + stpf = stpc; + } + else { + stpf = stpq; + } + } + } + else { + // Fourth case. A lower function value, derivatives of the + // same sign, and the magnitude of the derivative does + // not decrease. If the minimum is not bracketed, the step + // is either stpmin or stpmax, else the cubic step is taken. + info = 4; + bound = false; + if (brackt) { + theta = FloatType(3) * (fp - fy) / (sty - stp) + dy + dp; + s = max3(abs(theta), abs(dy), abs(dp)); + gamma = s * std::sqrt(pow2(theta / s) - (dy / s) * (dp / s)); + if (stp > sty) gamma = -gamma; + p = (gamma - dp) + theta; + q = ((gamma - dp) + gamma) + dy; + r = p/q; + stpc = stp + r * (sty - stp); + stpf = stpc; + } + else if (stp > stx) { + stpf = stpmax; + } + else { + stpf = stpmin; + } + } + // Update the interval of uncertainty. This update does not + // depend on the new step or the case analysis above. + if (fp > fx) { + sty = stp; + fy = fp; + dy = dp; + } + else { + if (sgnd < FloatType(0)) { + sty = stx; + fy = fx; + dy = dx; + } + stx = stp; + fx = fp; + dx = dp; + } + // Compute the new step and safeguard it. + stpf = std::min(stpmax, stpf); + stpf = std::max(stpmin, stpf); + stp = stpf; + if (brackt && bound) { + if (sty > stx) { + stp = std::min(stx + FloatType(0.66) * (sty - stx), stp); + } + else { + stp = std::max(stx + FloatType(0.66) * (sty - stx), stp); + } + } + return info; + } + + /* Compute the sum of a vector times a scalar plus another vector. + Adapted from the subroutine daxpy in + lbfgs.f. + */ + template + void daxpy( + SizeType n, + FloatType da, + const FloatType* dx, + SizeType ix0, + SizeType incx, + FloatType* dy, + SizeType iy0, + SizeType incy) + { + SizeType i, ix, iy, m; + if (n == 0) return; + if (da == FloatType(0)) return; + if (!(incx == 1 && incy == 1)) { + ix = 0; + iy = 0; + for (i = 0; i < n; i++) { + dy[iy0+iy] += da * dx[ix0+ix]; + ix += incx; + iy += incy; + } + return; + } + m = n % 4; + for (i = 0; i < m; i++) { + dy[iy0+i] += da * dx[ix0+i]; + } + for (; i < n;) { + dy[iy0+i] += da * dx[ix0+i]; i++; + dy[iy0+i] += da * dx[ix0+i]; i++; + dy[iy0+i] += da * dx[ix0+i]; i++; + dy[iy0+i] += da * dx[ix0+i]; i++; + } + } + + template + inline + void daxpy( + SizeType n, + FloatType da, + const FloatType* dx, + SizeType ix0, + FloatType* dy) + { + daxpy(n, da, dx, ix0, SizeType(1), dy, SizeType(0), SizeType(1)); + } + + /* Compute the dot product of two vectors. + Adapted from the subroutine ddot + in lbfgs.f. + */ + template + FloatType ddot( + SizeType n, + const FloatType* dx, + SizeType ix0, + SizeType incx, + const FloatType* dy, + SizeType iy0, + SizeType incy) + { + SizeType i, ix, iy, m; + FloatType dtemp(0); + if (n == 0) return FloatType(0); + if (!(incx == 1 && incy == 1)) { + ix = 0; + iy = 0; + for (i = 0; i < n; i++) { + dtemp += dx[ix0+ix] * dy[iy0+iy]; + ix += incx; + iy += incy; + } + return dtemp; + } + m = n % 5; + for (i = 0; i < m; i++) { + dtemp += dx[ix0+i] * dy[iy0+i]; + } + for (; i < n;) { + dtemp += dx[ix0+i] * dy[iy0+i]; i++; + dtemp += dx[ix0+i] * dy[iy0+i]; i++; + dtemp += dx[ix0+i] * dy[iy0+i]; i++; + dtemp += dx[ix0+i] * dy[iy0+i]; i++; + dtemp += dx[ix0+i] * dy[iy0+i]; i++; + } + return dtemp; + } + + template + inline + FloatType ddot( + SizeType n, + const FloatType* dx, + const FloatType* dy) + { + return ddot( + n, dx, SizeType(0), SizeType(1), dy, SizeType(0), SizeType(1)); + } + + } // namespace detail + + //! Interface to the LBFGS %minimizer. + /*! This class solves the unconstrained minimization problem +

+          min f(x),  x = (x1,x2,...,x_n),
+      
+ using the limited-memory BFGS method. The routine is + especially effective on problems involving a large number of + variables. In a typical iteration of this method an + approximation Hk to the inverse of the Hessian + is obtained by applying m BFGS updates to a + diagonal matrix Hk0, using information from the + previous m steps. The user specifies the number + m, which determines the amount of storage + required by the routine. The user may also provide the + diagonal matrices Hk0 (parameter diag in the run() + function) if not satisfied with the default choice. The + algorithm is described in "On the limited memory BFGS method for + large scale optimization", by D. Liu and J. Nocedal, Mathematical + Programming B 45 (1989) 503-528. + + The user is required to calculate the function value + f and its gradient g. In order to + allow the user complete control over these computations, + reverse communication is used. The routine must be called + repeatedly under the control of the member functions + requests_f_and_g(), + requests_diag(). + If neither requests_f_and_g() nor requests_diag() is + true the user should check for convergence + (using class traditional_convergence_test or any + other custom test). If the convergence test is negative, + the minimizer may be called again for the next iteration. + + The steplength (stp()) is determined at each iteration + by means of the line search routine mcsrch, which is + a slight modification of the routine CSRCH written + by More' and Thuente. + + The only variables that are machine-dependent are + xtol, + stpmin and + stpmax. + + Fatal errors cause error exceptions to be thrown. + The generic class error is sub-classed (e.g. + class error_line_search_failed) to facilitate + granular %error handling. + + A note on performance: Using Compaq Fortran V5.4 and + Compaq C++ V6.5, the C++ implementation is about 15% slower + than the Fortran implementation. + */ + template + class minimizer + { + public: + //! Default constructor. Some members are not initialized! + minimizer() + : n_(0), m_(0), maxfev_(0), + gtol_(0), xtol_(0), + stpmin_(0), stpmax_(0), + ispt(0), iypt(0) + {} + + //! Constructor. + /*! @param n The number of variables in the minimization problem. + Restriction: n > 0. + + @param m The number of corrections used in the BFGS update. + Values of m less than 3 are not recommended; + large values of m will result in excessive + computing time. 3 <= m <= 7 is + recommended. + Restriction: m > 0. + + @param maxfev Maximum number of function evaluations + per line search. + Termination occurs when the number of evaluations + of the objective function is at least maxfev by + the end of an iteration. + + @param gtol Controls the accuracy of the line search. + If the function and gradient evaluations are inexpensive with + respect to the cost of the iteration (which is sometimes the + case when solving very large problems) it may be advantageous + to set gtol to a small value. A typical small + value is 0.1. + Restriction: gtol should be greater than 1e-4. + + @param xtol An estimate of the machine precision (e.g. 10e-16 + on a SUN station 3/60). The line search routine will + terminate if the relative width of the interval of + uncertainty is less than xtol. + + @param stpmin Specifies the lower bound for the step + in the line search. + The default value is 1e-20. This value need not be modified + unless the exponent is too large for the machine being used, + or unless the problem is extremely badly scaled (in which + case the exponent should be increased). + + @param stpmax specifies the upper bound for the step + in the line search. + The default value is 1e20. This value need not be modified + unless the exponent is too large for the machine being used, + or unless the problem is extremely badly scaled (in which + case the exponent should be increased). + */ + explicit + minimizer( + SizeType n, + SizeType m = 5, + SizeType maxfev = 20, + FloatType gtol = FloatType(0.9), + FloatType xtol = FloatType(1.e-16), + FloatType stpmin = FloatType(1.e-20), + FloatType stpmax = FloatType(1.e20)) + : n_(n), m_(m), maxfev_(maxfev), + gtol_(gtol), xtol_(xtol), + stpmin_(stpmin), stpmax_(stpmax), + iflag_(0), requests_f_and_g_(false), requests_diag_(false), + iter_(0), nfun_(0), stp_(0), + stp1(0), ftol(0.0001), ys(0), point(0), npt(0), + ispt(n+2*m), iypt((n+2*m)+n*m), + info(0), bound(0), nfev(0) + { + if (n_ == 0) { + throw error_improper_input_parameter("n = 0."); + } + if (m_ == 0) { + throw error_improper_input_parameter("m = 0."); + } + if (maxfev_ == 0) { + throw error_improper_input_parameter("maxfev = 0."); + } + if (gtol_ <= FloatType(1.e-4)) { + throw error_improper_input_parameter("gtol <= 1.e-4."); + } + if (xtol_ < FloatType(0)) { + throw error_improper_input_parameter("xtol < 0."); + } + if (stpmin_ < FloatType(0)) { + throw error_improper_input_parameter("stpmin < 0."); + } + if (stpmax_ < stpmin) { + throw error_improper_input_parameter("stpmax < stpmin"); + } + w_.resize(n_*(2*m_+1)+2*m_); + scratch_array_.resize(n_); + } + + //! Number of free parameters (as passed to the constructor). + SizeType n() const { return n_; } + + //! Number of corrections kept (as passed to the constructor). + SizeType m() const { return m_; } + + /*! \brief Maximum number of evaluations of the objective function + per line search (as passed to the constructor). + */ + SizeType maxfev() const { return maxfev_; } + + /*! \brief Control of the accuracy of the line search. + (as passed to the constructor). + */ + FloatType gtol() const { return gtol_; } + + //! Estimate of the machine precision (as passed to the constructor). + FloatType xtol() const { return xtol_; } + + /*! \brief Lower bound for the step in the line search. + (as passed to the constructor). + */ + FloatType stpmin() const { return stpmin_; } + + /*! \brief Upper bound for the step in the line search. + (as passed to the constructor). + */ + FloatType stpmax() const { return stpmax_; } + + //! Status indicator for reverse communication. + /*! true if the run() function returns to request + evaluation of the objective function (f) and + gradients (g) for the current point + (x). To continue the minimization the + run() function is called again with the updated values for + f and g. +

+ See also: requests_diag() + */ + bool requests_f_and_g() const { return requests_f_and_g_; } + + //! Status indicator for reverse communication. + /*! true if the run() function returns to request + evaluation of the diagonal matrix (diag) + for the current point (x). + To continue the minimization the run() function is called + again with the updated values for diag. +

+ See also: requests_f_and_g() + */ + bool requests_diag() const { return requests_diag_; } + + //! Number of iterations so far. + /*! Note that one iteration may involve multiple evaluations + of the objective function. +

+ See also: nfun() + */ + SizeType iter() const { return iter_; } + + //! Total number of evaluations of the objective function so far. + /*! The total number of function evaluations increases by the + number of evaluations required for the line search. The total + is only increased after a successful line search. +

+ See also: iter() + */ + SizeType nfun() const { return nfun_; } + + //! Norm of gradient given gradient array of length n(). + FloatType euclidean_norm(const FloatType* a) const { + return std::sqrt(detail::ddot(n_, a, a)); + } + + //! Current stepsize. + FloatType stp() const { return stp_; } + + //! Execution of one step of the minimization. + /*! @param x On initial entry this must be set by the user to + the values of the initial estimate of the solution vector. + + @param f Before initial entry or on re-entry under the + control of requests_f_and_g(), f must be set + by the user to contain the value of the objective function + at the current point x. + + @param g Before initial entry or on re-entry under the + control of requests_f_and_g(), g must be set + by the user to contain the components of the gradient at + the current point x. + + The return value is true if either + requests_f_and_g() or requests_diag() is true. + Otherwise the user should check for convergence + (e.g. using class traditional_convergence_test) and + call the run() function again to continue the minimization. + If the return value is false the user + should not update f, g or + diag (other overload) before calling + the run() function again. + + Note that x is always modified by the run() + function. Depending on the situation it can therefore be + necessary to evaluate the objective function one more time + after the minimization is terminated. + */ + bool run( + FloatType* x, + FloatType f, + const FloatType* g) + { + return generic_run(x, f, g, false, 0); + } + + //! Execution of one step of the minimization. + /*! @param x See other overload. + + @param f See other overload. + + @param g See other overload. + + @param diag On initial entry or on re-entry under the + control of requests_diag(), diag must be set by + the user to contain the values of the diagonal matrix Hk0. + The routine will return at each iteration of the algorithm + with requests_diag() set to true. +

+ Restriction: all elements of diag must be + positive. + */ + bool run( + FloatType* x, + FloatType f, + const FloatType* g, + const FloatType* diag) + { + return generic_run(x, f, g, true, diag); + } + + void serialize(std::ostream* out) const { + out->write((const char*)&n_, sizeof(n_)); // sanity check + out->write((const char*)&m_, sizeof(m_)); // sanity check + SizeType fs = sizeof(FloatType); + out->write((const char*)&fs, sizeof(fs)); // sanity check + + mcsrch_instance.serialize(out); + out->write((const char*)&iflag_, sizeof(iflag_)); + out->write((const char*)&requests_f_and_g_, sizeof(requests_f_and_g_)); + out->write((const char*)&requests_diag_, sizeof(requests_diag_)); + out->write((const char*)&iter_, sizeof(iter_)); + out->write((const char*)&nfun_, sizeof(nfun_)); + out->write((const char*)&stp_, sizeof(stp_)); + out->write((const char*)&stp1, sizeof(stp1)); + out->write((const char*)&ftol, sizeof(ftol)); + out->write((const char*)&ys, sizeof(ys)); + out->write((const char*)&point, sizeof(point)); + out->write((const char*)&npt, sizeof(npt)); + out->write((const char*)&info, sizeof(info)); + out->write((const char*)&bound, sizeof(bound)); + out->write((const char*)&nfev, sizeof(nfev)); + out->write((const char*)&w_[0], sizeof(FloatType) * w_.size()); + out->write((const char*)&scratch_array_[0], sizeof(FloatType) * scratch_array_.size()); + } + + void deserialize(std::istream* in) { + SizeType n, m, fs; + in->read((char*)&n, sizeof(n)); + in->read((char*)&m, sizeof(m)); + in->read((char*)&fs, sizeof(fs)); + assert(n == n_); + assert(m == m_); + assert(fs == sizeof(FloatType)); + + mcsrch_instance.deserialize(in); + in->read((char*)&iflag_, sizeof(iflag_)); + in->read((char*)&requests_f_and_g_, sizeof(requests_f_and_g_)); + in->read((char*)&requests_diag_, sizeof(requests_diag_)); + in->read((char*)&iter_, sizeof(iter_)); + in->read((char*)&nfun_, sizeof(nfun_)); + in->read((char*)&stp_, sizeof(stp_)); + in->read((char*)&stp1, sizeof(stp1)); + in->read((char*)&ftol, sizeof(ftol)); + in->read((char*)&ys, sizeof(ys)); + in->read((char*)&point, sizeof(point)); + in->read((char*)&npt, sizeof(npt)); + in->read((char*)&info, sizeof(info)); + in->read((char*)&bound, sizeof(bound)); + in->read((char*)&nfev, sizeof(nfev)); + in->read((char*)&w_[0], sizeof(FloatType) * w_.size()); + in->read((char*)&scratch_array_[0], sizeof(FloatType) * scratch_array_.size()); + } + + protected: + static void throw_diagonal_element_not_positive(SizeType i) { + throw error_improper_input_data( + "The " + error::itoa(i) + ". diagonal element of the" + " inverse Hessian approximation is not positive."); + } + + bool generic_run( + FloatType* x, + FloatType f, + const FloatType* g, + bool diagco, + const FloatType* diag); + + detail::mcsrch mcsrch_instance; + const SizeType n_; + const SizeType m_; + const SizeType maxfev_; + const FloatType gtol_; + const FloatType xtol_; + const FloatType stpmin_; + const FloatType stpmax_; + int iflag_; + bool requests_f_and_g_; + bool requests_diag_; + SizeType iter_; + SizeType nfun_; + FloatType stp_; + FloatType stp1; + FloatType ftol; + FloatType ys; + SizeType point; + SizeType npt; + const SizeType ispt; + const SizeType iypt; + int info; + SizeType bound; + SizeType nfev; + std::vector w_; + std::vector scratch_array_; + }; + + template + bool minimizer::generic_run( + FloatType* x, + FloatType f, + const FloatType* g, + bool diagco, + const FloatType* diag) + { + bool execute_entire_while_loop = false; + if (!(requests_f_and_g_ || requests_diag_)) { + execute_entire_while_loop = true; + } + requests_f_and_g_ = false; + requests_diag_ = false; + FloatType* w = &(*(w_.begin())); + if (iflag_ == 0) { // Initialize. + nfun_ = 1; + if (diagco) { + for (SizeType i = 0; i < n_; i++) { + if (diag[i] <= FloatType(0)) { + throw_diagonal_element_not_positive(i); + } + } + } + else { + std::fill_n(scratch_array_.begin(), n_, FloatType(1)); + diag = &(*(scratch_array_.begin())); + } + for (SizeType i = 0; i < n_; i++) { + w[ispt + i] = -g[i] * diag[i]; + } + FloatType gnorm = std::sqrt(detail::ddot(n_, g, g)); + if (gnorm == FloatType(0)) return false; + stp1 = FloatType(1) / gnorm; + execute_entire_while_loop = true; + } + if (execute_entire_while_loop) { + bound = iter_; + iter_++; + info = 0; + if (iter_ != 1) { + if (iter_ > m_) bound = m_; + ys = detail::ddot( + n_, w, iypt + npt, SizeType(1), w, ispt + npt, SizeType(1)); + if (!diagco) { + FloatType yy = detail::ddot( + n_, w, iypt + npt, SizeType(1), w, iypt + npt, SizeType(1)); + std::fill_n(scratch_array_.begin(), n_, ys / yy); + diag = &(*(scratch_array_.begin())); + } + else { + iflag_ = 2; + requests_diag_ = true; + return true; + } + } + } + if (execute_entire_while_loop || iflag_ == 2) { + if (iter_ != 1) { + if (diag == 0) { + throw error_internal_error(__FILE__, __LINE__); + } + if (diagco) { + for (SizeType i = 0; i < n_; i++) { + if (diag[i] <= FloatType(0)) { + throw_diagonal_element_not_positive(i); + } + } + } + SizeType cp = point; + if (point == 0) cp = m_; + w[n_ + cp -1] = 1 / ys; + SizeType i; + for (i = 0; i < n_; i++) { + w[i] = -g[i]; + } + cp = point; + for (i = 0; i < bound; i++) { + if (cp == 0) cp = m_; + cp--; + FloatType sq = detail::ddot( + n_, w, ispt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1)); + SizeType inmc=n_+m_+cp; + SizeType iycn=iypt+cp*n_; + w[inmc] = w[n_ + cp] * sq; + detail::daxpy(n_, -w[inmc], w, iycn, w); + } + for (i = 0; i < n_; i++) { + w[i] *= diag[i]; + } + for (i = 0; i < bound; i++) { + FloatType yr = detail::ddot( + n_, w, iypt + cp * n_, SizeType(1), w, SizeType(0), SizeType(1)); + FloatType beta = w[n_ + cp] * yr; + SizeType inmc=n_+m_+cp; + beta = w[inmc] - beta; + SizeType iscn=ispt+cp*n_; + detail::daxpy(n_, beta, w, iscn, w); + cp++; + if (cp == m_) cp = 0; + } + std::copy(w, w+n_, w+(ispt + point * n_)); + } + stp_ = FloatType(1); + if (iter_ == 1) stp_ = stp1; + std::copy(g, g+n_, w); + } + mcsrch_instance.run( + gtol_, stpmin_, stpmax_, n_, x, f, g, w, ispt + point * n_, + stp_, ftol, xtol_, maxfev_, info, nfev, &(*(scratch_array_.begin()))); + if (info == -1) { + iflag_ = 1; + requests_f_and_g_ = true; + return true; + } + if (info != 1) { + throw error_internal_error(__FILE__, __LINE__); + } + nfun_ += nfev; + npt = point*n_; + for (SizeType i = 0; i < n_; i++) { + w[ispt + npt + i] = stp_ * w[ispt + npt + i]; + w[iypt + npt + i] = g[i] - w[i]; + } + point++; + if (point == m_) point = 0; + return false; + } + + //! Traditional LBFGS convergence test. + /*! This convergence test is equivalent to the test embedded + in the lbfgs.f Fortran code. The test assumes that + there is a meaningful relation between the Euclidean norm of the + parameter vector x and the norm of the gradient + vector g. Therefore this test should not be used if + this assumption is not correct for a given problem. + */ + template + class traditional_convergence_test + { + public: + //! Default constructor. + traditional_convergence_test() + : n_(0), eps_(0) + {} + + //! Constructor. + /*! @param n The number of variables in the minimization problem. + Restriction: n > 0. + + @param eps Determines the accuracy with which the solution + is to be found. + */ + explicit + traditional_convergence_test( + SizeType n, + FloatType eps = FloatType(1.e-5)) + : n_(n), eps_(eps) + { + if (n_ == 0) { + throw error_improper_input_parameter("n = 0."); + } + if (eps_ < FloatType(0)) { + throw error_improper_input_parameter("eps < 0."); + } + } + + //! Number of free parameters (as passed to the constructor). + SizeType n() const { return n_; } + + /*! \brief Accuracy with which the solution is to be found + (as passed to the constructor). + */ + FloatType eps() const { return eps_; } + + //! Execution of the convergence test for the given parameters. + /*! Returns true if +

+            ||g|| < eps * max(1,||x||),
+          
+ where ||.|| denotes the Euclidean norm. + + @param x Current solution vector. + + @param g Components of the gradient at the current + point x. + */ + bool + operator()(const FloatType* x, const FloatType* g) const + { + FloatType xnorm = std::sqrt(detail::ddot(n_, x, x)); + FloatType gnorm = std::sqrt(detail::ddot(n_, g, g)); + if (gnorm <= eps_ * std::max(FloatType(1), xnorm)) return true; + return false; + } + protected: + const SizeType n_; + const FloatType eps_; + }; + +}} // namespace scitbx::lbfgs + +template +std::ostream& operator<<(std::ostream& os, const scitbx::lbfgs::minimizer& min) { + return os << "ITER=" << min.iter() << "\tNFUN=" << min.nfun() << "\tSTP=" << min.stp() << "\tDIAG=" << min.requests_diag() << "\tF&G=" << min.requests_f_and_g(); +} + + +#endif // SCITBX_LBFGS_H diff --git a/training/utils/lbfgs_test.cc b/training/utils/lbfgs_test.cc new file mode 100644 index 00000000..9678e788 --- /dev/null +++ b/training/utils/lbfgs_test.cc @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include "lbfgs.h" +#include "sparse_vector.h" +#include "fdict.h" + +using namespace std; + +double TestOptimizer() { + cerr << "TESTING NON-PERSISTENT OPTIMIZER\n"; + + // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 + // df/dx1 = 8*x1 + x2 + // df/dx2 = 2*x2 + x1 + // df/dx3 = 2*x3 + 6 + double x[3]; + double g[3]; + scitbx::lbfgs::minimizer opt(3); + scitbx::lbfgs::traditional_convergence_test converged(3); + x[0] = 8; + x[1] = 8; + x[2] = 8; + double obj = 0; + do { + g[0] = 8 * x[0] + x[1]; + g[1] = 2 * x[1] + x[0]; + g[2] = 2 * x[2] + 6; + obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; + opt.run(x, obj, g); + if (!opt.requests_f_and_g()) { + if (converged(x,g)) break; + opt.run(x, obj, g); + } + cerr << x[0] << " " << x[1] << " " << x[2] << endl; + cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; + cerr << opt << endl; + } while (true); + return obj; +} + +double TestPersistentOptimizer() { + cerr << "\nTESTING PERSISTENT OPTIMIZER\n"; + // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 + // df/dx1 = 8*x1 + x2 + // df/dx2 = 2*x2 + x1 + // df/dx3 = 2*x3 + 6 + double x[3]; + double g[3]; + scitbx::lbfgs::traditional_convergence_test converged(3); + x[0] = 8; + x[1] = 8; + x[2] = 8; + double obj = 0; + string state; + do { + g[0] = 8 * x[0] + x[1]; + g[1] = 2 * x[1] + x[0]; + g[2] = 2 * x[2] + 6; + obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; + + { + scitbx::lbfgs::minimizer opt(3); + if (state.size() > 0) { + istringstream is(state, ios::binary); + opt.deserialize(&is); + } + opt.run(x, obj, g); + ostringstream os(ios::binary); opt.serialize(&os); state = os.str(); + } + + cerr << x[0] << " " << x[1] << " " << x[2] << endl; + cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; + } while (!converged(x, g)); + return obj; +} + +void TestSparseVector() { + cerr << "Testing SparseVector serialization.\n"; + int f1 = FD::Convert("Feature_1"); + int f2 = FD::Convert("Feature_2"); + FD::Convert("LanguageModel"); + int f4 = FD::Convert("SomeFeature"); + int f5 = FD::Convert("SomeOtherFeature"); + SparseVector g; + g.set_value(f2, log(0.5)); + g.set_value(f4, log(0.125)); + g.set_value(f1, 0); + g.set_value(f5, 23.777); + ostringstream os; + double iobj = 1.5; + B64::Encode(iobj, g, &os); + cerr << iobj << "\t" << g << endl; + string data = os.str(); + cout << data << endl; + SparseVector v; + double obj; + bool decode_b64 = B64::Decode(&obj, &v, &data[0], data.size()); + cerr << obj << "\t" << v << endl; + assert(decode_b64); + assert(obj == iobj); + assert(g.size() == v.size()); +} + +int main() { + double o1 = TestOptimizer(); + double o2 = TestPersistentOptimizer(); + if (fabs(o1 - o2) > 1e-5) { + cerr << "OPTIMIZERS PERFORMED DIFFERENTLY!\n" << o1 << " vs. " << o2 << endl; + return 1; + } + TestSparseVector(); + cerr << "SUCCESS\n"; + return 0; +} + diff --git a/training/utils/libcall.pl b/training/utils/libcall.pl new file mode 100644 index 00000000..c7d0f128 --- /dev/null +++ b/training/utils/libcall.pl @@ -0,0 +1,71 @@ +use IPC::Open3; +use Symbol qw(gensym); + +$DUMMY_STDERR = gensym(); +$DUMMY_STDIN = gensym(); + +# Run the command and ignore failures +sub unchecked_call { + system("@_") +} + +# Run the command and return its output, if any ignoring failures +sub unchecked_output { + return `@_` +} + +# WARNING: Do not use this for commands that will return large amounts +# of stdout or stderr -- they might block indefinitely +sub check_output { + print STDERR "Executing and gathering output: @_\n"; + + my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_); + my $proc_output = ""; + while( ) { + $proc_output .= $_; + } + waitpid($pid, 0); + # TODO: Grab signal that the process died from + my $child_exit_status = $? >> 8; + if($child_exit_status == 0) { + return $proc_output; + } else { + print STDERR "ERROR: Execution of @_ failed.\n"; + exit(1); + } +} + +# Based on Moses' safesystem sub +sub check_call { + print STDERR "Executing: @_\n"; + system(@_); + my $exitcode = $? >> 8; + if($exitcode == 0) { + return 0; + } elsif ($? == -1) { + print STDERR "ERROR: Failed to execute: @_\n $!\n"; + exit(1); + + } elsif ($? & 127) { + printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + exit(1); + + } else { + print STDERR "Failed with exit code: $exitcode\n" if $exitcode; + exit($exitcode); + } +} + +sub check_bash_call { + my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_"); + check_call(@args); +} + +sub check_bash_output { + my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_"); + return check_output(@args); +} + +# perl module weirdness... +return 1; diff --git a/training/utils/online_optimizer.cc b/training/utils/online_optimizer.cc new file mode 100644 index 00000000..3ed95452 --- /dev/null +++ b/training/utils/online_optimizer.cc @@ -0,0 +1,16 @@ +#include "online_optimizer.h" + +LearningRateSchedule::~LearningRateSchedule() {} + +double StandardLearningRate::eta(int k) const { + return eta_0_ / (1.0 + k / N_); +} + +double ExponentialDecayLearningRate::eta(int k) const { + return eta_0_ * pow(alpha_, k / N_); +} + +OnlineOptimizer::~OnlineOptimizer() {} + +void OnlineOptimizer::ResetEpochImpl() {} + diff --git a/training/utils/online_optimizer.h b/training/utils/online_optimizer.h new file mode 100644 index 00000000..28d89344 --- /dev/null +++ b/training/utils/online_optimizer.h @@ -0,0 +1,129 @@ +#ifndef _ONL_OPTIMIZE_H_ +#define _ONL_OPTIMIZE_H_ + +#include +#include +#include +#include +#include "sparse_vector.h" + +struct LearningRateSchedule { + virtual ~LearningRateSchedule(); + // returns the learning rate for the kth iteration + virtual double eta(int k) const = 0; +}; + +// TODO in the Tsoruoaka et al. (ACL 2009) paper, they use N +// to mean the batch size in most places, but it doesn't completely +// make sense to me in the learning rate schedules-- this needs +// to be worked out to make sure they didn't mean corpus size +// in some places and batch size in others (since in the paper they +// only ever work with batch sizes of 1) +struct StandardLearningRate : public LearningRateSchedule { + StandardLearningRate( + size_t batch_size, // batch size, not corpus size! + double eta_0 = 0.2) : + eta_0_(eta_0), + N_(static_cast(batch_size)) {} + + virtual double eta(int k) const; + + private: + const double eta_0_; + const double N_; +}; + +struct ExponentialDecayLearningRate : public LearningRateSchedule { + ExponentialDecayLearningRate( + size_t batch_size, // batch size, not corpus size! + double eta_0 = 0.2, + double alpha = 0.85 // recommended by Tsuruoka et al. (ACL 2009) + ) : eta_0_(eta_0), + N_(static_cast(batch_size)), + alpha_(alpha) { + assert(alpha > 0); + assert(alpha < 1.0); + } + + virtual double eta(int k) const; + + private: + const double eta_0_; + const double N_; + const double alpha_; +}; + +class OnlineOptimizer { + public: + virtual ~OnlineOptimizer(); + OnlineOptimizer(const std::tr1::shared_ptr& s, + size_t batch_size, + const std::vector& frozen_feats = std::vector()) + : N_(batch_size),schedule_(s),k_() { + for (int i = 0; i < frozen_feats.size(); ++i) + frozen_.insert(frozen_feats[i]); + } + void ResetEpoch() { k_ = 0; ResetEpochImpl(); } + void UpdateWeights(const SparseVector& approx_g, int max_feat, SparseVector* weights) { + ++k_; + const double eta = schedule_->eta(k_); + UpdateWeightsImpl(eta, approx_g, max_feat, weights); + } + + protected: + virtual void ResetEpochImpl(); + virtual void UpdateWeightsImpl(const double& eta, const SparseVector& approx_g, int max_feat, SparseVector* weights) = 0; + const size_t N_; // number of training instances per batch + std::set frozen_; // frozen (non-optimizing) features + + private: + std::tr1::shared_ptr schedule_; + int k_; // iteration count +}; + +class CumulativeL1OnlineOptimizer : public OnlineOptimizer { + public: + CumulativeL1OnlineOptimizer(const std::tr1::shared_ptr& s, + size_t training_instances, double C, + const std::vector& frozen) : + OnlineOptimizer(s, training_instances, frozen), C_(C), u_() {} + + protected: + void ResetEpochImpl() { u_ = 0; } + void UpdateWeightsImpl(const double& eta, const SparseVector& approx_g, int max_feat, SparseVector* weights) { + u_ += eta * C_ / N_; + for (SparseVector::const_iterator it = approx_g.begin(); + it != approx_g.end(); ++it) { + if (frozen_.count(it->first) == 0) + weights->add_value(it->first, eta * it->second); + } + for (int i = 1; i < max_feat; ++i) + if (frozen_.count(i) == 0) ApplyPenalty(i, weights); + } + + private: + void ApplyPenalty(int i, SparseVector* w) { + const double z = w->value(i); + double w_i = z; + double q_i = q_.value(i); + if (w_i > 0.0) + w_i = std::max(0.0, w_i - (u_ + q_i)); + else if (w_i < 0.0) + w_i = std::min(0.0, w_i + (u_ - q_i)); + q_i += w_i - z; + if (q_i == 0.0) + q_.erase(i); + else + q_.set_value(i, q_i); + if (w_i == 0.0) + w->erase(i); + else + w->set_value(i, w_i); + } + + const double C_; // reguarlization strength + double u_; + SparseVector q_; +}; + +#endif diff --git a/training/utils/optimize.cc b/training/utils/optimize.cc new file mode 100644 index 00000000..41ac90d8 --- /dev/null +++ b/training/utils/optimize.cc @@ -0,0 +1,102 @@ +#include "optimize.h" + +#include +#include + +#include "lbfgs.h" + +using namespace std; + +BatchOptimizer::~BatchOptimizer() {} + +void BatchOptimizer::Save(ostream* out) const { + out->write((const char*)&eval_, sizeof(eval_)); + out->write((const char*)&has_converged_, sizeof(has_converged_)); + SaveImpl(out); + unsigned int magic = 0xABCDDCBA; // should be uint32_t + out->write((const char*)&magic, sizeof(magic)); +} + +void BatchOptimizer::Load(istream* in) { + in->read((char*)&eval_, sizeof(eval_)); + in->read((char*)&has_converged_, sizeof(has_converged_)); + LoadImpl(in); + unsigned int magic = 0; // should be uint32_t + in->read((char*)&magic, sizeof(magic)); + assert(magic == 0xABCDDCBA); + cerr << Name() << " EVALUATION #" << eval_ << endl; +} + +void BatchOptimizer::SaveImpl(ostream* out) const { + (void)out; +} + +void BatchOptimizer::LoadImpl(istream* in) { + (void)in; +} + +string RPropOptimizer::Name() const { + return "RPropOptimizer"; +} + +void RPropOptimizer::OptimizeImpl(const double& obj, + const vector& g, + vector* x) { + for (int i = 0; i < g.size(); ++i) { + const double g_i = g[i]; + const double sign_i = (signbit(g_i) ? -1.0 : 1.0); + const double prod = g_i * prev_g_[i]; + if (prod > 0.0) { + const double dij = min(delta_ij_[i] * eta_plus_, delta_max_); + (*x)[i] -= dij * sign_i; + delta_ij_[i] = dij; + prev_g_[i] = g_i; + } else if (prod < 0.0) { + delta_ij_[i] = max(delta_ij_[i] * eta_minus_, delta_min_); + prev_g_[i] = 0.0; + } else { + (*x)[i] -= delta_ij_[i] * sign_i; + prev_g_[i] = g_i; + } + } +} + +void RPropOptimizer::SaveImpl(ostream* out) const { + const size_t n = prev_g_.size(); + out->write((const char*)&n, sizeof(n)); + out->write((const char*)&prev_g_[0], sizeof(double) * n); + out->write((const char*)&delta_ij_[0], sizeof(double) * n); +} + +void RPropOptimizer::LoadImpl(istream* in) { + size_t n; + in->read((char*)&n, sizeof(n)); + assert(n == prev_g_.size()); + assert(n == delta_ij_.size()); + in->read((char*)&prev_g_[0], sizeof(double) * n); + in->read((char*)&delta_ij_[0], sizeof(double) * n); +} + +string LBFGSOptimizer::Name() const { + return "LBFGSOptimizer"; +} + +LBFGSOptimizer::LBFGSOptimizer(int num_feats, int memory_buffers) : + opt_(num_feats, memory_buffers) {} + +void LBFGSOptimizer::SaveImpl(ostream* out) const { + opt_.serialize(out); +} + +void LBFGSOptimizer::LoadImpl(istream* in) { + opt_.deserialize(in); +} + +void LBFGSOptimizer::OptimizeImpl(const double& obj, + const vector& g, + vector* x) { + opt_.run(&(*x)[0], obj, &g[0]); + if (!opt_.requests_f_and_g()) opt_.run(&(*x)[0], obj, &g[0]); + // cerr << opt_ << endl; +} + diff --git a/training/utils/optimize.h b/training/utils/optimize.h new file mode 100644 index 00000000..07943b44 --- /dev/null +++ b/training/utils/optimize.h @@ -0,0 +1,92 @@ +#ifndef _OPTIMIZE_H_ +#define _OPTIMIZE_H_ + +#include +#include +#include +#include + +#include "lbfgs.h" + +// abstract base class for first order optimizers +// order of invocation: new, Load(), Optimize(), Save(), delete +class BatchOptimizer { + public: + BatchOptimizer() : eval_(1), has_converged_(false) {} + virtual ~BatchOptimizer(); + virtual std::string Name() const = 0; + int EvaluationCount() const { return eval_; } + bool HasConverged() const { return has_converged_; } + + void Optimize(const double& obj, + const std::vector& g, + std::vector* x) { + assert(g.size() == x->size()); + ++eval_; + OptimizeImpl(obj, g, x); + scitbx::lbfgs::traditional_convergence_test converged(g.size()); + has_converged_ = converged(&(*x)[0], &g[0]); + } + + void Save(std::ostream* out) const; + void Load(std::istream* in); + protected: + virtual void SaveImpl(std::ostream* out) const; + virtual void LoadImpl(std::istream* in); + virtual void OptimizeImpl(const double& obj, + const std::vector& g, + std::vector* x) = 0; + + int eval_; + private: + bool has_converged_; +}; + +class RPropOptimizer : public BatchOptimizer { + public: + explicit RPropOptimizer(int num_vars, + double eta_plus = 1.2, + double eta_minus = 0.5, + double delta_0 = 0.1, + double delta_max = 50.0, + double delta_min = 1e-6) : + prev_g_(num_vars, 0.0), + delta_ij_(num_vars, delta_0), + eta_plus_(eta_plus), + eta_minus_(eta_minus), + delta_max_(delta_max), + delta_min_(delta_min) { + assert(eta_plus > 1.0); + assert(eta_minus > 0.0 && eta_minus < 1.0); + assert(delta_max > 0.0); + assert(delta_min > 0.0); + } + std::string Name() const; + void OptimizeImpl(const double& obj, + const std::vector& g, + std::vector* x); + void SaveImpl(std::ostream* out) const; + void LoadImpl(std::istream* in); + private: + std::vector prev_g_; + std::vector delta_ij_; + const double eta_plus_; + const double eta_minus_; + const double delta_max_; + const double delta_min_; +}; + +class LBFGSOptimizer : public BatchOptimizer { + public: + explicit LBFGSOptimizer(int num_vars, int memory_buffers = 10); + std::string Name() const; + void SaveImpl(std::ostream* out) const; + void LoadImpl(std::istream* in); + void OptimizeImpl(const double& obj, + const std::vector& g, + std::vector* x); + private: + scitbx::lbfgs::minimizer opt_; +}; + +#endif diff --git a/training/utils/optimize_test.cc b/training/utils/optimize_test.cc new file mode 100644 index 00000000..bff2ca03 --- /dev/null +++ b/training/utils/optimize_test.cc @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include "optimize.h" +#include "online_optimizer.h" +#include "sparse_vector.h" +#include "fdict.h" + +using namespace std; + +double TestOptimizer(BatchOptimizer* opt) { + cerr << "TESTING NON-PERSISTENT OPTIMIZER\n"; + + // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 + // df/dx1 = 8*x1 + x2 + // df/dx2 = 2*x2 + x1 + // df/dx3 = 2*x3 + 6 + vector x(3); + vector g(3); + x[0] = 8; + x[1] = 8; + x[2] = 8; + double obj = 0; + do { + g[0] = 8 * x[0] + x[1]; + g[1] = 2 * x[1] + x[0]; + g[2] = 2 * x[2] + 6; + obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; + opt->Optimize(obj, g, &x); + + cerr << x[0] << " " << x[1] << " " << x[2] << endl; + cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; + } while (!opt->HasConverged()); + return obj; +} + +double TestPersistentOptimizer(BatchOptimizer* opt) { + cerr << "\nTESTING PERSISTENT OPTIMIZER\n"; + // f(x,y) = 4x1^2 + x1*x2 + x2^2 + x3^2 + 6x3 + 5 + // df/dx1 = 8*x1 + x2 + // df/dx2 = 2*x2 + x1 + // df/dx3 = 2*x3 + 6 + vector x(3); + vector g(3); + x[0] = 8; + x[1] = 8; + x[2] = 8; + double obj = 0; + string state; + bool converged = false; + while (!converged) { + g[0] = 8 * x[0] + x[1]; + g[1] = 2 * x[1] + x[0]; + g[2] = 2 * x[2] + 6; + obj = 4 * x[0]*x[0] + x[0] * x[1] + x[1]*x[1] + x[2]*x[2] + 6 * x[2] + 5; + + { + if (state.size() > 0) { + istringstream is(state, ios::binary); + opt->Load(&is); + } + opt->Optimize(obj, g, &x); + ostringstream os(ios::binary); opt->Save(&os); state = os.str(); + + } + + cerr << x[0] << " " << x[1] << " " << x[2] << endl; + cerr << " obj=" << obj << "\td/dx1=" << g[0] << " d/dx2=" << g[1] << " d/dx3=" << g[2] << endl; + converged = opt->HasConverged(); + if (!converged) { + // now screw up the state (should be undone by Load) + obj += 2.0; + g[1] = -g[2]; + vector x2 = x; + try { + opt->Optimize(obj, g, &x2); + } catch (...) { } + } + } + return obj; +} + +template +void TestOptimizerVariants(int num_vars) { + O oa(num_vars); + cerr << "-------------------------------------------------------------------------\n"; + cerr << "TESTING: " << oa.Name() << endl; + double o1 = TestOptimizer(&oa); + O ob(num_vars); + double o2 = TestPersistentOptimizer(&ob); + if (o1 != o2) { + cerr << oa.Name() << " VARIANTS PERFORMED DIFFERENTLY!\n" << o1 << " vs. " << o2 << endl; + exit(1); + } + cerr << oa.Name() << " SUCCESS\n"; +} + +using namespace std::tr1; + +void TestOnline() { + size_t N = 20; + double C = 1.0; + double eta0 = 0.2; + std::tr1::shared_ptr r(new ExponentialDecayLearningRate(N, eta0, 0.85)); + //shared_ptr r(new StandardLearningRate(N, eta0)); + CumulativeL1OnlineOptimizer opt(r, N, C, std::vector()); + assert(r->eta(10) < r->eta(1)); +} + +int main() { + int n = 3; + TestOptimizerVariants(n); + TestOptimizerVariants(n); + TestOnline(); + return 0; +} + diff --git a/training/utils/parallelize.pl b/training/utils/parallelize.pl new file mode 100755 index 00000000..4197e0e5 --- /dev/null +++ b/training/utils/parallelize.pl @@ -0,0 +1,423 @@ +#!/usr/bin/env perl + +# Author: Adam Lopez +# +# This script takes a command that processes input +# from stdin one-line-at-time, and parallelizes it +# on the cluster using David Chiang's sentserver/ +# sentclient architecture. +# +# Prerequisites: the command *must* read each line +# without waiting for subsequent lines of input +# (for instance, a command which must read all lines +# of input before processing will not work) and +# return it to the output *without* buffering +# multiple lines. + +#TODO: if -j 1, run immediately, not via sentserver? possible differences in environment might make debugging harder + +#ANNOYANCE: if input is shorter than -j n lines, or at the very last few lines, repeatedly sleeps. time cut down to 15s from 60s + +my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; } +use LocalConfig; + +use Cwd qw/ abs_path cwd getcwd /; +use File::Temp qw/ tempfile /; +use Getopt::Long; +use IPC::Open2; +use strict; +use POSIX ":sys_wait_h"; + +use File::Basename; +my $myDir = dirname(__FILE__); +print STDERR __FILE__." -> $myDir\n"; +push(@INC, $myDir); +require "libcall.pl"; + +my $tailn=5; # +0 = concatenate all the client logs. 5 = last 5 lines +my $recycle_clients; # spawn new clients when previous ones terminate +my $stay_alive; # dont let server die when having zero clients +my $joblist = ""; +my $errordir=""; +my $multiline; +my $workdir = '.'; +my $numnodes = 8; +my $user = $ENV{"USER"}; +my $pmem = "9g"; +my $basep=50300; +my $randp=300; +my $tryp=50; +my $no_which; +my $no_cd; + +my $DEBUG=$ENV{DEBUG}; +print STDERR "DEBUG=$DEBUG output enabled.\n" if $DEBUG; +my $verbose = 1; +sub verbose { + if ($verbose) { + print STDERR @_,"\n"; + } +} +sub debug { + if ($DEBUG) { + my ($package, $filename, $line) = caller; + print STDERR "DEBUG: $filename($line): ",join(' ',@_),"\n"; + } +} +my $is_shell_special=qr.[ \t\n\\><|&;"'`~*?{}$!()].; +my $shell_escape_in_quote=qr.[\\"\$`!].; +sub escape_shell { + my ($arg)=@_; + return undef unless defined $arg; + return '""' unless $arg; + if ($arg =~ /$is_shell_special/) { + $arg =~ s/($shell_escape_in_quote)/\\$1/g; + return "\"$arg\""; + } + return $arg; +} +sub preview_files { + my ($l,$skipempty,$footer,$n)=@_; + $n=$tailn unless defined $n; + my @f=grep { ! ($skipempty && -z $_) } @$l; + my $fn=join(' ',map {escape_shell($_)} @f); + my $cmd="tail -n $n $fn"; + unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); +} +sub prefix_dirname($) { + #like `dirname but if ends in / then return the whole thing + local ($_)=@_; + if (/\/$/) { + $_; + } else { + s#/[^/]$##; + $_ ? $_ : ''; + } +} +sub ensure_final_slash($) { + local ($_)=@_; + m#/$# ? $_ : ($_."/"); +} +sub extend_path($$;$$) { + my ($base,$ext,$mkdir,$baseisdir)=@_; + if (-d $base) { + $base.="/"; + } else { + my $dir; + if ($baseisdir) { + $dir=$base; + $base.='/' unless $base =~ /\/$/; + } else { + $dir=prefix_dirname($base); + } + my @cmd=("/bin/mkdir","-p",$dir); + check_call(@cmd) if $mkdir; + } + return $base.$ext; +} + +my $abscwd=abs_path(&getcwd); +sub print_help; + +my $use_fork; +my @pids; + +# Process command-line options +unless (GetOptions( + "stay-alive" => \$stay_alive, + "recycle-clients" => \$recycle_clients, + "error-dir=s" => \$errordir, + "multi-line" => \$multiline, + "workdir=s" => \$workdir, + "use-fork" => \$use_fork, + "verbose" => \$verbose, + "jobs=i" => \$numnodes, + "pmem=s" => \$pmem, + "baseport=i" => \$basep, +# "iport=i" => \$randp, #for short name -i + "no-which!" => \$no_which, + "no-cd!" => \$no_cd, + "tailn=s" => \$tailn, +) && scalar @ARGV){ + print_help(); + die "bad options."; +} + +my $cmd = ""; +my $prog=shift; +if ($no_which) { + $cmd=$prog; +} else { + $cmd=check_output("which $prog"); + chomp $cmd; + die "$prog not found - $cmd" unless $cmd; +} +#$cmd=abs_path($cmd); +for my $arg (@ARGV) { + $cmd .= " ".escape_shell($arg); +} +die "Please specify a command to parallelize\n" if $cmd eq ''; + +my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n"); + +my $executable = $cmd; +$executable =~ s/^\s*(\S+)($|\s.*)/$1/; +$executable=check_output("basename $executable"); +chomp $executable; + + +print STDERR "Parallelizing ($numnodes ways): $cmd\n\n"; + +# create -e dir and save .sh +use File::Temp qw/tempdir/; +unless ($errordir) { + $errordir=tempdir("$executable.XXXXXX",CLEANUP=>1); +} +if ($errordir) { + my $scriptfile=extend_path("$errordir/","$executable.sh",1,1); + -d $errordir || die "should have created -e dir $errordir"; + open SF,">",$scriptfile || die; + print SF "$cdcmd$cmd\n"; + close SF; + chmod 0755,$scriptfile; + $errordir=abs_path($errordir); + &verbose("-e dir: $errordir"); +} + +# set cleanup handler +my @cleanup_cmds; +sub cleanup; +sub cleanup_and_die; +$SIG{INT} = "cleanup_and_die"; +$SIG{TERM} = "cleanup_and_die"; +$SIG{HUP} = "cleanup_and_die"; + +# other subs: +sub numof_live_jobs; +sub launch_job_on_node; + + +# vars +my $mydir = check_output("dirname $0"); chomp $mydir; +my $sentserver = "$mydir/sentserver"; +my $sentclient = "$mydir/sentclient"; +my $host = check_output("hostname"); +chomp $host; + + +# find open port +srand; +my $port = 50300+int(rand($randp)); +my $endp=$port+$tryp; +sub listening_port_lines { + my $quiet=$verbose?'':'2>/dev/null'; + return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp"); +} +my $netstat=&listening_port_lines; + +if ($verbose){ print STDERR "Testing port $port...";} + +while ($netstat=~/$port/ || &listening_port_lines=~/$port/){ + if ($verbose){ print STDERR "port is busy\n";} + $port++; + if ($port > $endp){ + die "Unable to find open port\n"; + } + if ($verbose){ print STDERR "Testing port $port... "; } +} +if ($verbose){ + print STDERR "port $port is available\n"; +} + +my $key = int(rand()*1000000); + +my $multiflag = ""; +if ($multiline){ $multiflag = "-m"; print STDERR "expecting multiline output.\n"; } +my $stay_alive_flag = ""; +if ($stay_alive){ $stay_alive_flag = "--stay-alive"; print STDERR "staying alive while no clients are connected.\n"; } + +my $node_count = 0; +my $script = ""; +# fork == one thread runs the sentserver, while the +# other spawns the sentclient commands. +my $pid = fork; +if ($pid == 0) { # child + sleep 8; # give other thread time to start sentserver + $script = "$cdcmd$sentclient $host:$port:$key $cmd"; + + if ($verbose){ + print STDERR "Client script:\n====\n"; + print STDERR $script; + print STDERR "====\n"; + } + for (my $jobn=0; $jobn<$numnodes; $jobn++){ + launch_job(); + } + if ($recycle_clients) { + my $ret; + my $livejobs; + while (1) { + $ret = waitpid($pid, WNOHANG); + #print STDERR "waitpid $pid ret = $ret \n"; + last if ($ret != 0); + $livejobs = numof_live_jobs(); + if ($numnodes >= $livejobs ) { # a client terminated, OR # lines of input was less than -j + print STDERR "num of requested nodes = $numnodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n"; + launch_job(); + } else { + sleep 15; + } + } + } + print STDERR "CHILD PROCESSES SPAWNED ... WAITING\n"; + for my $p (@pids) { + waitpid($p, 0); + } +} else { +# my $todo = "$sentserver -k $key $multiflag $port "; + my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag "; + if ($verbose){ print STDERR "Running: $todo\n"; } + check_call($todo); + print STDERR "Call to $sentserver returned.\n"; + cleanup(); + exit(0); +} + +sub numof_live_jobs { + if ($use_fork) { + die "not implemented"; + } else { + # We can probably continue decoding if the qstat error is only temporary + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat"))); + return ($#livejobs + 1); + } +} +my (@errors,@outs,@cmds); + +sub launch_job { + if ($use_fork) { return launch_job_fork(); } + my $errorfile = "/dev/null"; + my $outfile = "/dev/null"; + $node_count++; + my $clientname = $executable; + $clientname =~ s/^(.{4}).*$/$1/; + $clientname = "$clientname.$node_count"; + if ($errordir){ + $errorfile = "$errordir/$clientname.ER"; + $outfile = "$errordir/$clientname.OU"; + push @errors,$errorfile; + push @outs,$outfile; + } + my $todo = qsub_args($pmem) . " -N $clientname -o $outfile -e $errorfile"; + push @cmds,$todo; + + print STDERR "Running: $todo\n"; + local(*QOUT, *QIN); + open2(\*QOUT, \*QIN, $todo) or die "Failed to open2: $!"; + print QIN $script; + close QIN; + while (my $jobid=){ + chomp $jobid; + if ($verbose){ print STDERR "Launched client job: $jobid"; } + $jobid =~ s/^(\d+)(.*?)$/\1/g; + $jobid =~ s/^Your job (\d+) .*$/\1/; + print STDERR " short job id $jobid\n"; + if ($verbose){ + print STDERR "cd: $abscwd\n"; + print STDERR "cmd: $cmd\n"; + } + if ($joblist == "") { $joblist = $jobid; } + else {$joblist = $joblist . "\|" . $jobid; } + my $cleanfn="qdel $jobid 2> /dev/null"; + push(@cleanup_cmds, $cleanfn); + } + close QOUT; +} + +sub launch_job_fork { + my $errorfile = "/dev/null"; + my $outfile = "/dev/null"; + $node_count++; + my $clientname = $executable; + $clientname =~ s/^(.{4}).*$/$1/; + $clientname = "$clientname.$node_count"; + if ($errordir){ + $errorfile = "$errordir/$clientname.ER"; + $outfile = "$errordir/$clientname.OU"; + push @errors,$errorfile; + push @outs,$outfile; + } + my $pid = fork; + if ($pid == 0) { + my ($fh, $scr_name) = get_temp_script(); + print $fh $script; + close $fh; + my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile"; + print STDERR "EXEC: $todo\n"; + my $out = check_output("$todo"); + unlink $scr_name or warn "Failed to remove $scr_name"; + exit 0; + } else { + push @pids, $pid; + } +} + +sub get_temp_script { + my ($fh, $filename) = tempfile( "$workdir/workXXXX", SUFFIX => '.sh'); + return ($fh, $filename); +} + +sub cleanup_and_die { + cleanup(); + die "\n"; +} + +sub cleanup { + print STDERR "Cleaning up...\n"; + for $cmd (@cleanup_cmds){ + print STDERR " Cleanup command: $cmd\n"; + eval $cmd; + } + print STDERR "outputs:\n",preview_files(\@outs,1),"\n"; + print STDERR "errors:\n",preview_files(\@errors,1),"\n"; + print STDERR "cmd:\n",$cmd,"\n"; + print STDERR " cat $errordir/*.ER\nfor logs.\n"; + print STDERR "Cleanup finished.\n"; +} + +sub print_help +{ + my $name = check_output("basename $0"); chomp $name; + print << "Help"; + +usage: $name [options] + + Automatic black-box parallelization of commands. + +options: + + --use-fork + Instead of using qsub, use fork. + + -e, --error-dir + Retain output files from jobs in , rather + than silently deleting them. + + -m, --multi-line + Expect that command may produce multiple output + lines for a single input line. $name makes a + reasonable attempt to obtain all output before + processing additional inputs. However, use of this + option is inherently unsafe. + + -v, --verbose + Print diagnostic informatoin on stderr. + + -j, --jobs + Number of jobs to use. + + -p, --pmem + pmem setting for each job. + +Help +} diff --git a/training/utils/risk.cc b/training/utils/risk.cc new file mode 100644 index 00000000..d5a12cfd --- /dev/null +++ b/training/utils/risk.cc @@ -0,0 +1,45 @@ +#include "risk.h" + +#include "prob.h" +#include "candidate_set.h" +#include "ns.h" + +using namespace std; + +namespace training { + +// g = \sum_e p(e|f) * loss(e) * (phi(e,f) - E[phi(e,f)]) +double CandidateSetRisk::operator()(const vector& params, + SparseVector* g) const { + prob_t z; + for (unsigned i = 0; i < cands_.size(); ++i) { + const prob_t u(cands_[i].fmap.dot(params), init_lnx()); + z += u; + } + const double log_z = log(z); + + SparseVector exp_feats; + if (g) { + for (unsigned i = 0; i < cands_.size(); ++i) { + const double log_prob = cands_[i].fmap.dot(params) - log_z; + const double prob = exp(log_prob); + exp_feats += cands_[i].fmap * prob; + } + } + + double risk = 0; + for (unsigned i = 0; i < cands_.size(); ++i) { + const double log_prob = cands_[i].fmap.dot(params) - log_z; + const double prob = exp(log_prob); + const double cost = metric_.IsErrorMetric() ? metric_.ComputeScore(cands_[i].eval_feats) + : 1.0 - metric_.ComputeScore(cands_[i].eval_feats); + const double r = prob * cost; + risk += r; + if (g) (*g) += (cands_[i].fmap - exp_feats) * r; + } + return risk; +} + +} + + diff --git a/training/utils/risk.h b/training/utils/risk.h new file mode 100644 index 00000000..2e8db0fb --- /dev/null +++ b/training/utils/risk.h @@ -0,0 +1,26 @@ +#ifndef _RISK_H_ +#define _RISK_H_ + +#include +#include "sparse_vector.h" +class EvaluationMetric; + +namespace training { + class CandidateSet; + + class CandidateSetRisk { + public: + explicit CandidateSetRisk(const CandidateSet& cs, const EvaluationMetric& metric) : + cands_(cs), + metric_(metric) {} + // compute the risk (expected loss) of a CandidateSet + // (optional) the gradient of the risk with respect to params + double operator()(const std::vector& params, + SparseVector* g = NULL) const; + private: + const CandidateSet& cands_; + const EvaluationMetric& metric_; + }; +}; + +#endif diff --git a/training/utils/sentclient.c b/training/utils/sentclient.c new file mode 100644 index 00000000..91d994ab --- /dev/null +++ b/training/utils/sentclient.c @@ -0,0 +1,76 @@ +/* Copyright (c) 2001 by David Chiang. All rights reserved.*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sentserver.h" + +int main (int argc, char *argv[]) { + int sock, port; + char *s, *key; + struct hostent *hp; + struct sockaddr_in server; + int errors = 0; + + if (argc < 3) { + fprintf(stderr, "Usage: sentclient host[:port[:key]] command [args ...]\n"); + exit(1); + } + + s = strchr(argv[1], ':'); + key = NULL; + + if (s == NULL) { + port = DEFAULT_PORT; + } else { + *s = '\0'; + s+=1; + /* dumb hack */ + key = strchr(s, ':'); + if (key != NULL){ + *key = '\0'; + key += 1; + } + port = atoi(s); + } + + sock = socket(AF_INET, SOCK_STREAM, 0); + + hp = gethostbyname(argv[1]); + if (hp == NULL) { + fprintf(stderr, "unknown host %s\n", argv[1]); + exit(1); + } + + bzero((char *)&server, sizeof(server)); + bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length); + server.sin_family = hp->h_addrtype; + server.sin_port = htons(port); + + while (connect(sock, (struct sockaddr *)&server, sizeof(server)) < 0) { + perror("connect()"); + sleep(1); + errors++; + if (errors > 5) + exit(1); + } + + close(0); + close(1); + dup2(sock, 0); + dup2(sock, 1); + + if (key != NULL){ + write(1, key, strlen(key)); + write(1, "\n", 1); + } + + execvp(argv[2], argv+2); + return 0; +} diff --git a/training/utils/sentserver.c b/training/utils/sentserver.c new file mode 100644 index 00000000..c20b4fa6 --- /dev/null +++ b/training/utils/sentserver.c @@ -0,0 +1,515 @@ +/* Copyright (c) 2001 by David Chiang. All rights reserved.*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sentserver.h" + +#define MAX_CLIENTS 64 + +struct clientinfo { + int s; + struct sockaddr_in sin; +}; + +struct line { + int id; + char *s; + int status; + struct line *next; +} *head, **ptail; + +int n_sent = 0, n_received=0, n_flushed=0; + +#define STATUS_RUNNING 0 +#define STATUS_ABORTED 1 +#define STATUS_FINISHED 2 + +pthread_mutex_t queue_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t clients_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER; + +int n_clients = 0; +int s; +int expect_multiline_output = 0; +int log_mutex = 0; +int stay_alive = 0; /* dont panic and die with zero clients */ + +void queue_finish(struct line *node, char *s, int fid); +char * read_line(int fd, int multiline); +void done (int code); + +struct line * queue_get(int fid) { + struct line *cur; + char *s, *synch; + + if (log_mutex) fprintf(stderr, "Getting for data for fid %d\n", fid); + if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); + pthread_mutex_lock(&queue_mutex); + + /* First, check for aborted sentences. */ + + if (log_mutex) fprintf(stderr, " Checking queue for aborted jobs (fid %d)\n", fid); + for (cur = head; cur != NULL; cur = cur->next) { + if (cur->status == STATUS_ABORTED) { + cur->status = STATUS_RUNNING; + + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); + + return cur; + } + } + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); + + /* Otherwise, read a new one. */ + if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid); + if (log_mutex) fprintf(stderr, " Reading input for new data (fid %d)\n", fid); + pthread_mutex_lock(&input_mutex); + s = read_line(0,0); + + while (s) { + if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); + pthread_mutex_lock(&queue_mutex); + if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid); + pthread_mutex_unlock(&input_mutex); + + cur = malloc(sizeof (struct line)); + cur->id = n_sent; + cur->s = s; + cur->next = NULL; + + *ptail = cur; + ptail = &cur->next; + + n_sent++; + + if (strcmp(s,"===SYNCH===\n")==0){ + fprintf(stderr, "Received ===SYNCH=== signal (fid %d)\n", fid); + // Note: queue_finish calls free(cur->s). + // Therefore we need to create a new string here. + synch = malloc((strlen("===SYNCH===\n")+2) * sizeof (char)); + synch = strcpy(synch, s); + + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); + queue_finish(cur, synch, fid); /* handles its own lock */ + + if (log_mutex) fprintf(stderr, "Locking input mutex (%d)\n", fid); + if (log_mutex) fprintf(stderr, " Reading input for new data (fid %d)\n", fid); + pthread_mutex_lock(&input_mutex); + + s = read_line(0,0); + } else { + if (log_mutex) fprintf(stderr, " Received new data %d (fid %d)\n", cur->id, fid); + cur->status = STATUS_RUNNING; + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); + return cur; + } + } + + if (log_mutex) fprintf(stderr, "Unlocking input mutex (%d)\n", fid); + pthread_mutex_unlock(&input_mutex); + /* Only way to reach this point: no more output */ + + if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); + pthread_mutex_lock(&queue_mutex); + if (head == NULL) { + fprintf(stderr, "Reached end of file. Exiting.\n"); + done(0); + } else + ptail = NULL; /* This serves as a signal that there is no more input */ + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); + + return NULL; +} + +void queue_panic() { + struct line *next; + while (head && head->status == STATUS_FINISHED) { + /* Write out finished sentences */ + if (head->status == STATUS_FINISHED) { + fputs(head->s, stdout); + fflush(stdout); + } + /* Write out blank line for unfinished sentences */ + if (head->status == STATUS_ABORTED) { + fputs("\n", stdout); + fflush(stdout); + } + /* By defition, there cannot be any RUNNING sentences, since + function is only called when n_clients == 0 */ + free(head->s); + next = head->next; + free(head); + head = next; + n_flushed++; + } + fclose(stdout); + fprintf(stderr, "All clients died. Panicking, flushing completed sentences and exiting.\n"); + done(1); +} + +void queue_abort(struct line *node, int fid) { + if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); + pthread_mutex_lock(&queue_mutex); + node->status = STATUS_ABORTED; + if (n_clients == 0) { + if (stay_alive) { + fprintf(stderr, "Warning! No live clients detected! Staying alive, will retry soon.\n"); + } else { + queue_panic(); + } + } + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); +} + + +void queue_print() { + struct line *cur; + + fprintf(stderr, " Queue\n"); + + for (cur = head; cur != NULL; cur = cur->next) { + switch(cur->status) { + case STATUS_RUNNING: + fprintf(stderr, " %d running ", cur->id); break; + case STATUS_ABORTED: + fprintf(stderr, " %d aborted ", cur->id); break; + case STATUS_FINISHED: + fprintf(stderr, " %d finished ", cur->id); break; + + } + fprintf(stderr, "\n"); + //fprintf(stderr, cur->s); + } +} + +void queue_finish(struct line *node, char *s, int fid) { + struct line *next; + if (log_mutex) fprintf(stderr, "Locking queue mutex (%d)\n", fid); + pthread_mutex_lock(&queue_mutex); + + free(node->s); + node->s = s; + node->status = STATUS_FINISHED; + n_received++; + + /* Flush out finished nodes */ + while (head && head->status == STATUS_FINISHED) { + + if (log_mutex) fprintf(stderr, " Flushing finished node %d\n", head->id); + + fputs(head->s, stdout); + fflush(stdout); + if (log_mutex) fprintf(stderr, " Flushed node %d\n", head->id); + free(head->s); + + next = head->next; + free(head); + + head = next; + + n_flushed++; + + if (head == NULL) { /* empty queue */ + if (ptail == NULL) { /* This can only happen if set in queue_get as signal that there is no more input. */ + fprintf(stderr, "All sentences finished. Exiting.\n"); + done(0); + } else /* ptail pointed at something which was just popped off the stack -- reset to head*/ + ptail = &head; + } + } + + if (log_mutex) fprintf(stderr, " Flushing output %d\n", head->id); + fflush(stdout); + fprintf(stderr, "%d sentences sent, %d sentences finished, %d sentences flushed\n", n_sent, n_received, n_flushed); + + if (log_mutex) fprintf(stderr, "Unlocking queue mutex (%d)\n", fid); + pthread_mutex_unlock(&queue_mutex); + +} + +char * read_line(int fd, int multiline) { + int size = 80; + char errorbuf[100]; + char *s = malloc(size+2); + int result, errors=0; + int i = 0; + + result = read(fd, s+i, 1); + + while (1) { + if (result < 0) { + perror("read()"); + sprintf(errorbuf, "Error code: %d\n", errno); + fprintf(stderr, errorbuf); + errors++; + if (errors > 5) { + free(s); + return NULL; + } else { + sleep(1); /* retry after delay */ + } + } else if (result == 0) { + break; + } else if (multiline==0 && s[i] == '\n') { + break; + } else { + if (s[i] == '\n'){ + /* if we've reached this point, + then multiline must be 1, and we're + going to poll the fd for an additional + line of data. The basic design is to + run a select on the filedescriptor fd. + Select will return under two conditions: + if there is data on the fd, or if a + timeout is reached. We'll select on this + fd. If select returns because there's data + ready, keep going; else assume there's no + more and return the data we already have. + */ + + fd_set set; + FD_ZERO(&set); + FD_SET(fd, &set); + + struct timeval timeout; + timeout.tv_sec = 3; // number of seconds for timeout + timeout.tv_usec = 0; + + int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout); + if (ready<1){ + break; // no more data, stop looping + } + } + i++; + + if (i == size) { + size = size*2; + s = realloc(s, size+2); + } + } + + result = read(fd, s+i, 1); + } + + if (result == 0 && i == 0) { /* end of file */ + free(s); + return NULL; + } + + s[i] = '\n'; + s[i+1] = '\0'; + + return s; +} + +void * new_client(void *arg) { + struct clientinfo *client = (struct clientinfo *)arg; + struct line *cur; + int result; + char *s; + char errorbuf[100]; + + pthread_mutex_lock(&clients_mutex); + n_clients++; + pthread_mutex_unlock(&clients_mutex); + + fprintf(stderr, "Client connected (%d connected)\n", n_clients); + + for (;;) { + + cur = queue_get(client->s); + + if (cur) { + /* fprintf(stderr, "Sending to client: %s", cur->s); */ + fprintf(stderr, "Sending data %d to client (fid %d)\n", cur->id, client->s); + result = write(client->s, cur->s, strlen(cur->s)); + if (result < strlen(cur->s)){ + perror("write()"); + sprintf(errorbuf, "Error code: %d\n", errno); + fprintf(stderr, errorbuf); + + pthread_mutex_lock(&clients_mutex); + n_clients--; + pthread_mutex_unlock(&clients_mutex); + + fprintf(stderr, "Client died (%d connected)\n", n_clients); + queue_abort(cur, client->s); + + close(client->s); + free(client); + + pthread_exit(NULL); + } + } else { + close(client->s); + pthread_mutex_lock(&clients_mutex); + n_clients--; + pthread_mutex_unlock(&clients_mutex); + fprintf(stderr, "Client dismissed (%d connected)\n", n_clients); + pthread_exit(NULL); + } + + s = read_line(client->s,expect_multiline_output); + if (s) { + /* fprintf(stderr, "Client (fid %d) returned: %s", client->s, s); */ + fprintf(stderr, "Client (fid %d) returned data %d\n", client->s, cur->id); +// queue_print(); + queue_finish(cur, s, client->s); + } else { + pthread_mutex_lock(&clients_mutex); + n_clients--; + pthread_mutex_unlock(&clients_mutex); + + fprintf(stderr, "Client died (%d connected)\n", n_clients); + queue_abort(cur, client->s); + + close(client->s); + free(client); + + pthread_exit(NULL); + } + + } + return 0; +} + +void done (int code) { + close(s); + exit(code); +} + + + +int main (int argc, char *argv[]) { + struct sockaddr_in sin, from; + int g; + socklen_t len; + struct clientinfo *client; + int port; + int opt; + int errors = 0; + int argi; + char *key = NULL, *client_key; + int use_key = 0; + /* the key stuff here doesn't provide any + real measure of security, it's mainly to keep + jobs from bumping into each other. */ + + pthread_t tid; + port = DEFAULT_PORT; + + for (argi=1; argi < argc; argi++){ + if (strcmp(argv[argi], "-m")==0){ + expect_multiline_output = 1; + } else if (strcmp(argv[argi], "-k")==0){ + argi++; + if (argi == argc){ + fprintf(stderr, "Key must be specified after -k\n"); + exit(1); + } + key = argv[argi]; + use_key = 1; + } else if (strcmp(argv[argi], "--stay-alive")==0){ + stay_alive = 1; /* dont panic and die with zero clients */ + } else { + port = atoi(argv[argi]); + } + } + + /* Initialize data structures */ + head = NULL; + ptail = &head; + + /* Set up listener */ + s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + opt = 1; + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(port); + while (bind(s, (struct sockaddr *) &sin, sizeof(sin)) < 0) { + perror("bind()"); + sleep(1); + errors++; + if (errors > 100) + exit(1); + } + + len = sizeof(sin); + getsockname(s, (struct sockaddr *) &sin, &len); + + fprintf(stderr, "Listening on port %hu\n", ntohs(sin.sin_port)); + + while (listen(s, MAX_CLIENTS) < 0) { + perror("listen()"); + sleep(1); + errors++; + if (errors > 100) + exit(1); + } + + for (;;) { + len = sizeof(from); + g = accept(s, (struct sockaddr *)&from, &len); + if (g < 0) { + perror("accept()"); + sleep(1); + continue; + } + client = malloc(sizeof(struct clientinfo)); + client->s = g; + bcopy(&from, &client->sin, len); + + if (use_key){ + fd_set set; + FD_ZERO(&set); + FD_SET(client->s, &set); + + struct timeval timeout; + timeout.tv_sec = 3; // number of seconds for timeout + timeout.tv_usec = 0; + + int ready = select(FD_SETSIZE, &set, NULL, NULL, &timeout); + if (ready<1){ + fprintf(stderr, "Prospective client failed to respond with correct key.\n"); + close(client->s); + free(client); + } else { + client_key = read_line(client->s,0); + client_key[strlen(client_key)-1]='\0'; /* chop trailing newline */ + if (strcmp(key, client_key)==0){ + pthread_create(&tid, NULL, new_client, client); + } else { + fprintf(stderr, "Prospective client failed to respond with correct key.\n"); + close(client->s); + free(client); + } + free(client_key); + } + } else { + pthread_create(&tid, NULL, new_client, client); + } + } + +} + + + diff --git a/training/utils/sentserver.h b/training/utils/sentserver.h new file mode 100644 index 00000000..cd17a546 --- /dev/null +++ b/training/utils/sentserver.h @@ -0,0 +1,6 @@ +#ifndef SENTSERVER_H +#define SENTSERVER_H + +#define DEFAULT_PORT 50000 + +#endif diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am new file mode 100644 index 00000000..280d3ae7 --- /dev/null +++ b/word-aligner/Makefile.am @@ -0,0 +1,6 @@ +bin_PROGRAMS = fast_align + +fast_align_SOURCES = fast_align.cc ttables.cc +fast_align_LDADD = $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc new file mode 100644 index 00000000..7492d26f --- /dev/null +++ b/word-aligner/fast_align.cc @@ -0,0 +1,281 @@ +#include +#include + +#include +#include + +#include "m.h" +#include "corpus_tools.h" +#include "stringlib.h" +#include "filelib.h" +#include "ttables.h" +#include "tdict.h" + +namespace po = boost::program_options; +using namespace std; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("input,i",po::value(),"Parallel corpus input file") + ("reverse,r","Reverse estimation (swap source and target during training)") + ("iterations,I",po::value()->default_value(5),"Number of iterations of EM training") + //("bidir,b", "Run bidirectional alignment") + ("favor_diagonal,d", "Use a static alignment distribution that assigns higher probabilities to alignments near the diagonal") + ("prob_align_null", po::value()->default_value(0.08), "When --favor_diagonal is set, what's the probability of a null alignment?") + ("diagonal_tension,T", po::value()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (<1 = flat >1 = sharp)") + ("variational_bayes,v","Infer VB estimate of parameters under a symmetric Dirichlet prior") + ("alpha,a", po::value()->default_value(0.01), "Hyperparameter for optional Dirichlet prior") + ("no_null_word,N","Do not generate from a null token") + ("output_parameters,p", "Write model parameters instead of alignments") + ("beam_threshold,t",po::value()->default_value(-4),"When writing parameters, log_10 of beam threshold for writing parameter (-10000 to include everything, 0 max parameter only)") + ("hide_training_alignments,H", "Hide training alignments (only useful if you want to use -x option and just compute testset statistics)") + ("testset,x", po::value(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model") + ("no_add_viterbi,V","When writing model parameters, do not add Viterbi alignment points (may generate a grammar where some training sentence pairs are unreachable)"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (conf->count("help") || conf->count("input") == 0) { + cerr << "Usage " << argv[0] << " [OPTIONS] -i corpus.fr-en\n"; + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +int main(int argc, char** argv) { + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + const string fname = conf["input"].as(); + const bool reverse = conf.count("reverse") > 0; + const int ITERATIONS = conf["iterations"].as(); + const double BEAM_THRESHOLD = pow(10.0, conf["beam_threshold"].as()); + const bool use_null = (conf.count("no_null_word") == 0); + const WordID kNULL = TD::Convert(""); + const bool add_viterbi = (conf.count("no_add_viterbi") == 0); + const bool variational_bayes = (conf.count("variational_bayes") > 0); + const bool write_alignments = (conf.count("output_parameters") == 0); + const double diagonal_tension = conf["diagonal_tension"].as(); + const double prob_align_null = conf["prob_align_null"].as(); + const bool hide_training_alignments = (conf.count("hide_training_alignments") > 0); + string testset; + if (conf.count("testset")) testset = conf["testset"].as(); + const double prob_align_not_null = 1.0 - prob_align_null; + const double alpha = conf["alpha"].as(); + const bool favor_diagonal = conf.count("favor_diagonal"); + if (variational_bayes && alpha <= 0.0) { + cerr << "--alpha must be > 0\n"; + return 1; + } + + TTable s2t, t2s; + TTable::Word2Word2Double s2t_viterbi; + double tot_len_ratio = 0; + double mean_srclen_multiplier = 0; + vector unnormed_a_i; + for (int iter = 0; iter < ITERATIONS; ++iter) { + const bool final_iteration = (iter == (ITERATIONS - 1)); + cerr << "ITERATION " << (iter + 1) << (final_iteration ? " (FINAL)" : "") << endl; + ReadFile rf(fname); + istream& in = *rf.stream(); + double likelihood = 0; + double denom = 0.0; + int lc = 0; + bool flag = false; + string line; + string ssrc, strg; + vector src, trg; + while(true) { + getline(in, line); + if (!in) break; + ++lc; + if (lc % 1000 == 0) { cerr << '.'; flag = true; } + if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } + src.clear(); trg.clear(); + CorpusTools::ReadLine(line, &src, &trg); + if (reverse) swap(src, trg); + if (src.size() == 0 || trg.size() == 0) { + cerr << "Error: " << lc << "\n" << line << endl; + return 1; + } + if (src.size() > unnormed_a_i.size()) + unnormed_a_i.resize(src.size()); + if (iter == 0) + tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); + denom += trg.size(); + vector probs(src.size() + 1); + bool first_al = true; // used for write_alignments + for (int j = 0; j < trg.size(); ++j) { + const WordID& f_j = trg[j]; + double sum = 0; + const double j_over_ts = double(j) / trg.size(); + double prob_a_i = 1.0 / (src.size() + use_null); // uniform (model 1) + if (use_null) { + if (favor_diagonal) prob_a_i = prob_align_null; + probs[0] = s2t.prob(kNULL, f_j) * prob_a_i; + sum += probs[0]; + } + double az = 0; + if (favor_diagonal) { + for (int ta = 0; ta < src.size(); ++ta) { + unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + az += unnormed_a_i[ta]; + } + az /= prob_align_not_null; + } + for (int i = 1; i <= src.size(); ++i) { + if (favor_diagonal) + prob_a_i = unnormed_a_i[i-1] / az; + probs[i] = s2t.prob(src[i-1], f_j) * prob_a_i; + sum += probs[i]; + } + if (final_iteration) { + if (add_viterbi || write_alignments) { + WordID max_i = 0; + double max_p = -1; + int max_index = -1; + if (use_null) { + max_i = kNULL; + max_index = 0; + max_p = probs[0]; + } + for (int i = 1; i <= src.size(); ++i) { + if (probs[i] > max_p) { + max_index = i; + max_p = probs[i]; + max_i = src[i-1]; + } + } + if (!hide_training_alignments && write_alignments) { + if (max_index > 0) { + if (first_al) first_al = false; else cout << ' '; + if (reverse) + cout << j << '-' << (max_index - 1); + else + cout << (max_index - 1) << '-' << j; + } + } + s2t_viterbi[max_i][f_j] = 1.0; + } + } else { + if (use_null) + s2t.Increment(kNULL, f_j, probs[0] / sum); + for (int i = 1; i <= src.size(); ++i) + s2t.Increment(src[i-1], f_j, probs[i] / sum); + } + likelihood += log(sum); + } + if (write_alignments && final_iteration && !hide_training_alignments) cout << endl; + } + + // log(e) = 1.0 + double base2_likelihood = likelihood / log(2); + + if (flag) { cerr << endl; } + if (iter == 0) { + mean_srclen_multiplier = tot_len_ratio / lc; + cerr << "expected target length = source length * " << mean_srclen_multiplier << endl; + } + cerr << " log_e likelihood: " << likelihood << endl; + cerr << " log_2 likelihood: " << base2_likelihood << endl; + cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; + cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + if (!final_iteration) { + if (variational_bayes) + s2t.NormalizeVB(alpha); + else + s2t.Normalize(); + } + } + if (testset.size()) { + ReadFile rf(testset); + istream& in = *rf.stream(); + int lc = 0; + double tlp = 0; + string line; + while (getline(in, line)) { + ++lc; + vector src, trg; + CorpusTools::ReadLine(line, &src, &trg); + cout << TD::GetString(src) << " ||| " << TD::GetString(trg) << " |||"; + if (reverse) swap(src, trg); + double log_prob = Md::log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier); + if (src.size() > unnormed_a_i.size()) + unnormed_a_i.resize(src.size()); + + // compute likelihood + for (int j = 0; j < trg.size(); ++j) { + const WordID& f_j = trg[j]; + double sum = 0; + int a_j = 0; + double max_pat = 0; + const double j_over_ts = double(j) / trg.size(); + double prob_a_i = 1.0 / (src.size() + use_null); // uniform (model 1) + if (use_null) { + if (favor_diagonal) prob_a_i = prob_align_null; + max_pat = s2t.prob(kNULL, f_j) * prob_a_i; + sum += max_pat; + } + double az = 0; + if (favor_diagonal) { + for (int ta = 0; ta < src.size(); ++ta) { + unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + az += unnormed_a_i[ta]; + } + az /= prob_align_not_null; + } + for (int i = 1; i <= src.size(); ++i) { + if (favor_diagonal) + prob_a_i = unnormed_a_i[i-1] / az; + double pat = s2t.prob(src[i-1], f_j) * prob_a_i; + if (pat > max_pat) { max_pat = pat; a_j = i; } + sum += pat; + } + log_prob += log(sum); + if (write_alignments) { + if (a_j > 0) { + cout << ' '; + if (reverse) + cout << j << '-' << (a_j - 1); + else + cout << (a_j - 1) << '-' << j; + } + } + } + tlp += log_prob; + cout << " ||| " << log_prob << endl << flush; + } // loop over test set sentences + cerr << "TOTAL LOG PROB " << tlp << endl; + } + + if (write_alignments) return 0; + + for (TTable::Word2Word2Double::iterator ei = s2t.ttable.begin(); ei != s2t.ttable.end(); ++ei) { + const TTable::Word2Double& cpd = ei->second; + const TTable::Word2Double& vit = s2t_viterbi[ei->first]; + const string& esym = TD::Convert(ei->first); + double max_p = -1; + for (TTable::Word2Double::const_iterator fi = cpd.begin(); fi != cpd.end(); ++fi) + if (fi->second > max_p) max_p = fi->second; + const double threshold = max_p * BEAM_THRESHOLD; + for (TTable::Word2Double::const_iterator fi = cpd.begin(); fi != cpd.end(); ++fi) { + if (fi->second > threshold || (vit.find(fi->first) != vit.end())) { + cout << esym << ' ' << TD::Convert(fi->first) << ' ' << log(fi->second) << endl; + } + } + } + return 0; +} + diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index 08ff33e1..ce3e1638 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -16,7 +16,7 @@ STEM_E = $(SCRIPT_DIR)/stemmers/$(E_LANG).pl CLASSIFY = $(SUPPORT_DIR)/classify.pl MAKE_LEX_GRAMMAR = $(SUPPORT_DIR)/make_lex_grammar.pl -MODEL1 = $(TRAINING_DIR)/fast_align +MODEL1 = $(SCRIPT_DIR)/fast_align MERGE_CORPUS = $(SUPPORT_DIR)/merge_corpus.pl e.voc: corpus.e diff --git a/word-aligner/paste-parallel-files.pl b/word-aligner/paste-parallel-files.pl deleted file mode 100755 index ce53b325..00000000 --- a/word-aligner/paste-parallel-files.pl +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my @fs = (); -for my $file (@ARGV) { - my $fh; - open $fh, "<$file" or die "Can't open $file for reading: $!"; - push @fs, $fh; -} -my $num = scalar @fs; -die "Usage: $0 file1.txt file2.txt [...]\n" unless $num > 1; - -my $first = $fs[0]; -while(<$first>) { - chomp; - my @out = (); - push @out, $_; - for (my $i=1; $i < $num; $i++) { - my $f = $fs[$i]; - my $line = <$f>; - die "Mismatched number of lines!" unless defined $line; - chomp $line; - push @out, $line; - } - print join(' ||| ', @out) . "\n"; -} - -for my $fh (@fs) { - my $x=<$fh>; - die "Mismatched number of lines!" if defined $x; - close $fh; -} - -exit 0; - diff --git a/word-aligner/ttables.cc b/word-aligner/ttables.cc new file mode 100644 index 00000000..45bf14c5 --- /dev/null +++ b/word-aligner/ttables.cc @@ -0,0 +1,31 @@ +#include "ttables.h" + +#include + +#include "dict.h" + +using namespace std; +using namespace std::tr1; + +void TTable::DeserializeProbsFromText(std::istream* in) { + int c = 0; + while(*in) { + string e; + string f; + double p; + (*in) >> e >> f >> p; + if (e.empty()) break; + ++c; + ttable[TD::Convert(e)][TD::Convert(f)] = p; + } + cerr << "Loaded " << c << " translation parameters.\n"; +} + +void TTable::SerializeHelper(string* out, const Word2Word2Double& o) { + assert(!"not implemented"); +} + +void TTable::DeserializeHelper(const string& in, Word2Word2Double* o) { + assert(!"not implemented"); +} + diff --git a/word-aligner/ttables.h b/word-aligner/ttables.h new file mode 100644 index 00000000..9baa13ca --- /dev/null +++ b/word-aligner/ttables.h @@ -0,0 +1,101 @@ +#ifndef _TTABLES_H_ +#define _TTABLES_H_ + +#include +#include + +#include "sparse_vector.h" +#include "m.h" +#include "wordid.h" +#include "tdict.h" + +class TTable { + public: + TTable() {} + typedef std::tr1::unordered_map Word2Double; + typedef std::tr1::unordered_map Word2Word2Double; + inline double prob(const int& e, const int& f) const { + const Word2Word2Double::const_iterator cit = ttable.find(e); + if (cit != ttable.end()) { + const Word2Double& cpd = cit->second; + const Word2Double::const_iterator it = cpd.find(f); + if (it == cpd.end()) return 1e-9; + return it->second; + } else { + return 1e-9; + } + } + inline void Increment(const int& e, const int& f) { + counts[e][f] += 1.0; + } + inline void Increment(const int& e, const int& f, double x) { + counts[e][f] += x; + } + void NormalizeVB(const double alpha) { + ttable.swap(counts); + for (Word2Word2Double::iterator cit = ttable.begin(); + cit != ttable.end(); ++cit) { + double tot = 0; + Word2Double& cpd = cit->second; + for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) + tot += it->second + alpha; + for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) + it->second = exp(Md::digamma(it->second + alpha) - Md::digamma(tot)); + } + counts.clear(); + } + void Normalize() { + ttable.swap(counts); + for (Word2Word2Double::iterator cit = ttable.begin(); + cit != ttable.end(); ++cit) { + double tot = 0; + Word2Double& cpd = cit->second; + for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) + tot += it->second; + for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) + it->second /= tot; + } + counts.clear(); + } + // adds counts from another TTable - probabilities remain unchanged + TTable& operator+=(const TTable& rhs) { + for (Word2Word2Double::const_iterator it = rhs.counts.begin(); + it != rhs.counts.end(); ++it) { + const Word2Double& cpd = it->second; + Word2Double& tgt = counts[it->first]; + for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) { + tgt[j->first] += j->second; + } + } + return *this; + } + void ShowTTable() const { + for (Word2Word2Double::const_iterator it = ttable.begin(); it != ttable.end(); ++it) { + const Word2Double& cpd = it->second; + for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) { + std::cerr << "P(" << TD::Convert(j->first) << '|' << TD::Convert(it->first) << ") = " << j->second << std::endl; + } + } + } + void ShowCounts() const { + for (Word2Word2Double::const_iterator it = counts.begin(); it != counts.end(); ++it) { + const Word2Double& cpd = it->second; + for (Word2Double::const_iterator j = cpd.begin(); j != cpd.end(); ++j) { + std::cerr << "c(" << TD::Convert(j->first) << '|' << TD::Convert(it->first) << ") = " << j->second << std::endl; + } + } + } + void DeserializeProbsFromText(std::istream* in); + void SerializeCounts(std::string* out) const { SerializeHelper(out, counts); } + void DeserializeCounts(const std::string& in) { DeserializeHelper(in, &counts); } + void SerializeProbs(std::string* out) const { SerializeHelper(out, ttable); } + void DeserializeProbs(const std::string& in) { DeserializeHelper(in, &ttable); } + private: + static void SerializeHelper(std::string*, const Word2Word2Double& o); + static void DeserializeHelper(const std::string&, Word2Word2Double* o); + public: + Word2Word2Double ttable; + Word2Word2Double counts; +}; + +#endif -- cgit v1.2.3 From fbe3415e35f7c16177314422a4f4e146bb4d808f Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:44:44 -0500 Subject: forgotten makefile --- .gitignore | 1 - training/utils/Makefile.am | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 training/utils/Makefile.am diff --git a/.gitignore b/.gitignore index c6023822..56372ad4 100644 --- a/.gitignore +++ b/.gitignore @@ -197,7 +197,6 @@ training/mira/kbest_mira training/pro/mr_pro_map training/pro/mr_pro_reduce training/rampion/rampion_cccp -training/utils/Makefile.am training/utils/lbfgs_test training/utils/optimize_test training/utils/sentclient diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am new file mode 100644 index 00000000..189d9a76 --- /dev/null +++ b/training/utils/Makefile.am @@ -0,0 +1,33 @@ +noinst_LIBRARIES = libtraining_utils.a + +bin_PROGRAMS = \ + sentserver \ + sentclient + +noinst_PROGRAMS = \ + lbfgs_test \ + optimize_test + +sentserver_SOURCES = sentserver.c +sentserver_LDFLAGS = -pthread + +sentclient_SOURCES = sentclient.c +sentclient_LDFLAGS = -pthread + +TESTS = lbfgs_test optimize_test + +libtraining_utils_a_SOURCES = \ + candidate_set.cc \ + entropy.cc \ + optimize.cc \ + online_optimizer.cc \ + risk.cc + +optimize_test_SOURCES = optimize_test.cc +optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a -lz + +lbfgs_test_SOURCES = lbfgs_test.cc +lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a -lz + +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm + -- cgit v1.2.3 From 6f36c1e0c7cab60a38c5905bc279dfdf9ef2c1fa Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 14:02:17 -0500 Subject: add package dependency in auto build --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 43d32b92..cbfd8fb9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ before_script: - sudo apt-get install libboost-program-options-dev + - sudo apt-get install libboost-serialization-dev - sudo apt-get install libboost-regex-dev - sudo apt-get install libboost-test-dev - sudo apt-get install libboost-system-dev -- cgit v1.2.3 From c634ab440869d77769ce6d5e0f7da7bc8c36650e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 14:26:10 -0500 Subject: fix python pointers into training lib --- python/setup.py.in | 4 ++-- python/src/_cdec.cpp | 4 ++-- python/src/mteval.pxd | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/setup.py.in b/python/setup.py.in index f191ef8b..dac72903 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -3,7 +3,7 @@ from distutils.extension import Extension import re INC = ['..', 'src/', '../decoder', '../utils', '../mteval'] -LIB = ['../decoder', '../utils', '../mteval', '../training', '../klm/lm', '../klm/util', '../klm/search'] +LIB = ['../decoder', '../utils', '../mteval', '../training/utils', '../klm/lm', '../klm/util', '../klm/search'] # Set automatically by configure LIBS = re.findall('-l([^\s]+)', '@LIBS@') @@ -17,7 +17,7 @@ ext_modules = [ sources=['src/_cdec.cpp'], include_dirs=INC, library_dirs=LIB, - libraries=LIBS + ['z', 'cdec', 'utils', 'mteval', 'training', 'klm', 'klm_util', 'ksearch'], + libraries=LIBS + ['z', 'cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'], extra_compile_args=CPPFLAGS, extra_link_args=LDFLAGS), Extension(name='cdec.sa._sa', diff --git a/python/src/_cdec.cpp b/python/src/_cdec.cpp index c8d50a4f..6f341f41 100644 --- a/python/src/_cdec.cpp +++ b/python/src/_cdec.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.1 on Tue Oct 16 01:04:11 2012 */ +/* Generated by Cython 0.17.1 on Sun Nov 18 14:23:01 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -286,7 +286,7 @@ #include "decoder/kbest.h" #include "mteval/ns.h" #include "py_scorer.h" -#include "training/candidate_set.h" +#include "training/utils/candidate_set.h" #ifdef _OPENMP #include #endif /* _OPENMP */ diff --git a/python/src/mteval.pxd b/python/src/mteval.pxd index c97c4b34..33a6b7a5 100644 --- a/python/src/mteval.pxd +++ b/python/src/mteval.pxd @@ -37,7 +37,7 @@ cdef extern from "py_scorer.h": cdef EvaluationMetric* PyMetricInstance "PythonEvaluationMetric::Instance"( string& metric_id, void*, MetricStatsCallback, MetricScoreCallback) -cdef extern from "training/candidate_set.h" namespace "training": +cdef extern from "training/utils/candidate_set.h" namespace "training": cdef cppclass Candidate: vector[WordID] ewords FastSparseVector[weight_t] fmap -- cgit v1.2.3 From e9f80543bf7ff4070a1b5cdc1c3f5648addc70ce Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Sun, 18 Nov 2012 15:36:16 -0500 Subject: Add Python tests --- .travis.yml | 3 +++ python/tests/test_decoder.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 python/tests/test_decoder.py diff --git a/.travis.yml b/.travis.yml index cbfd8fb9..9e784717 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,3 +11,6 @@ script: make after_script: - make check - ./tests/run-system-tests.pl + - cd python + - python setup.py install + - nosetests tests diff --git a/python/tests/test_decoder.py b/python/tests/test_decoder.py new file mode 100644 index 00000000..a74e6268 --- /dev/null +++ b/python/tests/test_decoder.py @@ -0,0 +1,41 @@ +#coding:utf8 +import os +import gzip +import cdec +import unittest +from nose.tools import assert_almost_equals, assert_equal + +weights = os.path.dirname(__file__)+'/../../tests/system_tests/australia/weights' +ref_weights = {'WordPenalty': -2.844814, 'LanguageModel': 1.0, 'PhraseModel_0': -1.066893, 'PhraseModel_1': -0.752247, 'PhraseModel_2': -0.589793, 'PassThrough': -20.0, 'Glue': 0} + +grammar_file = os.path.dirname(__file__)+'/../../tests/system_tests/australia/australia.scfg.gz' + +input_sentence = u'澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。' +ref_output_sentence = u'australia is have diplomatic relations with north korea one of the few countries .' +ref_f_tree = u'(S (S (S (S (X 澳洲 是)) (X (X 与 北韩) 有 邦交)) (X 的 少数 国家 之一)) (X 。))' +ref_e_tree = u'(S (S (S (S (X australia is)) (X have diplomatic relations (X with north korea))) (X one of the few countries)) (X .))' +ref_fvector = {'PhraseModel_2': 7.082652, 'Glue': 3.0, 'PhraseModel_0': 2.014353, 'PhraseModel_1': 8.591477} + +def assert_fvector_equal(vec, ref): + vecd = dict(vec) + assert_equal(set(vecd.keys()), set(ref.keys())) + for k, v in ref.items(): + assert_almost_equals(vec[k], v, 6) + +class TestDecoder(unittest.TestCase): + def setUp(self): + self.decoder = cdec.Decoder(formalism='scfg') + self.decoder.read_weights(weights) + with gzip.open(grammar_file) as f: + self.grammar = f.read() + + def test_weights(self): + assert_fvector_equal(self.decoder.weights, ref_weights) + + def test_translate(self): + forest = self.decoder.translate(input_sentence, grammar=self.grammar) + assert_equal(forest.viterbi(), ref_output_sentence) + f_tree, e_tree = forest.viterbi_trees() + assert_equal(f_tree, ref_f_tree) + assert_equal(e_tree, ref_e_tree) + assert_fvector_equal(forest.viterbi_features(), ref_fvector) -- cgit v1.2.3 From 61ede617e3eaa15e0005965d1da2e462659b0f7d Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Sun, 18 Nov 2012 16:06:14 -0500 Subject: Switch .travis.yml language to python to get virtualenv --- .travis.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9e784717..f3c418b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,6 @@ +language: python +python: + - "2.7" before_script: - sudo apt-get install libboost-program-options-dev - sudo apt-get install libboost-serialization-dev @@ -7,10 +10,12 @@ before_script: - sudo apt-get install flex - autoreconf -ifv - ./configure -script: make +script: + - make + - cd python + - python setup.py install + - cd .. after_script: - make check - ./tests/run-system-tests.pl - - cd python - - python setup.py install - - nosetests tests + - nosetests python/tests -- cgit v1.2.3 From c3a0f085f678b0186b977c8e97dbc9175676f9bd Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 17:41:27 -0500 Subject: add another interface --- python/src/_cdec.cpp | 989 ++++++++++++++++++++++++++-------------------- python/src/hypergraph.pxd | 3 + python/src/hypergraph.pxi | 9 + 3 files changed, 582 insertions(+), 419 deletions(-) diff --git a/python/src/_cdec.cpp b/python/src/_cdec.cpp index 6f341f41..00cb0641 100644 --- a/python/src/_cdec.cpp +++ b/python/src/_cdec.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.1 on Sun Nov 18 14:23:01 2012 */ +/* Generated by Cython 0.17.1 on Sun Nov 18 15:08:18 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -588,7 +588,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_20_lines { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":103 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":108 * del hypos * * def sample_trees(self, unsigned n): # <<<<<<<<<<<<<< @@ -606,7 +606,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":156 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":161 * * property edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -650,7 +650,7 @@ struct __pyx_obj_5_cdec_CandidateSet { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":162 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":167 * * property nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -815,7 +815,7 @@ struct __pyx_obj_4cdec_2sa_3_sa_FloatList { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":191 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":196 * return vector * * cdef class HypergraphEdge: # <<<<<<<<<<<<<< @@ -848,7 +848,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_1___iter__ { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":247 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":256 * * property in_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -911,7 +911,7 @@ struct __pyx_obj_5_cdec_Decoder { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":237 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":246 * raise NotImplemented('comparison not implemented for HypergraphEdge') * * cdef class HypergraphNode: # <<<<<<<<<<<<<< @@ -939,7 +939,7 @@ struct __pyx_obj_5_cdec_SparseVector { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":253 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":262 * * property out_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -1048,7 +1048,7 @@ struct __pyx_obj_5_cdec_SufficientStats { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":44 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":49 * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') * * def kbest(self, size): # <<<<<<<<<<<<<< @@ -1067,7 +1067,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":76 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":81 * del e_derivations * * def kbest_features(self, size): # <<<<<<<<<<<<<< @@ -1087,7 +1087,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":211 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":216 * * property tail_nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -1133,7 +1133,7 @@ struct __pyx_obj_5_cdec___pyx_scope_struct_3_genexpr { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":57 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":62 * del derivations * * def kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -1184,7 +1184,7 @@ struct __pyx_obj_5_cdec_Lattice { }; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":92 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":97 * del derivations * * def sample(self, unsigned n): # <<<<<<<<<<<<<< @@ -1252,7 +1252,7 @@ struct __pyx_vtabstruct_5_cdec_Hypergraph { static struct __pyx_vtabstruct_5_cdec_Hypergraph *__pyx_vtabptr_5_cdec_Hypergraph; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":191 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":196 * return vector * * cdef class HypergraphEdge: # <<<<<<<<<<<<<< @@ -1317,7 +1317,7 @@ struct __pyx_vtabstruct_4cdec_2sa_3_sa_Phrase { static struct __pyx_vtabstruct_4cdec_2sa_3_sa_Phrase *__pyx_vtabptr_4cdec_2sa_3_sa_Phrase; -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":237 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":246 * raise NotImplemented('comparison not implemented for HypergraphEdge') * * cdef class HypergraphNode: # <<<<<<<<<<<<<< @@ -1917,26 +1917,28 @@ static void __pyx_pf_5_cdec_10Hypergraph___dealloc__(struct __pyx_obj_5_cdec_Hyp static PyObject *__pyx_pf_5_cdec_10Hypergraph_2viterbi(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4viterbi_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6viterbi_features(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_10kbest(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_13kbest_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_16kbest_features(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_19sample(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_22sample_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_inp); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_beam_alpha, PyObject *__pyx_v_density, PyObject *__pyx_v_kwargs); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_29lattice(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_31plf(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_weights); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_10viterbi_joshua(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_12kbest(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_15kbest_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_18kbest_features(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_21sample(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_24sample_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_27intersect(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_inp); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_beam_alpha, PyObject *__pyx_v_density, PyObject *__pyx_v_kwargs); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_31lattice(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_33plf(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_35reweight(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_weights); /* proto */ static PyObject *__pyx_pf_5_cdec_10Hypergraph_5edges___get__(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_10Hypergraph_5nodes___get__(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4goal___get__(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6npaths___get__(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self); /* proto */ static Py_ssize_t __pyx_pf_5_cdec_14HypergraphEdge___len__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_9head_node___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_10tail_nodes___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4span___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ +static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_8src_span___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_14feature_values___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4prob___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self); /* proto */ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_x, struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_y, int __pyx_v_op); /* proto */ @@ -9078,7 +9080,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6viterbi_features(struct __pyx_obj * fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0])) * return fmap # <<<<<<<<<<<<<< * - * def viterbi_joshua(self): + * def viterbi_forest(self): */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(((PyObject *)__pyx_v_fmap)); @@ -9099,13 +9101,12 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6viterbi_features(struct __pyx_obj } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_9viterbi_joshua(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_8viterbi_joshua[] = "hg.viterbi_joshua() -> Joshua representation of the best derivation."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_9viterbi_joshua(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_9viterbi_forest(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static PyObject *__pyx_pw_5_cdec_10Hypergraph_9viterbi_forest(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("viterbi_joshua (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); + __Pyx_RefNannySetupContext("viterbi_forest (wrapper)", 0); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_8viterbi_forest(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); __Pyx_RefNannyFinishContext(); return __pyx_r; } @@ -9113,12 +9114,88 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_9viterbi_joshua(PyObject *__pyx_v_ /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":40 * return fmap * + * def viterbi_forest(self): # <<<<<<<<<<<<<< + * cdef Hypergraph hg = Hypergraph() + * hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) + */ + +static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_forest(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { + struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_hg = 0; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("viterbi_forest", 0); + + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":41 + * + * def viterbi_forest(self): + * cdef Hypergraph hg = Hypergraph() # <<<<<<<<<<<<<< + * hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) + * return hg + */ + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Hypergraph)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_v_hg = ((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_t_1); + __pyx_t_1 = 0; + + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":42 + * def viterbi_forest(self): + * cdef Hypergraph hg = Hypergraph() + * hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) # <<<<<<<<<<<<<< + * return hg + * + */ + __pyx_v_hg->hg = new Hypergraph(((__pyx_v_self->hg[0]).CreateViterbiHypergraph(NULL).get()[0])); + + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":43 + * cdef Hypergraph hg = Hypergraph() + * hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) + * return hg # <<<<<<<<<<<<<< + * + * def viterbi_joshua(self): + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(((PyObject *)__pyx_v_hg)); + __pyx_r = ((PyObject *)__pyx_v_hg); + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_AddTraceback("_cdec.Hypergraph.viterbi_forest", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XDECREF((PyObject *)__pyx_v_hg); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_10Hypergraph_11viterbi_joshua(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_10viterbi_joshua[] = "hg.viterbi_joshua() -> Joshua representation of the best derivation."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_11viterbi_joshua(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("viterbi_joshua (wrapper)", 0); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_10viterbi_joshua(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":45 + * return hg + * * def viterbi_joshua(self): # <<<<<<<<<<<<<< * """hg.viterbi_joshua() -> Joshua representation of the best derivation.""" * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_10viterbi_joshua(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -9128,7 +9205,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(struct __pyx_obj_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("viterbi_joshua", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":42 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":47 * def viterbi_joshua(self): * """hg.viterbi_joshua() -> Joshua representation of the best derivation.""" * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') # <<<<<<<<<<<<<< @@ -9136,9 +9213,9 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(struct __pyx_obj_5 * def kbest(self, size): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyBytes_FromString(JoshuaVisualizationString((__pyx_v_self->hg[0])).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(JoshuaVisualizationString((__pyx_v_self->hg[0])).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); @@ -9146,7 +9223,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(struct __pyx_obj_5 PyTuple_SET_ITEM(__pyx_t_2, 1, ((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_r = __pyx_t_1; @@ -9165,21 +9242,21 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_8viterbi_joshua(struct __pyx_obj_5 __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_11kbest(PyObject *__pyx_v_self, PyObject *__pyx_v_size); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_10kbest[] = "hg.kbest(size) -> List of k-best hypotheses in the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_11kbest(PyObject *__pyx_v_self, PyObject *__pyx_v_size) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_13kbest(PyObject *__pyx_v_self, PyObject *__pyx_v_size); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_12kbest[] = "hg.kbest(size) -> List of k-best hypotheses in the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_13kbest(PyObject *__pyx_v_self, PyObject *__pyx_v_size) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("kbest (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_10kbest(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_size)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_12kbest(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_size)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":44 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":49 * return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') * * def kbest(self, size): # <<<<<<<<<<<<<< @@ -9187,7 +9264,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_11kbest(PyObject *__pyx_v_self, Py * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal]* derivations = new kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal](self.hg[0], size) */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_10kbest(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_12kbest(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size) { struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -9208,7 +9285,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_10kbest(struct __pyx_obj_5_cdec_Hy __Pyx_INCREF(__pyx_cur_scope->__pyx_v_size); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_size); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_12generator4, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_14generator4, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -9226,7 +9303,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_10kbest(struct __pyx_obj_5_cdec_Hy return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_14generator4(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_8_kbest *)__pyx_generator->closure); PyObject *__pyx_r = NULL; @@ -9245,19 +9322,19 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":46 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":51 * def kbest(self, size): * """hg.kbest(size) -> List of k-best hypotheses in the hypergraph.""" * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal]* derivations = new kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal](self.hg[0], size) # <<<<<<<<<<<<<< * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal].Derivation* derivation * cdef unsigned k */ - __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations,ESentenceTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":49 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":54 * cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal].Derivation* derivation * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -9266,18 +9343,18 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject */ /*try:*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":50 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":55 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break */ - __pyx_t_2 = __Pyx_PyInt_AsLong(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_2 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_2 = __Pyx_PyInt_AsLong(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_2 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L5;} for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":51 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":56 * try: * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -9286,7 +9363,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject */ __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":52 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":57 * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break # <<<<<<<<<<<<<< @@ -9300,16 +9377,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject } __pyx_L9:; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":53 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":58 * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break * yield unicode(GetString(derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< * finally: * del derivations */ - __pyx_t_4 = PyBytes_FromString(TD::GetString(__pyx_cur_scope->__pyx_v_derivation->yield).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = PyBytes_FromString(TD::GetString(__pyx_cur_scope->__pyx_v_derivation->yield).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_t_4)); __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); @@ -9317,7 +9394,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject PyTuple_SET_ITEM(__pyx_t_5, 1, ((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __pyx_r = __pyx_t_4; @@ -9332,12 +9409,12 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject __pyx_L10_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L5;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L5;} } __pyx_L8_break:; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":55 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":60 * yield unicode(GetString(derivation._yield).c_str(), 'utf8') * finally: * del derivations # <<<<<<<<<<<<<< @@ -9384,21 +9461,21 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_12generator4(__pyx_GeneratorObject __Pyx_RefNannyFinishContext(); return NULL; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_14kbest_trees(PyObject *__pyx_v_self, PyObject *__pyx_v_size); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_13kbest_trees[] = "hg.kbest_trees(size) -> List of k-best trees in the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_14kbest_trees(PyObject *__pyx_v_self, PyObject *__pyx_v_size) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_16kbest_trees(PyObject *__pyx_v_self, PyObject *__pyx_v_size); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_15kbest_trees[] = "hg.kbest_trees(size) -> List of k-best trees in the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_16kbest_trees(PyObject *__pyx_v_self, PyObject *__pyx_v_size) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("kbest_trees (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_13kbest_trees(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_size)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_15kbest_trees(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_size)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":57 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":62 * del derivations * * def kbest_trees(self, size): # <<<<<<<<<<<<<< @@ -9406,7 +9483,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_14kbest_trees(PyObject *__pyx_v_se * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal](self.hg[0], size) */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_13kbest_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_15kbest_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size) { struct __pyx_obj_5_cdec___pyx_scope_struct_9_kbest_trees *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -9427,7 +9504,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_13kbest_trees(struct __pyx_obj_5_c __Pyx_INCREF(__pyx_cur_scope->__pyx_v_size); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_size); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_15generator5, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_17generator5, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -9445,7 +9522,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_13kbest_trees(struct __pyx_obj_5_c return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_17generator5(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { struct __pyx_obj_5_cdec___pyx_scope_struct_9_kbest_trees *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_9_kbest_trees *)__pyx_generator->closure); PyObject *__pyx_r = NULL; @@ -9466,29 +9543,29 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":59 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":64 * def kbest_trees(self, size): * """hg.kbest_trees(size) -> List of k-best trees in the hypergraph.""" * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal](self.hg[0], size) # <<<<<<<<<<<<<< * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal].Derivation* f_derivation * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal]* e_derivations = new kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal](self.hg[0], size) */ - __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_f_derivations = new KBest::KBestDerivations,FTreeTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":61 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":66 * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal](self.hg[0], size) * cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal].Derivation* f_derivation * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal]* e_derivations = new kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal](self.hg[0], size) # <<<<<<<<<<<<<< * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal].Derivation* e_derivation * cdef unsigned k */ - __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_e_derivations = new KBest::KBestDerivations,ETreeTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":64 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":69 * cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal].Derivation* e_derivation * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -9497,18 +9574,18 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject */ /*try:*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":65 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":70 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) */ - __pyx_t_2 = __Pyx_PyInt_AsLong(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_2 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_2 = __Pyx_PyInt_AsLong(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_2 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L5;} for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":66 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":71 * try: * for k in range(size): * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -9517,7 +9594,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject */ __pyx_cur_scope->__pyx_v_f_derivation = __pyx_cur_scope->__pyx_v_f_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":67 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":72 * for k in range(size): * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -9526,7 +9603,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject */ __pyx_cur_scope->__pyx_v_e_derivation = __pyx_cur_scope->__pyx_v_e_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":68 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":73 * f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not f_derivation or not e_derivation: break # <<<<<<<<<<<<<< @@ -9546,16 +9623,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject } __pyx_L9:; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":69 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":74 * e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not f_derivation or not e_derivation: break * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') * yield (f_tree, e_tree) */ - __pyx_t_6 = PyBytes_FromString(TD::GetString(__pyx_cur_scope->__pyx_v_f_derivation->yield).c_str()); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_6 = PyBytes_FromString(TD::GetString(__pyx_cur_scope->__pyx_v_f_derivation->yield).c_str()); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(((PyObject *)__pyx_t_6)); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_t_6)); __Pyx_GIVEREF(((PyObject *)__pyx_t_6)); @@ -9563,7 +9640,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject PyTuple_SET_ITEM(__pyx_t_7, 1, ((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __pyx_t_6 = 0; - __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_f_tree)); @@ -9572,16 +9649,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject __pyx_cur_scope->__pyx_v_f_tree = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":70 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":75 * if not f_derivation or not e_derivation: break * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') # <<<<<<<<<<<<<< * yield (f_tree, e_tree) * finally: */ - __pyx_t_6 = PyBytes_FromString(TD::GetString(__pyx_cur_scope->__pyx_v_e_derivation->yield).c_str()); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_6 = PyBytes_FromString(TD::GetString(__pyx_cur_scope->__pyx_v_e_derivation->yield).c_str()); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(((PyObject *)__pyx_t_6)); - __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_7 = PyTuple_New(2); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_7, 0, ((PyObject *)__pyx_t_6)); __Pyx_GIVEREF(((PyObject *)__pyx_t_6)); @@ -9589,7 +9666,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject PyTuple_SET_ITEM(__pyx_t_7, 1, ((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __pyx_t_6 = 0; - __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_6 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_e_tree)); @@ -9598,14 +9675,14 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject __pyx_cur_scope->__pyx_v_e_tree = ((PyObject*)__pyx_t_6); __pyx_t_6 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":71 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":76 * f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8') * e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8') * yield (f_tree, e_tree) # <<<<<<<<<<<<<< * finally: * del f_derivations */ - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_6); __Pyx_INCREF(((PyObject *)__pyx_cur_scope->__pyx_v_f_tree)); PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_cur_scope->__pyx_v_f_tree)); @@ -9625,12 +9702,12 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject __pyx_L10_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L5;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L5;} } __pyx_L8_break:; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":73 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":78 * yield (f_tree, e_tree) * finally: * del f_derivations # <<<<<<<<<<<<<< @@ -9654,7 +9731,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject __pyx_L6:; delete __pyx_cur_scope->__pyx_v_f_derivations; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":74 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":79 * finally: * del f_derivations * del e_derivations # <<<<<<<<<<<<<< @@ -9686,21 +9763,21 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_15generator5(__pyx_GeneratorObject __Pyx_RefNannyFinishContext(); return NULL; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_17kbest_features(PyObject *__pyx_v_self, PyObject *__pyx_v_size); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_16kbest_features[] = "hg.kbest_trees(size) -> List of k-best feature vectors in the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_17kbest_features(PyObject *__pyx_v_self, PyObject *__pyx_v_size) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_19kbest_features(PyObject *__pyx_v_self, PyObject *__pyx_v_size); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_18kbest_features[] = "hg.kbest_trees(size) -> List of k-best feature vectors in the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_19kbest_features(PyObject *__pyx_v_self, PyObject *__pyx_v_size) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("kbest_features (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_16kbest_features(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_size)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_18kbest_features(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_size)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":76 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":81 * del e_derivations * * def kbest_features(self, size): # <<<<<<<<<<<<<< @@ -9708,7 +9785,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_17kbest_features(PyObject *__pyx_v * cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal]* derivations = new kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal](self.hg[0], size) */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_16kbest_features(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_18kbest_features(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_size) { struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -9729,7 +9806,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_16kbest_features(struct __pyx_obj_ __Pyx_INCREF(__pyx_cur_scope->__pyx_v_size); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_size); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_18generator6, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_20generator6, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -9747,7 +9824,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_16kbest_features(struct __pyx_obj_ return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_20generator6(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_10_kbest_features *)__pyx_generator->closure); PyObject *__pyx_r = NULL; @@ -9765,19 +9842,19 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":78 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":83 * def kbest_features(self, size): * """hg.kbest_trees(size) -> List of k-best feature vectors in the hypergraph.""" * cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal]* derivations = new kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal](self.hg[0], size) # <<<<<<<<<<<<<< * cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal].Derivation* derivation * cdef SparseVector fmap */ - __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsUnsignedInt(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_1 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_derivations = new KBest::KBestDerivations,FeatureVectorTraversal>((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_t_1); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":82 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":87 * cdef SparseVector fmap * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -9786,18 +9863,18 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject */ /*try:*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":83 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":88 * cdef unsigned k * try: * for k in range(size): # <<<<<<<<<<<<<< * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break */ - __pyx_t_2 = __Pyx_PyInt_AsLong(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_2 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 83; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_2 = __Pyx_PyInt_AsLong(__pyx_cur_scope->__pyx_v_size); if (unlikely((__pyx_t_2 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L5;} for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_2; __pyx_t_1+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_1; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":84 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":89 * try: * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) # <<<<<<<<<<<<<< @@ -9806,7 +9883,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject */ __pyx_cur_scope->__pyx_v_derivation = __pyx_cur_scope->__pyx_v_derivations->LazyKthBest((__pyx_cur_scope->__pyx_v_self->hg->nodes_.size() - 1), __pyx_cur_scope->__pyx_v_k); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":85 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":90 * for k in range(size): * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break # <<<<<<<<<<<<<< @@ -9820,23 +9897,23 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject } __pyx_L9:; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":86 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":91 * derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k) * if not derivation: break * fmap = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< * fmap.vector = new FastSparseVector[weight_t](derivation._yield) * yield fmap */ - __pyx_t_4 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L5;} + if (!(likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_fmap)); __Pyx_XDECREF(((PyObject *)__pyx_cur_scope->__pyx_v_fmap)); __Pyx_GIVEREF(__pyx_t_4); __pyx_cur_scope->__pyx_v_fmap = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_4); __pyx_t_4 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":87 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":92 * if not derivation: break * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](derivation._yield) # <<<<<<<<<<<<<< @@ -9845,7 +9922,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject */ __pyx_cur_scope->__pyx_v_fmap->vector = new FastSparseVector(__pyx_cur_scope->__pyx_v_derivation->yield); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":88 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":93 * fmap = SparseVector.__new__(SparseVector) * fmap.vector = new FastSparseVector[weight_t](derivation._yield) * yield fmap # <<<<<<<<<<<<<< @@ -9864,12 +9941,12 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject __pyx_L10_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L5;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L5;} } __pyx_L8_break:; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":90 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":95 * yield fmap * finally: * del derivations # <<<<<<<<<<<<<< @@ -9914,18 +9991,18 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_18generator6(__pyx_GeneratorObject __Pyx_RefNannyFinishContext(); return NULL; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_20sample(PyObject *__pyx_v_self, PyObject *__pyx_arg_n); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_19sample[] = "hg.sample(n) -> Sample of n hypotheses from the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_20sample(PyObject *__pyx_v_self, PyObject *__pyx_arg_n) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_22sample(PyObject *__pyx_v_self, PyObject *__pyx_arg_n); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_21sample[] = "hg.sample(n) -> Sample of n hypotheses from the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_22sample(PyObject *__pyx_v_self, PyObject *__pyx_arg_n) { unsigned int __pyx_v_n; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sample (wrapper)", 0); assert(__pyx_arg_n); { - __pyx_v_n = __Pyx_PyInt_AsUnsignedInt(__pyx_arg_n); if (unlikely((__pyx_v_n == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n = __Pyx_PyInt_AsUnsignedInt(__pyx_arg_n); if (unlikely((__pyx_v_n == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -9933,12 +10010,12 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_20sample(PyObject *__pyx_v_self, P __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_19sample(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((unsigned int)__pyx_v_n)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_21sample(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((unsigned int)__pyx_v_n)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":92 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":97 * del derivations * * def sample(self, unsigned n): # <<<<<<<<<<<<<< @@ -9946,7 +10023,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_20sample(PyObject *__pyx_v_self, P * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_19sample(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_21sample(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n) { struct __pyx_obj_5_cdec___pyx_scope_struct_11_sample *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -9965,7 +10042,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_19sample(struct __pyx_obj_5_cdec_H __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __pyx_cur_scope->__pyx_v_n = __pyx_v_n; { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_21generator7, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_23generator7, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -9983,7 +10060,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_19sample(struct __pyx_obj_5_cdec_H return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_23generator7(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { struct __pyx_obj_5_cdec___pyx_scope_struct_11_sample *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_11_sample *)__pyx_generator->closure); PyObject *__pyx_r = NULL; @@ -10002,19 +10079,19 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":94 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":99 * def sample(self, unsigned n): * """hg.sample(n) -> Sample of n hypotheses from the hypergraph.""" * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() # <<<<<<<<<<<<<< * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) * cdef unsigned k */ - try {__pyx_t_1 = new std::vector();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 94; __pyx_clineno = __LINE__; goto __pyx_L1_error;}} + try {__pyx_t_1 = new std::vector();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L1_error;}} __pyx_cur_scope->__pyx_v_hypos = __pyx_t_1; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":95 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":100 * """hg.sample(n) -> Sample of n hypotheses from the hypergraph.""" * cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]() * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) # <<<<<<<<<<<<<< @@ -10023,7 +10100,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject */ HypergraphSampler::sample_hypotheses((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_hypos); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":97 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":102 * hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos) * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -10032,7 +10109,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject */ /*try:*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":98 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":103 * cdef unsigned k * try: * for k in range(hypos.size()): # <<<<<<<<<<<<<< @@ -10043,16 +10120,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_3; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":99 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":104 * try: * for k in range(hypos.size()): * yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8') # <<<<<<<<<<<<<< * finally: * del hypos */ - __pyx_t_4 = PyBytes_FromString(TD::GetString(((__pyx_cur_scope->__pyx_v_hypos[0])[__pyx_cur_scope->__pyx_v_k]).words).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = PyBytes_FromString(TD::GetString(((__pyx_cur_scope->__pyx_v_hypos[0])[__pyx_cur_scope->__pyx_v_k]).words).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_t_4)); __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); @@ -10060,7 +10137,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject PyTuple_SET_ITEM(__pyx_t_5, 1, ((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __pyx_r = __pyx_t_4; @@ -10075,11 +10152,11 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject __pyx_L9_resume_from_yield:; __pyx_t_2 = __pyx_cur_scope->__pyx_t_0; __pyx_t_3 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 99; __pyx_clineno = __LINE__; goto __pyx_L5;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 104; __pyx_clineno = __LINE__; goto __pyx_L5;} } } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":101 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":106 * yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8') * finally: * del hypos # <<<<<<<<<<<<<< @@ -10126,18 +10203,18 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_21generator7(__pyx_GeneratorObject __Pyx_RefNannyFinishContext(); return NULL; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_23sample_trees(PyObject *__pyx_v_self, PyObject *__pyx_arg_n); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_22sample_trees[] = "hg.sample_trees(n) -> Sample of n trees from the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_23sample_trees(PyObject *__pyx_v_self, PyObject *__pyx_arg_n) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_25sample_trees(PyObject *__pyx_v_self, PyObject *__pyx_arg_n); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_24sample_trees[] = "hg.sample_trees(n) -> Sample of n trees from the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_25sample_trees(PyObject *__pyx_v_self, PyObject *__pyx_arg_n) { unsigned int __pyx_v_n; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sample_trees (wrapper)", 0); assert(__pyx_arg_n); { - __pyx_v_n = __Pyx_PyInt_AsUnsignedInt(__pyx_arg_n); if (unlikely((__pyx_v_n == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_v_n = __Pyx_PyInt_AsUnsignedInt(__pyx_arg_n); if (unlikely((__pyx_v_n == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; @@ -10145,12 +10222,12 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_23sample_trees(PyObject *__pyx_v_s __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_22sample_trees(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((unsigned int)__pyx_v_n)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_24sample_trees(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((unsigned int)__pyx_v_n)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":103 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":108 * del hypos * * def sample_trees(self, unsigned n): # <<<<<<<<<<<<<< @@ -10158,7 +10235,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_23sample_trees(PyObject *__pyx_v_s * cdef vector[string]* trees = new vector[string]() */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_22sample_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_24sample_trees(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, unsigned int __pyx_v_n) { struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees *__pyx_cur_scope; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -10177,7 +10254,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_22sample_trees(struct __pyx_obj_5_ __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __pyx_cur_scope->__pyx_v_n = __pyx_v_n; { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_24generator8, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_26generator8, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -10195,7 +10272,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_22sample_trees(struct __pyx_obj_5_ return __pyx_r; } -static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ +static PyObject *__pyx_gb_5_cdec_10Hypergraph_26generator8(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value) /* generator body */ { struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees *__pyx_cur_scope = ((struct __pyx_obj_5_cdec___pyx_scope_struct_12_sample_trees *)__pyx_generator->closure); PyObject *__pyx_r = NULL; @@ -10214,19 +10291,19 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":105 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":110 * def sample_trees(self, unsigned n): * """hg.sample_trees(n) -> Sample of n trees from the hypergraph.""" * cdef vector[string]* trees = new vector[string]() # <<<<<<<<<<<<<< * hypergraph.sample_trees(self.hg[0], n, self._rng(), trees) * cdef unsigned k */ - try {__pyx_t_1 = new std::vector();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 105; __pyx_clineno = __LINE__; goto __pyx_L1_error;}} + try {__pyx_t_1 = new std::vector();} catch(...) {__Pyx_CppExn2PyErr(); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L1_error;}} __pyx_cur_scope->__pyx_v_trees = __pyx_t_1; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":106 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":111 * """hg.sample_trees(n) -> Sample of n trees from the hypergraph.""" * cdef vector[string]* trees = new vector[string]() * hypergraph.sample_trees(self.hg[0], n, self._rng(), trees) # <<<<<<<<<<<<<< @@ -10235,7 +10312,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject */ HypergraphSampler::sample_trees((__pyx_cur_scope->__pyx_v_self->hg[0]), __pyx_cur_scope->__pyx_v_n, ((struct __pyx_vtabstruct_5_cdec_Hypergraph *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->_rng(__pyx_cur_scope->__pyx_v_self), __pyx_cur_scope->__pyx_v_trees); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":108 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":113 * hypergraph.sample_trees(self.hg[0], n, self._rng(), trees) * cdef unsigned k * try: # <<<<<<<<<<<<<< @@ -10244,7 +10321,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject */ /*try:*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":109 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":114 * cdef unsigned k * try: * for k in range(trees.size()): # <<<<<<<<<<<<<< @@ -10255,16 +10332,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_cur_scope->__pyx_v_k = __pyx_t_3; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":110 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":115 * try: * for k in range(trees.size()): * yield unicode(trees[0][k].c_str(), 'utf8') # <<<<<<<<<<<<<< * finally: * del trees */ - __pyx_t_4 = PyBytes_FromString(((__pyx_cur_scope->__pyx_v_trees[0])[__pyx_cur_scope->__pyx_v_k]).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = PyBytes_FromString(((__pyx_cur_scope->__pyx_v_trees[0])[__pyx_cur_scope->__pyx_v_k]).c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_t_4)); __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); @@ -10272,7 +10349,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject PyTuple_SET_ITEM(__pyx_t_5, 1, ((PyObject *)__pyx_n_s__utf8)); __Pyx_GIVEREF(((PyObject *)__pyx_n_s__utf8)); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L5;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)(&PyUnicode_Type))), ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L5;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __pyx_r = __pyx_t_4; @@ -10287,11 +10364,11 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject __pyx_L9_resume_from_yield:; __pyx_t_2 = __pyx_cur_scope->__pyx_t_0; __pyx_t_3 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 110; __pyx_clineno = __LINE__; goto __pyx_L5;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L5;} } } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":112 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":117 * yield unicode(trees[0][k].c_str(), 'utf8') * finally: * del trees # <<<<<<<<<<<<<< @@ -10340,18 +10417,18 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_24generator8(__pyx_GeneratorObject } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_26intersect(PyObject *__pyx_v_self, PyObject *__pyx_v_inp); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_25intersect[] = "hg.intersect(Lattice/string): Intersect the hypergraph with the provided reference."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_26intersect(PyObject *__pyx_v_self, PyObject *__pyx_v_inp) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_28intersect(PyObject *__pyx_v_self, PyObject *__pyx_v_inp); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_27intersect[] = "hg.intersect(Lattice/string): Intersect the hypergraph with the provided reference."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_28intersect(PyObject *__pyx_v_self, PyObject *__pyx_v_inp) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("intersect (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_25intersect(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_inp)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_27intersect(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_inp)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":114 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":119 * del trees * * def intersect(self, inp): # <<<<<<<<<<<<<< @@ -10359,7 +10436,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_26intersect(PyObject *__pyx_v_self * cdef Lattice lat */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_inp) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_27intersect(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_inp) { struct __pyx_obj_5_cdec_Lattice *__pyx_v_lat = 0; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -10371,7 +10448,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde int __pyx_clineno = 0; __Pyx_RefNannySetupContext("intersect", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":117 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":122 * """hg.intersect(Lattice/string): Intersect the hypergraph with the provided reference.""" * cdef Lattice lat * if isinstance(inp, Lattice): # <<<<<<<<<<<<<< @@ -10384,7 +10461,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":118 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":123 * cdef Lattice lat * if isinstance(inp, Lattice): * lat = inp # <<<<<<<<<<<<<< @@ -10396,7 +10473,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde goto __pyx_L3; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":119 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":124 * if isinstance(inp, Lattice): * lat = inp * elif isinstance(inp, basestring): # <<<<<<<<<<<<<< @@ -10405,23 +10482,23 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde */ __pyx_t_1 = __pyx_builtin_basestring; __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyObject_IsInstance(__pyx_v_inp, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_IsInstance(__pyx_v_inp, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":120 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":125 * lat = inp * elif isinstance(inp, basestring): * lat = Lattice(inp) # <<<<<<<<<<<<<< * else: * raise TypeError('cannot intersect hypergraph with %s' % type(inp)) */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_inp); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_inp); __Pyx_GIVEREF(__pyx_v_inp); - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __pyx_v_lat = ((struct __pyx_obj_5_cdec_Lattice *)__pyx_t_3); @@ -10430,30 +10507,30 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde } /*else*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":122 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":127 * lat = Lattice(inp) * else: * raise TypeError('cannot intersect hypergraph with %s' % type(inp)) # <<<<<<<<<<<<<< * return hypergraph.Intersect(lat.lattice[0], self.hg) * */ - __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_15), ((PyObject *)Py_TYPE(__pyx_v_inp))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_15), ((PyObject *)Py_TYPE(__pyx_v_inp))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_3)); __Pyx_GIVEREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_L3:; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":123 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":128 * else: * raise TypeError('cannot intersect hypergraph with %s' % type(inp)) * return hypergraph.Intersect(lat.lattice[0], self.hg) # <<<<<<<<<<<<<< @@ -10461,7 +10538,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde * def prune(self, beam_alpha=0, density=0, **kwargs): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_PyBool_FromLong(HG::Intersect((__pyx_v_lat->lattice[0]), __pyx_v_self->hg)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyBool_FromLong(HG::Intersect((__pyx_v_lat->lattice[0]), __pyx_v_self->hg)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; @@ -10482,9 +10559,9 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_25intersect(struct __pyx_obj_5_cde } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_28prune(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_27prune[] = "hg.prune(beam_alpha=0, density=0): Prune the hypergraph.\n beam_alpha: use beam pruning\n density: use density pruning"; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_28prune(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_30prune(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_29prune[] = "hg.prune(beam_alpha=0, density=0): Prune the hypergraph.\n beam_alpha: use beam pruning\n density: use density pruning"; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_30prune(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_beam_alpha = 0; PyObject *__pyx_v_density = 0; PyObject *__pyx_v_kwargs = 0; @@ -10521,7 +10598,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_28prune(PyObject *__pyx_v_self, Py } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, __pyx_v_kwargs, values, pos_args, "prune") < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, __pyx_v_kwargs, values, pos_args, "prune") < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -10536,20 +10613,20 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_28prune(PyObject *__pyx_v_self, Py } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("prune", 0, 0, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("prune", 0, 0, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_DECREF(__pyx_v_kwargs); __pyx_v_kwargs = 0; __Pyx_AddTraceback("_cdec.Hypergraph.prune", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_27prune(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), __pyx_v_beam_alpha, __pyx_v_density, __pyx_v_kwargs); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_29prune(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), __pyx_v_beam_alpha, __pyx_v_density, __pyx_v_kwargs); __Pyx_XDECREF(__pyx_v_kwargs); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":125 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":130 * return hypergraph.Intersect(lat.lattice[0], self.hg) * * def prune(self, beam_alpha=0, density=0, **kwargs): # <<<<<<<<<<<<<< @@ -10557,7 +10634,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_28prune(PyObject *__pyx_v_self, Py * beam_alpha: use beam pruning */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_beam_alpha, PyObject *__pyx_v_density, PyObject *__pyx_v_kwargs) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_29prune(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_beam_alpha, PyObject *__pyx_v_density, PyObject *__pyx_v_kwargs) { std::vector *__pyx_v_preserve_mask; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -10569,7 +10646,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hy int __pyx_clineno = 0; __Pyx_RefNannySetupContext("prune", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":129 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":134 * beam_alpha: use beam pruning * density: use density pruning""" * cdef hypergraph.EdgeMask* preserve_mask = NULL # <<<<<<<<<<<<<< @@ -10578,17 +10655,17 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hy */ __pyx_v_preserve_mask = NULL; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":130 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":135 * density: use density pruning""" * cdef hypergraph.EdgeMask* preserve_mask = NULL * if 'csplit_preserve_full_word' in kwargs: # <<<<<<<<<<<<<< * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True */ - __pyx_t_1 = (__Pyx_PyDict_Contains(((PyObject *)__pyx_n_s_16), ((PyObject *)__pyx_v_kwargs), Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = (__Pyx_PyDict_Contains(((PyObject *)__pyx_n_s_16), ((PyObject *)__pyx_v_kwargs), Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":131 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":136 * cdef hypergraph.EdgeMask* preserve_mask = NULL * if 'csplit_preserve_full_word' in kwargs: * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) # <<<<<<<<<<<<<< @@ -10597,7 +10674,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hy */ __pyx_v_preserve_mask = new std::vector(__pyx_v_self->hg->edges_.size()); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":132 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":137 * if 'csplit_preserve_full_word' in kwargs: * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True # <<<<<<<<<<<<<< @@ -10609,18 +10686,18 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hy } __pyx_L3:; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":133 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":138 * preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size()) * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True * self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False) # <<<<<<<<<<<<<< * if preserve_mask: * del preserve_mask */ - __pyx_t_2 = __pyx_PyFloat_AsDouble(__pyx_v_beam_alpha); if (unlikely((__pyx_t_2 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_3 = __pyx_PyFloat_AsDouble(__pyx_v_density); if (unlikely((__pyx_t_3 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __pyx_PyFloat_AsDouble(__pyx_v_beam_alpha); if (unlikely((__pyx_t_2 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsDouble(__pyx_v_density); if (unlikely((__pyx_t_3 == (double)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->hg->PruneInsideOutside(__pyx_t_2, __pyx_t_3, __pyx_v_preserve_mask, 0, 1.0, 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":134 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":139 * preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True * self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False) * if preserve_mask: # <<<<<<<<<<<<<< @@ -10630,7 +10707,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hy __pyx_t_1 = (__pyx_v_preserve_mask != 0); if (__pyx_t_1) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":135 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":140 * self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False) * if preserve_mask: * del preserve_mask # <<<<<<<<<<<<<< @@ -10654,18 +10731,18 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_27prune(struct __pyx_obj_5_cdec_Hy } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_30lattice(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_29lattice[] = "hg.lattice() -> Lattice corresponding to the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_30lattice(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_32lattice(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_31lattice[] = "hg.lattice() -> Lattice corresponding to the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_32lattice(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("lattice (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_29lattice(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_31lattice(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":137 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":142 * del preserve_mask * * def lattice(self): # TODO direct hg -> lattice conversion in cdec # <<<<<<<<<<<<<< @@ -10673,7 +10750,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_30lattice(PyObject *__pyx_v_self, * cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str() */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_29lattice(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_31lattice(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { PyObject *__pyx_v_plf = 0; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -10685,19 +10762,19 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29lattice(struct __pyx_obj_5_cdec_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lattice", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":139 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":144 * def lattice(self): # TODO direct hg -> lattice conversion in cdec * """hg.lattice() -> Lattice corresponding to the hypergraph.""" * cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str() # <<<<<<<<<<<<<< * return Lattice(eval(plf)) * */ - __pyx_t_1 = PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->hg[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->hg[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __pyx_v_plf = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":140 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":145 * """hg.lattice() -> Lattice corresponding to the hypergraph.""" * cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str() * return Lattice(eval(plf)) # <<<<<<<<<<<<<< @@ -10705,17 +10782,17 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29lattice(struct __pyx_obj_5_cdec_ * def plf(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_Globals(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_Globals(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); if (((PyObject *)__pyx_v_plf)) { - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__plf), ((PyObject *)__pyx_v_plf)) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__plf), ((PyObject *)__pyx_v_plf)) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } if (((PyObject *)__pyx_v_self)) { - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__self), ((PyObject *)__pyx_v_self)) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__self), ((PyObject *)__pyx_v_self)) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(((PyObject *)__pyx_v_plf)); PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_plf)); @@ -10726,15 +10803,15 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29lattice(struct __pyx_obj_5_cdec_ __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_t_1 = 0; __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_builtin_eval, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_builtin_eval, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_Lattice)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; __pyx_r = __pyx_t_2; @@ -10757,18 +10834,18 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_29lattice(struct __pyx_obj_5_cdec_ } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_32plf(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_31plf[] = "hg.plf() -> Lattice PLF representation corresponding to the hypergraph."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_32plf(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_34plf(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_33plf[] = "hg.plf() -> Lattice PLF representation corresponding to the hypergraph."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_34plf(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("plf (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_31plf(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_33plf(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":142 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":147 * return Lattice(eval(plf)) * * def plf(self): # <<<<<<<<<<<<<< @@ -10776,7 +10853,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_32plf(PyObject *__pyx_v_self, CYTH * return bytes(hypergraph.AsPLF(self.hg[0], True).c_str()) */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_31plf(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_33plf(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -10786,7 +10863,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_31plf(struct __pyx_obj_5_cdec_Hype int __pyx_clineno = 0; __Pyx_RefNannySetupContext("plf", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":144 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":149 * def plf(self): * """hg.plf() -> Lattice PLF representation corresponding to the hypergraph.""" * return bytes(hypergraph.AsPLF(self.hg[0], True).c_str()) # <<<<<<<<<<<<<< @@ -10794,14 +10871,14 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_31plf(struct __pyx_obj_5_cdec_Hype * def reweight(self, weights): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->hg[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(HypergraphIO::AsPLF((__pyx_v_self->hg[0]), 1).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyBytes_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 144; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyBytes_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 149; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_r = __pyx_t_1; @@ -10822,18 +10899,18 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_31plf(struct __pyx_obj_5_cdec_Hype } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_34reweight(PyObject *__pyx_v_self, PyObject *__pyx_v_weights); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_33reweight[] = "hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_34reweight(PyObject *__pyx_v_self, PyObject *__pyx_v_weights) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_36reweight(PyObject *__pyx_v_self, PyObject *__pyx_v_weights); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_35reweight[] = "hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_36reweight(PyObject *__pyx_v_self, PyObject *__pyx_v_weights) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("reweight (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_33reweight(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_weights)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_35reweight(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self), ((PyObject *)__pyx_v_weights)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":146 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":151 * return bytes(hypergraph.AsPLF(self.hg[0], True).c_str()) * * def reweight(self, weights): # <<<<<<<<<<<<<< @@ -10841,7 +10918,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_34reweight(PyObject *__pyx_v_self, * if isinstance(weights, SparseVector): */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_weights) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_35reweight(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self, PyObject *__pyx_v_weights) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -10852,7 +10929,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec int __pyx_clineno = 0; __Pyx_RefNannySetupContext("reweight", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":148 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":153 * def reweight(self, weights): * """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector.""" * if isinstance(weights, SparseVector): # <<<<<<<<<<<<<< @@ -10865,7 +10942,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":149 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":154 * """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector.""" * if isinstance(weights, SparseVector): * self.hg.Reweight(( weights).vector[0]) # <<<<<<<<<<<<<< @@ -10876,7 +10953,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec goto __pyx_L3; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":150 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":155 * if isinstance(weights, SparseVector): * self.hg.Reweight(( weights).vector[0]) * elif isinstance(weights, DenseVector): # <<<<<<<<<<<<<< @@ -10889,7 +10966,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":151 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":156 * self.hg.Reweight(( weights).vector[0]) * elif isinstance(weights, DenseVector): * self.hg.Reweight(( weights).vector[0]) # <<<<<<<<<<<<<< @@ -10901,26 +10978,26 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_33reweight(struct __pyx_obj_5_cdec } /*else*/ { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":153 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":158 * self.hg.Reweight(( weights).vector[0]) * else: * raise TypeError('cannot reweight hypergraph with %s' % type(weights)) # <<<<<<<<<<<<<< * * property edges: */ - __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_17), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_17), ((PyObject *)Py_TYPE(__pyx_v_weights))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 158; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_L3:; @@ -10949,7 +11026,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_5edges_1__get__(PyObject *__pyx_v_ return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":156 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":161 * * property edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -10975,7 +11052,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_5edges___get__(struct __pyx_obj_5_ __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_5edges_2generator9, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_5edges_2generator9, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -11011,9 +11088,9 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5edges_2generator9(__pyx_Generator return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":158 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":163 * def __get__(self): * cdef unsigned i * for i in range(self.hg.edges_.size()): # <<<<<<<<<<<<<< @@ -11024,16 +11101,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5edges_2generator9(__pyx_Generator for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":159 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":164 * cdef unsigned i * for i in range(self.hg.edges_.size()): * yield HypergraphEdge().init(self.hg, i) # <<<<<<<<<<<<<< * * property nodes: */ - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphEdge)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphEdge)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 164; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphEdge *)((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, __pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphEdge *)((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, __pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 164; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_4; @@ -11048,7 +11125,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5edges_2generator9(__pyx_Generator __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 159; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 164; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -11076,7 +11153,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_5nodes_1__get__(PyObject *__pyx_v_ return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":162 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":167 * * property nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -11102,7 +11179,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_5nodes___get__(struct __pyx_obj_5_ __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 162; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -11138,9 +11215,9 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10(__pyx_Generato return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 162; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":164 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":169 * def __get__(self): * cdef unsigned i * for i in range(self.hg.nodes_.size()): # <<<<<<<<<<<<<< @@ -11151,16 +11228,16 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10(__pyx_Generato for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":165 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":170 * cdef unsigned i * for i in range(self.hg.nodes_.size()): * yield HypergraphNode().init(self.hg, i) # <<<<<<<<<<<<<< * * property goal: */ - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, __pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, __pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_4; @@ -11175,7 +11252,7 @@ static PyObject *__pyx_gb_5_cdec_10Hypergraph_5nodes_2generator10(__pyx_Generato __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 165; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -11202,7 +11279,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_4goal_1__get__(PyObject *__pyx_v_s return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":168 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":173 * * property goal: * def __get__(self): # <<<<<<<<<<<<<< @@ -11220,7 +11297,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4goal___get__(struct __pyx_obj_5_c int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":169 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":174 * property goal: * def __get__(self): * return HypergraphNode().init(self.hg, self.hg.GoalNode()) # <<<<<<<<<<<<<< @@ -11228,9 +11305,9 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_4goal___get__(struct __pyx_obj_5_c * property npaths: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1), __pyx_v_self->hg, __pyx_v_self->hg->GoalNode()); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1), __pyx_v_self->hg, __pyx_v_self->hg->GoalNode()); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; @@ -11261,7 +11338,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_6npaths_1__get__(PyObject *__pyx_v return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":172 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":177 * * property npaths: * def __get__(self): # <<<<<<<<<<<<<< @@ -11278,7 +11355,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6npaths___get__(struct __pyx_obj_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":173 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":178 * property npaths: * def __get__(self): * return self.hg.NumberOfPaths() # <<<<<<<<<<<<<< @@ -11286,7 +11363,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6npaths___get__(struct __pyx_obj_5 * def inside_outside(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->hg->NumberOfPaths()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->hg->NumberOfPaths()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -11305,18 +11382,18 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_6npaths___get__(struct __pyx_obj_5 } /* Python wrapper */ -static PyObject *__pyx_pw_5_cdec_10Hypergraph_36inside_outside(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static char __pyx_doc_5_cdec_10Hypergraph_35inside_outside[] = "hg.inside_outside() -> SparseVector with inside-outside scores for each feature."; -static PyObject *__pyx_pw_5_cdec_10Hypergraph_36inside_outside(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { +static PyObject *__pyx_pw_5_cdec_10Hypergraph_38inside_outside(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_5_cdec_10Hypergraph_37inside_outside[] = "hg.inside_outside() -> SparseVector with inside-outside scores for each feature."; +static PyObject *__pyx_pw_5_cdec_10Hypergraph_38inside_outside(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("inside_outside (wrapper)", 0); - __pyx_r = __pyx_pf_5_cdec_10Hypergraph_35inside_outside(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); + __pyx_r = __pyx_pf_5_cdec_10Hypergraph_37inside_outside(((struct __pyx_obj_5_cdec_Hypergraph *)__pyx_v_self)); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":175 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":180 * return self.hg.NumberOfPaths() * * def inside_outside(self): # <<<<<<<<<<<<<< @@ -11324,7 +11401,7 @@ static PyObject *__pyx_pw_5_cdec_10Hypergraph_36inside_outside(PyObject *__pyx_v * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() */ -static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { +static PyObject *__pyx_pf_5_cdec_10Hypergraph_37inside_outside(struct __pyx_obj_5_cdec_Hypergraph *__pyx_v_self) { FastSparseVector *__pyx_v_result; prob_t __pyx_v_z; struct __pyx_obj_5_cdec_SparseVector *__pyx_v_vector = 0; @@ -11340,7 +11417,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("inside_outside", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":177 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":182 * def inside_outside(self): * """hg.inside_outside() -> SparseVector with inside-outside scores for each feature.""" * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() # <<<<<<<<<<<<<< @@ -11349,7 +11426,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ __pyx_v_result = new FastSparseVector(); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":178 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":183 * """hg.inside_outside() -> SparseVector with inside-outside scores for each feature.""" * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() * cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result) # <<<<<<<<<<<<<< @@ -11358,7 +11435,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ __pyx_v_z = InsideOutside, EdgeFeaturesAndProbWeightFunction>((__pyx_v_self->hg[0]), __pyx_v_result); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":179 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":184 * cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]() * cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result) * result[0] /= z # <<<<<<<<<<<<<< @@ -11367,20 +11444,20 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ (__pyx_v_result[0]) /= __pyx_v_z; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":180 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":185 * cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result) * result[0] /= z * cdef SparseVector vector = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< * vector.vector = new FastSparseVector[double]() * cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False) */ - __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 180; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 180; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":181 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":186 * result[0] /= z * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double]() # <<<<<<<<<<<<<< @@ -11389,7 +11466,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ __pyx_v_vector->vector = new FastSparseVector(); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":182 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":187 * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double]() * cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False) # <<<<<<<<<<<<<< @@ -11398,7 +11475,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ __pyx_v_it = new FastSparseVector::const_iterator((__pyx_v_result[0]), 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":184 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":189 * cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False) * cdef unsigned i * for i in range(result.size()): # <<<<<<<<<<<<<< @@ -11409,7 +11486,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":185 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":190 * cdef unsigned i * for i in range(result.size()): * vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second)) # <<<<<<<<<<<<<< @@ -11418,7 +11495,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ __pyx_v_vector->vector->set_value((__pyx_v_it[0]).operator->()->first, log((__pyx_v_it[0]).operator->()->second)); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":186 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":191 * for i in range(result.size()): * vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second)) * pinc(it[0]) # ++it # <<<<<<<<<<<<<< @@ -11428,7 +11505,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ (++(__pyx_v_it[0])); } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":187 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":192 * vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second)) * pinc(it[0]) # ++it * del it # <<<<<<<<<<<<<< @@ -11437,7 +11514,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ delete __pyx_v_it; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":188 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":193 * pinc(it[0]) # ++it * del it * del result # <<<<<<<<<<<<<< @@ -11446,7 +11523,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ */ delete __pyx_v_result; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":189 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":194 * del it * del result * return vector # <<<<<<<<<<<<<< @@ -11471,7 +11548,7 @@ static PyObject *__pyx_pf_5_cdec_10Hypergraph_35inside_outside(struct __pyx_obj_ return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":196 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":201 * cdef public TRule trule * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): # <<<<<<<<<<<<<< @@ -11488,7 +11565,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy int __pyx_clineno = 0; __Pyx_RefNannySetupContext("init", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":197 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":202 * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg # <<<<<<<<<<<<<< @@ -11497,7 +11574,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy */ __pyx_v_self->hg = __pyx_v_hg; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":198 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":203 * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg * self.edge = &hg.edges_[i] # <<<<<<<<<<<<<< @@ -11506,23 +11583,23 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy */ __pyx_v_self->edge = (&(__pyx_v_hg->edges_[__pyx_v_i])); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":199 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":204 * self.hg = hg * self.edge = &hg.edges_[i] * self.trule = TRule.__new__(TRule) # <<<<<<<<<<<<<< * self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_) * return self */ - __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_TRule)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_TRule)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_TRule)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_TRule)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_self->trule); __Pyx_DECREF(((PyObject *)__pyx_v_self->trule)); __pyx_v_self->trule = ((struct __pyx_obj_5_cdec_TRule *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":200 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":205 * self.edge = &hg.edges_[i] * self.trule = TRule.__new__(TRule) * self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_) # <<<<<<<<<<<<<< @@ -11531,7 +11608,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphEdge_init(struct __pyx_obj_5_cdec_Hy */ __pyx_v_self->trule->rule = new boost::shared_ptr(__pyx_v_self->edge->rule_); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":201 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":206 * self.trule = TRule.__new__(TRule) * self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_) * return self # <<<<<<<<<<<<<< @@ -11566,7 +11643,7 @@ static Py_ssize_t __pyx_pw_5_cdec_14HypergraphEdge_1__len__(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":203 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":208 * return self * * def __len__(self): # <<<<<<<<<<<<<< @@ -11579,7 +11656,7 @@ static Py_ssize_t __pyx_pf_5_cdec_14HypergraphEdge___len__(struct __pyx_obj_5_cd __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":204 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":209 * * def __len__(self): * return self.edge.tail_nodes_.size() # <<<<<<<<<<<<<< @@ -11606,7 +11683,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_9head_node_1__get__(PyObject * return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":207 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":212 * * property head_node: * def __get__(self): # <<<<<<<<<<<<<< @@ -11624,7 +11701,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_9head_node___get__(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":208 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":213 * property head_node: * def __get__(self): * return HypergraphNode().init(self.hg, self.edge.head_node_) # <<<<<<<<<<<<<< @@ -11632,9 +11709,9 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_9head_node___get__(struct __py * property tail_nodes: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 213; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1), __pyx_v_self->hg, __pyx_v_self->edge->head_node_); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_1), __pyx_v_self->hg, __pyx_v_self->edge->head_node_); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 213; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; @@ -11666,7 +11743,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_10tail_nodes_1__get__(PyObject return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":211 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":216 * * property tail_nodes: * def __get__(self): # <<<<<<<<<<<<<< @@ -11692,7 +11769,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_10tail_nodes___get__(struct __ __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 211; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -11728,9 +11805,9 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11(__py return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 211; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":213 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":218 * def __get__(self): * cdef unsigned i * for i in range(self.edge.tail_nodes_.size()): # <<<<<<<<<<<<<< @@ -11741,16 +11818,16 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11(__py for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":214 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":219 * cdef unsigned i * for i in range(self.edge.tail_nodes_.size()): * yield HypergraphNode().init(self.hg, self.edge.tail_nodes_[i]) # <<<<<<<<<<<<<< * * property span: */ - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphNode)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, (__pyx_cur_scope->__pyx_v_self->edge->tail_nodes_[__pyx_cur_scope->__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphNode *)((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, (__pyx_cur_scope->__pyx_v_self->edge->tail_nodes_[__pyx_cur_scope->__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_4; @@ -11765,7 +11842,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphEdge_10tail_nodes_2generator11(__py __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 214; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 219; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -11792,7 +11869,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_4span_1__get__(PyObject *__pyx return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":217 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":222 * * property span: * def __get__(self): # <<<<<<<<<<<<<< @@ -11811,19 +11888,19 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4span___get__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":218 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":223 * property span: * def __get__(self): * return (self.edge.i_, self.edge.j_) # <<<<<<<<<<<<<< * - * property feature_values: + * property src_span: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromLong(__pyx_v_self->edge->i_); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_v_self->edge->i_); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 223; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_self->edge->j_); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_self->edge->j_); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 223; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 223; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); @@ -11849,6 +11926,74 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4span___get__(struct __pyx_obj return __pyx_r; } +/* Python wrapper */ +static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_8src_span_1__get__(PyObject *__pyx_v_self); /*proto*/ +static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_8src_span_1__get__(PyObject *__pyx_v_self) { + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); + __pyx_r = __pyx_pf_5_cdec_14HypergraphEdge_8src_span___get__(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_v_self)); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":226 + * + * property src_span: + * def __get__(self): # <<<<<<<<<<<<<< + * return (self.edge.prev_i_, self.edge.prev_j_) + * + */ + +static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_8src_span___get__(struct __pyx_obj_5_cdec_HypergraphEdge *__pyx_v_self) { + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + PyObject *__pyx_t_2 = NULL; + PyObject *__pyx_t_3 = NULL; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("__get__", 0); + + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":227 + * property src_span: + * def __get__(self): + * return (self.edge.prev_i_, self.edge.prev_j_) # <<<<<<<<<<<<<< + * + * property feature_values: + */ + __Pyx_XDECREF(__pyx_r); + __pyx_t_1 = PyInt_FromLong(__pyx_v_self->edge->prev_i_); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyInt_FromLong(__pyx_v_self->edge->prev_j_); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); + __Pyx_GIVEREF(__pyx_t_2); + __pyx_t_1 = 0; + __pyx_t_2 = 0; + __pyx_r = ((PyObject *)__pyx_t_3); + __pyx_t_3 = 0; + goto __pyx_L0; + + __pyx_r = Py_None; __Pyx_INCREF(Py_None); + goto __pyx_L0; + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_2); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_AddTraceback("_cdec.HypergraphEdge.src_span.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; + __pyx_L0:; + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + /* Python wrapper */ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_14feature_values_1__get__(PyObject *__pyx_v_self); /*proto*/ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_14feature_values_1__get__(PyObject *__pyx_v_self) { @@ -11860,7 +12005,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_14feature_values_1__get__(PyOb return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":221 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":230 * * property feature_values: * def __get__(self): # <<<<<<<<<<<<<< @@ -11878,20 +12023,20 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_14feature_values___get__(struc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":222 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":231 * property feature_values: * def __get__(self): * cdef SparseVector vector = SparseVector.__new__(SparseVector) # <<<<<<<<<<<<<< * vector.vector = new FastSparseVector[double](self.edge.feature_values_) * return vector */ - __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_tp_new(((PyObject*)__pyx_ptype_5_cdec_SparseVector)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5_cdec_SparseVector)))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_vector = ((struct __pyx_obj_5_cdec_SparseVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":223 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":232 * def __get__(self): * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double](self.edge.feature_values_) # <<<<<<<<<<<<<< @@ -11900,7 +12045,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_14feature_values___get__(struc */ __pyx_v_vector->vector = new FastSparseVector(__pyx_v_self->edge->feature_values_); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":224 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":233 * cdef SparseVector vector = SparseVector.__new__(SparseVector) * vector.vector = new FastSparseVector[double](self.edge.feature_values_) * return vector # <<<<<<<<<<<<<< @@ -11936,7 +12081,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_4prob_1__get__(PyObject *__pyx return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":227 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":236 * * property prob: * def __get__(self): # <<<<<<<<<<<<<< @@ -11953,7 +12098,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4prob___get__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":228 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":237 * property prob: * def __get__(self): * return self.edge.edge_prob_.as_float() # <<<<<<<<<<<<<< @@ -11961,7 +12106,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_4prob___get__(struct __pyx_obj * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->edge->edge_prob_.as_float()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 228; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyFloat_FromDouble(__pyx_v_self->edge->edge_prob_.as_float()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -11985,8 +12130,8 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_3__richcmp__(PyObject *__pyx_v PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_x), __pyx_ptype_5_cdec_HypergraphEdge, 1, "x", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_y), __pyx_ptype_5_cdec_HypergraphEdge, 1, "y", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_x), __pyx_ptype_5_cdec_HypergraphEdge, 1, "x", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_y), __pyx_ptype_5_cdec_HypergraphEdge, 1, "y", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_v_x), ((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_v_y), ((int)__pyx_v_op)); goto __pyx_L0; __pyx_L1_error:; @@ -11996,7 +12141,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_3__richcmp__(PyObject *__pyx_v return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":230 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":239 * return self.edge.edge_prob_.as_float() * * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): # <<<<<<<<<<<<<< @@ -12014,7 +12159,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__richcmp__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":233 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":242 * if op == 2: # == * return x.edge == y.edge * elif op == 3: # != # <<<<<<<<<<<<<< @@ -12023,7 +12168,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ */ switch (__pyx_v_op) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":231 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":240 * * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): * if op == 2: # == # <<<<<<<<<<<<<< @@ -12032,7 +12177,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ */ case 2: - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":232 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":241 * def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op): * if op == 2: # == * return x.edge == y.edge # <<<<<<<<<<<<<< @@ -12040,14 +12185,14 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ * return not (x == y) */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_x->edge == __pyx_v_y->edge)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 232; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_x->edge == __pyx_v_y->edge)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 241; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; break; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":233 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":242 * if op == 2: # == * return x.edge == y.edge * elif op == 3: # != # <<<<<<<<<<<<<< @@ -12056,7 +12201,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ */ case 3: - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":234 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":243 * return x.edge == y.edge * elif op == 3: # != * return not (x == y) # <<<<<<<<<<<<<< @@ -12064,10 +12209,10 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_RichCompare(((PyObject *)__pyx_v_x), ((PyObject *)__pyx_v_y), Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 234; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 234; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(((PyObject *)__pyx_v_x), ((PyObject *)__pyx_v_y), Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 234; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -12075,18 +12220,18 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphEdge_2__richcmp__(struct __pyx_obj_ break; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":235 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":244 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphEdge') # <<<<<<<<<<<<<< * * cdef class HypergraphNode: */ - __pyx_t_1 = PyObject_Call(__pyx_builtin_NotImplemented, ((PyObject *)__pyx_k_tuple_19), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 235; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_NotImplemented, ((PyObject *)__pyx_k_tuple_19), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 235; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; @@ -12111,7 +12256,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphEdge_5trule_1__get__(PyObject *__py return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":194 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":199 * cdef hypergraph.Hypergraph* hg * cdef hypergraph.HypergraphEdge* edge * cdef public TRule trule # <<<<<<<<<<<<<< @@ -12153,7 +12298,7 @@ static int __pyx_pf_5_cdec_14HypergraphEdge_5trule_2__set__(struct __pyx_obj_5_c const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__set__", 0); - if (!(likely(((__pyx_v_value) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_value, __pyx_ptype_5_cdec_TRule))))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_value) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_value, __pyx_ptype_5_cdec_TRule))))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_INCREF(__pyx_v_value); __Pyx_GIVEREF(__pyx_v_value); __Pyx_GOTREF(__pyx_v_self->trule); @@ -12196,7 +12341,7 @@ static int __pyx_pf_5_cdec_14HypergraphEdge_5trule_4__del__(struct __pyx_obj_5_c return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":241 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":250 * cdef hypergraph.HypergraphNode* node * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): # <<<<<<<<<<<<<< @@ -12209,7 +12354,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphNode_init(struct __pyx_obj_5_cdec_Hy __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("init", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":242 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":251 * * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg # <<<<<<<<<<<<<< @@ -12218,7 +12363,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphNode_init(struct __pyx_obj_5_cdec_Hy */ __pyx_v_self->hg = __pyx_v_hg; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":243 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":252 * cdef init(self, hypergraph.Hypergraph* hg, unsigned i): * self.hg = hg * self.node = &hg.nodes_[i] # <<<<<<<<<<<<<< @@ -12227,7 +12372,7 @@ static PyObject *__pyx_f_5_cdec_14HypergraphNode_init(struct __pyx_obj_5_cdec_Hy */ __pyx_v_self->node = (&(__pyx_v_hg->nodes_[__pyx_v_i])); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":244 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":253 * self.hg = hg * self.node = &hg.nodes_[i] * return self # <<<<<<<<<<<<<< @@ -12258,7 +12403,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_8in_edges_1__get__(PyObject *_ return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":247 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":256 * * property in_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -12284,7 +12429,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_8in_edges___get__(struct __pyx __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -12320,9 +12465,9 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12(__pyx_G return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":249 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":258 * def __get__(self): * cdef unsigned i * for i in range(self.node.in_edges_.size()): # <<<<<<<<<<<<<< @@ -12333,16 +12478,16 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12(__pyx_G for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":250 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":259 * cdef unsigned i * for i in range(self.node.in_edges_.size()): * yield HypergraphEdge().init(self.hg, self.node.in_edges_[i]) # <<<<<<<<<<<<<< * * property out_edges: */ - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphEdge)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphEdge)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphEdge *)((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, (__pyx_cur_scope->__pyx_v_self->node->in_edges_[__pyx_cur_scope->__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphEdge *)((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, (__pyx_cur_scope->__pyx_v_self->node->in_edges_[__pyx_cur_scope->__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_4; @@ -12357,7 +12502,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_8in_edges_2generator12(__pyx_G __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -12385,7 +12530,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_9out_edges_1__get__(PyObject * return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":253 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":262 * * property out_edges: * def __get__(self): # <<<<<<<<<<<<<< @@ -12411,7 +12556,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_9out_edges___get__(struct __py __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -12447,9 +12592,9 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13(__pyx_ return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":255 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":264 * def __get__(self): * cdef unsigned i * for i in range(self.node.out_edges_.size()): # <<<<<<<<<<<<<< @@ -12460,16 +12605,16 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13(__pyx_ for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":256 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":265 * cdef unsigned i * for i in range(self.node.out_edges_.size()): * yield HypergraphEdge().init(self.hg, self.node.out_edges_[i]) # <<<<<<<<<<<<<< * * property span: */ - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphEdge)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5_cdec_HypergraphEdge)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphEdge *)((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, (__pyx_cur_scope->__pyx_v_self->node->out_edges_[__pyx_cur_scope->__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = ((struct __pyx_vtabstruct_5_cdec_HypergraphEdge *)((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3)->__pyx_vtab)->init(((struct __pyx_obj_5_cdec_HypergraphEdge *)__pyx_t_3), __pyx_cur_scope->__pyx_v_self->hg, (__pyx_cur_scope->__pyx_v_self->node->out_edges_[__pyx_cur_scope->__pyx_v_i])); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_4; @@ -12484,7 +12629,7 @@ static PyObject *__pyx_gb_5_cdec_14HypergraphNode_9out_edges_2generator13(__pyx_ __pyx_L6_resume_from_yield:; __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } PyErr_SetNone(PyExc_StopIteration); goto __pyx_L0; @@ -12511,7 +12656,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_4span_1__get__(PyObject *__pyx return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":259 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":268 * * property span: * def __get__(self): # <<<<<<<<<<<<<< @@ -12529,7 +12674,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_4span___get__(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":260 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":269 * property span: * def __get__(self): * return next(self.in_edges).span # <<<<<<<<<<<<<< @@ -12537,12 +12682,12 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_4span___get__(struct __pyx_obj * property cat: */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__in_edges); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__in_edges); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyIter_Next(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyIter_Next(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__span); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__span); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_r = __pyx_t_1; @@ -12573,7 +12718,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_3cat_1__get__(PyObject *__pyx_ return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":263 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":272 * * property cat: * def __get__(self): # <<<<<<<<<<<<<< @@ -12591,7 +12736,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_3cat___get__(struct __pyx_obj_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":264 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":273 * property cat: * def __get__(self): * if self.node.cat_: # <<<<<<<<<<<<<< @@ -12600,7 +12745,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_3cat___get__(struct __pyx_obj_ */ if (__pyx_v_self->node->cat_) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":265 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":274 * def __get__(self): * if self.node.cat_: * return str(TDConvert(-self.node.cat_).c_str()) # <<<<<<<<<<<<<< @@ -12608,14 +12753,14 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode_3cat___get__(struct __pyx_obj_ * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyBytes_FromString(TD::Convert((-__pyx_v_self->node->cat_)).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyBytes_FromString(TD::Convert((-__pyx_v_self->node->cat_)).c_str()); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_r = __pyx_t_1; @@ -12644,8 +12789,8 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_1__richcmp__(PyObject *__pyx_v PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__richcmp__ (wrapper)", 0); - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_x), __pyx_ptype_5_cdec_HypergraphNode, 1, "x", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_y), __pyx_ptype_5_cdec_HypergraphNode, 1, "y", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_x), __pyx_ptype_5_cdec_HypergraphNode, 1, "x", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_y), __pyx_ptype_5_cdec_HypergraphNode, 1, "y", 0))) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_5_cdec_14HypergraphNode___richcmp__(((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_v_x), ((struct __pyx_obj_5_cdec_HypergraphNode *)__pyx_v_y), ((int)__pyx_v_op)); goto __pyx_L0; __pyx_L1_error:; @@ -12655,7 +12800,7 @@ static PyObject *__pyx_pw_5_cdec_14HypergraphNode_1__richcmp__(PyObject *__pyx_v return __pyx_r; } -/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":267 +/* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":276 * return str(TDConvert(-self.node.cat_).c_str()) * * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): # <<<<<<<<<<<<<< @@ -12673,7 +12818,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__richcmp__", 0); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":270 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":279 * if op == 2: # == * return x.node == y.node * elif op == 3: # != # <<<<<<<<<<<<<< @@ -12682,7 +12827,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5 */ switch (__pyx_v_op) { - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":268 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":277 * * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): * if op == 2: # == # <<<<<<<<<<<<<< @@ -12691,7 +12836,7 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5 */ case 2: - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":269 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":278 * def __richcmp__(HypergraphNode x, HypergraphNode y, int op): * if op == 2: # == * return x.node == y.node # <<<<<<<<<<<<<< @@ -12699,14 +12844,14 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5 * return not (x == y) */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_x->node == __pyx_v_y->node)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyBool_FromLong((__pyx_v_x->node == __pyx_v_y->node)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; break; - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":270 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":279 * if op == 2: # == * return x.node == y.node * elif op == 3: # != # <<<<<<<<<<<<<< @@ -12715,17 +12860,17 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5 */ case 3: - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":271 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":280 * return x.node == y.node * elif op == 3: # != * return not (x == y) # <<<<<<<<<<<<<< * raise NotImplemented('comparison not implemented for HypergraphNode') */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_RichCompare(((PyObject *)__pyx_v_x), ((PyObject *)__pyx_v_y), Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(((PyObject *)__pyx_v_x), ((PyObject *)__pyx_v_y), Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -12733,16 +12878,16 @@ static PyObject *__pyx_pf_5_cdec_14HypergraphNode___richcmp__(struct __pyx_obj_5 break; } - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":272 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":281 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphNode') # <<<<<<<<<<<<<< */ - __pyx_t_1 = PyObject_Call(__pyx_builtin_NotImplemented, ((PyObject *)__pyx_k_tuple_21), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_NotImplemented, ((PyObject *)__pyx_k_tuple_21), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[3]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[3]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; @@ -21157,18 +21302,19 @@ static PyMethodDef __pyx_methods_5_cdec_Hypergraph[] = { {__Pyx_NAMESTR("viterbi"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_3viterbi, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_2viterbi)}, {__Pyx_NAMESTR("viterbi_trees"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_5viterbi_trees, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_4viterbi_trees)}, {__Pyx_NAMESTR("viterbi_features"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_7viterbi_features, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_6viterbi_features)}, - {__Pyx_NAMESTR("viterbi_joshua"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_9viterbi_joshua, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_8viterbi_joshua)}, - {__Pyx_NAMESTR("kbest"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_11kbest, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_10kbest)}, - {__Pyx_NAMESTR("kbest_trees"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_14kbest_trees, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_13kbest_trees)}, - {__Pyx_NAMESTR("kbest_features"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_17kbest_features, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_16kbest_features)}, - {__Pyx_NAMESTR("sample"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_20sample, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_19sample)}, - {__Pyx_NAMESTR("sample_trees"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_23sample_trees, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_22sample_trees)}, - {__Pyx_NAMESTR("intersect"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_26intersect, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_25intersect)}, - {__Pyx_NAMESTR("prune"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_28prune, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_27prune)}, - {__Pyx_NAMESTR("lattice"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_30lattice, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_29lattice)}, - {__Pyx_NAMESTR("plf"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_32plf, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_31plf)}, - {__Pyx_NAMESTR("reweight"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_34reweight, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_33reweight)}, - {__Pyx_NAMESTR("inside_outside"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_36inside_outside, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_35inside_outside)}, + {__Pyx_NAMESTR("viterbi_forest"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_9viterbi_forest, METH_NOARGS, __Pyx_DOCSTR(0)}, + {__Pyx_NAMESTR("viterbi_joshua"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_11viterbi_joshua, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_10viterbi_joshua)}, + {__Pyx_NAMESTR("kbest"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_13kbest, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_12kbest)}, + {__Pyx_NAMESTR("kbest_trees"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_16kbest_trees, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_15kbest_trees)}, + {__Pyx_NAMESTR("kbest_features"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_19kbest_features, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_18kbest_features)}, + {__Pyx_NAMESTR("sample"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_22sample, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_21sample)}, + {__Pyx_NAMESTR("sample_trees"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_25sample_trees, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_24sample_trees)}, + {__Pyx_NAMESTR("intersect"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_28intersect, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_27intersect)}, + {__Pyx_NAMESTR("prune"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_30prune, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_29prune)}, + {__Pyx_NAMESTR("lattice"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_32lattice, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_31lattice)}, + {__Pyx_NAMESTR("plf"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_34plf, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_33plf)}, + {__Pyx_NAMESTR("reweight"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_36reweight, METH_O, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_35reweight)}, + {__Pyx_NAMESTR("inside_outside"), (PyCFunction)__pyx_pw_5_cdec_10Hypergraph_38inside_outside, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5_cdec_10Hypergraph_37inside_outside)}, {0, 0, 0, 0} }; @@ -21381,6 +21527,10 @@ static PyObject *__pyx_getprop_5_cdec_14HypergraphEdge_span(PyObject *o, CYTHON_ return __pyx_pw_5_cdec_14HypergraphEdge_4span_1__get__(o); } +static PyObject *__pyx_getprop_5_cdec_14HypergraphEdge_src_span(PyObject *o, CYTHON_UNUSED void *x) { + return __pyx_pw_5_cdec_14HypergraphEdge_8src_span_1__get__(o); +} + static PyObject *__pyx_getprop_5_cdec_14HypergraphEdge_feature_values(PyObject *o, CYTHON_UNUSED void *x) { return __pyx_pw_5_cdec_14HypergraphEdge_14feature_values_1__get__(o); } @@ -21410,6 +21560,7 @@ static struct PyGetSetDef __pyx_getsets_5_cdec_HypergraphEdge[] = { {(char *)"head_node", __pyx_getprop_5_cdec_14HypergraphEdge_head_node, 0, 0, 0}, {(char *)"tail_nodes", __pyx_getprop_5_cdec_14HypergraphEdge_tail_nodes, 0, 0, 0}, {(char *)"span", __pyx_getprop_5_cdec_14HypergraphEdge_span, 0, 0, 0}, + {(char *)"src_span", __pyx_getprop_5_cdec_14HypergraphEdge_src_span, 0, 0, 0}, {(char *)"feature_values", __pyx_getprop_5_cdec_14HypergraphEdge_feature_values, 0, 0, 0}, {(char *)"prob", __pyx_getprop_5_cdec_14HypergraphEdge_prob, 0, 0, 0}, {(char *)"trule", __pyx_getprop_5_cdec_14HypergraphEdge_trule, __pyx_setprop_5_cdec_14HypergraphEdge_trule, 0, 0}, @@ -28670,8 +28821,8 @@ static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_NotImplemented = __Pyx_GetName(__pyx_b, __pyx_n_s__NotImplemented); if (!__pyx_builtin_NotImplemented) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_super = __Pyx_GetName(__pyx_b, __pyx_n_s__super); if (!__pyx_builtin_super) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_basestring = __Pyx_GetName(__pyx_b, __pyx_n_s__basestring); if (!__pyx_builtin_basestring) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_eval = __Pyx_GetName(__pyx_b, __pyx_n_s__eval); if (!__pyx_builtin_eval) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_basestring = __Pyx_GetName(__pyx_b, __pyx_n_s__basestring); if (!__pyx_builtin_basestring) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_eval = __Pyx_GetName(__pyx_b, __pyx_n_s__eval); if (!__pyx_builtin_eval) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 145; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_enumerate = __Pyx_GetName(__pyx_b, __pyx_n_s__enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_IndexError = __Pyx_GetName(__pyx_b, __pyx_n_s__IndexError); if (!__pyx_builtin_IndexError) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_open = __Pyx_GetName(__pyx_b, __pyx_n_s__open); if (!__pyx_builtin_open) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -28739,26 +28890,26 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_13)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_14)); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":235 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":244 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphEdge') # <<<<<<<<<<<<<< * * cdef class HypergraphNode: */ - __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 235; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_19 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_19); __Pyx_INCREF(((PyObject *)__pyx_kp_s_18)); PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_kp_s_18)); __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_18)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19)); - /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":272 + /* "/home/vchahune/tools/cdec/python/src/hypergraph.pxi":281 * elif op == 3: # != * return not (x == y) * raise NotImplemented('comparison not implemented for HypergraphNode') # <<<<<<<<<<<<<< */ - __pyx_k_tuple_21 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_21 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_21)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_21); __Pyx_INCREF(((PyObject *)__pyx_kp_s_20)); PyTuple_SET_ITEM(__pyx_k_tuple_21, 0, ((PyObject *)__pyx_kp_s_20)); @@ -29215,15 +29366,15 @@ PyMODINIT_FUNC PyInit__cdec(void) __pyx_ptype_5_cdec_Hypergraph = &__pyx_type_5_cdec_Hypergraph; __pyx_vtabptr_5_cdec_HypergraphEdge = &__pyx_vtable_5_cdec_HypergraphEdge; __pyx_vtable_5_cdec_HypergraphEdge.init = (PyObject *(*)(struct __pyx_obj_5_cdec_HypergraphEdge *, Hypergraph *, unsigned int))__pyx_f_5_cdec_14HypergraphEdge_init; - if (PyType_Ready(&__pyx_type_5_cdec_HypergraphEdge) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetVtable(__pyx_type_5_cdec_HypergraphEdge.tp_dict, __pyx_vtabptr_5_cdec_HypergraphEdge) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "HypergraphEdge", (PyObject *)&__pyx_type_5_cdec_HypergraphEdge) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec_HypergraphEdge) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetVtable(__pyx_type_5_cdec_HypergraphEdge.tp_dict, __pyx_vtabptr_5_cdec_HypergraphEdge) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "HypergraphEdge", (PyObject *)&__pyx_type_5_cdec_HypergraphEdge) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_HypergraphEdge = &__pyx_type_5_cdec_HypergraphEdge; __pyx_vtabptr_5_cdec_HypergraphNode = &__pyx_vtable_5_cdec_HypergraphNode; __pyx_vtable_5_cdec_HypergraphNode.init = (PyObject *(*)(struct __pyx_obj_5_cdec_HypergraphNode *, Hypergraph *, unsigned int))__pyx_f_5_cdec_14HypergraphNode_init; - if (PyType_Ready(&__pyx_type_5_cdec_HypergraphNode) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetVtable(__pyx_type_5_cdec_HypergraphNode.tp_dict, __pyx_vtabptr_5_cdec_HypergraphNode) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (__Pyx_SetAttrString(__pyx_m, "HypergraphNode", (PyObject *)&__pyx_type_5_cdec_HypergraphNode) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec_HypergraphNode) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetVtable(__pyx_type_5_cdec_HypergraphNode.tp_dict, __pyx_vtabptr_5_cdec_HypergraphNode) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetAttrString(__pyx_m, "HypergraphNode", (PyObject *)&__pyx_type_5_cdec_HypergraphNode) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec_HypergraphNode = &__pyx_type_5_cdec_HypergraphNode; if (PyType_Ready(&__pyx_type_5_cdec_Lattice) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #if CYTHON_COMPILING_IN_CPYTHON @@ -29285,25 +29436,25 @@ PyMODINIT_FUNC PyInit__cdec(void) __pyx_ptype_5_cdec___pyx_scope_struct_6_genexpr = &__pyx_type_5_cdec___pyx_scope_struct_6_genexpr; if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_7___iter__) < 0) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 199; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_7___iter__ = &__pyx_type_5_cdec___pyx_scope_struct_7___iter__; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_8_kbest) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_8_kbest) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_8_kbest = &__pyx_type_5_cdec___pyx_scope_struct_8_kbest; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_9_kbest_trees) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_9_kbest_trees) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_9_kbest_trees = &__pyx_type_5_cdec___pyx_scope_struct_9_kbest_trees; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_10_kbest_features) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_10_kbest_features) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_10_kbest_features = &__pyx_type_5_cdec___pyx_scope_struct_10_kbest_features; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_11_sample) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_11_sample) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_11_sample = &__pyx_type_5_cdec___pyx_scope_struct_11_sample; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_12_sample_trees) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_12_sample_trees) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_12_sample_trees = &__pyx_type_5_cdec___pyx_scope_struct_12_sample_trees; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_13___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 156; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_13___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_13___get__ = &__pyx_type_5_cdec___pyx_scope_struct_13___get__; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_14___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 162; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_14___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 167; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_14___get__ = &__pyx_type_5_cdec___pyx_scope_struct_14___get__; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_15___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 211; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_15___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 216; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_15___get__ = &__pyx_type_5_cdec___pyx_scope_struct_15___get__; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_16___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_16___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_16___get__ = &__pyx_type_5_cdec___pyx_scope_struct_16___get__; - if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_17___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_17___get__) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_17___get__ = &__pyx_type_5_cdec___pyx_scope_struct_17___get__; if (PyType_Ready(&__pyx_type_5_cdec___pyx_scope_struct_18___iter__) < 0) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_5_cdec___pyx_scope_struct_18___iter__ = &__pyx_type_5_cdec___pyx_scope_struct_18___iter__; diff --git a/python/src/hypergraph.pxd b/python/src/hypergraph.pxd index dd3d39cc..1e150bbc 100644 --- a/python/src/hypergraph.pxd +++ b/python/src/hypergraph.pxd @@ -19,6 +19,8 @@ cdef extern from "decoder/hg.h": # typically source span: short int i_ short int j_ + short int prev_i_ + short int prev_j_ ctypedef HypergraphEdge const_HypergraphEdge "const Hypergraph::Edge" @@ -45,6 +47,7 @@ cdef extern from "decoder/hg.h": bint use_sum_prod_semiring, double scale, bint safe_inside) nogil + shared_ptr[Hypergraph] CreateViterbiHypergraph(EdgeMask* preserve_mask) nogil cdef extern from "decoder/viterbi.h": prob_t ViterbiESentence(Hypergraph& hg, vector[WordID]* trans) nogil diff --git a/python/src/hypergraph.pxi b/python/src/hypergraph.pxi index 5b675531..4a709d32 100644 --- a/python/src/hypergraph.pxi +++ b/python/src/hypergraph.pxi @@ -37,6 +37,11 @@ cdef class Hypergraph: fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0])) return fmap + def viterbi_forest(self): + cdef Hypergraph hg = Hypergraph() + hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0]) + return hg + def viterbi_joshua(self): """hg.viterbi_joshua() -> Joshua representation of the best derivation.""" return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8') @@ -217,6 +222,10 @@ cdef class HypergraphEdge: def __get__(self): return (self.edge.i_, self.edge.j_) + property src_span: + def __get__(self): + return (self.edge.prev_i_, self.edge.prev_j_) + property feature_values: def __get__(self): cdef SparseVector vector = SparseVector.__new__(SparseVector) -- cgit v1.2.3 From 128563db1ed56e2b94a28d0a6670cc7faf6dce7b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 19 Nov 2012 11:38:45 +0100 Subject: merge upstream/master --- training/dtrain/parallelize/test/cdec.ini | 22 ---------------------- training/dtrain/parallelize/test/dtrain.ini | 15 --------------- training/dtrain/parallelize/test/in | 10 ---------- training/dtrain/parallelize/test/refs | 10 ---------- training/dtrain/test/parallelize/test/cdec.ini | 22 ++++++++++++++++++++++ training/dtrain/test/parallelize/test/dtrain.ini | 15 +++++++++++++++ training/dtrain/test/parallelize/test/in | 10 ++++++++++ training/dtrain/test/parallelize/test/refs | 10 ++++++++++ 8 files changed, 57 insertions(+), 57 deletions(-) delete mode 100644 training/dtrain/parallelize/test/cdec.ini delete mode 100644 training/dtrain/parallelize/test/dtrain.ini delete mode 100644 training/dtrain/parallelize/test/in delete mode 100644 training/dtrain/parallelize/test/refs create mode 100644 training/dtrain/test/parallelize/test/cdec.ini create mode 100644 training/dtrain/test/parallelize/test/dtrain.ini create mode 100644 training/dtrain/test/parallelize/test/in create mode 100644 training/dtrain/test/parallelize/test/refs diff --git a/training/dtrain/parallelize/test/cdec.ini b/training/dtrain/parallelize/test/cdec.ini deleted file mode 100644 index 72e99dc5..00000000 --- a/training/dtrain/parallelize/test/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/parallelize/test/dtrain.ini b/training/dtrain/parallelize/test/dtrain.ini deleted file mode 100644 index 03f9d240..00000000 --- a/training/dtrain/parallelize/test/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -decoder_config=cdec.ini diff --git a/training/dtrain/parallelize/test/in b/training/dtrain/parallelize/test/in deleted file mode 100644 index a312809f..00000000 --- a/training/dtrain/parallelize/test/in +++ /dev/null @@ -1,10 +0,0 @@ -barack obama erhält als vierter us @-@ präsident den frieden nobelpreis -der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . -darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . -der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . -zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . -das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . -nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . -diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . -das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . -der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/training/dtrain/parallelize/test/refs b/training/dtrain/parallelize/test/refs deleted file mode 100644 index 4d3128cb..00000000 --- a/training/dtrain/parallelize/test/refs +++ /dev/null @@ -1,10 +0,0 @@ -barack obama becomes the fourth american president to receive the nobel peace prize -the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . -he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . -the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . -first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . -the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . -then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . -he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . -the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . -the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . diff --git a/training/dtrain/test/parallelize/test/cdec.ini b/training/dtrain/test/parallelize/test/cdec.ini new file mode 100644 index 00000000..72e99dc5 --- /dev/null +++ b/training/dtrain/test/parallelize/test/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel /stor/dat/wmt12/en/news_only/m/wmt12.news.en.3.kenv5 +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/test/parallelize/test/dtrain.ini b/training/dtrain/test/parallelize/test/dtrain.ini new file mode 100644 index 00000000..03f9d240 --- /dev/null +++ b/training/dtrain/test/parallelize/test/dtrain.ini @@ -0,0 +1,15 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +tmp=/tmp +decoder_config=cdec.ini diff --git a/training/dtrain/test/parallelize/test/in b/training/dtrain/test/parallelize/test/in new file mode 100644 index 00000000..a312809f --- /dev/null +++ b/training/dtrain/test/parallelize/test/in @@ -0,0 +1,10 @@ +barack obama erhält als vierter us @-@ präsident den frieden nobelpreis +der amerikanische präsident barack obama kommt für 26 stunden nach oslo , norwegen , um hier als vierter us @-@ präsident in der geschichte den frieden nobelpreis entgegen zunehmen . +darüber hinaus erhält er das diplom sowie die medaille und einen scheck über 1,4 mio. dollar für seine außer gewöhnlichen bestrebungen um die intensivierung der welt diplomatie und zusammen arbeit unter den völkern . +der chef des weißen hauses kommt morgen zusammen mit seiner frau michelle in der nordwegischen metropole an und wird die ganze zeit beschäftigt sein . +zunächst stattet er dem nobel @-@ institut einen besuch ab , wo er überhaupt zum ersten mal mit den fünf ausschuss mitglieder zusammen trifft , die ihn im oktober aus 172 leuten und 33 organisationen gewählt haben . +das präsidenten paar hat danach ein treffen mit dem norwegischen könig harald v. und königin sonja eingeplant . +nachmittags erreicht dann der besuch seinen höhepunkt mit der zeremonie , bei der obama den prestige preis übernimmt . +diesen erhält er als der vierte us @-@ präsident , aber erst als der dritte , der den preis direkt im amt entgegen nimmt . +das weiße haus avisierte schon , dass obama bei der übernahme des preises über den afghanistan krieg sprechen wird . +der präsident will diesem thema nicht ausweichen , weil er weiß , dass er den preis als ein präsident übernimmt , der zur zeit krieg in zwei ländern führt . diff --git a/training/dtrain/test/parallelize/test/refs b/training/dtrain/test/parallelize/test/refs new file mode 100644 index 00000000..4d3128cb --- /dev/null +++ b/training/dtrain/test/parallelize/test/refs @@ -0,0 +1,10 @@ +barack obama becomes the fourth american president to receive the nobel peace prize +the american president barack obama will fly into oslo , norway for 26 hours to receive the nobel peace prize , the fourth american president in history to do so . +he will receive a diploma , medal and cheque for 1.4 million dollars for his exceptional efforts to improve global diplomacy and encourage international cooperation , amongst other things . +the head of the white house will be flying into the norwegian city in the morning with his wife michelle and will have a busy schedule . +first , he will visit the nobel institute , where he will have his first meeting with the five committee members who selected him from 172 people and 33 organisations . +the presidential couple then has a meeting scheduled with king harald v and queen sonja of norway . +then , in the afternoon , the visit will culminate in a grand ceremony , at which obama will receive the prestigious award . +he will be the fourth american president to be awarded the prize , and only the third to have received it while actually in office . +the white house has stated that , when he accepts the prize , obama will speak about the war in afghanistan . +the president does not want to skirt around this topic , as he realises that he is accepting the prize as a president whose country is currently at war in two countries . -- cgit v1.2.3 From 655aac48c50d6c1da4a7b2f15034ec9f9bbbc0d3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 19 Nov 2012 11:38:58 +0100 Subject: merge upstream/master --- training/dtrain/parallelize.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 1d277ff6..eb4148f5 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -7,9 +7,10 @@ if ARGV.size != 5 exit end -dtrain_bin = '/home/pks/bin/dtrain_local' +cdec_dir = '/path/to/cdec_dir' +dtrain_bin = "#{cdec_dir}/training/dtrain/dtrain_local" ruby = '/usr/bin/ruby' -lplp_rb = '/home/pks/mt/cdec-dtrain/dtrain/hstreaming/lplp.rb' +lplp_rb = "#{cdec_dir}/training/dtrain/hstreaming/lplp.rb" lplp_args = 'l2 select_k 100000' gzip = '/bin/gzip' -- cgit v1.2.3 From 6802ac200ef614b4935d597ed4cfc3857c1f6c06 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 20 Nov 2012 13:56:08 -0500 Subject: fixes for 2011 optimizer --- training/crf/mpi_online_optimize.cc | 12 +++++++++++- word-aligner/aligner.pl | 19 ++++++++++++++++++- word-aligner/makefiles/makefile.grammars | 2 +- word-aligner/makefiles/makefile.model.f-e | 14 ++++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 word-aligner/makefiles/makefile.model.f-e diff --git a/training/crf/mpi_online_optimize.cc b/training/crf/mpi_online_optimize.cc index d6968848..9e1ae34c 100644 --- a/training/crf/mpi_online_optimize.cc +++ b/training/crf/mpi_online_optimize.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("training_agenda,a",po::value(), "Text file listing a series of configuration files and the number of iterations to train using each configuration successively") ("minibatch_size_per_proc,s", po::value()->default_value(5), "Number of training instances evaluated per processor in each minibatch") ("optimization_method,m", po::value()->default_value("sgd"), "Optimization method (sgd)") + ("max_walltime", po::value(), "Maximum walltime to run (in minutes)") ("random_seed,S", po::value(), "Random seed (if not specified, /dev/random will be used)") ("eta_0,e", po::value()->default_value(0.2), "Initial learning rate for SGD (eta_0)") ("L1,1","Use L1 regularization") @@ -304,6 +306,9 @@ int main(int argc, char** argv) { int write_weights_every_ith = 100; // TODO configure int titer = -1; + unsigned timeout = 0; + if (conf.count("max_walltime")) timeout = 60 * conf["max_walltime"].as(); + const time_t start_time = time(NULL); for (int ai = 0; ai < agenda.size(); ++ai) { const string& cur_config = agenda[ai].first; const unsigned max_iteration = agenda[ai].second; @@ -336,9 +341,14 @@ int main(int argc, char** argv) { ostringstream o; o << "weights.epoch_" << (ai+1) << '.' << iter << ".gz"; fname = o.str(); } + const time_t cur_time = time(NULL); + if (timeout) { + if ((cur_time - start_time) > timeout) converged = true; + } if (converged && ((ai+1)==agenda.size())) { fname = "weights.final.gz"; } ostringstream vv; - vv << "total iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << '/' << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast(corpus.size())) << " eta=" << lr->eta(titer); + double minutes = (cur_time - start_time) / 60.0; + vv << "total walltime=" << minutes << "min iter=" << titer << " (of current config iter=" << iter << ") minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << '/' << FD::NumFeats() << " passes_thru_data=" << (titer * size_per_proc / static_cast(corpus.size())) << " eta=" << lr->eta(titer); const string svv = vv.str(); cerr << svv << endl; Weights::WriteToFile(fname, lambdas, true, &svv); diff --git a/word-aligner/aligner.pl b/word-aligner/aligner.pl index c5078645..cbccb94a 100755 --- a/word-aligner/aligner.pl +++ b/word-aligner/aligner.pl @@ -51,6 +51,8 @@ while() { chomp; my ($f, $e) = split / \|\|\| /; die "Bad format, excepted ||| separated line" unless defined $f && defined $e; + $f =~ s/\[/(/g; + $e =~ s/\]/)/g; print F "$f\n"; print E "$e\n"; } @@ -80,6 +82,11 @@ NCLASSES = $num_classes TARGETS = @targets PTRAIN = \$(TRAINING_DIR)/cluster-ptrain.pl --restart_if_necessary PTRAIN_PARAMS = --gaussian_prior --sigma_squared 1.0 --max_iteration 15 +#MPIJOBS = 4 +#MPIRUN = mpirun -np $(MPIJOBS) +MPIRUN= + +WALLTIME=90 export @@ -99,7 +106,15 @@ clean: EOT close TOPLEVEL; -print STDERR "Created alignment task. chdir to talign/ then type make.\n\n"; +print STDERR <$stage_dir/agenda.txt" or die "Can't write $stage_dir/agenda.txt: $!"; print AGENDA "cdec.ini $TRAINING_ITERATIONS\n"; close AGENDA; + `cp $SCRIPT_DIR/makefiles/makefile.model.$direction $stage_dir/Makefile`; + die unless $? == 0; } sub usage { diff --git a/word-aligner/makefiles/makefile.grammars b/word-aligner/makefiles/makefile.grammars index ce3e1638..8d3ea8cb 100644 --- a/word-aligner/makefiles/makefile.grammars +++ b/word-aligner/makefiles/makefile.grammars @@ -4,7 +4,7 @@ clean: $(RM) orthonorm-dict.* voc2class* corpus.class.* corpus.e-f corpus.f-e corpus.f-e.lex-grammar* *.model1 *voc corpus.e-f.lex-grammar* *stem* freq* wordpairs* SUPPORT_DIR = $(SCRIPT_DIR)/support -GZIP = /usr/bin/gzip +GZIP = gzip ZCAT = zcat EXTRACT_GRAMMAR = $(SUPPORT_DIR)/extract_grammar.pl EXTRACT_VOCAB = $(SUPPORT_DIR)/extract_vocab.pl diff --git a/word-aligner/makefiles/makefile.model.f-e b/word-aligner/makefiles/makefile.model.f-e new file mode 100644 index 00000000..404f5b30 --- /dev/null +++ b/word-aligner/makefiles/makefile.model.f-e @@ -0,0 +1,14 @@ +all: output.f-e.aligned + +clean: + $(RM) output.f-e.a weights.cur.gz + +CDEC = $(SCRIPT_DIR)/../decoder/cdec +OPTIMIZE = $(SCRIPT_DIR)/../training/crf/mpi_online_optimize + +weights.cur.gz: ../grammars/wordpairs.f-e.features.gz + $(MPIRUN) $(OPTIMIZE) -a agenda.txt -1 -C 1.0 -t ../grammars/corpus.f-e --max_walltime 90 + +output.f-e.aligned: weights.cur.gz + $(CDEC) -c cdec.ini -w $< --lextrans_align_only -i ../grammars/corpus.f-e -a > $@ + -- cgit v1.2.3 From 147238b755eeeb4623ce74aad79d62c378b54ea5 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 24 Nov 2012 20:22:45 -0500 Subject: victor's geometric series trick --- word-aligner/da.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++ word-aligner/fast_align.cc | 64 ++++++++++++++++++--------------------- 2 files changed, 104 insertions(+), 35 deletions(-) create mode 100644 word-aligner/da.h diff --git a/word-aligner/da.h b/word-aligner/da.h new file mode 100644 index 00000000..add64a5b --- /dev/null +++ b/word-aligner/da.h @@ -0,0 +1,75 @@ +#ifndef _DA_H_ +#define _DA_H_ + +#include +#include + +// m = trg len +// n = src len +// i = trg index +// j = src index +struct DiagonalAlignment { + + static double UnnormalizedProb(const unsigned i, const unsigned j, const unsigned m, const unsigned n, const double alpha) { + assert(i > 0); + assert(n > 0); + assert(m >= i); + assert(n >= j); + return exp(feat(i, j, m, n) * alpha); + } + + static double ComputeZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) { + assert(i > 0); + assert(n > 0); + assert(m >= i); + const double split = double(i) * n / m; + const unsigned floor = split; + unsigned ceil = floor + 1; + const double ratio = exp(-alpha / n); + const unsigned num_top = n - floor; + double ezt = 0; + double ezb = 0; + if (num_top) + ezt = UnnormalizedProb(i, ceil, m, n, alpha) * (1.0 - pow(ratio, num_top)) / (1.0 - ratio); + if (floor) + ezb = UnnormalizedProb(i, floor, m, n, alpha) * (1.0 - pow(ratio, floor)) / (1.0 - ratio); + return ezb + ezt; + } + + static double ComputeDLogZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) { + const double z = ComputeZ(i, n, m, alpha); + const double split = double(i) * n / m; + const unsigned floor = split; + const unsigned ceil = floor + 1; + const double ratio = exp(-alpha / n); + const double d = -1.0 / n; + const unsigned num_top = n - floor; + double pct = 0; + double pcb = 0; + if (num_top) { + pct = arithmetico_geometric_series(feat(i, ceil, m, n), UnnormalizedProb(i, ceil, m, n, alpha), ratio, d, num_top); + //cerr << "PCT = " << pct << endl; + } + if (floor) { + pcb = arithmetico_geometric_series(feat(i, floor, m, n), UnnormalizedProb(i, floor, m, n, alpha), ratio, d, floor); + //cerr << "PCB = " << pcb << endl; + } + return (pct + pcb) / z; + } + + private: + inline static double feat(const unsigned i, const unsigned j, const unsigned m, const unsigned n) { + return -fabs(double(j) / n - double(i) / m); + } + + inline static double arithmetico_geometric_series(const double a_1, const double g_1, const double r, const double d, const unsigned n) { + const double g_np1 = g_1 * pow(r, n); + const double a_n = d * (n - 1) + a_1; + const double x_1 = a_1 * g_1; + const double g_2 = g_1 * r; + const double rm1 = r - 1; + return (a_n * g_np1 - x_1) / rm1 - d*(g_np1 - g_2) / (rm1 * rm1); + } +}; + +#endif diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc index 7492d26f..14f7cac8 100644 --- a/word-aligner/fast_align.cc +++ b/word-aligner/fast_align.cc @@ -10,6 +10,7 @@ #include "filelib.h" #include "ttables.h" #include "tdict.h" +#include "da.h" namespace po = boost::program_options; using namespace std; @@ -68,11 +69,11 @@ int main(int argc, char** argv) { const bool variational_bayes = (conf.count("variational_bayes") > 0); const bool write_alignments = (conf.count("output_parameters") == 0); const double diagonal_tension = conf["diagonal_tension"].as(); - const double prob_align_null = conf["prob_align_null"].as(); const bool hide_training_alignments = (conf.count("hide_training_alignments") > 0); string testset; if (conf.count("testset")) testset = conf["testset"].as(); - const double prob_align_not_null = 1.0 - prob_align_null; + double prob_align_null = conf["prob_align_null"].as(); + double prob_align_not_null = 1.0 - prob_align_null; const double alpha = conf["alpha"].as(); const bool favor_diagonal = conf.count("favor_diagonal"); if (variational_bayes && alpha <= 0.0) { @@ -84,7 +85,6 @@ int main(int argc, char** argv) { TTable::Word2Word2Double s2t_viterbi; double tot_len_ratio = 0; double mean_srclen_multiplier = 0; - vector unnormed_a_i; for (int iter = 0; iter < ITERATIONS; ++iter) { const bool final_iteration = (iter == (ITERATIONS - 1)); cerr << "ITERATION " << (iter + 1) << (final_iteration ? " (FINAL)" : "") << endl; @@ -97,6 +97,8 @@ int main(int argc, char** argv) { string line; string ssrc, strg; vector src, trg; + double c0 = 0; + double toks = 0; while(true) { getline(in, line); if (!in) break; @@ -110,17 +112,15 @@ int main(int argc, char** argv) { cerr << "Error: " << lc << "\n" << line << endl; return 1; } - if (src.size() > unnormed_a_i.size()) - unnormed_a_i.resize(src.size()); if (iter == 0) tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); denom += trg.size(); vector probs(src.size() + 1); bool first_al = true; // used for write_alignments - for (int j = 0; j < trg.size(); ++j) { + toks += trg.size(); + for (unsigned j = 0; j < trg.size(); ++j) { const WordID& f_j = trg[j]; double sum = 0; - const double j_over_ts = double(j) / trg.size(); double prob_a_i = 1.0 / (src.size() + use_null); // uniform (model 1) if (use_null) { if (favor_diagonal) prob_a_i = prob_align_null; @@ -128,16 +128,11 @@ int main(int argc, char** argv) { sum += probs[0]; } double az = 0; - if (favor_diagonal) { - for (int ta = 0; ta < src.size(); ++ta) { - unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); - az += unnormed_a_i[ta]; - } - az /= prob_align_not_null; - } - for (int i = 1; i <= src.size(); ++i) { + if (favor_diagonal) + az = DiagonalAlignment::ComputeZ(j+1, trg.size(), src.size(), diagonal_tension) / prob_align_not_null; + for (unsigned i = 1; i <= src.size(); ++i) { if (favor_diagonal) - prob_a_i = unnormed_a_i[i-1] / az; + prob_a_i = DiagonalAlignment::UnnormalizedProb(j + 1, i, trg.size(), src.size(), diagonal_tension) / az; probs[i] = s2t.prob(src[i-1], f_j) * prob_a_i; sum += probs[i]; } @@ -151,7 +146,7 @@ int main(int argc, char** argv) { max_index = 0; max_p = probs[0]; } - for (int i = 1; i <= src.size(); ++i) { + for (unsigned i = 1; i <= src.size(); ++i) { if (probs[i] > max_p) { max_index = i; max_p = probs[i]; @@ -170,9 +165,12 @@ int main(int argc, char** argv) { s2t_viterbi[max_i][f_j] = 1.0; } } else { - if (use_null) - s2t.Increment(kNULL, f_j, probs[0] / sum); - for (int i = 1; i <= src.size(); ++i) + if (use_null) { + double count = probs[0] / sum; + c0 += count; + s2t.Increment(kNULL, f_j, count); + } + for (unsigned i = 1; i <= src.size(); ++i) s2t.Increment(src[i-1], f_j, probs[i] / sum); } likelihood += log(sum); @@ -190,13 +188,17 @@ int main(int argc, char** argv) { } cerr << " log_e likelihood: " << likelihood << endl; cerr << " log_2 likelihood: " << base2_likelihood << endl; - cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; - cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; + cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; if (!final_iteration) { if (variational_bayes) s2t.NormalizeVB(alpha); else s2t.Normalize(); + cerr << " p0: " << c0 / toks << endl; + //prob_align_null *= 0.8; + //prob_align_null += (c0 / toks) * 0.2; + prob_align_not_null = 1.0 - prob_align_null; } } if (testset.size()) { @@ -212,16 +214,13 @@ int main(int argc, char** argv) { cout << TD::GetString(src) << " ||| " << TD::GetString(trg) << " |||"; if (reverse) swap(src, trg); double log_prob = Md::log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier); - if (src.size() > unnormed_a_i.size()) - unnormed_a_i.resize(src.size()); // compute likelihood - for (int j = 0; j < trg.size(); ++j) { + for (unsigned j = 0; j < trg.size(); ++j) { const WordID& f_j = trg[j]; double sum = 0; int a_j = 0; double max_pat = 0; - const double j_over_ts = double(j) / trg.size(); double prob_a_i = 1.0 / (src.size() + use_null); // uniform (model 1) if (use_null) { if (favor_diagonal) prob_a_i = prob_align_null; @@ -229,16 +228,11 @@ int main(int argc, char** argv) { sum += max_pat; } double az = 0; - if (favor_diagonal) { - for (int ta = 0; ta < src.size(); ++ta) { - unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); - az += unnormed_a_i[ta]; - } - az /= prob_align_not_null; - } - for (int i = 1; i <= src.size(); ++i) { + if (favor_diagonal) + az = DiagonalAlignment::ComputeZ(j+1, trg.size(), src.size(), diagonal_tension) / prob_align_not_null; + for (unsigned i = 1; i <= src.size(); ++i) { if (favor_diagonal) - prob_a_i = unnormed_a_i[i-1] / az; + prob_a_i = DiagonalAlignment::UnnormalizedProb(j + 1, i, trg.size(), src.size(), diagonal_tension) / az; double pat = s2t.prob(src[i-1], f_j) * prob_a_i; if (pat > max_pat) { max_pat = pat; a_j = i; } sum += pat; -- cgit v1.2.3 From efa888a623913e72c5dd0e527ba7da014d52597e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 24 Nov 2012 20:25:51 -0500 Subject: comment out asserts --- word-aligner/da.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/word-aligner/da.h b/word-aligner/da.h index add64a5b..5e6dfa92 100644 --- a/word-aligner/da.h +++ b/word-aligner/da.h @@ -11,17 +11,21 @@ struct DiagonalAlignment { static double UnnormalizedProb(const unsigned i, const unsigned j, const unsigned m, const unsigned n, const double alpha) { +#if 0 assert(i > 0); assert(n > 0); assert(m >= i); assert(n >= j); +#endif return exp(feat(i, j, m, n) * alpha); } static double ComputeZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) { +#if 0 assert(i > 0); assert(n > 0); assert(m >= i); +#endif const double split = double(i) * n / m; const unsigned floor = split; unsigned ceil = floor + 1; -- cgit v1.2.3 From 9b3306332a27f23a36e96a93b5ff97caee1b6e3c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 25 Nov 2012 00:28:07 -0500 Subject: optionally optimize diagonal tension --- word-aligner/da.h | 10 +++++----- word-aligner/fast_align.cc | 47 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/word-aligner/da.h b/word-aligner/da.h index 5e6dfa92..c979b641 100644 --- a/word-aligner/da.h +++ b/word-aligner/da.h @@ -17,7 +17,7 @@ struct DiagonalAlignment { assert(m >= i); assert(n >= j); #endif - return exp(feat(i, j, m, n) * alpha); + return exp(Feature(i, j, m, n) * alpha); } static double ComputeZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) { @@ -51,21 +51,21 @@ struct DiagonalAlignment { double pct = 0; double pcb = 0; if (num_top) { - pct = arithmetico_geometric_series(feat(i, ceil, m, n), UnnormalizedProb(i, ceil, m, n, alpha), ratio, d, num_top); + pct = arithmetico_geometric_series(Feature(i, ceil, m, n), UnnormalizedProb(i, ceil, m, n, alpha), ratio, d, num_top); //cerr << "PCT = " << pct << endl; } if (floor) { - pcb = arithmetico_geometric_series(feat(i, floor, m, n), UnnormalizedProb(i, floor, m, n, alpha), ratio, d, floor); + pcb = arithmetico_geometric_series(Feature(i, floor, m, n), UnnormalizedProb(i, floor, m, n, alpha), ratio, d, floor); //cerr << "PCB = " << pcb << endl; } return (pct + pcb) / z; } - private: - inline static double feat(const unsigned i, const unsigned j, const unsigned m, const unsigned n) { + inline static double Feature(const unsigned i, const unsigned j, const unsigned m, const unsigned n) { return -fabs(double(j) / n - double(i) / m); } + private: inline static double arithmetico_geometric_series(const double a_1, const double g_1, const double r, const double d, const unsigned n) { const double g_np1 = g_1 * pow(r, n); const double a_n = d * (n - 1) + a_1; diff --git a/word-aligner/fast_align.cc b/word-aligner/fast_align.cc index 14f7cac8..9d698074 100644 --- a/word-aligner/fast_align.cc +++ b/word-aligner/fast_align.cc @@ -1,6 +1,9 @@ #include #include +#include +#include +#include #include #include @@ -14,6 +17,7 @@ namespace po = boost::program_options; using namespace std; +using namespace std::tr1; bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); @@ -25,6 +29,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("favor_diagonal,d", "Use a static alignment distribution that assigns higher probabilities to alignments near the diagonal") ("prob_align_null", po::value()->default_value(0.08), "When --favor_diagonal is set, what's the probability of a null alignment?") ("diagonal_tension,T", po::value()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (<1 = flat >1 = sharp)") + ("optimize_tension,o", "Optimize diagonal tension during EM") ("variational_bayes,v","Infer VB estimate of parameters under a symmetric Dirichlet prior") ("alpha,a", po::value()->default_value(0.01), "Hyperparameter for optional Dirichlet prior") ("no_null_word,N","Do not generate from a null token") @@ -68,7 +73,8 @@ int main(int argc, char** argv) { const bool add_viterbi = (conf.count("no_add_viterbi") == 0); const bool variational_bayes = (conf.count("variational_bayes") > 0); const bool write_alignments = (conf.count("output_parameters") == 0); - const double diagonal_tension = conf["diagonal_tension"].as(); + double diagonal_tension = conf["diagonal_tension"].as(); + bool optimize_tension = conf.count("optimize_tension"); const bool hide_training_alignments = (conf.count("hide_training_alignments") > 0); string testset; if (conf.count("testset")) testset = conf["testset"].as(); @@ -83,8 +89,10 @@ int main(int argc, char** argv) { TTable s2t, t2s; TTable::Word2Word2Double s2t_viterbi; + unordered_map, unsigned, boost::hash > > size_counts; double tot_len_ratio = 0; double mean_srclen_multiplier = 0; + vector probs; for (int iter = 0; iter < ITERATIONS; ++iter) { const bool final_iteration = (iter == (ITERATIONS - 1)); cerr << "ITERATION " << (iter + 1) << (final_iteration ? " (FINAL)" : "") << endl; @@ -98,6 +106,7 @@ int main(int argc, char** argv) { string ssrc, strg; vector src, trg; double c0 = 0; + double emp_feat = 0; double toks = 0; while(true) { getline(in, line); @@ -115,7 +124,9 @@ int main(int argc, char** argv) { if (iter == 0) tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); denom += trg.size(); - vector probs(src.size() + 1); + probs.resize(src.size() + 1); + if (iter == 0) + ++size_counts[make_pair(trg.size(), src.size())]; bool first_al = true; // used for write_alignments toks += trg.size(); for (unsigned j = 0; j < trg.size(); ++j) { @@ -170,8 +181,11 @@ int main(int argc, char** argv) { c0 += count; s2t.Increment(kNULL, f_j, count); } - for (unsigned i = 1; i <= src.size(); ++i) - s2t.Increment(src[i-1], f_j, probs[i] / sum); + for (unsigned i = 1; i <= src.size(); ++i) { + const double p = probs[i] / sum; + s2t.Increment(src[i-1], f_j, p); + emp_feat += DiagonalAlignment::Feature(j, i, trg.size(), src.size()) * p; + } } likelihood += log(sum); } @@ -186,17 +200,38 @@ int main(int argc, char** argv) { mean_srclen_multiplier = tot_len_ratio / lc; cerr << "expected target length = source length * " << mean_srclen_multiplier << endl; } + emp_feat /= toks; cerr << " log_e likelihood: " << likelihood << endl; cerr << " log_2 likelihood: " << base2_likelihood << endl; cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + cerr << " posterior p0: " << c0 / toks << endl; + cerr << " posterior al-feat: " << emp_feat << endl; + //cerr << " model tension: " << mod_feat / toks << endl; + cerr << " size counts: " << size_counts.size() << endl; if (!final_iteration) { + if (favor_diagonal && optimize_tension && iter > 0) { + for (int ii = 0; ii < 8; ++ii) { + double mod_feat = 0; + unordered_map,unsigned>::iterator it = size_counts.begin(); + for(; it != size_counts.end(); ++it) { + const pair& p = it->first; + for (short j = 1; j <= p.first; ++j) + mod_feat += it->second * DiagonalAlignment::ComputeDLogZ(j, p.first, p.second, diagonal_tension); + } + mod_feat /= toks; + cerr << " " << ii + 1 << " model al-feat: " << mod_feat << " (tension=" << diagonal_tension << ")\n"; + diagonal_tension += (emp_feat - mod_feat) * 20.0; + if (diagonal_tension <= 0.1) diagonal_tension = 0.1; + if (diagonal_tension > 14) diagonal_tension = 14; + } + cerr << " final tension: " << diagonal_tension << endl; + } if (variational_bayes) s2t.NormalizeVB(alpha); else s2t.Normalize(); - cerr << " p0: " << c0 / toks << endl; - //prob_align_null *= 0.8; + //prob_align_null *= 0.8; // XXX //prob_align_null += (c0 / toks) * 0.2; prob_align_not_null = 1.0 - prob_align_null; } -- cgit v1.2.3 From c81b0dc240f2233c3e5ecccd8982218115476f9a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 4 Dec 2012 22:01:32 -0500 Subject: more flexible corpus cutting --- corpus/cut-corpus.pl | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/corpus/cut-corpus.pl b/corpus/cut-corpus.pl index fc9cce3b..7daac0e2 100755 --- a/corpus/cut-corpus.pl +++ b/corpus/cut-corpus.pl @@ -3,14 +3,33 @@ use strict; die "Usage: $0 N\nSplits a corpus separated by ||| symbols and returns the Nth field\n" unless scalar @ARGV > 0; my $x = shift @ARGV; -die "N must be numeric" unless $x =~ /^\d+$/; -$x--; +my @ind = split /,/, $x; +my @o = (); +for my $ff (@ind) { + if ($ff =~ /^\d+$/) { + push @o, $ff - 1; + } elsif ($ff =~ /^(\d+)-(\d+)$/) { + my $a = $1; + my $b = $2; + die "$a-$b is a bad range in input: $x\n" unless $b > $a; + for (my $i=$a; $i <= $b; $i++) { + push @o, $i - 1; + } + } else { + die "Bad input: $x\n"; + } +} while(<>) { chomp; my @fields = split / \|\|\| /; - my $y = $fields[$x]; - if (!defined $y) { $y= ''; } - print "$y\n"; + my @sf; + for my $i (@o) { + my $y = $fields[$i]; + if (!defined $y) { $y= ''; } + push @sf, $y; + } + print join(' ||| ', @sf) . "\n"; } + -- cgit v1.2.3 From a86a37cbe2fb6ffdcf4374f180010a118fed1063 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 5 Dec 2012 20:27:30 -0500 Subject: remove logging, you should be using pv --- corpus/support/quote-norm.pl | 7 ++++++- corpus/support/tokenizer.pl | 9 --------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/corpus/support/quote-norm.pl b/corpus/support/quote-norm.pl index 0c5b9c26..72b0064d 100755 --- a/corpus/support/quote-norm.pl +++ b/corpus/support/quote-norm.pl @@ -18,13 +18,18 @@ while() { s/(\W)(euro?)(\d*\.\d+|\d+)/$1EUR $3/gi; s/&\s*#45\s*;\s*&\s*#45\s*;/--/g; s/&\s*#45\s*;/--/g; + s/�c/--/g; s/ ,,/ "/g; s/``/"/g; s/''/"/g; + s/[「」]/"/g; s/〃/"/g; s/¨/"/g; s/¡/ ¡ /g; s/¿/ ¿ /g; + # â + s/â(\x{80}\x{99}|\x{80}\x{98})/'/g; + s/â(\x{80}\x{9c}|\x{80}\x{9d})/"/g; s/ˇ/'/g; s/´/'/g; s/`/'/g; @@ -39,7 +44,7 @@ while() { s/»/"/g; tr/!-~/!-~/; s/、/,/g; - s/。/./g; + # s/。/./g; s/…/.../g; s/―/--/g; s/–/--/g; diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index 23be00a5..e9c3a37d 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -107,24 +107,15 @@ my $orig_token_total = 0; my $deep_proc_token_total = 0; my $new_token_total = 0; -my $line_total = 0; -my $content_line_total = 0; - while(){ chomp(); - $line_total ++; - if ($line_total % 100000 == 0) { print STDERR " [$line_total]\n"; } - elsif ($line_total % 2500 == 0) { print STDERR "."; } - if(/^(\[b\s+|\]b|\]f|\[f\s+)/ || (/^\[[bf]$/) || (/^\s*$/) || /^ Date: Wed, 5 Dec 2012 20:57:20 -0500 Subject: slight tokenization bug fix --- corpus/support/token_patterns | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corpus/support/token_patterns b/corpus/support/token_patterns index c0e6fe1a..8e69432b 100644 --- a/corpus/support/token_patterns +++ b/corpus/support/token_patterns @@ -1,3 +1,3 @@ /^(al|el|ul|e)\-[a-z]+$/ -/^(\d+)\.$/ +/^(\d|\d\d|\d\d\d)\.$/ -- cgit v1.2.3 From 85fb3e7fa679101b30c6d1d5e3347b019a6c73d2 Mon Sep 17 00:00:00 2001 From: Victor Chahuneau Date: Thu, 13 Dec 2012 00:19:45 -0500 Subject: Enable loose phrase extraction parameter (default is still tight) use --loose when compiling corpus or tight_phrases = False in config --- python/pkg/cdec/sa/compile.py | 8 +- python/pkg/cdec/sa/extractor.py | 6 +- python/src/sa/_sa.c | 8686 +++++++++++++++++++-------------------- python/src/sa/rulefactory.pxi | 30 +- 4 files changed, 4345 insertions(+), 4385 deletions(-) diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py index 393c72a4..c0402761 100644 --- a/python/pkg/cdec/sa/compile.py +++ b/python/pkg/cdec/sa/compile.py @@ -35,6 +35,8 @@ def main(): help='Number of pre-computed frequent patterns') parser.add_argument('--rank2', '-r2', type=int, default=10, help='Number of pre-computed super-frequent patterns)') + parser.add_argument('--loose', action='store_true', + help='Enable loose phrase extraction (default: tight)') parser.add_argument('-c', '--config', default='/dev/stdout', help='Output configuration') parser.add_argument('-f', '--source', @@ -53,8 +55,10 @@ def main(): parser.error('a parallel corpus is required\n' '\tuse -f (source) with -e (target) or -b (bitext)') - param_names = ("max_len", "max_nt", "max_size", "min_gap", "rank1", "rank2") - params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, args.rank1, args.rank2) + param_names = ('max_len', 'max_nt', 'max_size', 'min_gap', + 'rank1', 'rank2', 'tight_phrases') + params = (args.maxlen, args.maxnt, args.maxsize, args.mingap, + args.rank1, args.rank2, not args.loose) if not os.path.exists(args.output): os.mkdir(args.output) diff --git a/python/pkg/cdec/sa/extractor.py b/python/pkg/cdec/sa/extractor.py index a5ce8a68..e09f79ea 100644 --- a/python/pkg/cdec/sa/extractor.py +++ b/python/pkg/cdec/sa/extractor.py @@ -10,7 +10,7 @@ MAX_INITIAL_SIZE = 15 class GrammarExtractor: def __init__(self, config, features=None): - if isinstance(config, str) or isinstance(config, unicode): + if isinstance(config, basestring): if not os.path.exists(config): raise IOError('cannot read configuration from {0}'.format(config)) config = cdec.configobj.ConfigObj(config, unrepr=True) @@ -50,8 +50,8 @@ class GrammarExtractor: train_max_initial_size=config['max_size'], # minimum span of an RHS nonterminal in a rule extracted from TRAINING DATA train_min_gap_size=config['min_gap'], - # True if phrases should be tight, False otherwise (better but slower) - tight_phrases=True, + # False if phrases should be loose (better but slower), True otherwise + tight_phrases=config.get('tight_phrases', True), ) # lexical weighting tables diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c index 5b47fe8a..9c8dc2cd 100644 --- a/python/src/sa/_sa.c +++ b/python/src/sa/_sa.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17 on Thu Sep 6 14:56:12 2012 */ +/* Generated by Cython 0.17.1 on Thu Dec 13 00:17:27 2012 */ #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -409,7 +409,7 @@ struct __pyx_t_3_sa__Trie_Node; struct __pyx_t_3_sa_match_node; struct __pyx_t_3_sa_Matching; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":9 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":9 * from libc.string cimport memset, strcpy * * cdef struct _node: # <<<<<<<<<<<<<< @@ -423,7 +423,7 @@ struct __pyx_t_3_sa__node { int val; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":30 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":30 * _init_lower_mask() * * cdef struct _BitSet: # <<<<<<<<<<<<<< @@ -437,7 +437,7 @@ struct __pyx_t_3_sa__BitSet { int size; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":168 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":168 * return result * * cdef struct _VEB: # <<<<<<<<<<<<<< @@ -454,7 +454,7 @@ struct __pyx_t_3_sa__VEB { void **bottom; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":10 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":10 * cdef struct _Trie_Node # forward decl * * cdef struct _Trie_Edge: # <<<<<<<<<<<<<< @@ -468,7 +468,7 @@ struct __pyx_t_3_sa__Trie_Edge { struct __pyx_t_3_sa__Trie_Edge *smaller; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":8 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":8 * from libc.string cimport memset, memcpy * * cdef struct _Trie_Node # forward decl # <<<<<<<<<<<<<< @@ -481,7 +481,7 @@ struct __pyx_t_3_sa__Trie_Node { int arr_len; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":66 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":66 * * # linked list structure for storing matches in BaselineRuleFactory * cdef struct match_node: # <<<<<<<<<<<<<< @@ -493,7 +493,7 @@ struct __pyx_t_3_sa_match_node { struct __pyx_t_3_sa_match_node *next; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":162 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":162 * * # struct used to encapsulate a single matching * cdef struct Matching: # <<<<<<<<<<<<<< @@ -508,7 +508,7 @@ struct __pyx_t_3_sa_Matching { int size; }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":218 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":218 * * * cdef class HieroCachingRuleFactory: # <<<<<<<<<<<<<< @@ -557,7 +557,7 @@ struct __pyx_obj_3_sa_HieroCachingRuleFactory { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":115 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":115 * for arc in node for node in lattice) * * def decode_sentence(lattice): # <<<<<<<<<<<<<< @@ -569,7 +569,7 @@ struct __pyx_obj_3_sa___pyx_scope_struct_9_decode_sentence { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":36 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":36 * logger.info("LCP array completed") * * def compute_stats(self, int max_n): # <<<<<<<<<<<<<< @@ -600,7 +600,7 @@ struct __pyx_obj_3_sa___pyx_scope_struct_3_compute_stats { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":21 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":21 * * def __str__(self): * return ' '.join('%s=%s' % feat for feat in self) # <<<<<<<<<<<<<< @@ -634,7 +634,7 @@ struct __pyx_obj_3_sa_IntList { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":340 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":340 * * * cdef class VEBIterator: # <<<<<<<<<<<<<< @@ -648,7 +648,7 @@ struct __pyx_obj_3_sa_VEBIterator { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":47 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":47 * * * cdef class BiLex: # <<<<<<<<<<<<<< @@ -669,7 +669,7 @@ struct __pyx_obj_3_sa_BiLex { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":354 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":354 * * * cdef class VEB: # <<<<<<<<<<<<<< @@ -683,7 +683,7 @@ struct __pyx_obj_3_sa_VEB { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":5 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":5 * as k most frequent n-grams""" * * cdef class LCP: # <<<<<<<<<<<<<< @@ -697,7 +697,7 @@ struct __pyx_obj_3_sa_LCP { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":9 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":9 * from libc.string cimport memset, strcpy * * cdef class DataArray: # <<<<<<<<<<<<<< @@ -716,7 +716,7 @@ struct __pyx_obj_3_sa_DataArray { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":100 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":100 * * * cdef class BitSetIterator: # <<<<<<<<<<<<<< @@ -730,7 +730,7 @@ struct __pyx_obj_3_sa_BitSetIterator { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":190 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":190 * return ' ||| '.join(fields) * * def alignments(self): # <<<<<<<<<<<<<< @@ -747,7 +747,7 @@ struct __pyx_obj_3_sa___pyx_scope_struct_14_alignments { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":188 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":188 * * * cdef class Precomputation: # <<<<<<<<<<<<<< @@ -768,7 +768,7 @@ struct __pyx_obj_3_sa_Precomputation { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":6 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":6 * from libc.stdio cimport FILE, fclose, fopen * * cdef class SuffixArray: # <<<<<<<<<<<<<< @@ -784,7 +784,7 @@ struct __pyx_obj_3_sa_SuffixArray { }; -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":7 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":7 * cdef int INDEX_MASK = (1< size: # <<<<<<<<<<<<<< @@ -3438,7 +3438,7 @@ static int __pyx_pf_3_sa_9FloatList___cinit__(struct __pyx_obj_3_sa_FloatList *_ __pyx_t_1 = (__pyx_v_initial_len > __pyx_v_size); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":13 * def __cinit__(self, int size=0, int increment=1, int initial_len=0): * if initial_len > size: * size = initial_len # <<<<<<<<<<<<<< @@ -3450,7 +3450,7 @@ static int __pyx_pf_3_sa_9FloatList___cinit__(struct __pyx_obj_3_sa_FloatList *_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":14 * if initial_len > size: * size = initial_len * self.size = size # <<<<<<<<<<<<<< @@ -3459,7 +3459,7 @@ static int __pyx_pf_3_sa_9FloatList___cinit__(struct __pyx_obj_3_sa_FloatList *_ */ __pyx_v_self->size = __pyx_v_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":15 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":15 * size = initial_len * self.size = size * self.increment = increment # <<<<<<<<<<<<<< @@ -3468,7 +3468,7 @@ static int __pyx_pf_3_sa_9FloatList___cinit__(struct __pyx_obj_3_sa_FloatList *_ */ __pyx_v_self->increment = __pyx_v_increment; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":16 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":16 * self.size = size * self.increment = increment * self.len = initial_len # <<<<<<<<<<<<<< @@ -3477,7 +3477,7 @@ static int __pyx_pf_3_sa_9FloatList___cinit__(struct __pyx_obj_3_sa_FloatList *_ */ __pyx_v_self->len = __pyx_v_initial_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":17 * self.increment = increment * self.len = initial_len * self.arr = malloc(size*sizeof(float)) # <<<<<<<<<<<<<< @@ -3486,7 +3486,7 @@ static int __pyx_pf_3_sa_9FloatList___cinit__(struct __pyx_obj_3_sa_FloatList *_ */ __pyx_v_self->arr = ((float *)malloc((__pyx_v_size * (sizeof(float))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":18 * self.len = initial_len * self.arr = malloc(size*sizeof(float)) * memset(self.arr, 0, initial_len*sizeof(float)) # <<<<<<<<<<<<<< @@ -3509,7 +3509,7 @@ static void __pyx_pw_3_sa_9FloatList_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":20 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":20 * memset(self.arr, 0, initial_len*sizeof(float)) * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -3521,7 +3521,7 @@ static void __pyx_pf_3_sa_9FloatList_2__dealloc__(struct __pyx_obj_3_sa_FloatLis __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":21 * * def __dealloc__(self): * free(self.arr) # <<<<<<<<<<<<<< @@ -3544,7 +3544,7 @@ static PyObject *__pyx_pw_3_sa_9FloatList_5__getitem__(PyObject *__pyx_v_self, P return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":23 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":23 * free(self.arr) * * def __getitem__(self, i): # <<<<<<<<<<<<<< @@ -3567,7 +3567,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":24 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":24 * * def __getitem__(self, i): * j = i # <<<<<<<<<<<<<< @@ -3577,7 +3577,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo __Pyx_INCREF(__pyx_v_i); __pyx_v_j = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":25 * def __getitem__(self, i): * j = i * if i<0: # <<<<<<<<<<<<<< @@ -3589,7 +3589,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":26 * j = i * if i<0: * j = self.len + i # <<<<<<<<<<<<<< @@ -3608,7 +3608,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":27 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":27 * if i<0: * j = self.len + i * if j<0 or j>=self.len: # <<<<<<<<<<<<<< @@ -3631,7 +3631,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo } if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":28 * j = self.len + i * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length FloatList" % (i, self.len)) # <<<<<<<<<<<<<< @@ -3666,7 +3666,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":29 * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length FloatList" % (i, self.len)) * return self.arr[j] # <<<<<<<<<<<<<< @@ -3695,7 +3695,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_4__getitem__(struct __pyx_obj_3_sa_Flo return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":31 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":31 * return self.arr[j] * * cdef void set(self, int i, float v): # <<<<<<<<<<<<<< @@ -3717,7 +3717,7 @@ static void __pyx_f_3_sa_9FloatList_set(struct __pyx_obj_3_sa_FloatList *__pyx_v int __pyx_clineno = 0; __Pyx_RefNannySetupContext("set", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":32 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":32 * * cdef void set(self, int i, float v): * j = i # <<<<<<<<<<<<<< @@ -3726,7 +3726,7 @@ static void __pyx_f_3_sa_9FloatList_set(struct __pyx_obj_3_sa_FloatList *__pyx_v */ __pyx_v_j = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":33 * cdef void set(self, int i, float v): * j = i * if i<0: # <<<<<<<<<<<<<< @@ -3736,7 +3736,7 @@ static void __pyx_f_3_sa_9FloatList_set(struct __pyx_obj_3_sa_FloatList *__pyx_v __pyx_t_1 = (__pyx_v_i < 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":34 * j = i * if i<0: * j = self.len + i # <<<<<<<<<<<<<< @@ -3748,7 +3748,7 @@ static void __pyx_f_3_sa_9FloatList_set(struct __pyx_obj_3_sa_FloatList *__pyx_v } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":35 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":35 * if i<0: * j = self.len + i * if j<0 or j>=self.len: # <<<<<<<<<<<<<< @@ -3764,7 +3764,7 @@ static void __pyx_f_3_sa_9FloatList_set(struct __pyx_obj_3_sa_FloatList *__pyx_v } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":36 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":36 * j = self.len + i * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length FloatList" % (i, self.len)) # <<<<<<<<<<<<<< @@ -3801,7 +3801,7 @@ static void __pyx_f_3_sa_9FloatList_set(struct __pyx_obj_3_sa_FloatList *__pyx_v } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":37 * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length FloatList" % (i, self.len)) * self.arr[j] = v # <<<<<<<<<<<<<< @@ -3831,7 +3831,7 @@ static int __pyx_pw_3_sa_9FloatList_7__setitem__(PyObject *__pyx_v_self, PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":39 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":39 * self.arr[j] = v * * def __setitem__(self, i, val): # <<<<<<<<<<<<<< @@ -3849,7 +3849,7 @@ static int __pyx_pf_3_sa_9FloatList_6__setitem__(struct __pyx_obj_3_sa_FloatList int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__setitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":40 * * def __setitem__(self, i, val): * self.set(i, val) # <<<<<<<<<<<<<< @@ -3881,7 +3881,7 @@ static Py_ssize_t __pyx_pw_3_sa_9FloatList_9__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":42 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":42 * self.set(i, val) * * def __len__(self): # <<<<<<<<<<<<<< @@ -3894,7 +3894,7 @@ static Py_ssize_t __pyx_pf_3_sa_9FloatList_8__len__(struct __pyx_obj_3_sa_FloatL __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":43 * * def __len__(self): * return self.len # <<<<<<<<<<<<<< @@ -3931,7 +3931,7 @@ static PyObject *__pyx_pw_3_sa_9FloatList_11append(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":45 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":45 * return self.len * * def append(self, float val): # <<<<<<<<<<<<<< @@ -3945,7 +3945,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_10append(struct __pyx_obj_3_sa_FloatLi int __pyx_t_1; __Pyx_RefNannySetupContext("append", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":46 * * def append(self, float val): * if self.len == self.size: # <<<<<<<<<<<<<< @@ -3955,7 +3955,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_10append(struct __pyx_obj_3_sa_FloatLi __pyx_t_1 = (__pyx_v_self->len == __pyx_v_self->size); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":47 * def append(self, float val): * if self.len == self.size: * self.size = self.size + self.increment # <<<<<<<<<<<<<< @@ -3964,7 +3964,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_10append(struct __pyx_obj_3_sa_FloatLi */ __pyx_v_self->size = (__pyx_v_self->size + __pyx_v_self->increment); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":48 * if self.len == self.size: * self.size = self.size + self.increment * self.arr = realloc(self.arr, self.size*sizeof(float)) # <<<<<<<<<<<<<< @@ -3976,7 +3976,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_10append(struct __pyx_obj_3_sa_FloatLi } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":49 * self.size = self.size + self.increment * self.arr = realloc(self.arr, self.size*sizeof(float)) * self.arr[self.len] = val # <<<<<<<<<<<<<< @@ -3985,7 +3985,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_10append(struct __pyx_obj_3_sa_FloatLi */ (__pyx_v_self->arr[__pyx_v_self->len]) = __pyx_v_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":50 * self.arr = realloc(self.arr, self.size*sizeof(float)) * self.arr[self.len] = val * self.len = self.len + 1 # <<<<<<<<<<<<<< @@ -4000,7 +4000,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_10append(struct __pyx_obj_3_sa_FloatLi return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":52 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":52 * self.len = self.len + 1 * * cdef void write_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -4012,7 +4012,7 @@ static void __pyx_f_3_sa_9FloatList_write_handle(struct __pyx_obj_3_sa_FloatList __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":53 * * cdef void write_handle(self, FILE* f): * fwrite(&(self.len), sizeof(float), 1, f) # <<<<<<<<<<<<<< @@ -4021,7 +4021,7 @@ static void __pyx_f_3_sa_9FloatList_write_handle(struct __pyx_obj_3_sa_FloatList */ fwrite((&__pyx_v_self->len), (sizeof(float)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":54 * cdef void write_handle(self, FILE* f): * fwrite(&(self.len), sizeof(float), 1, f) * fwrite(self.arr, sizeof(float), self.len, f) # <<<<<<<<<<<<<< @@ -4054,7 +4054,7 @@ static PyObject *__pyx_pw_3_sa_9FloatList_13write(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":56 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":56 * fwrite(self.arr, sizeof(float), self.len, f) * * def write(self, char* filename): # <<<<<<<<<<<<<< @@ -4068,7 +4068,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_12write(struct __pyx_obj_3_sa_FloatLis __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":58 * def write(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -4077,7 +4077,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_12write(struct __pyx_obj_3_sa_FloatLis */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":59 * cdef FILE* f * f = fopen(filename, "w") * self.write_handle(f) # <<<<<<<<<<<<<< @@ -4086,7 +4086,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_12write(struct __pyx_obj_3_sa_FloatLis */ ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->__pyx_vtab)->write_handle(__pyx_v_self, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":60 * f = fopen(filename, "w") * self.write_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -4101,7 +4101,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_12write(struct __pyx_obj_3_sa_FloatLis return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":62 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":62 * fclose(f) * * cdef void read_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -4113,7 +4113,7 @@ static void __pyx_f_3_sa_9FloatList_read_handle(struct __pyx_obj_3_sa_FloatList __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":63 * * cdef void read_handle(self, FILE* f): * free(self.arr) # <<<<<<<<<<<<<< @@ -4122,7 +4122,7 @@ static void __pyx_f_3_sa_9FloatList_read_handle(struct __pyx_obj_3_sa_FloatList */ free(__pyx_v_self->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":64 * cdef void read_handle(self, FILE* f): * free(self.arr) * fread(&(self.len), sizeof(float), 1, f) # <<<<<<<<<<<<<< @@ -4131,7 +4131,7 @@ static void __pyx_f_3_sa_9FloatList_read_handle(struct __pyx_obj_3_sa_FloatList */ fread((&__pyx_v_self->len), (sizeof(float)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":65 * free(self.arr) * fread(&(self.len), sizeof(float), 1, f) * self.arr = malloc(self.len * sizeof(float)) # <<<<<<<<<<<<<< @@ -4140,7 +4140,7 @@ static void __pyx_f_3_sa_9FloatList_read_handle(struct __pyx_obj_3_sa_FloatList */ __pyx_v_self->arr = ((float *)malloc((__pyx_v_self->len * (sizeof(float))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":66 * fread(&(self.len), sizeof(float), 1, f) * self.arr = malloc(self.len * sizeof(float)) * self.size = self.len # <<<<<<<<<<<<<< @@ -4149,7 +4149,7 @@ static void __pyx_f_3_sa_9FloatList_read_handle(struct __pyx_obj_3_sa_FloatList */ __pyx_v_self->size = __pyx_v_self->len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":67 * self.arr = malloc(self.len * sizeof(float)) * self.size = self.len * fread(self.arr, sizeof(float), self.len, f) # <<<<<<<<<<<<<< @@ -4182,7 +4182,7 @@ static PyObject *__pyx_pw_3_sa_9FloatList_15read(PyObject *__pyx_v_self, PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":69 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":69 * fread(self.arr, sizeof(float), self.len, f) * * def read(self, char* filename): # <<<<<<<<<<<<<< @@ -4196,7 +4196,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_14read(struct __pyx_obj_3_sa_FloatList __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":71 * def read(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -4205,7 +4205,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_14read(struct __pyx_obj_3_sa_FloatList */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":72 * cdef FILE* f * f = fopen(filename, "r") * self.read_handle(f) # <<<<<<<<<<<<<< @@ -4213,7 +4213,7 @@ static PyObject *__pyx_pf_3_sa_9FloatList_14read(struct __pyx_obj_3_sa_FloatList */ ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->__pyx_vtab)->read_handle(__pyx_v_self, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/float_list.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/float_list.pxi":73 * f = fopen(filename, "r") * self.read_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -4307,7 +4307,7 @@ static int __pyx_pw_3_sa_7IntList_1__cinit__(PyObject *__pyx_v_self, PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":11 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":11 * cdef class IntList: * * def __cinit__(self, int size=0, int increment=1, int initial_len=0): # <<<<<<<<<<<<<< @@ -4321,7 +4321,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx int __pyx_t_1; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":12 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":12 * * def __cinit__(self, int size=0, int increment=1, int initial_len=0): * if initial_len > size: # <<<<<<<<<<<<<< @@ -4331,7 +4331,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx __pyx_t_1 = (__pyx_v_initial_len > __pyx_v_size); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":13 * def __cinit__(self, int size=0, int increment=1, int initial_len=0): * if initial_len > size: * size = initial_len # <<<<<<<<<<<<<< @@ -4343,7 +4343,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":14 * if initial_len > size: * size = initial_len * self.size = size # <<<<<<<<<<<<<< @@ -4352,7 +4352,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx */ __pyx_v_self->size = __pyx_v_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":15 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":15 * size = initial_len * self.size = size * self.increment = increment # <<<<<<<<<<<<<< @@ -4361,7 +4361,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx */ __pyx_v_self->increment = __pyx_v_increment; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":16 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":16 * self.size = size * self.increment = increment * self.len = initial_len # <<<<<<<<<<<<<< @@ -4370,7 +4370,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx */ __pyx_v_self->len = __pyx_v_initial_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":17 * self.increment = increment * self.len = initial_len * self.arr = malloc(size*sizeof(int)) # <<<<<<<<<<<<<< @@ -4379,7 +4379,7 @@ static int __pyx_pf_3_sa_7IntList___cinit__(struct __pyx_obj_3_sa_IntList *__pyx */ __pyx_v_self->arr = ((int *)malloc((__pyx_v_size * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":18 * self.len = initial_len * self.arr = malloc(size*sizeof(int)) * memset(self.arr, 0, initial_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -4404,7 +4404,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_3__str__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":20 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":20 * memset(self.arr, 0, initial_len*sizeof(int)) * * def __str__(self): # <<<<<<<<<<<<<< @@ -4427,7 +4427,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":22 * def __str__(self): * cdef unsigned i * ret = "IntList[" # <<<<<<<<<<<<<< @@ -4437,7 +4437,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * __Pyx_INCREF(((PyObject *)__pyx_kp_s_3)); __pyx_v_ret = ((PyObject *)__pyx_kp_s_3); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":23 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":23 * cdef unsigned i * ret = "IntList[" * for idx in range(self.size): # <<<<<<<<<<<<<< @@ -4448,7 +4448,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_idx = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":24 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":24 * ret = "IntList[" * for idx in range(self.size): * if idx>0: # <<<<<<<<<<<<<< @@ -4458,7 +4458,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * __pyx_t_3 = (__pyx_v_idx > 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":25 * for idx in range(self.size): * if idx>0: * ret += "," # <<<<<<<<<<<<<< @@ -4474,7 +4474,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":26 * if idx>0: * ret += "," * ret += str(self.arr[idx]) # <<<<<<<<<<<<<< @@ -4499,7 +4499,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * __pyx_t_5 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":27 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":27 * ret += "," * ret += str(self.arr[idx]) * ret += "]" # <<<<<<<<<<<<<< @@ -4512,7 +4512,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * __pyx_v_ret = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":28 * ret += str(self.arr[idx]) * ret += "]" * ret += "len=" # <<<<<<<<<<<<<< @@ -4525,7 +4525,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * __pyx_v_ret = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":29 * ret += "]" * ret += "len=" * ret += self.len # <<<<<<<<<<<<<< @@ -4541,7 +4541,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_2__str__(struct __pyx_obj_3_sa_IntList * __pyx_v_ret = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":30 * ret += "len=" * ret += self.len * return ret # <<<<<<<<<<<<<< @@ -4578,7 +4578,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_5index(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":32 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":32 * return ret * * def index(self, val): # <<<<<<<<<<<<<< @@ -4600,7 +4600,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_4index(struct __pyx_obj_3_sa_IntList *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("index", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":34 * def index(self, val): * cdef unsigned i * for i in range(self.len): # <<<<<<<<<<<<<< @@ -4611,7 +4611,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_4index(struct __pyx_obj_3_sa_IntList *__ for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":35 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":35 * cdef unsigned i * for i in range(self.len): * if self.arr[i] == val: # <<<<<<<<<<<<<< @@ -4626,7 +4626,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_4index(struct __pyx_obj_3_sa_IntList *__ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":36 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":36 * for i in range(self.len): * if self.arr[i] == val: * return i # <<<<<<<<<<<<<< @@ -4644,7 +4644,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_4index(struct __pyx_obj_3_sa_IntList *__ __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":37 * if self.arr[i] == val: * return i * return IndexError # <<<<<<<<<<<<<< @@ -4725,7 +4725,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_7partition(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":39 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":39 * return IndexError * * def partition(self,start,end): # <<<<<<<<<<<<<< @@ -4751,7 +4751,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList int __pyx_clineno = 0; __Pyx_RefNannySetupContext("partition", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":40 * * def partition(self,start,end): * pivot = self.arr[end] # <<<<<<<<<<<<<< @@ -4764,7 +4764,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_v_pivot = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":41 * def partition(self,start,end): * pivot = self.arr[end] * bottom = start-1 # <<<<<<<<<<<<<< @@ -4776,7 +4776,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_v_bottom = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":42 * pivot = self.arr[end] * bottom = start-1 * top = end # <<<<<<<<<<<<<< @@ -4786,7 +4786,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __Pyx_INCREF(__pyx_v_end); __pyx_v_top = __pyx_v_end; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":43 * bottom = start-1 * top = end * done = 0 # <<<<<<<<<<<<<< @@ -4795,7 +4795,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList */ __pyx_v_done = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":44 * top = end * done = 0 * while not done: # <<<<<<<<<<<<<< @@ -4806,7 +4806,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_t_3 = (!__pyx_v_done); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":45 * done = 0 * while not done: * while not done: # <<<<<<<<<<<<<< @@ -4817,7 +4817,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_t_3 = (!__pyx_v_done); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":46 * while not done: * while not done: * bottom += 1 # <<<<<<<<<<<<<< @@ -4830,7 +4830,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_v_bottom = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":47 * while not done: * bottom += 1 * if bottom == top: # <<<<<<<<<<<<<< @@ -4842,7 +4842,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":48 * bottom += 1 * if bottom == top: * done = 1 # <<<<<<<<<<<<<< @@ -4851,7 +4851,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList */ __pyx_v_done = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":49 * if bottom == top: * done = 1 * break # <<<<<<<<<<<<<< @@ -4863,7 +4863,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":50 * done = 1 * break * if self.arr[bottom] > pivot: # <<<<<<<<<<<<<< @@ -4879,7 +4879,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":51 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":51 * break * if self.arr[bottom] > pivot: * self.arr[top] = self.arr[bottom] # <<<<<<<<<<<<<< @@ -4890,7 +4890,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_top); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_self->arr[__pyx_t_5]) = (__pyx_v_self->arr[__pyx_t_1]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":52 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":52 * if self.arr[bottom] > pivot: * self.arr[top] = self.arr[bottom] * break # <<<<<<<<<<<<<< @@ -4904,7 +4904,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList } __pyx_L6_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":53 * self.arr[top] = self.arr[bottom] * break * while not done: # <<<<<<<<<<<<<< @@ -4915,7 +4915,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_t_3 = (!__pyx_v_done); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":54 * break * while not done: * top -= 1 # <<<<<<<<<<<<<< @@ -4928,7 +4928,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_v_top = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":55 * while not done: * top -= 1 * if top == bottom: # <<<<<<<<<<<<<< @@ -4940,7 +4940,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":56 * top -= 1 * if top == bottom: * done = 1 # <<<<<<<<<<<<<< @@ -4949,7 +4949,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList */ __pyx_v_done = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":57 * if top == bottom: * done = 1 * break # <<<<<<<<<<<<<< @@ -4961,7 +4961,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList } __pyx_L11:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":58 * done = 1 * break * if self.arr[top] < pivot: # <<<<<<<<<<<<<< @@ -4977,7 +4977,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":59 * break * if self.arr[top] < pivot: * self.arr[bottom] = self.arr[top] # <<<<<<<<<<<<<< @@ -4988,7 +4988,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_bottom); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_self->arr[__pyx_t_5]) = (__pyx_v_self->arr[__pyx_t_1]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":60 * if self.arr[top] < pivot: * self.arr[bottom] = self.arr[top] * break # <<<<<<<<<<<<<< @@ -5003,7 +5003,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_L10_break:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":61 * self.arr[bottom] = self.arr[top] * break * self.arr[top] = pivot # <<<<<<<<<<<<<< @@ -5014,7 +5014,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_6partition(struct __pyx_obj_3_sa_IntList __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_top); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_self->arr[__pyx_t_1]) = __pyx_t_6; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":62 * break * self.arr[top] = pivot * return top # <<<<<<<<<<<<<< @@ -5098,7 +5098,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_9_doquicksort(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":64 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":64 * return top * * def _doquicksort(self,start,end): # <<<<<<<<<<<<<< @@ -5119,7 +5119,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_8_doquicksort(struct __pyx_obj_3_sa_IntL int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_doquicksort", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":65 * * def _doquicksort(self,start,end): * if start < end: # <<<<<<<<<<<<<< @@ -5131,7 +5131,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_8_doquicksort(struct __pyx_obj_3_sa_IntL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":66 * def _doquicksort(self,start,end): * if start < end: * split = self.partition(start,end) # <<<<<<<<<<<<<< @@ -5155,7 +5155,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_8_doquicksort(struct __pyx_obj_3_sa_IntL __pyx_v_split = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":67 * if start < end: * split = self.partition(start,end) * self._doquicksort(start,split-1) # <<<<<<<<<<<<<< @@ -5180,7 +5180,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_8_doquicksort(struct __pyx_obj_3_sa_IntL __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":68 * split = self.partition(start,end) * self._doquicksort(start,split-1) * self._doquicksort(split+1,end) # <<<<<<<<<<<<<< @@ -5208,7 +5208,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_8_doquicksort(struct __pyx_obj_3_sa_IntL } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":70 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":70 * self._doquicksort(split+1,end) * else: * return # <<<<<<<<<<<<<< @@ -5247,7 +5247,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_11sort(PyObject *__pyx_v_self, CYTHON_UN return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":72 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":72 * return * * def sort(self): # <<<<<<<<<<<<<< @@ -5266,7 +5266,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_10sort(struct __pyx_obj_3_sa_IntList *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("sort", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":73 * * def sort(self): * self._doquicksort(0,self.len-1) # <<<<<<<<<<<<<< @@ -5316,7 +5316,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_13reset(PyObject *__pyx_v_self, CYTHON_U return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":75 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":75 * self._doquicksort(0,self.len-1) * * def reset(self): # <<<<<<<<<<<<<< @@ -5329,7 +5329,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_12reset(struct __pyx_obj_3_sa_IntList *_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("reset", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":76 * * def reset(self): * self.len = 0 # <<<<<<<<<<<<<< @@ -5353,7 +5353,7 @@ static void __pyx_pw_3_sa_7IntList_15__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":78 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":78 * self.len = 0 * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -5365,7 +5365,7 @@ static void __pyx_pf_3_sa_7IntList_14__dealloc__(struct __pyx_obj_3_sa_IntList * __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":79 * * def __dealloc__(self): * free(self.arr) # <<<<<<<<<<<<<< @@ -5389,7 +5389,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_17__iter__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":81 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":81 * free(self.arr) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -5452,7 +5452,7 @@ static PyObject *__pyx_gb_3_sa_7IntList_18generator(__pyx_GeneratorObject *__pyx __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 81; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":83 * def __iter__(self): * cdef int i * for i in range(self.len): # <<<<<<<<<<<<<< @@ -5463,7 +5463,7 @@ static PyObject *__pyx_gb_3_sa_7IntList_18generator(__pyx_GeneratorObject *__pyx for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":84 * cdef int i * for i in range(self.len): * yield self.arr[i] # <<<<<<<<<<<<<< @@ -5510,7 +5510,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_20__getitem__(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":86 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":86 * yield self.arr[i] * * def __getitem__(self, index): # <<<<<<<<<<<<<< @@ -5540,7 +5540,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":88 * def __getitem__(self, index): * cdef int i, j, k * if isinstance(index, int): # <<<<<<<<<<<<<< @@ -5553,7 +5553,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":89 * cdef int i, j, k * if isinstance(index, int): * j = index # <<<<<<<<<<<<<< @@ -5563,7 +5563,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __pyx_t_3 = __Pyx_PyInt_AsInt(__pyx_v_index); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 89; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_j = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":90 * if isinstance(index, int): * j = index * if j < 0: # <<<<<<<<<<<<<< @@ -5573,7 +5573,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __pyx_t_2 = (__pyx_v_j < 0); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":91 * j = index * if j < 0: * j = self.len + j # <<<<<<<<<<<<<< @@ -5585,7 +5585,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":92 * if j < 0: * j = self.len + j * if j<0 or j>=self.len: # <<<<<<<<<<<<<< @@ -5601,7 +5601,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":93 * j = self.len + j * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length IntList" % (index, self.len)) # <<<<<<<<<<<<<< @@ -5636,7 +5636,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":94 * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length IntList" % (index, self.len)) * return self.arr[j] # <<<<<<<<<<<<<< @@ -5652,7 +5652,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":95 * raise IndexError("Requested index %d of %d-length IntList" % (index, self.len)) * return self.arr[j] * elif isinstance(index, slice): # <<<<<<<<<<<<<< @@ -5665,7 +5665,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":96 * return self.arr[j] * elif isinstance(index, slice): * i = index.start # <<<<<<<<<<<<<< @@ -5678,7 +5678,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_i = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":97 * elif isinstance(index, slice): * i = index.start * j = index.stop # <<<<<<<<<<<<<< @@ -5691,7 +5691,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_j = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":98 * i = index.start * j = index.stop * if i < 0: # <<<<<<<<<<<<<< @@ -5701,7 +5701,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __pyx_t_5 = (__pyx_v_i < 0); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":99 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":99 * j = index.stop * if i < 0: * i = self.len + i # <<<<<<<<<<<<<< @@ -5713,7 +5713,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":100 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":100 * if i < 0: * i = self.len + i * if j < 0: # <<<<<<<<<<<<<< @@ -5723,7 +5723,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __pyx_t_5 = (__pyx_v_j < 0); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":101 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":101 * i = self.len + i * if j < 0: * j = self.len + j # <<<<<<<<<<<<<< @@ -5735,7 +5735,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":102 * if j < 0: * j = self.len + j * if i < 0 or i >= self.len or j < 0 or j > self.len: # <<<<<<<<<<<<<< @@ -5763,7 +5763,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":103 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":103 * j = self.len + j * if i < 0 or i >= self.len or j < 0 or j > self.len: * raise IndexError("Requested index %d:%d of %d-length IntList" % (index.start, index.stop, self.len)) # <<<<<<<<<<<<<< @@ -5805,7 +5805,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":104 * if i < 0 or i >= self.len or j < 0 or j > self.len: * raise IndexError("Requested index %d:%d of %d-length IntList" % (index.start, index.stop, self.len)) * result = () # <<<<<<<<<<<<<< @@ -5815,7 +5815,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __Pyx_INCREF(((PyObject *)__pyx_empty_tuple)); __pyx_v_result = __pyx_empty_tuple; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":105 * raise IndexError("Requested index %d:%d of %d-length IntList" % (index.start, index.stop, self.len)) * result = () * for k from i <= k < j: # <<<<<<<<<<<<<< @@ -5825,7 +5825,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __pyx_t_3 = __pyx_v_j; for (__pyx_v_k = __pyx_v_i; __pyx_v_k < __pyx_t_3; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":106 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":106 * result = () * for k from i <= k < j: * result = result + (self.arr[k],) # <<<<<<<<<<<<<< @@ -5847,7 +5847,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL __pyx_t_9 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":107 * for k from i <= k < j: * result = result + (self.arr[k],) * return result # <<<<<<<<<<<<<< @@ -5862,7 +5862,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":109 * return result * else: * raise TypeError("Illegal key type %s for IntList" % type(index)) # <<<<<<<<<<<<<< @@ -5901,7 +5901,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_19__getitem__(struct __pyx_obj_3_sa_IntL return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":111 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":111 * raise TypeError("Illegal key type %s for IntList" % type(index)) * * cdef void set(self, int i, int val): # <<<<<<<<<<<<<< @@ -5923,7 +5923,7 @@ static void __pyx_f_3_sa_7IntList_set(struct __pyx_obj_3_sa_IntList *__pyx_v_sel int __pyx_clineno = 0; __Pyx_RefNannySetupContext("set", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":112 * * cdef void set(self, int i, int val): * j = i # <<<<<<<<<<<<<< @@ -5932,7 +5932,7 @@ static void __pyx_f_3_sa_7IntList_set(struct __pyx_obj_3_sa_IntList *__pyx_v_sel */ __pyx_v_j = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":113 * cdef void set(self, int i, int val): * j = i * if i<0: # <<<<<<<<<<<<<< @@ -5942,7 +5942,7 @@ static void __pyx_f_3_sa_7IntList_set(struct __pyx_obj_3_sa_IntList *__pyx_v_sel __pyx_t_1 = (__pyx_v_i < 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":114 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":114 * j = i * if i<0: * j = self.len + i # <<<<<<<<<<<<<< @@ -5954,7 +5954,7 @@ static void __pyx_f_3_sa_7IntList_set(struct __pyx_obj_3_sa_IntList *__pyx_v_sel } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":115 * if i<0: * j = self.len + i * if j<0 or j>=self.len: # <<<<<<<<<<<<<< @@ -5970,7 +5970,7 @@ static void __pyx_f_3_sa_7IntList_set(struct __pyx_obj_3_sa_IntList *__pyx_v_sel } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":116 * j = self.len + i * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length IntList" % (i, self.len)) # <<<<<<<<<<<<<< @@ -6007,7 +6007,7 @@ static void __pyx_f_3_sa_7IntList_set(struct __pyx_obj_3_sa_IntList *__pyx_v_sel } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":117 * if j<0 or j>=self.len: * raise IndexError("Requested index %d of %d-length IntList" % (i, self.len)) * self.arr[j] = val # <<<<<<<<<<<<<< @@ -6037,7 +6037,7 @@ static int __pyx_pw_3_sa_7IntList_22__setitem__(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":119 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":119 * self.arr[j] = val * * def __setitem__(self, i, val): # <<<<<<<<<<<<<< @@ -6055,7 +6055,7 @@ static int __pyx_pf_3_sa_7IntList_21__setitem__(struct __pyx_obj_3_sa_IntList *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__setitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":120 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":120 * * def __setitem__(self, i, val): * self.set(i, val) # <<<<<<<<<<<<<< @@ -6087,7 +6087,7 @@ static Py_ssize_t __pyx_pw_3_sa_7IntList_24__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":122 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":122 * self.set(i, val) * * def __len__(self): # <<<<<<<<<<<<<< @@ -6100,7 +6100,7 @@ static Py_ssize_t __pyx_pf_3_sa_7IntList_23__len__(struct __pyx_obj_3_sa_IntList __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":123 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":123 * * def __len__(self): * return self.len # <<<<<<<<<<<<<< @@ -6127,7 +6127,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_26get_size(PyObject *__pyx_v_self, CYTHO return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":125 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":125 * return self.len * * def get_size(self): # <<<<<<<<<<<<<< @@ -6144,7 +6144,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_25get_size(struct __pyx_obj_3_sa_IntList int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_size", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":126 * * def get_size(self): * return self.size # <<<<<<<<<<<<<< @@ -6191,7 +6191,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_28append(PyObject *__pyx_v_self, PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":128 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":128 * return self.size * * def append(self, int val): # <<<<<<<<<<<<<< @@ -6204,7 +6204,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_27append(struct __pyx_obj_3_sa_IntList * __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("append", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":129 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":129 * * def append(self, int val): * self._append(val) # <<<<<<<<<<<<<< @@ -6219,7 +6219,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_27append(struct __pyx_obj_3_sa_IntList * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":131 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":131 * self._append(val) * * cdef void _append(self, int val): # <<<<<<<<<<<<<< @@ -6232,7 +6232,7 @@ static void __pyx_f_3_sa_7IntList__append(struct __pyx_obj_3_sa_IntList *__pyx_v int __pyx_t_1; __Pyx_RefNannySetupContext("_append", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":132 * * cdef void _append(self, int val): * if self.len == self.size: # <<<<<<<<<<<<<< @@ -6242,7 +6242,7 @@ static void __pyx_f_3_sa_7IntList__append(struct __pyx_obj_3_sa_IntList *__pyx_v __pyx_t_1 = (__pyx_v_self->len == __pyx_v_self->size); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":133 * cdef void _append(self, int val): * if self.len == self.size: * self.size = self.size + self.increment # <<<<<<<<<<<<<< @@ -6251,7 +6251,7 @@ static void __pyx_f_3_sa_7IntList__append(struct __pyx_obj_3_sa_IntList *__pyx_v */ __pyx_v_self->size = (__pyx_v_self->size + __pyx_v_self->increment); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":134 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":134 * if self.len == self.size: * self.size = self.size + self.increment * self.arr = realloc(self.arr, self.size*sizeof(int)) # <<<<<<<<<<<<<< @@ -6263,7 +6263,7 @@ static void __pyx_f_3_sa_7IntList__append(struct __pyx_obj_3_sa_IntList *__pyx_v } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":135 * self.size = self.size + self.increment * self.arr = realloc(self.arr, self.size*sizeof(int)) * self.arr[self.len] = val # <<<<<<<<<<<<<< @@ -6272,7 +6272,7 @@ static void __pyx_f_3_sa_7IntList__append(struct __pyx_obj_3_sa_IntList *__pyx_v */ (__pyx_v_self->arr[__pyx_v_self->len]) = __pyx_v_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":136 * self.arr = realloc(self.arr, self.size*sizeof(int)) * self.arr[self.len] = val * self.len = self.len + 1 # <<<<<<<<<<<<<< @@ -6295,7 +6295,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_30extend(PyObject *__pyx_v_self, PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":138 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":138 * self.len = self.len + 1 * * def extend(self, other): # <<<<<<<<<<<<<< @@ -6312,7 +6312,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_29extend(struct __pyx_obj_3_sa_IntList * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("extend", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":139 * * def extend(self, other): * self._extend(other) # <<<<<<<<<<<<<< @@ -6337,7 +6337,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_29extend(struct __pyx_obj_3_sa_IntList * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":141 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":141 * self._extend(other) * * cdef void _extend(self, IntList other): # <<<<<<<<<<<<<< @@ -6349,7 +6349,7 @@ static void __pyx_f_3_sa_7IntList__extend(struct __pyx_obj_3_sa_IntList *__pyx_v __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_extend", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":142 * * cdef void _extend(self, IntList other): * self._extend_arr(other.arr, other.len) # <<<<<<<<<<<<<< @@ -6361,7 +6361,7 @@ static void __pyx_f_3_sa_7IntList__extend(struct __pyx_obj_3_sa_IntList *__pyx_v __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":144 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":144 * self._extend_arr(other.arr, other.len) * * cdef void _extend_arr(self, int* other, int other_len): # <<<<<<<<<<<<<< @@ -6374,7 +6374,7 @@ static void __pyx_f_3_sa_7IntList__extend_arr(struct __pyx_obj_3_sa_IntList *__p int __pyx_t_1; __Pyx_RefNannySetupContext("_extend_arr", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":145 * * cdef void _extend_arr(self, int* other, int other_len): * if self.size < self.len + other_len: # <<<<<<<<<<<<<< @@ -6384,7 +6384,7 @@ static void __pyx_f_3_sa_7IntList__extend_arr(struct __pyx_obj_3_sa_IntList *__p __pyx_t_1 = (__pyx_v_self->size < (__pyx_v_self->len + __pyx_v_other_len)); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":146 * cdef void _extend_arr(self, int* other, int other_len): * if self.size < self.len + other_len: * self.size = self.len + other_len # <<<<<<<<<<<<<< @@ -6393,7 +6393,7 @@ static void __pyx_f_3_sa_7IntList__extend_arr(struct __pyx_obj_3_sa_IntList *__p */ __pyx_v_self->size = (__pyx_v_self->len + __pyx_v_other_len); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":147 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":147 * if self.size < self.len + other_len: * self.size = self.len + other_len * self.arr = realloc(self.arr, self.size*sizeof(int)) # <<<<<<<<<<<<<< @@ -6405,7 +6405,7 @@ static void __pyx_f_3_sa_7IntList__extend_arr(struct __pyx_obj_3_sa_IntList *__p } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":148 * self.size = self.len + other_len * self.arr = realloc(self.arr, self.size*sizeof(int)) * memcpy(self.arr+self.len, other, other_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -6414,7 +6414,7 @@ static void __pyx_f_3_sa_7IntList__extend_arr(struct __pyx_obj_3_sa_IntList *__p */ memcpy((__pyx_v_self->arr + __pyx_v_self->len), __pyx_v_other, (__pyx_v_other_len * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":149 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":149 * self.arr = realloc(self.arr, self.size*sizeof(int)) * memcpy(self.arr+self.len, other, other_len*sizeof(int)) * self.len = self.len + other_len # <<<<<<<<<<<<<< @@ -6426,7 +6426,7 @@ static void __pyx_f_3_sa_7IntList__extend_arr(struct __pyx_obj_3_sa_IntList *__p __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":151 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":151 * self.len = self.len + other_len * * cdef void _clear(self): # <<<<<<<<<<<<<< @@ -6438,7 +6438,7 @@ static void __pyx_f_3_sa_7IntList__clear(struct __pyx_obj_3_sa_IntList *__pyx_v_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_clear", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":152 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":152 * * cdef void _clear(self): * free(self.arr) # <<<<<<<<<<<<<< @@ -6447,7 +6447,7 @@ static void __pyx_f_3_sa_7IntList__clear(struct __pyx_obj_3_sa_IntList *__pyx_v_ */ free(__pyx_v_self->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":153 * cdef void _clear(self): * free(self.arr) * self.len = 0 # <<<<<<<<<<<<<< @@ -6456,7 +6456,7 @@ static void __pyx_f_3_sa_7IntList__clear(struct __pyx_obj_3_sa_IntList *__pyx_v_ */ __pyx_v_self->len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":154 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":154 * free(self.arr) * self.len = 0 * self.size = 0 # <<<<<<<<<<<<<< @@ -6465,7 +6465,7 @@ static void __pyx_f_3_sa_7IntList__clear(struct __pyx_obj_3_sa_IntList *__pyx_v_ */ __pyx_v_self->size = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":155 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":155 * self.len = 0 * self.size = 0 * self.arr = malloc(0) # <<<<<<<<<<<<<< @@ -6477,7 +6477,7 @@ static void __pyx_f_3_sa_7IntList__clear(struct __pyx_obj_3_sa_IntList *__pyx_v_ __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":157 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":157 * self.arr = malloc(0) * * cdef void write_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -6489,7 +6489,7 @@ static void __pyx_f_3_sa_7IntList_write_handle(struct __pyx_obj_3_sa_IntList *__ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":158 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":158 * * cdef void write_handle(self, FILE* f): * fwrite(&(self.len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -6498,7 +6498,7 @@ static void __pyx_f_3_sa_7IntList_write_handle(struct __pyx_obj_3_sa_IntList *__ */ fwrite((&__pyx_v_self->len), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":159 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":159 * cdef void write_handle(self, FILE* f): * fwrite(&(self.len), sizeof(int), 1, f) * fwrite(self.arr, sizeof(int), self.len, f) # <<<<<<<<<<<<<< @@ -6531,7 +6531,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_32write(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":161 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":161 * fwrite(self.arr, sizeof(int), self.len, f) * * def write(self, char* filename): # <<<<<<<<<<<<<< @@ -6545,7 +6545,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_31write(struct __pyx_obj_3_sa_IntList *_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":163 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":163 * def write(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -6554,7 +6554,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_31write(struct __pyx_obj_3_sa_IntList *_ */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":164 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":164 * cdef FILE* f * f = fopen(filename, "w") * self.write_handle(f) # <<<<<<<<<<<<<< @@ -6563,7 +6563,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_31write(struct __pyx_obj_3_sa_IntList *_ */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->__pyx_vtab)->write_handle(__pyx_v_self, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":165 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":165 * f = fopen(filename, "w") * self.write_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -6578,7 +6578,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_31write(struct __pyx_obj_3_sa_IntList *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":167 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":167 * fclose(f) * * cdef void read_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -6590,7 +6590,7 @@ static void __pyx_f_3_sa_7IntList_read_handle(struct __pyx_obj_3_sa_IntList *__p __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":168 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":168 * * cdef void read_handle(self, FILE* f): * (self.arr) # <<<<<<<<<<<<<< @@ -6599,7 +6599,7 @@ static void __pyx_f_3_sa_7IntList_read_handle(struct __pyx_obj_3_sa_IntList *__p */ __pyx_v_self->arr; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":169 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":169 * cdef void read_handle(self, FILE* f): * (self.arr) * fread(&(self.len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -6608,7 +6608,7 @@ static void __pyx_f_3_sa_7IntList_read_handle(struct __pyx_obj_3_sa_IntList *__p */ fread((&__pyx_v_self->len), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":170 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":170 * (self.arr) * fread(&(self.len), sizeof(int), 1, f) * self.arr = malloc(self.len * sizeof(int)) # <<<<<<<<<<<<<< @@ -6617,7 +6617,7 @@ static void __pyx_f_3_sa_7IntList_read_handle(struct __pyx_obj_3_sa_IntList *__p */ __pyx_v_self->arr = ((int *)malloc((__pyx_v_self->len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":171 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":171 * fread(&(self.len), sizeof(int), 1, f) * self.arr = malloc(self.len * sizeof(int)) * self.size = self.len # <<<<<<<<<<<<<< @@ -6626,7 +6626,7 @@ static void __pyx_f_3_sa_7IntList_read_handle(struct __pyx_obj_3_sa_IntList *__p */ __pyx_v_self->size = __pyx_v_self->len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":172 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":172 * self.arr = malloc(self.len * sizeof(int)) * self.size = self.len * fread(self.arr, sizeof(int), self.len, f) # <<<<<<<<<<<<<< @@ -6659,7 +6659,7 @@ static PyObject *__pyx_pw_3_sa_7IntList_34read(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":174 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":174 * fread(self.arr, sizeof(int), self.len, f) * * def read(self, char* filename): # <<<<<<<<<<<<<< @@ -6673,7 +6673,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_33read(struct __pyx_obj_3_sa_IntList *__ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":176 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":176 * def read(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -6682,7 +6682,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_33read(struct __pyx_obj_3_sa_IntList *__ */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":177 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":177 * cdef FILE* f * f = fopen(filename, "r") * self.read_handle(f) # <<<<<<<<<<<<<< @@ -6690,7 +6690,7 @@ static PyObject *__pyx_pf_3_sa_7IntList_33read(struct __pyx_obj_3_sa_IntList *__ */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->__pyx_vtab)->read_handle(__pyx_v_self, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/int_list.pxi":178 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/int_list.pxi":178 * f = fopen(filename, "r") * self.read_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -6717,7 +6717,7 @@ static int __pyx_pw_3_sa_9StringMap_1__cinit__(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":13 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":13 * cdef int index(self, char *s) * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -6730,7 +6730,7 @@ static int __pyx_pf_3_sa_9StringMap___cinit__(struct __pyx_obj_3_sa_StringMap *_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":14 * * def __cinit__(self): * self.vocab = stringmap_new() # <<<<<<<<<<<<<< @@ -6753,7 +6753,7 @@ static void __pyx_pw_3_sa_9StringMap_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":16 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":16 * self.vocab = stringmap_new() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -6765,7 +6765,7 @@ static void __pyx_pf_3_sa_9StringMap_2__dealloc__(struct __pyx_obj_3_sa_StringMa __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":17 * * def __dealloc__(self): * stringmap_delete(self.vocab) # <<<<<<<<<<<<<< @@ -6777,7 +6777,7 @@ static void __pyx_pf_3_sa_9StringMap_2__dealloc__(struct __pyx_obj_3_sa_StringMa __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":19 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":19 * stringmap_delete(self.vocab) * * cdef char *word(self, int i): # <<<<<<<<<<<<<< @@ -6790,7 +6790,7 @@ static char *__pyx_f_3_sa_9StringMap_word(struct __pyx_obj_3_sa_StringMap *__pyx __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("word", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":20 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":20 * * cdef char *word(self, int i): * return stringmap_word(self.vocab, i) # <<<<<<<<<<<<<< @@ -6806,7 +6806,7 @@ static char *__pyx_f_3_sa_9StringMap_word(struct __pyx_obj_3_sa_StringMap *__pyx return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":22 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":22 * return stringmap_word(self.vocab, i) * * cdef int index(self, char *s): # <<<<<<<<<<<<<< @@ -6818,7 +6818,7 @@ static int __pyx_f_3_sa_9StringMap_index(struct __pyx_obj_3_sa_StringMap *__pyx_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("index", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/str_map.pxi":23 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/str_map.pxi":23 * * cdef int index(self, char *s): * return stringmap_index(self.vocab, s) # <<<<<<<<<<<<<< @@ -6846,7 +6846,7 @@ static int __pyx_pw_3_sa_9DataArray_1__cinit__(PyObject *__pyx_v_self, PyObject static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__side,&__pyx_n_s__use_sent_id,0}; PyObject* values[4] = {0,0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":17 * cdef bint use_sent_id * * def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): # <<<<<<<<<<<<<< @@ -6938,7 +6938,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":18 * * def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} # <<<<<<<<<<<<<< @@ -6955,7 +6955,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_v_self->word2id = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":19 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":19 * def __cinit__(self, from_binary=None, from_text=None, side=None, bint use_sent_id=False): * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} * self.id2word = ["END_OF_FILE", "END_OF_LINE"] # <<<<<<<<<<<<<< @@ -6976,7 +6976,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_v_self->id2word = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":20 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":20 * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} * self.id2word = ["END_OF_FILE", "END_OF_LINE"] * self.data = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -6991,7 +6991,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_v_self->data = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":21 * self.id2word = ["END_OF_FILE", "END_OF_LINE"] * self.data = IntList(1000,1000) * self.sent_id = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -7006,7 +7006,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_v_self->sent_id = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":22 * self.data = IntList(1000,1000) * self.sent_id = IntList(1000,1000) * self.sent_index = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -7021,7 +7021,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_v_self->sent_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":23 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":23 * self.sent_id = IntList(1000,1000) * self.sent_index = IntList(1000,1000) * self.use_sent_id = use_sent_id # <<<<<<<<<<<<<< @@ -7030,7 +7030,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ */ __pyx_v_self->use_sent_id = __pyx_v_use_sent_id; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":24 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":24 * self.sent_index = IntList(1000,1000) * self.use_sent_id = use_sent_id * if from_binary: # <<<<<<<<<<<<<< @@ -7040,7 +7040,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_binary); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":25 * self.use_sent_id = use_sent_id * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< @@ -7062,7 +7062,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":26 * if from_binary: * self.read_binary(from_binary) * elif from_text: # <<<<<<<<<<<<<< @@ -7072,7 +7072,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 26; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":27 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":27 * self.read_binary(from_binary) * elif from_text: * if side: # <<<<<<<<<<<<<< @@ -7082,7 +7082,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_side); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":28 * elif from_text: * if side: * self.read_bitext(from_text, (0 if side == 'source' else 1)) # <<<<<<<<<<<<<< @@ -7118,7 +7118,7 @@ static int __pyx_pf_3_sa_9DataArray___cinit__(struct __pyx_obj_3_sa_DataArray *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":30 * self.read_bitext(from_text, (0 if side == 'source' else 1)) * else: * self.read_text(from_text) # <<<<<<<<<<<<<< @@ -7167,7 +7167,7 @@ static Py_ssize_t __pyx_pw_3_sa_9DataArray_3__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":32 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":32 * self.read_text(from_text) * * def __len__(self): # <<<<<<<<<<<<<< @@ -7185,7 +7185,7 @@ static Py_ssize_t __pyx_pf_3_sa_9DataArray_2__len__(struct __pyx_obj_3_sa_DataAr int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":33 * * def __len__(self): * return len(self.data) # <<<<<<<<<<<<<< @@ -7221,7 +7221,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_5get_sentence_id(PyObject *__pyx_v_sel return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":35 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":35 * return len(self.data) * * def get_sentence_id(self, i): # <<<<<<<<<<<<<< @@ -7239,7 +7239,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_4get_sentence_id(struct __pyx_obj_3_sa int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_sentence_id", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":36 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":36 * * def get_sentence_id(self, i): * return self.sent_id.arr[i] # <<<<<<<<<<<<<< @@ -7277,7 +7277,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_7get_sentence(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":38 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":38 * return self.sent_id.arr[i] * * def get_sentence(self, i): # <<<<<<<<<<<<<< @@ -7302,7 +7302,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __Pyx_RefNannySetupContext("get_sentence", 0); __Pyx_INCREF(__pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":40 * def get_sentence(self, i): * cdef int j, start, stop * sent = [] # <<<<<<<<<<<<<< @@ -7314,7 +7314,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __pyx_v_sent = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":41 * cdef int j, start, stop * sent = [] * start = self.sent_index.arr[i] # <<<<<<<<<<<<<< @@ -7324,7 +7324,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_start = (__pyx_v_self->sent_index->arr[__pyx_t_2]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":42 * sent = [] * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] # <<<<<<<<<<<<<< @@ -7337,7 +7337,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_stop = (__pyx_v_self->sent_index->arr[__pyx_t_2]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":43 * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: # <<<<<<<<<<<<<< @@ -7352,7 +7352,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":44 * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: * sent.append(self.id2word[self.data.arr[i]]) # <<<<<<<<<<<<<< @@ -7367,7 +7367,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_v_i); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":43 * start = self.sent_index.arr[i] * stop = self.sent_index.arr[i+1] * for i from start <= i < stop: # <<<<<<<<<<<<<< @@ -7380,7 +7380,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_6get_sentence(struct __pyx_obj_3_sa_Da __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":45 * for i from start <= i < stop: * sent.append(self.id2word[self.data.arr[i]]) * return sent # <<<<<<<<<<<<<< @@ -7417,7 +7417,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_9get_id(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":47 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":47 * return sent * * def get_id(self, word): # <<<<<<<<<<<<<< @@ -7437,7 +7437,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8get_id(struct __pyx_obj_3_sa_DataArra int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_id", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":48 * * def get_id(self, word): * if not word in self.word2id: # <<<<<<<<<<<<<< @@ -7448,7 +7448,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8get_id(struct __pyx_obj_3_sa_DataArra __pyx_t_2 = (!__pyx_t_1); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":49 * def get_id(self, word): * if not word in self.word2id: * self.word2id[word] = len(self.id2word) # <<<<<<<<<<<<<< @@ -7464,7 +7464,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8get_id(struct __pyx_obj_3_sa_DataArra if (PyObject_SetItem(__pyx_v_self->word2id, __pyx_v_word, __pyx_t_3) < 0) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":50 * if not word in self.word2id: * self.word2id[word] = len(self.id2word) * self.id2word.append(word) # <<<<<<<<<<<<<< @@ -7478,7 +7478,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_8get_id(struct __pyx_obj_3_sa_DataArra } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":51 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":51 * self.word2id[word] = len(self.id2word) * self.id2word.append(word) * return self.word2id[word] # <<<<<<<<<<<<<< @@ -7515,7 +7515,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_11__getitem__(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":53 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":53 * return self.word2id[word] * * def __getitem__(self, loc): # <<<<<<<<<<<<<< @@ -7533,7 +7533,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_10__getitem__(struct __pyx_obj_3_sa_Da int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":54 * * def __getitem__(self, loc): * return self.id2word[self.data.arr[loc]] # <<<<<<<<<<<<<< @@ -7571,7 +7571,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_13get_sentence_bounds(PyObject *__pyx_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":56 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":56 * return self.id2word[self.data.arr[loc]] * * def get_sentence_bounds(self, loc): # <<<<<<<<<<<<<< @@ -7592,7 +7592,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_12get_sentence_bounds(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_sentence_bounds", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":57 * * def get_sentence_bounds(self, loc): * cdef int sid = self.sent_id.arr[loc] # <<<<<<<<<<<<<< @@ -7602,7 +7602,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_12get_sentence_bounds(struct __pyx_obj __pyx_t_1 = __Pyx_PyIndex_AsSsize_t(__pyx_v_loc); if (unlikely((__pyx_t_1 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_sid = (__pyx_v_self->sent_id->arr[__pyx_t_1]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":58 * def get_sentence_bounds(self, loc): * cdef int sid = self.sent_id.arr[loc] * return (self.sent_index.arr[sid], self.sent_index.arr[sid+1]) # <<<<<<<<<<<<<< @@ -7661,7 +7661,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_15write_text(PyObject *__pyx_v_self, P return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":60 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":60 * return (self.sent_index.arr[sid], self.sent_index.arr[sid+1]) * * def write_text(self, char* filename): # <<<<<<<<<<<<<< @@ -7693,7 +7693,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -7733,7 +7733,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":62 * def write_text(self, char* filename): * with open(filename, "w") as f: * for w_id in self.data: # <<<<<<<<<<<<<< @@ -7778,7 +7778,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __pyx_v_w_id = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":63 * with open(filename, "w") as f: * for w_id in self.data: * if w_id > 1: # <<<<<<<<<<<<<< @@ -7790,7 +7790,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":64 * for w_id in self.data: * if w_id > 1: * f.write("%s " % self.get_word(w_id)) # <<<<<<<<<<<<<< @@ -7827,7 +7827,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat } __pyx_L18:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":65 * if w_id > 1: * f.write("%s " % self.get_word(w_id)) * if w_id == 1: # <<<<<<<<<<<<<< @@ -7839,7 +7839,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66 * f.write("%s " % self.get_word(w_id)) * if w_id == 1: * f.write("\n") # <<<<<<<<<<<<<< @@ -7869,7 +7869,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_14write_text(struct __pyx_obj_3_sa_Dat __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -7988,7 +7988,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_17read_text(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":68 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":68 * f.write("\n") * * def read_text(self, char* filename): # <<<<<<<<<<<<<< @@ -8016,7 +8016,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69 * * def read_text(self, char* filename): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< @@ -8056,7 +8056,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data __pyx_v_fp = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":70 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":70 * def read_text(self, char* filename): * with gzip_or_text(filename) as fp: * self.read_text_data(fp) # <<<<<<<<<<<<<< @@ -8085,7 +8085,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_16read_text(struct __pyx_obj_3_sa_Data __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69 * * def read_text(self, char* filename): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< @@ -8238,7 +8238,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_19read_bitext(PyObject *__pyx_v_self, } static PyObject *__pyx_gb_3_sa_9DataArray_11read_bitext_2generator6(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":74 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74 * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: * data = (line.split(' ||| ')[side] for line in fp) # <<<<<<<<<<<<<< @@ -8385,7 +8385,7 @@ static PyObject *__pyx_gb_3_sa_9DataArray_11read_bitext_2generator6(__pyx_Genera return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":72 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":72 * self.read_text_data(fp) * * def read_bitext(self, char* filename, int side): # <<<<<<<<<<<<<< @@ -8421,7 +8421,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_Da __Pyx_GOTREF(__pyx_cur_scope); __pyx_cur_scope->__pyx_v_side = __pyx_v_side; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 * * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< @@ -8462,7 +8462,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_Da __pyx_cur_scope->__pyx_v_fp = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74 * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: * data = (line.split(' ||| ')[side] for line in fp) # <<<<<<<<<<<<<< @@ -8474,7 +8474,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_Da __pyx_v_data = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":75 * with gzip_or_text(filename) as fp: * data = (line.split(' ||| ')[side] for line in fp) * self.read_text_data(data) # <<<<<<<<<<<<<< @@ -8503,7 +8503,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_18read_bitext(struct __pyx_obj_3_sa_Da __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 * * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< @@ -8611,7 +8611,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_21read_text_data(PyObject *__pyx_v_sel return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":77 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":77 * self.read_text_data(data) * * def read_text_data(self, data): # <<<<<<<<<<<<<< @@ -8641,7 +8641,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_text_data", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":78 * * def read_text_data(self, data): * cdef int word_count = 0 # <<<<<<<<<<<<<< @@ -8650,7 +8650,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa */ __pyx_v_word_count = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":79 * def read_text_data(self, data): * cdef int word_count = 0 * for line_num, line in enumerate(data): # <<<<<<<<<<<<<< @@ -8705,7 +8705,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __pyx_t_1 = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":80 * cdef int word_count = 0 * for line_num, line in enumerate(data): * self.sent_index.append(word_count) # <<<<<<<<<<<<<< @@ -8719,7 +8719,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":81 * for line_num, line in enumerate(data): * self.sent_index.append(word_count) * for word in line.split(): # <<<<<<<<<<<<<< @@ -8770,7 +8770,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __pyx_v_word = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":82 * self.sent_index.append(word_count) * for word in line.split(): * self.data.append(self.get_id(word)) # <<<<<<<<<<<<<< @@ -8793,7 +8793,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":83 * for word in line.split(): * self.data.append(self.get_id(word)) * if self.use_sent_id: # <<<<<<<<<<<<<< @@ -8802,7 +8802,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa */ if (__pyx_v_self->use_sent_id) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":84 * self.data.append(self.get_id(word)) * if self.use_sent_id: * self.sent_id.append(line_num) # <<<<<<<<<<<<<< @@ -8816,7 +8816,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":85 * if self.use_sent_id: * self.sent_id.append(line_num) * word_count = word_count + 1 # <<<<<<<<<<<<<< @@ -8827,7 +8827,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":86 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":86 * self.sent_id.append(line_num) * word_count = word_count + 1 * self.data.append(1) # <<<<<<<<<<<<<< @@ -8838,7 +8838,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":87 * word_count = word_count + 1 * self.data.append(1) * if self.use_sent_id: # <<<<<<<<<<<<<< @@ -8847,7 +8847,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa */ if (__pyx_v_self->use_sent_id) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":88 * self.data.append(1) * if self.use_sent_id: * self.sent_id.append(line_num) # <<<<<<<<<<<<<< @@ -8861,7 +8861,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":89 * if self.use_sent_id: * self.sent_id.append(line_num) * word_count = word_count + 1 # <<<<<<<<<<<<<< @@ -8873,7 +8873,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":90 * self.sent_id.append(line_num) * word_count = word_count + 1 * self.data.append(0) # <<<<<<<<<<<<<< @@ -8884,7 +8884,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_20read_text_data(struct __pyx_obj_3_sa __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":91 * word_count = word_count + 1 * self.data.append(0) * self.sent_index.append(word_count) # <<<<<<<<<<<<<< @@ -8939,7 +8939,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_23read_binary(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":94 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":94 * * * def read_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -8953,7 +8953,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_Da __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":96 * def read_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -8962,7 +8962,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_Da */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":97 * cdef FILE* f * f = fopen(filename, "r") * self.read_handle(f) # <<<<<<<<<<<<<< @@ -8971,7 +8971,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_Da */ ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->__pyx_vtab)->read_handle(__pyx_v_self, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":98 * f = fopen(filename, "r") * self.read_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -8986,7 +8986,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_22read_binary(struct __pyx_obj_3_sa_Da return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":100 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":100 * fclose(f) * * cdef void read_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -9011,7 +9011,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":105 * cdef char* word * * self.data.read_handle(f) # <<<<<<<<<<<<<< @@ -9020,7 +9020,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->data->__pyx_vtab)->read_handle(__pyx_v_self->data, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":106 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":106 * * self.data.read_handle(f) * self.sent_index.read_handle(f) # <<<<<<<<<<<<<< @@ -9029,7 +9029,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->read_handle(__pyx_v_self->sent_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":107 * self.data.read_handle(f) * self.sent_index.read_handle(f) * self.sent_id.read_handle(f) # <<<<<<<<<<<<<< @@ -9038,7 +9038,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_id->__pyx_vtab)->read_handle(__pyx_v_self->sent_id, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":108 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":108 * self.sent_index.read_handle(f) * self.sent_id.read_handle(f) * fread(&(num_words), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -9047,7 +9047,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ fread((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":109 * self.sent_id.read_handle(f) * fread(&(num_words), sizeof(int), 1, f) * for i in range(num_words): # <<<<<<<<<<<<<< @@ -9058,7 +9058,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":110 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":110 * fread(&(num_words), sizeof(int), 1, f) * for i in range(num_words): * fread(&(word_len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -9067,7 +9067,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ fread((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":111 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":111 * for i in range(num_words): * fread(&(word_len), sizeof(int), 1, f) * word = malloc (word_len * sizeof(char)) # <<<<<<<<<<<<<< @@ -9076,7 +9076,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ __pyx_v_word = ((char *)malloc((__pyx_v_word_len * (sizeof(char))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":112 * fread(&(word_len), sizeof(int), 1, f) * word = malloc (word_len * sizeof(char)) * fread(word, sizeof(char), word_len, f) # <<<<<<<<<<<<<< @@ -9085,7 +9085,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray */ fread(__pyx_v_word, (sizeof(char)), __pyx_v_word_len, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":113 * word = malloc (word_len * sizeof(char)) * fread(word, sizeof(char), word_len, f) * self.word2id[word] = len(self.id2word) # <<<<<<<<<<<<<< @@ -9104,7 +9104,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":114 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":114 * fread(word, sizeof(char), word_len, f) * self.word2id[word] = len(self.id2word) * self.id2word.append(word) # <<<<<<<<<<<<<< @@ -9118,7 +9118,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":115 * self.word2id[word] = len(self.id2word) * self.id2word.append(word) * free(word) # <<<<<<<<<<<<<< @@ -9128,7 +9128,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray free(__pyx_v_word); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":116 * self.id2word.append(word) * free(word) * if len(self.sent_id) == 0: # <<<<<<<<<<<<<< @@ -9142,7 +9142,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray __pyx_t_6 = (__pyx_t_4 == 0); if (__pyx_t_6) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":117 * free(word) * if len(self.sent_id) == 0: * self.use_sent_id = False # <<<<<<<<<<<<<< @@ -9154,7 +9154,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":119 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":119 * self.use_sent_id = False * else: * self.use_sent_id = True # <<<<<<<<<<<<<< @@ -9174,7 +9174,7 @@ static void __pyx_f_3_sa_9DataArray_read_handle(struct __pyx_obj_3_sa_DataArray __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":121 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":121 * self.use_sent_id = True * * cdef void write_handle(self, FILE* f): # <<<<<<<<<<<<<< @@ -9198,7 +9198,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":125 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":125 * cdef int num_words * * self.data.write_handle(f) # <<<<<<<<<<<<<< @@ -9207,7 +9207,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->data->__pyx_vtab)->write_handle(__pyx_v_self->data, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":126 * * self.data.write_handle(f) * self.sent_index.write_handle(f) # <<<<<<<<<<<<<< @@ -9216,7 +9216,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->write_handle(__pyx_v_self->sent_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":127 * self.data.write_handle(f) * self.sent_index.write_handle(f) * self.sent_id.write_handle(f) # <<<<<<<<<<<<<< @@ -9225,7 +9225,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_id->__pyx_vtab)->write_handle(__pyx_v_self->sent_id, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":128 * self.sent_index.write_handle(f) * self.sent_id.write_handle(f) * num_words = len(self.id2word) - 2 # <<<<<<<<<<<<<< @@ -9238,7 +9238,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_num_words = (__pyx_t_2 - 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":129 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":129 * self.sent_id.write_handle(f) * num_words = len(self.id2word) - 2 * fwrite(&(num_words), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -9247,7 +9247,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ fwrite((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":130 * num_words = len(self.id2word) - 2 * fwrite(&(num_words), sizeof(int), 1, f) * for word in self.id2word[2:]: # <<<<<<<<<<<<<< @@ -9295,7 +9295,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray __pyx_v_word = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":131 * fwrite(&(num_words), sizeof(int), 1, f) * for word in self.id2word[2:]: * word_len = len(word) + 1 # <<<<<<<<<<<<<< @@ -9305,7 +9305,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray __pyx_t_5 = PyObject_Length(__pyx_v_word); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word_len = (__pyx_t_5 + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":132 * for word in self.id2word[2:]: * word_len = len(word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -9314,7 +9314,7 @@ static void __pyx_f_3_sa_9DataArray_write_handle(struct __pyx_obj_3_sa_DataArray */ fwrite((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":133 * word_len = len(word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) * fwrite(word, sizeof(char), word_len, f) # <<<<<<<<<<<<<< @@ -9357,7 +9357,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_25write_binary(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":135 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":135 * fwrite(word, sizeof(char), word_len, f) * * def write_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -9371,7 +9371,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_D __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":137 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":137 * def write_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -9380,7 +9380,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_D */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":138 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":138 * cdef FILE* f * f = fopen(filename, "w") * self.write_handle(f) # <<<<<<<<<<<<<< @@ -9389,7 +9389,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_24write_binary(struct __pyx_obj_3_sa_D */ ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->__pyx_vtab)->write_handle(__pyx_v_self, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":139 * f = fopen(filename, "w") * self.write_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -9415,7 +9415,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_27write_enhanced_handle(PyObject *__py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":141 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":141 * fclose(f) * * def write_enhanced_handle(self, f): # <<<<<<<<<<<<<< @@ -9439,7 +9439,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced_handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":142 * * def write_enhanced_handle(self, f): * for i in self.data: # <<<<<<<<<<<<<< @@ -9484,7 +9484,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __pyx_v_i = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":143 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":143 * def write_enhanced_handle(self, f): * for i in self.data: * f.write("%d " %i) # <<<<<<<<<<<<<< @@ -9508,7 +9508,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":144 * for i in self.data: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< @@ -9522,7 +9522,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":145 * f.write("%d " %i) * f.write("\n") * for i in self.sent_index: # <<<<<<<<<<<<<< @@ -9567,7 +9567,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":146 * f.write("\n") * for i in self.sent_index: * f.write("%d " %i) # <<<<<<<<<<<<<< @@ -9591,7 +9591,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":147 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":147 * for i in self.sent_index: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< @@ -9605,7 +9605,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":148 * f.write("%d " %i) * f.write("\n") * for i in self.sent_id: # <<<<<<<<<<<<<< @@ -9650,7 +9650,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __pyx_v_i = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":149 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":149 * f.write("\n") * for i in self.sent_id: * f.write("%d " %i) # <<<<<<<<<<<<<< @@ -9674,7 +9674,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":150 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":150 * for i in self.sent_id: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< @@ -9688,7 +9688,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":151 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":151 * f.write("%d " %i) * f.write("\n") * for word in self.id2word: # <<<<<<<<<<<<<< @@ -9733,7 +9733,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o __pyx_v_word = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":152 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":152 * f.write("\n") * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) # <<<<<<<<<<<<<< @@ -9768,7 +9768,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_26write_enhanced_handle(struct __pyx_o } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":153 * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) * f.write("\n") # <<<<<<<<<<<<<< @@ -9820,7 +9820,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_29write_enhanced(PyObject *__pyx_v_sel return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":155 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":155 * f.write("\n") * * def write_enhanced(self, char* filename): # <<<<<<<<<<<<<< @@ -9848,7 +9848,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":156 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -9887,7 +9887,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":157 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":157 * def write_enhanced(self, char* filename): * with open(filename, "w") as f: * self.write_enhanced_handle(self, f) # <<<<<<<<<<<<<< @@ -9917,7 +9917,7 @@ static PyObject *__pyx_pf_3_sa_9DataArray_28write_enhanced(struct __pyx_obj_3_sa __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":156 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -10023,7 +10023,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_7word2id_1__get__(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":10 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":10 * * cdef class DataArray: * cdef public word2id # <<<<<<<<<<<<<< @@ -10110,7 +10110,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_7id2word_1__get__(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":11 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":11 * cdef class DataArray: * cdef public word2id * cdef public id2word # <<<<<<<<<<<<<< @@ -10197,7 +10197,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_4data_1__get__(PyObject *__pyx_v_self) return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":12 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":12 * cdef public word2id * cdef public id2word * cdef public IntList data # <<<<<<<<<<<<<< @@ -10293,7 +10293,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_7sent_id_1__get__(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":13 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":13 * cdef public id2word * cdef public IntList data * cdef public IntList sent_id # <<<<<<<<<<<<<< @@ -10389,7 +10389,7 @@ static PyObject *__pyx_pw_3_sa_9DataArray_10sent_index_1__get__(PyObject *__pyx_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":14 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":14 * cdef public IntList data * cdef public IntList sent_id * cdef public IntList sent_index # <<<<<<<<<<<<<< @@ -10474,7 +10474,7 @@ static int __pyx_pf_3_sa_9DataArray_10sent_index_4__del__(struct __pyx_obj_3_sa_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":12 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":12 * cdef IntList sent_index * * cdef int link(self, int i, int j): # <<<<<<<<<<<<<< @@ -10487,7 +10487,7 @@ static int __pyx_f_3_sa_9Alignment_link(CYTHON_UNUSED struct __pyx_obj_3_sa_Alig __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("link", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":14 * cdef int link(self, int i, int j): * """Integerizes an alignment link pair""" * return i*65536 + j # <<<<<<<<<<<<<< @@ -10515,7 +10515,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_1unlink(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":16 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":16 * return i*65536 + j * * def unlink(self, link): # <<<<<<<<<<<<<< @@ -10534,7 +10534,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_unlink(CYTHON_UNUSED struct __pyx_obj_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("unlink", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":18 * def unlink(self, link): * """De-integerizes an alignment link pair""" * return (link/65536, link%65536) # <<<<<<<<<<<<<< @@ -10572,7 +10572,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_unlink(CYTHON_UNUSED struct __pyx_obj_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":20 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":20 * return (link/65536, link%65536) * * cdef _unlink(self, int link, int* f, int* e): # <<<<<<<<<<<<<< @@ -10585,7 +10585,7 @@ static PyObject *__pyx_f_3_sa_9Alignment__unlink(CYTHON_UNUSED struct __pyx_obj_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_unlink", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":21 * * cdef _unlink(self, int link, int* f, int* e): * f[0] = link/65536 # <<<<<<<<<<<<<< @@ -10594,7 +10594,7 @@ static PyObject *__pyx_f_3_sa_9Alignment__unlink(CYTHON_UNUSED struct __pyx_obj_ */ (__pyx_v_f[0]) = __Pyx_div_long(__pyx_v_link, 65536); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":22 * cdef _unlink(self, int link, int* f, int* e): * f[0] = link/65536 * e[0] = link%65536 # <<<<<<<<<<<<<< @@ -10630,7 +10630,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_3get_sent_links(PyObject *__pyx_v_self return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":24 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":24 * e[0] = link%65536 * * def get_sent_links(self, int sent_id): # <<<<<<<<<<<<<< @@ -10650,7 +10650,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_sent_links", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":28 * cdef int* arr * cdef int arr_len * sent_links = IntList() # <<<<<<<<<<<<<< @@ -10662,7 +10662,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_ __pyx_v_sent_links = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":29 * cdef int arr_len * sent_links = IntList() * arr = self._get_sent_links(sent_id, &arr_len) # <<<<<<<<<<<<<< @@ -10671,7 +10671,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_ */ __pyx_v_arr = ((struct __pyx_vtabstruct_3_sa_Alignment *)__pyx_v_self->__pyx_vtab)->_get_sent_links(__pyx_v_self, __pyx_v_sent_id, (&__pyx_v_arr_len)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":30 * sent_links = IntList() * arr = self._get_sent_links(sent_id, &arr_len) * sent_links._extend_arr(arr, arr_len*2) # <<<<<<<<<<<<<< @@ -10680,7 +10680,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_ */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_sent_links->__pyx_vtab)->_extend_arr(__pyx_v_sent_links, __pyx_v_arr, (__pyx_v_arr_len * 2)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":31 * arr = self._get_sent_links(sent_id, &arr_len) * sent_links._extend_arr(arr, arr_len*2) * free(arr) # <<<<<<<<<<<<<< @@ -10689,7 +10689,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_ */ free(__pyx_v_arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":32 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":32 * sent_links._extend_arr(arr, arr_len*2) * free(arr) * return sent_links # <<<<<<<<<<<<<< @@ -10714,7 +10714,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_2get_sent_links(struct __pyx_obj_3_sa_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":34 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":34 * return sent_links * * cdef int* _get_sent_links(self, int sent_id, int* num_links): # <<<<<<<<<<<<<< @@ -10736,7 +10736,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_get_sent_links", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":37 * cdef int* sent_links * cdef int i, start, end * start = self.sent_index.arr[sent_id] # <<<<<<<<<<<<<< @@ -10745,7 +10745,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm */ __pyx_v_start = (__pyx_v_self->sent_index->arr[__pyx_v_sent_id]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":38 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":38 * cdef int i, start, end * start = self.sent_index.arr[sent_id] * end = self.sent_index.arr[sent_id+1] # <<<<<<<<<<<<<< @@ -10754,7 +10754,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm */ __pyx_v_end = (__pyx_v_self->sent_index->arr[(__pyx_v_sent_id + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":39 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":39 * start = self.sent_index.arr[sent_id] * end = self.sent_index.arr[sent_id+1] * num_links[0] = end - start # <<<<<<<<<<<<<< @@ -10763,7 +10763,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm */ (__pyx_v_num_links[0]) = (__pyx_v_end - __pyx_v_start); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":40 * end = self.sent_index.arr[sent_id+1] * num_links[0] = end - start * sent_links = malloc(2*num_links[0]*sizeof(int)) # <<<<<<<<<<<<<< @@ -10772,7 +10772,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm */ __pyx_v_sent_links = ((int *)malloc(((2 * (__pyx_v_num_links[0])) * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":41 * num_links[0] = end - start * sent_links = malloc(2*num_links[0]*sizeof(int)) * for i from 0 <= i < num_links[0]: # <<<<<<<<<<<<<< @@ -10782,7 +10782,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm __pyx_t_1 = (__pyx_v_num_links[0]); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":42 * sent_links = malloc(2*num_links[0]*sizeof(int)) * for i from 0 <= i < num_links[0]: * self._unlink(self.links.arr[start + i], sent_links + (2*i), sent_links + (2*i) + 1) # <<<<<<<<<<<<<< @@ -10794,7 +10794,7 @@ static int *__pyx_f_3_sa_9Alignment__get_sent_links(struct __pyx_obj_3_sa_Alignm __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":43 * for i from 0 <= i < num_links[0]: * self._unlink(self.links.arr[start + i], sent_links + (2*i), sent_links + (2*i) + 1) * return sent_links # <<<<<<<<<<<<<< @@ -10827,7 +10827,7 @@ static int __pyx_pw_3_sa_9Alignment_5__cinit__(PyObject *__pyx_v_self, PyObject static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,0}; PyObject* values[2] = {0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":45 * return sent_links * * def __cinit__(self, from_binary=None, from_text=None): # <<<<<<<<<<<<<< @@ -10897,7 +10897,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":46 * * def __cinit__(self, from_binary=None, from_text=None): * self.links = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -10912,7 +10912,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * __pyx_v_self->links = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":47 * def __cinit__(self, from_binary=None, from_text=None): * self.links = IntList(1000,1000) * self.sent_index = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -10927,7 +10927,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * __pyx_v_self->sent_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":48 * self.links = IntList(1000,1000) * self.sent_index = IntList(1000,1000) * if from_binary: # <<<<<<<<<<<<<< @@ -10937,7 +10937,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_binary); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":49 * self.sent_index = IntList(1000,1000) * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< @@ -10959,7 +10959,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":50 * if from_binary: * self.read_binary(from_binary) * elif from_text: # <<<<<<<<<<<<<< @@ -10969,7 +10969,7 @@ static int __pyx_pf_3_sa_9Alignment_4__cinit__(struct __pyx_obj_3_sa_Alignment * __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":51 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":51 * self.read_binary(from_binary) * elif from_text: * self.read_text(from_text) # <<<<<<<<<<<<<< @@ -11026,7 +11026,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_7read_text(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":53 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":53 * self.read_text(from_text) * * def read_text(self, char* filename): # <<<<<<<<<<<<<< @@ -11068,7 +11068,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":54 * * def read_text(self, char* filename): * with gzip_or_text(filename) as f: # <<<<<<<<<<<<<< @@ -11108,7 +11108,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __pyx_v_f = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":55 * def read_text(self, char* filename): * with gzip_or_text(filename) as f: * for line in f: # <<<<<<<<<<<<<< @@ -11153,7 +11153,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __pyx_v_line = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":56 * with gzip_or_text(filename) as f: * for line in f: * self.sent_index.append(len(self.links)) # <<<<<<<<<<<<<< @@ -11171,7 +11171,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":57 * for line in f: * self.sent_index.append(len(self.links)) * pairs = line.split() # <<<<<<<<<<<<<< @@ -11187,7 +11187,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __pyx_v_pairs = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":58 * self.sent_index.append(len(self.links)) * pairs = line.split() * for pair in pairs: # <<<<<<<<<<<<<< @@ -11232,7 +11232,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __pyx_v_pair = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":59 * pairs = line.split() * for pair in pairs: * (i, j) = map(int, pair.split('-')) # <<<<<<<<<<<<<< @@ -11311,7 +11311,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __pyx_v_j = __pyx_t_13; __pyx_t_13 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":60 * for pair in pairs: * (i, j) = map(int, pair.split('-')) * self.links.append(self.link(i, j)) # <<<<<<<<<<<<<< @@ -11331,7 +11331,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":61 * (i, j) = map(int, pair.split('-')) * self.links.append(self.link(i, j)) * self.sent_index.append(len(self.links)) # <<<<<<<<<<<<<< @@ -11361,7 +11361,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_6read_text(struct __pyx_obj_3_sa_Align __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":54 * * def read_text(self, char* filename): * with gzip_or_text(filename) as f: # <<<<<<<<<<<<<< @@ -11485,7 +11485,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_9read_binary(PyObject *__pyx_v_self, P return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":63 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":63 * self.sent_index.append(len(self.links)) * * def read_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -11499,7 +11499,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_8read_binary(struct __pyx_obj_3_sa_Ali __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":65 * def read_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -11508,7 +11508,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_8read_binary(struct __pyx_obj_3_sa_Ali */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":66 * cdef FILE* f * f = fopen(filename, "r") * self.links.read_handle(f) # <<<<<<<<<<<<<< @@ -11517,7 +11517,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_8read_binary(struct __pyx_obj_3_sa_Ali */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->links->__pyx_vtab)->read_handle(__pyx_v_self->links, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":67 * f = fopen(filename, "r") * self.links.read_handle(f) * self.sent_index.read_handle(f) # <<<<<<<<<<<<<< @@ -11526,7 +11526,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_8read_binary(struct __pyx_obj_3_sa_Ali */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->read_handle(__pyx_v_self->sent_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":68 * self.links.read_handle(f) * self.sent_index.read_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -11562,7 +11562,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_11write_text(PyObject *__pyx_v_self, P return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":70 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":70 * fclose(f) * * def write_text(self, char* filename): # <<<<<<<<<<<<<< @@ -11597,7 +11597,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":71 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -11637,7 +11637,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":72 * def write_text(self, char* filename): * with open(filename, "w") as f: * sent_num = 0 # <<<<<<<<<<<<<< @@ -11647,7 +11647,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __Pyx_INCREF(__pyx_int_0); __pyx_v_sent_num = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":73 * with open(filename, "w") as f: * sent_num = 0 * for i, link in enumerate(self.links): # <<<<<<<<<<<<<< @@ -11702,7 +11702,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __pyx_t_4 = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":74 * sent_num = 0 * for i, link in enumerate(self.links): * while i >= self.sent_index[sent_num]: # <<<<<<<<<<<<<< @@ -11718,7 +11718,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; if (!__pyx_t_11) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":75 * for i, link in enumerate(self.links): * while i >= self.sent_index[sent_num]: * f.write("\n") # <<<<<<<<<<<<<< @@ -11732,7 +11732,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":76 * while i >= self.sent_index[sent_num]: * f.write("\n") * sent_num = sent_num + 1 # <<<<<<<<<<<<<< @@ -11746,7 +11746,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __pyx_t_2 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":77 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":77 * f.write("\n") * sent_num = sent_num + 1 * f.write("%d-%d " % self.unlink(link)) # <<<<<<<<<<<<<< @@ -11783,7 +11783,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":78 * sent_num = sent_num + 1 * f.write("%d-%d " % self.unlink(link)) * f.write("\n") # <<<<<<<<<<<<<< @@ -11809,7 +11809,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_10write_text(struct __pyx_obj_3_sa_Ali __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":71 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -11931,7 +11931,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_13write_binary(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":80 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":80 * f.write("\n") * * def write_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -11945,7 +11945,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_12write_binary(struct __pyx_obj_3_sa_A __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":82 * def write_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -11954,7 +11954,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_12write_binary(struct __pyx_obj_3_sa_A */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":83 * cdef FILE* f * f = fopen(filename, "w") * self.links.write_handle(f) # <<<<<<<<<<<<<< @@ -11963,7 +11963,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_12write_binary(struct __pyx_obj_3_sa_A */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->links->__pyx_vtab)->write_handle(__pyx_v_self->links, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":84 * f = fopen(filename, "w") * self.links.write_handle(f) * self.sent_index.write_handle(f) # <<<<<<<<<<<<<< @@ -11972,7 +11972,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_12write_binary(struct __pyx_obj_3_sa_A */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sent_index->__pyx_vtab)->write_handle(__pyx_v_self->sent_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":85 * self.links.write_handle(f) * self.sent_index.write_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -12008,7 +12008,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_15write_enhanced(PyObject *__pyx_v_sel return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":87 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":87 * fclose(f) * * def write_enhanced(self, char* filename): # <<<<<<<<<<<<<< @@ -12041,7 +12041,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":88 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -12081,7 +12081,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":89 * def write_enhanced(self, char* filename): * with open(filename, "w") as f: * sent_num = 1 # <<<<<<<<<<<<<< @@ -12090,7 +12090,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa */ __pyx_v_sent_num = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":90 * with open(filename, "w") as f: * sent_num = 1 * for link in self.links: # <<<<<<<<<<<<<< @@ -12135,7 +12135,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa __pyx_v_link = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":91 * sent_num = 1 * for link in self.links: * f.write("%d " % link) # <<<<<<<<<<<<<< @@ -12159,7 +12159,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":92 * for link in self.links: * f.write("%d " % link) * f.write("\n") # <<<<<<<<<<<<<< @@ -12173,7 +12173,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":93 * f.write("%d " % link) * f.write("\n") * for i in self.sent_index: # <<<<<<<<<<<<<< @@ -12218,7 +12218,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa __pyx_v_i = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":94 * f.write("\n") * for i in self.sent_index: * f.write("%d " % i) # <<<<<<<<<<<<<< @@ -12242,7 +12242,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":95 * for i in self.sent_index: * f.write("%d " % i) * f.write("\n") # <<<<<<<<<<<<<< @@ -12266,7 +12266,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_14write_enhanced(struct __pyx_obj_3_sa __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":88 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -12376,7 +12376,7 @@ static PyObject *__pyx_pw_3_sa_9Alignment_17alignment(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":97 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":97 * f.write("\n") * * def alignment(self, i): # <<<<<<<<<<<<<< @@ -12402,7 +12402,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig int __pyx_clineno = 0; __Pyx_RefNannySetupContext("alignment", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":100 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":100 * """Return all (e,f) pairs for sentence i""" * cdef int j, start, end * result = [] # <<<<<<<<<<<<<< @@ -12414,7 +12414,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig __pyx_v_result = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":101 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":101 * cdef int j, start, end * result = [] * start = self.sent_index.arr[i] # <<<<<<<<<<<<<< @@ -12424,7 +12424,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig __pyx_t_2 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_2 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_start = (__pyx_v_self->sent_index->arr[__pyx_t_2]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":102 * result = [] * start = self.sent_index.arr[i] * end = self.sent_index.arr[i+1] # <<<<<<<<<<<<<< @@ -12437,7 +12437,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_end = (__pyx_v_self->sent_index->arr[__pyx_t_2]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":103 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":103 * start = self.sent_index.arr[i] * end = self.sent_index.arr[i+1] * for j from start <= j < end: # <<<<<<<<<<<<<< @@ -12447,7 +12447,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig __pyx_t_3 = __pyx_v_end; for (__pyx_v_j = __pyx_v_start; __pyx_v_j < __pyx_t_3; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":104 * end = self.sent_index.arr[i+1] * for j from start <= j < end: * result.append(self.unlink(self.links.arr[j])) # <<<<<<<<<<<<<< @@ -12470,7 +12470,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":105 * for j from start <= j < end: * result.append(self.unlink(self.links.arr[j])) * return result # <<<<<<<<<<<<<< @@ -12495,7 +12495,7 @@ static PyObject *__pyx_pf_3_sa_9Alignment_16alignment(struct __pyx_obj_3_sa_Alig return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":15 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":15 * int val * * cdef _node* new_node(int key): # <<<<<<<<<<<<<< @@ -12509,7 +12509,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("new_node", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":17 * cdef _node* new_node(int key): * cdef _node* n * n = <_node*> malloc(sizeof(_node)) # <<<<<<<<<<<<<< @@ -12518,7 +12518,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { */ __pyx_v_n = ((struct __pyx_t_3_sa__node *)malloc((sizeof(struct __pyx_t_3_sa__node)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":18 * cdef _node* n * n = <_node*> malloc(sizeof(_node)) * n.smaller = NULL # <<<<<<<<<<<<<< @@ -12527,7 +12527,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { */ __pyx_v_n->smaller = NULL; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":19 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":19 * n = <_node*> malloc(sizeof(_node)) * n.smaller = NULL * n.bigger = NULL # <<<<<<<<<<<<<< @@ -12536,7 +12536,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { */ __pyx_v_n->bigger = NULL; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":20 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":20 * n.smaller = NULL * n.bigger = NULL * n.key = key # <<<<<<<<<<<<<< @@ -12545,7 +12545,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { */ __pyx_v_n->key = __pyx_v_key; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":21 * n.bigger = NULL * n.key = key * n.val = 0 # <<<<<<<<<<<<<< @@ -12554,7 +12554,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { */ __pyx_v_n->val = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":22 * n.key = key * n.val = 0 * return n # <<<<<<<<<<<<<< @@ -12570,7 +12570,7 @@ static struct __pyx_t_3_sa__node *__pyx_f_3_sa_new_node(int __pyx_v_key) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":25 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":25 * * * cdef del_node(_node* n): # <<<<<<<<<<<<<< @@ -12588,7 +12588,7 @@ static PyObject *__pyx_f_3_sa_del_node(struct __pyx_t_3_sa__node *__pyx_v_n) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("del_node", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":26 * * cdef del_node(_node* n): * if n.smaller != NULL: # <<<<<<<<<<<<<< @@ -12598,7 +12598,7 @@ static PyObject *__pyx_f_3_sa_del_node(struct __pyx_t_3_sa__node *__pyx_v_n) { __pyx_t_1 = (__pyx_v_n->smaller != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":27 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":27 * cdef del_node(_node* n): * if n.smaller != NULL: * del_node(n.smaller) # <<<<<<<<<<<<<< @@ -12612,7 +12612,7 @@ static PyObject *__pyx_f_3_sa_del_node(struct __pyx_t_3_sa__node *__pyx_v_n) { } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":28 * if n.smaller != NULL: * del_node(n.smaller) * if n.bigger != NULL: # <<<<<<<<<<<<<< @@ -12622,7 +12622,7 @@ static PyObject *__pyx_f_3_sa_del_node(struct __pyx_t_3_sa__node *__pyx_v_n) { __pyx_t_1 = (__pyx_v_n->bigger != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":29 * del_node(n.smaller) * if n.bigger != NULL: * del_node(n.bigger) # <<<<<<<<<<<<<< @@ -12636,7 +12636,7 @@ static PyObject *__pyx_f_3_sa_del_node(struct __pyx_t_3_sa__node *__pyx_v_n) { } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":30 * if n.bigger != NULL: * del_node(n.bigger) * free(n) # <<<<<<<<<<<<<< @@ -12657,7 +12657,7 @@ static PyObject *__pyx_f_3_sa_del_node(struct __pyx_t_3_sa__node *__pyx_v_n) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":32 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":32 * free(n) * * cdef int* get_val(_node* n, int key): # <<<<<<<<<<<<<< @@ -12671,7 +12671,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx int __pyx_t_1; __Pyx_RefNannySetupContext("get_val", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":33 * * cdef int* get_val(_node* n, int key): * if key == n.key: # <<<<<<<<<<<<<< @@ -12681,7 +12681,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx __pyx_t_1 = (__pyx_v_key == __pyx_v_n->key); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":34 * cdef int* get_val(_node* n, int key): * if key == n.key: * return &n.val # <<<<<<<<<<<<<< @@ -12693,7 +12693,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":35 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":35 * if key == n.key: * return &n.val * elif key < n.key: # <<<<<<<<<<<<<< @@ -12703,7 +12703,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx __pyx_t_1 = (__pyx_v_key < __pyx_v_n->key); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":36 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":36 * return &n.val * elif key < n.key: * if n.smaller == NULL: # <<<<<<<<<<<<<< @@ -12713,7 +12713,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx __pyx_t_1 = (__pyx_v_n->smaller == NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":37 * elif key < n.key: * if n.smaller == NULL: * n.smaller = new_node(key) # <<<<<<<<<<<<<< @@ -12722,7 +12722,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx */ __pyx_v_n->smaller = __pyx_f_3_sa_new_node(__pyx_v_key); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":38 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":38 * if n.smaller == NULL: * n.smaller = new_node(key) * return &(n.smaller.val) # <<<<<<<<<<<<<< @@ -12735,7 +12735,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":39 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":39 * n.smaller = new_node(key) * return &(n.smaller.val) * return get_val(n.smaller, key) # <<<<<<<<<<<<<< @@ -12748,7 +12748,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":41 * return get_val(n.smaller, key) * else: * if n.bigger == NULL: # <<<<<<<<<<<<<< @@ -12758,7 +12758,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx __pyx_t_1 = (__pyx_v_n->bigger == NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":42 * else: * if n.bigger == NULL: * n.bigger = new_node(key) # <<<<<<<<<<<<<< @@ -12767,7 +12767,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx */ __pyx_v_n->bigger = __pyx_f_3_sa_new_node(__pyx_v_key); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":43 * if n.bigger == NULL: * n.bigger = new_node(key) * return &(n.bigger.val) # <<<<<<<<<<<<<< @@ -12780,7 +12780,7 @@ static int *__pyx_f_3_sa_get_val(struct __pyx_t_3_sa__node *__pyx_v_n, int __pyx } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":44 * n.bigger = new_node(key) * return &(n.bigger.val) * return get_val(n.bigger, key) # <<<<<<<<<<<<<< @@ -12814,7 +12814,7 @@ static int __pyx_pw_3_sa_5BiLex_1__cinit__(PyObject *__pyx_v_self, PyObject *__p static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_text,&__pyx_n_s__from_data,&__pyx_n_s__from_binary,&__pyx_n_s__earray,&__pyx_n_s__fsarray,&__pyx_n_s__alignment,0}; PyObject* values[6] = {0,0,0,0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":54 * cdef id2eword, id2fword, eword2id, fword2id * * def __cinit__(self, from_text=None, from_data=False, from_binary=None, # <<<<<<<<<<<<<< @@ -12825,7 +12825,7 @@ static int __pyx_pw_3_sa_5BiLex_1__cinit__(PyObject *__pyx_v_self, PyObject *__p values[1] = __pyx_k_41; values[2] = ((PyObject *)Py_None); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":55 * * def __cinit__(self, from_text=None, from_data=False, from_binary=None, * earray=None, fsarray=None, alignment=None): # <<<<<<<<<<<<<< @@ -12916,7 +12916,7 @@ static int __pyx_pw_3_sa_5BiLex_1__cinit__(PyObject *__pyx_v_self, PyObject *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":54 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":54 * cdef id2eword, id2fword, eword2id, fword2id * * def __cinit__(self, from_text=None, from_data=False, from_binary=None, # <<<<<<<<<<<<<< @@ -12937,7 +12937,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":56 * def __cinit__(self, from_text=None, from_data=False, from_binary=None, * earray=None, fsarray=None, alignment=None): * self.id2eword = [] # <<<<<<<<<<<<<< @@ -12952,7 +12952,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->id2eword = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":57 * earray=None, fsarray=None, alignment=None): * self.id2eword = [] * self.id2fword = [] # <<<<<<<<<<<<<< @@ -12967,7 +12967,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->id2fword = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":58 * self.id2eword = [] * self.id2fword = [] * self.eword2id = {} # <<<<<<<<<<<<<< @@ -12982,7 +12982,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->eword2id = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":59 * self.id2fword = [] * self.eword2id = {} * self.fword2id = {} # <<<<<<<<<<<<<< @@ -12997,7 +12997,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->fword2id = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":60 * self.eword2id = {} * self.fword2id = {} * self.e_index = IntList() # <<<<<<<<<<<<<< @@ -13012,7 +13012,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->e_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":61 * self.fword2id = {} * self.e_index = IntList() * self.f_index = IntList() # <<<<<<<<<<<<<< @@ -13027,7 +13027,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->f_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":62 * self.e_index = IntList() * self.f_index = IntList() * self.col1 = FloatList() # <<<<<<<<<<<<<< @@ -13042,7 +13042,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->col1 = ((struct __pyx_obj_3_sa_FloatList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":63 * self.f_index = IntList() * self.col1 = FloatList() * self.col2 = FloatList() # <<<<<<<<<<<<<< @@ -13057,7 +13057,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_v_self->col2 = ((struct __pyx_obj_3_sa_FloatList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":64 * self.col1 = FloatList() * self.col2 = FloatList() * if from_binary: # <<<<<<<<<<<<<< @@ -13067,7 +13067,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_binary); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":65 * self.col2 = FloatList() * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< @@ -13089,7 +13089,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":66 * if from_binary: * self.read_binary(from_binary) * elif from_data: # <<<<<<<<<<<<<< @@ -13099,7 +13099,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_data); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":67 * self.read_binary(from_binary) * elif from_data: * self.compute_from_data(fsarray, earray, alignment) # <<<<<<<<<<<<<< @@ -13125,7 +13125,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":69 * self.compute_from_data(fsarray, earray, alignment) * else: * self.read_text(from_text) # <<<<<<<<<<<<<< @@ -13161,7 +13161,7 @@ static int __pyx_pf_3_sa_5BiLex___cinit__(struct __pyx_obj_3_sa_BiLex *__pyx_v_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":72 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":72 * * * cdef compute_from_data(self, SuffixArray fsa, DataArray eda, Alignment aa): # <<<<<<<<<<<<<< @@ -13215,7 +13215,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL int __pyx_clineno = 0; __Pyx_RefNannySetupContext("compute_from_data", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":79 * cdef int null_word * * null_word = 0 # <<<<<<<<<<<<<< @@ -13224,7 +13224,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_null_word = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":80 * * null_word = 0 * for word in fsa.darray.id2word: # I miss list comprehensions # <<<<<<<<<<<<<< @@ -13269,7 +13269,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_word = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":81 * null_word = 0 * for word in fsa.darray.id2word: # I miss list comprehensions * self.id2fword.append(word) # <<<<<<<<<<<<<< @@ -13282,7 +13282,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":82 * for word in fsa.darray.id2word: # I miss list comprehensions * self.id2fword.append(word) * self.id2fword[null_word] = "NULL" # <<<<<<<<<<<<<< @@ -13291,7 +13291,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ if (__Pyx_SetItemInt(__pyx_v_self->id2fword, __pyx_v_null_word, ((PyObject *)__pyx_n_s__NULL), sizeof(int), PyInt_FromLong) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 82; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":83 * self.id2fword.append(word) * self.id2fword[null_word] = "NULL" * for id, word in enumerate(self.id2fword): # <<<<<<<<<<<<<< @@ -13346,7 +13346,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_1 = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":84 * self.id2fword[null_word] = "NULL" * for id, word in enumerate(self.id2fword): * self.fword2id[word] = id # <<<<<<<<<<<<<< @@ -13358,7 +13358,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":86 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":86 * self.fword2id[word] = id * * for word in eda.id2word: # <<<<<<<<<<<<<< @@ -13403,7 +13403,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_word = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":87 * * for word in eda.id2word: * self.id2eword.append(word) # <<<<<<<<<<<<<< @@ -13416,7 +13416,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":88 * for word in eda.id2word: * self.id2eword.append(word) * self.id2eword[null_word] = "NULL" # <<<<<<<<<<<<<< @@ -13425,7 +13425,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ if (__Pyx_SetItemInt(__pyx_v_self->id2eword, __pyx_v_null_word, ((PyObject *)__pyx_n_s__NULL), sizeof(int), PyInt_FromLong) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":89 * self.id2eword.append(word) * self.id2eword[null_word] = "NULL" * for id, word in enumerate(self.id2eword): # <<<<<<<<<<<<<< @@ -13480,7 +13480,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_1 = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":90 * self.id2eword[null_word] = "NULL" * for id, word in enumerate(self.id2eword): * self.eword2id[word] = id # <<<<<<<<<<<<<< @@ -13492,7 +13492,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":92 * self.eword2id[word] = id * * num_pairs = 0 # <<<<<<<<<<<<<< @@ -13501,7 +13501,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_num_pairs = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":94 * num_pairs = 0 * * V_E = len(eda.id2word) # <<<<<<<<<<<<<< @@ -13514,7 +13514,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_V_E = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":95 * * V_E = len(eda.id2word) * V_F = len(fsa.darray.id2word) # <<<<<<<<<<<<<< @@ -13527,7 +13527,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_V_F = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":96 * V_E = len(eda.id2word) * V_F = len(fsa.darray.id2word) * fmargin = malloc(V_F*sizeof(int)) # <<<<<<<<<<<<<< @@ -13536,7 +13536,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_fmargin = ((int *)malloc((__pyx_v_V_F * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":97 * V_F = len(fsa.darray.id2word) * fmargin = malloc(V_F*sizeof(int)) * emargin = malloc(V_E*sizeof(int)) # <<<<<<<<<<<<<< @@ -13545,7 +13545,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_emargin = ((int *)malloc((__pyx_v_V_E * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":98 * fmargin = malloc(V_F*sizeof(int)) * emargin = malloc(V_E*sizeof(int)) * memset(fmargin, 0, V_F*sizeof(int)) # <<<<<<<<<<<<<< @@ -13554,7 +13554,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ memset(__pyx_v_fmargin, 0, (__pyx_v_V_F * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":99 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":99 * emargin = malloc(V_E*sizeof(int)) * memset(fmargin, 0, V_F*sizeof(int)) * memset(emargin, 0, V_E*sizeof(int)) # <<<<<<<<<<<<<< @@ -13563,7 +13563,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ memset(__pyx_v_emargin, 0, (__pyx_v_V_E * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":101 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":101 * memset(emargin, 0, V_E*sizeof(int)) * * dict = <_node**> malloc(V_F*sizeof(_node*)) # <<<<<<<<<<<<<< @@ -13572,7 +13572,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_dict = ((struct __pyx_t_3_sa__node **)malloc((__pyx_v_V_F * (sizeof(struct __pyx_t_3_sa__node *))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":102 * * dict = <_node**> malloc(V_F*sizeof(_node*)) * memset(dict, 0, V_F*sizeof(_node*)) # <<<<<<<<<<<<<< @@ -13581,7 +13581,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ memset(__pyx_v_dict, 0, (__pyx_v_V_F * (sizeof(struct __pyx_t_3_sa__node *)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":104 * memset(dict, 0, V_F*sizeof(_node*)) * * num_sents = len(fsa.darray.sent_index) # <<<<<<<<<<<<<< @@ -13597,7 +13597,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_num_sents = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":105 * * num_sents = len(fsa.darray.sent_index) * for sent_id from 0 <= sent_id < num_sents-1: # <<<<<<<<<<<<<< @@ -13610,7 +13610,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (__pyx_v_sent_id = 0; __pyx_v_sent_id < __pyx_t_6; __pyx_v_sent_id++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":107 * for sent_id from 0 <= sent_id < num_sents-1: * * fsent = fsa.darray.data.arr + fsa.darray.sent_index.arr[sent_id] # <<<<<<<<<<<<<< @@ -13619,7 +13619,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_fsent = (__pyx_v_fsa->darray->data->arr + (__pyx_v_fsa->darray->sent_index->arr[__pyx_v_sent_id])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":108 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":108 * * fsent = fsa.darray.data.arr + fsa.darray.sent_index.arr[sent_id] * I = fsa.darray.sent_index.arr[sent_id+1] - fsa.darray.sent_index.arr[sent_id] - 1 # <<<<<<<<<<<<<< @@ -13628,7 +13628,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_I = (((__pyx_v_fsa->darray->sent_index->arr[(__pyx_v_sent_id + 1)]) - (__pyx_v_fsa->darray->sent_index->arr[__pyx_v_sent_id])) - 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":109 * fsent = fsa.darray.data.arr + fsa.darray.sent_index.arr[sent_id] * I = fsa.darray.sent_index.arr[sent_id+1] - fsa.darray.sent_index.arr[sent_id] - 1 * faligned = malloc(I*sizeof(int)) # <<<<<<<<<<<<<< @@ -13637,7 +13637,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_faligned = ((int *)malloc((__pyx_v_I * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":110 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":110 * I = fsa.darray.sent_index.arr[sent_id+1] - fsa.darray.sent_index.arr[sent_id] - 1 * faligned = malloc(I*sizeof(int)) * memset(faligned, 0, I*sizeof(int)) # <<<<<<<<<<<<<< @@ -13646,7 +13646,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ memset(__pyx_v_faligned, 0, (__pyx_v_I * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":112 * memset(faligned, 0, I*sizeof(int)) * * esent = eda.data.arr + eda.sent_index.arr[sent_id] # <<<<<<<<<<<<<< @@ -13655,7 +13655,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_esent = (__pyx_v_eda->data->arr + (__pyx_v_eda->sent_index->arr[__pyx_v_sent_id])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":113 * * esent = eda.data.arr + eda.sent_index.arr[sent_id] * J = eda.sent_index.arr[sent_id+1] - eda.sent_index.arr[sent_id] - 1 # <<<<<<<<<<<<<< @@ -13664,7 +13664,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_J = (((__pyx_v_eda->sent_index->arr[(__pyx_v_sent_id + 1)]) - (__pyx_v_eda->sent_index->arr[__pyx_v_sent_id])) - 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":114 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":114 * esent = eda.data.arr + eda.sent_index.arr[sent_id] * J = eda.sent_index.arr[sent_id+1] - eda.sent_index.arr[sent_id] - 1 * ealigned = malloc(J*sizeof(int)) # <<<<<<<<<<<<<< @@ -13673,7 +13673,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_ealigned = ((int *)malloc((__pyx_v_J * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":115 * J = eda.sent_index.arr[sent_id+1] - eda.sent_index.arr[sent_id] - 1 * ealigned = malloc(J*sizeof(int)) * memset(ealigned, 0, J*sizeof(int)) # <<<<<<<<<<<<<< @@ -13682,7 +13682,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ memset(__pyx_v_ealigned, 0, (__pyx_v_J * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":117 * memset(ealigned, 0, J*sizeof(int)) * * links = aa._get_sent_links(sent_id, &num_links) # <<<<<<<<<<<<<< @@ -13691,7 +13691,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_links = ((struct __pyx_vtabstruct_3_sa_Alignment *)__pyx_v_aa->__pyx_vtab)->_get_sent_links(__pyx_v_aa, __pyx_v_sent_id, (&__pyx_v_num_links)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":119 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":119 * links = aa._get_sent_links(sent_id, &num_links) * * for l from 0 <= l < num_links: # <<<<<<<<<<<<<< @@ -13701,7 +13701,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_7 = __pyx_v_num_links; for (__pyx_v_l = 0; __pyx_v_l < __pyx_t_7; __pyx_v_l++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":120 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":120 * * for l from 0 <= l < num_links: * i = links[l*2] # <<<<<<<<<<<<<< @@ -13710,7 +13710,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_i = (__pyx_v_links[(__pyx_v_l * 2)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":121 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":121 * for l from 0 <= l < num_links: * i = links[l*2] * j = links[l*2+1] # <<<<<<<<<<<<<< @@ -13719,7 +13719,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_j = (__pyx_v_links[((__pyx_v_l * 2) + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":122 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":122 * i = links[l*2] * j = links[l*2+1] * if i >= I or j >= J: # <<<<<<<<<<<<<< @@ -13735,7 +13735,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":123 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":123 * j = links[l*2+1] * if i >= I or j >= J: * raise Exception("%d-%d out of bounds (I=%d,J=%d) in line %d\n" % (i,j,I,J,sent_id+1)) # <<<<<<<<<<<<<< @@ -13787,7 +13787,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __pyx_L15:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":124 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":124 * if i >= I or j >= J: * raise Exception("%d-%d out of bounds (I=%d,J=%d) in line %d\n" % (i,j,I,J,sent_id+1)) * f_i = fsent[i] # <<<<<<<<<<<<<< @@ -13796,7 +13796,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_f_i = (__pyx_v_fsent[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":125 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":125 * raise Exception("%d-%d out of bounds (I=%d,J=%d) in line %d\n" % (i,j,I,J,sent_id+1)) * f_i = fsent[i] * e_j = esent[j] # <<<<<<<<<<<<<< @@ -13805,7 +13805,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_e_j = (__pyx_v_esent[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":126 * f_i = fsent[i] * e_j = esent[j] * fmargin[f_i] = fmargin[f_i]+1 # <<<<<<<<<<<<<< @@ -13814,7 +13814,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_fmargin[__pyx_v_f_i]) = ((__pyx_v_fmargin[__pyx_v_f_i]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":127 * e_j = esent[j] * fmargin[f_i] = fmargin[f_i]+1 * emargin[e_j] = emargin[e_j]+1 # <<<<<<<<<<<<<< @@ -13823,7 +13823,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_emargin[__pyx_v_e_j]) = ((__pyx_v_emargin[__pyx_v_e_j]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":128 * fmargin[f_i] = fmargin[f_i]+1 * emargin[e_j] = emargin[e_j]+1 * if dict[f_i] == NULL: # <<<<<<<<<<<<<< @@ -13833,7 +13833,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_dict[__pyx_v_f_i]) == NULL); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":129 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":129 * emargin[e_j] = emargin[e_j]+1 * if dict[f_i] == NULL: * dict[f_i] = new_node(e_j) # <<<<<<<<<<<<<< @@ -13842,7 +13842,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_dict[__pyx_v_f_i]) = __pyx_f_3_sa_new_node(__pyx_v_e_j); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":130 * if dict[f_i] == NULL: * dict[f_i] = new_node(e_j) * dict[f_i].val = 1 # <<<<<<<<<<<<<< @@ -13851,7 +13851,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_dict[__pyx_v_f_i])->val = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":131 * dict[f_i] = new_node(e_j) * dict[f_i].val = 1 * num_pairs = num_pairs + 1 # <<<<<<<<<<<<<< @@ -13863,7 +13863,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":133 * num_pairs = num_pairs + 1 * else: * count = get_val(dict[f_i], e_j) # <<<<<<<<<<<<<< @@ -13872,7 +13872,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_count = __pyx_f_3_sa_get_val((__pyx_v_dict[__pyx_v_f_i]), __pyx_v_e_j); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":134 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":134 * else: * count = get_val(dict[f_i], e_j) * if count[0] == 0: # <<<<<<<<<<<<<< @@ -13882,7 +13882,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_count[0]) == 0); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":135 * count = get_val(dict[f_i], e_j) * if count[0] == 0: * num_pairs = num_pairs + 1 # <<<<<<<<<<<<<< @@ -13894,7 +13894,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __pyx_L17:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":136 * if count[0] == 0: * num_pairs = num_pairs + 1 * count[0] = count[0] + 1 # <<<<<<<<<<<<<< @@ -13905,7 +13905,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __pyx_L16:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":138 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":138 * count[0] = count[0] + 1 * # add count * faligned[i] = 1 # <<<<<<<<<<<<<< @@ -13914,7 +13914,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_faligned[__pyx_v_i]) = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":139 * # add count * faligned[i] = 1 * ealigned[j] = 1 # <<<<<<<<<<<<<< @@ -13924,7 +13924,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL (__pyx_v_ealigned[__pyx_v_j]) = 1; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":140 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":140 * faligned[i] = 1 * ealigned[j] = 1 * for i from 0 <= i < I: # <<<<<<<<<<<<<< @@ -13934,7 +13934,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_7 = __pyx_v_I; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_7; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":141 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":141 * ealigned[j] = 1 * for i from 0 <= i < I: * if faligned[i] == 0: # <<<<<<<<<<<<<< @@ -13944,7 +13944,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_faligned[__pyx_v_i]) == 0); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":142 * for i from 0 <= i < I: * if faligned[i] == 0: * f_i = fsent[i] # <<<<<<<<<<<<<< @@ -13953,7 +13953,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_f_i = (__pyx_v_fsent[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":143 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":143 * if faligned[i] == 0: * f_i = fsent[i] * fmargin[f_i] = fmargin[f_i] + 1 # <<<<<<<<<<<<<< @@ -13962,7 +13962,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_fmargin[__pyx_v_f_i]) = ((__pyx_v_fmargin[__pyx_v_f_i]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":144 * f_i = fsent[i] * fmargin[f_i] = fmargin[f_i] + 1 * emargin[null_word] = emargin[null_word] + 1 # <<<<<<<<<<<<<< @@ -13971,7 +13971,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_emargin[__pyx_v_null_word]) = ((__pyx_v_emargin[__pyx_v_null_word]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":145 * fmargin[f_i] = fmargin[f_i] + 1 * emargin[null_word] = emargin[null_word] + 1 * if dict[f_i] == NULL: # <<<<<<<<<<<<<< @@ -13981,7 +13981,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_dict[__pyx_v_f_i]) == NULL); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":146 * emargin[null_word] = emargin[null_word] + 1 * if dict[f_i] == NULL: * dict[f_i] = new_node(null_word) # <<<<<<<<<<<<<< @@ -13990,7 +13990,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_dict[__pyx_v_f_i]) = __pyx_f_3_sa_new_node(__pyx_v_null_word); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":147 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":147 * if dict[f_i] == NULL: * dict[f_i] = new_node(null_word) * dict[f_i].val = 1 # <<<<<<<<<<<<<< @@ -13999,7 +13999,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_dict[__pyx_v_f_i])->val = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":148 * dict[f_i] = new_node(null_word) * dict[f_i].val = 1 * num_pairs = num_pairs + 1 # <<<<<<<<<<<<<< @@ -14011,7 +14011,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":150 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":150 * num_pairs = num_pairs + 1 * else: * count = get_val(dict[f_i], null_word) # <<<<<<<<<<<<<< @@ -14020,7 +14020,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_count = __pyx_f_3_sa_get_val((__pyx_v_dict[__pyx_v_f_i]), __pyx_v_null_word); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":151 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":151 * else: * count = get_val(dict[f_i], null_word) * if count[0] == 0: # <<<<<<<<<<<<<< @@ -14030,7 +14030,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_count[0]) == 0); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":152 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":152 * count = get_val(dict[f_i], null_word) * if count[0] == 0: * num_pairs = num_pairs + 1 # <<<<<<<<<<<<<< @@ -14042,7 +14042,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __pyx_L22:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":153 * if count[0] == 0: * num_pairs = num_pairs + 1 * count[0] = count[0] + 1 # <<<<<<<<<<<<<< @@ -14057,7 +14057,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_L20:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":154 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":154 * num_pairs = num_pairs + 1 * count[0] = count[0] + 1 * for j from 0 <= j < J: # <<<<<<<<<<<<<< @@ -14067,7 +14067,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_7 = __pyx_v_J; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_7; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":155 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":155 * count[0] = count[0] + 1 * for j from 0 <= j < J: * if ealigned[j] == 0: # <<<<<<<<<<<<<< @@ -14077,7 +14077,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_ealigned[__pyx_v_j]) == 0); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":156 * for j from 0 <= j < J: * if ealigned[j] == 0: * e_j = esent[j] # <<<<<<<<<<<<<< @@ -14086,7 +14086,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_e_j = (__pyx_v_esent[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":157 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":157 * if ealigned[j] == 0: * e_j = esent[j] * fmargin[null_word] = fmargin[null_word] + 1 # <<<<<<<<<<<<<< @@ -14095,7 +14095,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_fmargin[__pyx_v_null_word]) = ((__pyx_v_fmargin[__pyx_v_null_word]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":158 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":158 * e_j = esent[j] * fmargin[null_word] = fmargin[null_word] + 1 * emargin[e_j] = emargin[e_j] + 1 # <<<<<<<<<<<<<< @@ -14104,7 +14104,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_emargin[__pyx_v_e_j]) = ((__pyx_v_emargin[__pyx_v_e_j]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":159 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":159 * fmargin[null_word] = fmargin[null_word] + 1 * emargin[e_j] = emargin[e_j] + 1 * if dict[null_word] == NULL: # <<<<<<<<<<<<<< @@ -14114,7 +14114,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_dict[__pyx_v_null_word]) == NULL); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":160 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":160 * emargin[e_j] = emargin[e_j] + 1 * if dict[null_word] == NULL: * dict[null_word] = new_node(e_j) # <<<<<<<<<<<<<< @@ -14123,7 +14123,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_dict[__pyx_v_null_word]) = __pyx_f_3_sa_new_node(__pyx_v_e_j); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":161 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":161 * if dict[null_word] == NULL: * dict[null_word] = new_node(e_j) * dict[null_word].val = 1 # <<<<<<<<<<<<<< @@ -14132,7 +14132,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ (__pyx_v_dict[__pyx_v_null_word])->val = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":162 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":162 * dict[null_word] = new_node(e_j) * dict[null_word].val = 1 * num_pairs = num_pairs + 1 # <<<<<<<<<<<<<< @@ -14144,7 +14144,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":164 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":164 * num_pairs = num_pairs + 1 * else: * count = get_val(dict[null_word], e_j) # <<<<<<<<<<<<<< @@ -14153,7 +14153,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_count = __pyx_f_3_sa_get_val((__pyx_v_dict[__pyx_v_null_word]), __pyx_v_e_j); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":165 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":165 * else: * count = get_val(dict[null_word], e_j) * if count[0] == 0: # <<<<<<<<<<<<<< @@ -14163,7 +14163,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_count[0]) == 0); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":166 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":166 * count = get_val(dict[null_word], e_j) * if count[0] == 0: * num_pairs = num_pairs + 1 # <<<<<<<<<<<<<< @@ -14175,7 +14175,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL } __pyx_L27:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":167 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":167 * if count[0] == 0: * num_pairs = num_pairs + 1 * count[0] = count[0] + 1 # <<<<<<<<<<<<<< @@ -14190,7 +14190,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_L25:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":168 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":168 * num_pairs = num_pairs + 1 * count[0] = count[0] + 1 * free(links) # <<<<<<<<<<<<<< @@ -14199,7 +14199,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ free(__pyx_v_links); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":169 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":169 * count[0] = count[0] + 1 * free(links) * free(faligned) # <<<<<<<<<<<<<< @@ -14208,7 +14208,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ free(__pyx_v_faligned); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":170 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":170 * free(links) * free(faligned) * free(ealigned) # <<<<<<<<<<<<<< @@ -14218,7 +14218,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL free(__pyx_v_ealigned); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":171 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":171 * free(faligned) * free(ealigned) * self.f_index = IntList(initial_len=V_F) # <<<<<<<<<<<<<< @@ -14240,7 +14240,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_self->f_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_13); __pyx_t_13 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":172 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":172 * free(ealigned) * self.f_index = IntList(initial_len=V_F) * self.e_index = IntList(initial_len=num_pairs) # <<<<<<<<<<<<<< @@ -14262,7 +14262,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_self->e_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_12); __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":173 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":173 * self.f_index = IntList(initial_len=V_F) * self.e_index = IntList(initial_len=num_pairs) * self.col1 = FloatList(initial_len=num_pairs) # <<<<<<<<<<<<<< @@ -14284,7 +14284,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_self->col1 = ((struct __pyx_obj_3_sa_FloatList *)__pyx_t_13); __pyx_t_13 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":174 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":174 * self.e_index = IntList(initial_len=num_pairs) * self.col1 = FloatList(initial_len=num_pairs) * self.col2 = FloatList(initial_len=num_pairs) # <<<<<<<<<<<<<< @@ -14306,7 +14306,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_v_self->col2 = ((struct __pyx_obj_3_sa_FloatList *)__pyx_t_12); __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":176 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":176 * self.col2 = FloatList(initial_len=num_pairs) * * num_pairs = 0 # <<<<<<<<<<<<<< @@ -14315,7 +14315,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ __pyx_v_num_pairs = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":177 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":177 * * num_pairs = 0 * for i from 0 <= i < V_F: # <<<<<<<<<<<<<< @@ -14325,7 +14325,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_6 = __pyx_v_V_F; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_6; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":179 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":179 * for i from 0 <= i < V_F: * #self.f_index[i] = num_pairs * self.f_index.set(i, num_pairs) # <<<<<<<<<<<<<< @@ -14334,7 +14334,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->f_index->__pyx_vtab)->set(__pyx_v_self->f_index, __pyx_v_i, __pyx_v_num_pairs); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":180 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":180 * #self.f_index[i] = num_pairs * self.f_index.set(i, num_pairs) * if dict[i] != NULL: # <<<<<<<<<<<<<< @@ -14344,7 +14344,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_t_10 = ((__pyx_v_dict[__pyx_v_i]) != NULL); if (__pyx_t_10) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":181 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":181 * self.f_index.set(i, num_pairs) * if dict[i] != NULL: * self._add_node(dict[i], &num_pairs, float(fmargin[i]), emargin) # <<<<<<<<<<<<<< @@ -14355,7 +14355,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":182 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":182 * if dict[i] != NULL: * self._add_node(dict[i], &num_pairs, float(fmargin[i]), emargin) * del_node(dict[i]) # <<<<<<<<<<<<<< @@ -14370,7 +14370,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL __pyx_L30:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":183 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":183 * self._add_node(dict[i], &num_pairs, float(fmargin[i]), emargin) * del_node(dict[i]) * free(fmargin) # <<<<<<<<<<<<<< @@ -14379,7 +14379,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ free(__pyx_v_fmargin); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":184 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":184 * del_node(dict[i]) * free(fmargin) * free(emargin) # <<<<<<<<<<<<<< @@ -14388,7 +14388,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ free(__pyx_v_emargin); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":185 * free(fmargin) * free(emargin) * free(dict) # <<<<<<<<<<<<<< @@ -14397,7 +14397,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL */ free(__pyx_v_dict); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":186 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":186 * free(emargin) * free(dict) * return # <<<<<<<<<<<<<< @@ -14428,7 +14428,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_compute_from_data(struct __pyx_obj_3_sa_BiL return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":189 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":189 * * * cdef _add_node(self, _node* n, int* num_pairs, float fmargin, int* emargin): # <<<<<<<<<<<<<< @@ -14447,7 +14447,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_add_node", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":191 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":191 * cdef _add_node(self, _node* n, int* num_pairs, float fmargin, int* emargin): * cdef int loc * if n.smaller != NULL: # <<<<<<<<<<<<<< @@ -14457,7 +14457,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py __pyx_t_1 = (__pyx_v_n->smaller != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":192 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":192 * cdef int loc * if n.smaller != NULL: * self._add_node(n.smaller, num_pairs, fmargin, emargin) # <<<<<<<<<<<<<< @@ -14471,7 +14471,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":193 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":193 * if n.smaller != NULL: * self._add_node(n.smaller, num_pairs, fmargin, emargin) * loc = num_pairs[0] # <<<<<<<<<<<<<< @@ -14480,7 +14480,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py */ __pyx_v_loc = (__pyx_v_num_pairs[0]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":194 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":194 * self._add_node(n.smaller, num_pairs, fmargin, emargin) * loc = num_pairs[0] * self.e_index.set(loc, n.key) # <<<<<<<<<<<<<< @@ -14489,7 +14489,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->e_index->__pyx_vtab)->set(__pyx_v_self->e_index, __pyx_v_loc, __pyx_v_n->key); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":195 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":195 * loc = num_pairs[0] * self.e_index.set(loc, n.key) * self.col1.set(loc, float(n.val)/fmargin) # <<<<<<<<<<<<<< @@ -14502,7 +14502,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py } ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->col1->__pyx_vtab)->set(__pyx_v_self->col1, __pyx_v_loc, (((double)__pyx_v_n->val) / __pyx_v_fmargin)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":196 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":196 * self.e_index.set(loc, n.key) * self.col1.set(loc, float(n.val)/fmargin) * self.col2.set(loc, float(n.val)/float(emargin[n.key])) # <<<<<<<<<<<<<< @@ -14515,7 +14515,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py } ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->col2->__pyx_vtab)->set(__pyx_v_self->col2, __pyx_v_loc, (((double)__pyx_v_n->val) / ((double)(__pyx_v_emargin[__pyx_v_n->key])))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":197 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":197 * self.col1.set(loc, float(n.val)/fmargin) * self.col2.set(loc, float(n.val)/float(emargin[n.key])) * num_pairs[0] = loc + 1 # <<<<<<<<<<<<<< @@ -14524,7 +14524,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py */ (__pyx_v_num_pairs[0]) = (__pyx_v_loc + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":198 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":198 * self.col2.set(loc, float(n.val)/float(emargin[n.key])) * num_pairs[0] = loc + 1 * if n.bigger != NULL: # <<<<<<<<<<<<<< @@ -14534,7 +14534,7 @@ static PyObject *__pyx_f_3_sa_5BiLex__add_node(struct __pyx_obj_3_sa_BiLex *__py __pyx_t_1 = (__pyx_v_n->bigger != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":199 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":199 * num_pairs[0] = loc + 1 * if n.bigger != NULL: * self._add_node(n.bigger, num_pairs, fmargin, emargin) # <<<<<<<<<<<<<< @@ -14581,7 +14581,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_3write_binary(PyObject *__pyx_v_self, PyOb return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":202 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":202 * * * def write_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -14600,7 +14600,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":204 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":204 * def write_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -14609,7 +14609,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":205 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":205 * cdef FILE* f * f = fopen(filename, "w") * self.f_index.write_handle(f) # <<<<<<<<<<<<<< @@ -14618,7 +14618,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->f_index->__pyx_vtab)->write_handle(__pyx_v_self->f_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":206 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":206 * f = fopen(filename, "w") * self.f_index.write_handle(f) * self.e_index.write_handle(f) # <<<<<<<<<<<<<< @@ -14627,7 +14627,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->e_index->__pyx_vtab)->write_handle(__pyx_v_self->e_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":207 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":207 * self.f_index.write_handle(f) * self.e_index.write_handle(f) * self.col1.write_handle(f) # <<<<<<<<<<<<<< @@ -14636,7 +14636,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex */ ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->col1->__pyx_vtab)->write_handle(__pyx_v_self->col1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":208 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":208 * self.e_index.write_handle(f) * self.col1.write_handle(f) * self.col2.write_handle(f) # <<<<<<<<<<<<<< @@ -14645,7 +14645,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex */ ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->col2->__pyx_vtab)->write_handle(__pyx_v_self->col2, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":209 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":209 * self.col1.write_handle(f) * self.col2.write_handle(f) * self.write_wordlist(self.id2fword, f) # <<<<<<<<<<<<<< @@ -14659,7 +14659,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":210 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":210 * self.col2.write_handle(f) * self.write_wordlist(self.id2fword, f) * self.write_wordlist(self.id2eword, f) # <<<<<<<<<<<<<< @@ -14673,7 +14673,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":211 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":211 * self.write_wordlist(self.id2fword, f) * self.write_wordlist(self.id2eword, f) * fclose(f) # <<<<<<<<<<<<<< @@ -14695,7 +14695,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_2write_binary(struct __pyx_obj_3_sa_BiLex return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":214 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":214 * * * cdef write_wordlist(self, wordlist, FILE* f): # <<<<<<<<<<<<<< @@ -14720,7 +14720,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_wordlist", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":218 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":218 * cdef int num_words * * num_words = len(wordlist) # <<<<<<<<<<<<<< @@ -14730,7 +14730,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o __pyx_t_1 = PyObject_Length(__pyx_v_wordlist); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_num_words = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":219 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":219 * * num_words = len(wordlist) * fwrite(&(num_words), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -14739,7 +14739,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o */ fwrite((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":220 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":220 * num_words = len(wordlist) * fwrite(&(num_words), sizeof(int), 1, f) * for word in wordlist: # <<<<<<<<<<<<<< @@ -14784,7 +14784,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o __pyx_v_word = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":221 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":221 * fwrite(&(num_words), sizeof(int), 1, f) * for word in wordlist: * word_len = len(word) + 1 # <<<<<<<<<<<<<< @@ -14794,7 +14794,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o __pyx_t_5 = PyObject_Length(__pyx_v_word); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_word_len = (__pyx_t_5 + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":222 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":222 * for word in wordlist: * word_len = len(word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -14803,7 +14803,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o */ fwrite((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":223 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":223 * word_len = len(word) + 1 * fwrite(&(word_len), sizeof(int), 1, f) * fwrite(word, sizeof(char), word_len, f) # <<<<<<<<<<<<<< @@ -14829,7 +14829,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_write_wordlist(CYTHON_UNUSED struct __pyx_o return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":226 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":226 * * * cdef read_wordlist(self, word2id, id2word, FILE* f): # <<<<<<<<<<<<<< @@ -14853,7 +14853,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_wordlist", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":231 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":231 * cdef char* word * * fread(&(num_words), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -14862,7 +14862,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob */ fread((&__pyx_v_num_words), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":232 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":232 * * fread(&(num_words), sizeof(int), 1, f) * for i from 0 <= i < num_words: # <<<<<<<<<<<<<< @@ -14872,7 +14872,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob __pyx_t_1 = __pyx_v_num_words; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":233 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":233 * fread(&(num_words), sizeof(int), 1, f) * for i from 0 <= i < num_words: * fread(&(word_len), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -14881,7 +14881,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob */ fread((&__pyx_v_word_len), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":234 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":234 * for i from 0 <= i < num_words: * fread(&(word_len), sizeof(int), 1, f) * word = malloc (word_len * sizeof(char)) # <<<<<<<<<<<<<< @@ -14890,7 +14890,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob */ __pyx_v_word = ((char *)malloc((__pyx_v_word_len * (sizeof(char))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":235 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":235 * fread(&(word_len), sizeof(int), 1, f) * word = malloc (word_len * sizeof(char)) * fread(word, sizeof(char), word_len, f) # <<<<<<<<<<<<<< @@ -14899,7 +14899,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob */ fread(__pyx_v_word, (sizeof(char)), __pyx_v_word_len, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":236 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":236 * word = malloc (word_len * sizeof(char)) * fread(word, sizeof(char), word_len, f) * word2id[word] = len(id2word) # <<<<<<<<<<<<<< @@ -14915,7 +14915,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":237 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":237 * fread(word, sizeof(char), word_len, f) * word2id[word] = len(id2word) * id2word.append(word) # <<<<<<<<<<<<<< @@ -14929,7 +14929,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_read_wordlist(CYTHON_UNUSED struct __pyx_ob __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":238 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":238 * word2id[word] = len(id2word) * id2word.append(word) * free(word) # <<<<<<<<<<<<<< @@ -14973,7 +14973,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_5read_binary(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":240 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":240 * free(word) * * def read_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -14993,7 +14993,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":242 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":242 * def read_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -15002,7 +15002,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":243 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":243 * cdef FILE* f * f = fopen(filename, "r") * self.f_index.read_handle(f) # <<<<<<<<<<<<<< @@ -15011,7 +15011,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->f_index->__pyx_vtab)->read_handle(__pyx_v_self->f_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":244 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":244 * f = fopen(filename, "r") * self.f_index.read_handle(f) * self.e_index.read_handle(f) # <<<<<<<<<<<<<< @@ -15020,7 +15020,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->e_index->__pyx_vtab)->read_handle(__pyx_v_self->e_index, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":245 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":245 * self.f_index.read_handle(f) * self.e_index.read_handle(f) * self.col1.read_handle(f) # <<<<<<<<<<<<<< @@ -15029,7 +15029,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * */ ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->col1->__pyx_vtab)->read_handle(__pyx_v_self->col1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":246 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":246 * self.e_index.read_handle(f) * self.col1.read_handle(f) * self.col2.read_handle(f) # <<<<<<<<<<<<<< @@ -15038,7 +15038,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * */ ((struct __pyx_vtabstruct_3_sa_FloatList *)__pyx_v_self->col2->__pyx_vtab)->read_handle(__pyx_v_self->col2, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":247 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":247 * self.col1.read_handle(f) * self.col2.read_handle(f) * self.read_wordlist(self.fword2id, self.id2fword, f) # <<<<<<<<<<<<<< @@ -15055,7 +15055,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":248 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":248 * self.col2.read_handle(f) * self.read_wordlist(self.fword2id, self.id2fword, f) * self.read_wordlist(self.eword2id, self.id2eword, f) # <<<<<<<<<<<<<< @@ -15072,7 +15072,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_4read_binary(struct __pyx_obj_3_sa_BiLex * __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":249 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":249 * self.read_wordlist(self.fword2id, self.id2fword, f) * self.read_wordlist(self.eword2id, self.id2eword, f) * fclose(f) # <<<<<<<<<<<<<< @@ -15106,7 +15106,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_7get_e_id(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":252 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":252 * * * def get_e_id(self, eword): # <<<<<<<<<<<<<< @@ -15126,7 +15126,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_6get_e_id(struct __pyx_obj_3_sa_BiLex *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_e_id", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":253 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":253 * * def get_e_id(self, eword): * if eword not in self.eword2id: # <<<<<<<<<<<<<< @@ -15136,7 +15136,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_6get_e_id(struct __pyx_obj_3_sa_BiLex *__p __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_eword, __pyx_v_self->eword2id, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":254 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":254 * def get_e_id(self, eword): * if eword not in self.eword2id: * e_id = len(self.id2eword) # <<<<<<<<<<<<<< @@ -15152,7 +15152,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_6get_e_id(struct __pyx_obj_3_sa_BiLex *__p __pyx_v_e_id = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":255 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":255 * if eword not in self.eword2id: * e_id = len(self.id2eword) * self.id2eword.append(eword) # <<<<<<<<<<<<<< @@ -15163,7 +15163,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_6get_e_id(struct __pyx_obj_3_sa_BiLex *__p __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":256 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":256 * e_id = len(self.id2eword) * self.id2eword.append(eword) * self.eword2id[eword] = e_id # <<<<<<<<<<<<<< @@ -15175,7 +15175,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_6get_e_id(struct __pyx_obj_3_sa_BiLex *__p } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":257 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":257 * self.id2eword.append(eword) * self.eword2id[eword] = e_id * return self.eword2id[eword] # <<<<<<<<<<<<<< @@ -15213,7 +15213,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_9get_f_id(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":260 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":260 * * * def get_f_id(self, fword): # <<<<<<<<<<<<<< @@ -15233,7 +15233,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_8get_f_id(struct __pyx_obj_3_sa_BiLex *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_f_id", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":261 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":261 * * def get_f_id(self, fword): * if fword not in self.fword2id: # <<<<<<<<<<<<<< @@ -15243,7 +15243,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_8get_f_id(struct __pyx_obj_3_sa_BiLex *__p __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_fword, __pyx_v_self->fword2id, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 261; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":262 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":262 * def get_f_id(self, fword): * if fword not in self.fword2id: * f_id = len(self.id2fword) # <<<<<<<<<<<<<< @@ -15259,7 +15259,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_8get_f_id(struct __pyx_obj_3_sa_BiLex *__p __pyx_v_f_id = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":263 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":263 * if fword not in self.fword2id: * f_id = len(self.id2fword) * self.id2fword.append(fword) # <<<<<<<<<<<<<< @@ -15270,7 +15270,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_8get_f_id(struct __pyx_obj_3_sa_BiLex *__p __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":264 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":264 * f_id = len(self.id2fword) * self.id2fword.append(fword) * self.fword2id[fword] = f_id # <<<<<<<<<<<<<< @@ -15282,7 +15282,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_8get_f_id(struct __pyx_obj_3_sa_BiLex *__p } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":265 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":265 * self.id2fword.append(fword) * self.fword2id[fword] = f_id * return self.fword2id[fword] # <<<<<<<<<<<<<< @@ -15330,7 +15330,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_11read_text(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":268 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":268 * * * def read_text(self, char* filename): # <<<<<<<<<<<<<< @@ -15385,7 +15385,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":272 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":272 * cdef IntList fcount * * fcount = IntList() # <<<<<<<<<<<<<< @@ -15397,7 +15397,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_fcount = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":273 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":273 * * fcount = IntList() * with gzip_or_text(filename) as f: # <<<<<<<<<<<<<< @@ -15437,7 +15437,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_f = __pyx_t_1; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":275 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":275 * with gzip_or_text(filename) as f: * # first loop merely establishes size of array objects * for line in f: # <<<<<<<<<<<<<< @@ -15482,7 +15482,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_line = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":276 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":276 * # first loop merely establishes size of array objects * for line in f: * (fword, eword, score1, score2) = line.split() # <<<<<<<<<<<<<< @@ -15568,7 +15568,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_score2 = __pyx_t_12; __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":277 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":277 * for line in f: * (fword, eword, score1, score2) = line.split() * f_id = self.get_f_id(fword) # <<<<<<<<<<<<<< @@ -15590,7 +15590,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_f_id = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":278 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":278 * (fword, eword, score1, score2) = line.split() * f_id = self.get_f_id(fword) * e_id = self.get_e_id(eword) # <<<<<<<<<<<<<< @@ -15612,7 +15612,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_e_id = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":279 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":279 * f_id = self.get_f_id(fword) * e_id = self.get_e_id(eword) * while f_id >= len(fcount): # <<<<<<<<<<<<<< @@ -15629,7 +15629,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; if (!__pyx_t_16) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":280 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":280 * e_id = self.get_e_id(eword) * while f_id >= len(fcount): * fcount.append(0) # <<<<<<<<<<<<<< @@ -15641,7 +15641,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":281 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":281 * while f_id >= len(fcount): * fcount.append(0) * fcount.arr[f_id] = fcount.arr[f_id] + 1 # <<<<<<<<<<<<<< @@ -15654,7 +15654,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":284 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":284 * * # Allocate space for dictionary in arrays * N = 0 # <<<<<<<<<<<<<< @@ -15664,7 +15664,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __Pyx_INCREF(__pyx_int_0); __pyx_v_N = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":285 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":285 * # Allocate space for dictionary in arrays * N = 0 * n_f = len(fcount) # <<<<<<<<<<<<<< @@ -15677,7 +15677,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_n_f = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":286 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":286 * N = 0 * n_f = len(fcount) * self.f_index = IntList(initial_len=n_f+1) # <<<<<<<<<<<<<< @@ -15699,7 +15699,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_self->f_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_12); __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":287 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":287 * n_f = len(fcount) * self.f_index = IntList(initial_len=n_f+1) * for i from 0 <= i < n_f: # <<<<<<<<<<<<<< @@ -15714,7 +15714,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_i = __pyx_t_12; __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":288 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":288 * self.f_index = IntList(initial_len=n_f+1) * for i from 0 <= i < n_f: * self.f_index.arr[i] = N # <<<<<<<<<<<<<< @@ -15725,7 +15725,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_8 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_8 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 288; __pyx_clineno = __LINE__; goto __pyx_L7_error;} (__pyx_v_self->f_index->arr[__pyx_t_8]) = __pyx_t_20; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":289 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":289 * for i from 0 <= i < n_f: * self.f_index.arr[i] = N * N = N + fcount.arr[i] # <<<<<<<<<<<<<< @@ -15742,7 +15742,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_N = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":290 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":290 * self.f_index.arr[i] = N * N = N + fcount.arr[i] * fcount.arr[i] = 0 # <<<<<<<<<<<<<< @@ -15754,7 +15754,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_19 = __Pyx_PyInt_AsLong(__pyx_v_i); if (unlikely((__pyx_t_19 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L7_error;} } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":287 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":287 * n_f = len(fcount) * self.f_index = IntList(initial_len=n_f+1) * for i from 0 <= i < n_f: # <<<<<<<<<<<<<< @@ -15767,7 +15767,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":291 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":291 * N = N + fcount.arr[i] * fcount.arr[i] = 0 * self.f_index.arr[n_f] = N # <<<<<<<<<<<<<< @@ -15778,7 +15778,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_8 = __Pyx_PyIndex_AsSsize_t(__pyx_v_n_f); if (unlikely((__pyx_t_8 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L7_error;} (__pyx_v_self->f_index->arr[__pyx_t_8]) = __pyx_t_20; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":292 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":292 * fcount.arr[i] = 0 * self.f_index.arr[n_f] = N * self.e_index = IntList(initial_len=N) # <<<<<<<<<<<<<< @@ -15797,7 +15797,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_self->e_index = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_12); __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":293 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":293 * self.f_index.arr[n_f] = N * self.e_index = IntList(initial_len=N) * self.col1 = FloatList(initial_len=N) # <<<<<<<<<<<<<< @@ -15816,7 +15816,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_self->col1 = ((struct __pyx_obj_3_sa_FloatList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":294 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":294 * self.e_index = IntList(initial_len=N) * self.col1 = FloatList(initial_len=N) * self.col2 = FloatList(initial_len=N) # <<<<<<<<<<<<<< @@ -15835,7 +15835,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_self->col2 = ((struct __pyx_obj_3_sa_FloatList *)__pyx_t_12); __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":297 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":297 * * # Re-read file, placing words into buckets * f.seek(0) # <<<<<<<<<<<<<< @@ -15849,7 +15849,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":298 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":298 * # Re-read file, placing words into buckets * f.seek(0) * for line in f: # <<<<<<<<<<<<<< @@ -15894,7 +15894,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_line = __pyx_t_12; __pyx_t_12 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":299 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":299 * f.seek(0) * for line in f: * (fword, eword, score1, score2) = line.split() # <<<<<<<<<<<<<< @@ -15980,7 +15980,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_score2 = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":300 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":300 * for line in f: * (fword, eword, score1, score2) = line.split() * f_id = self.get_f_id(fword) # <<<<<<<<<<<<<< @@ -16002,7 +16002,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_f_id = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":301 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":301 * (fword, eword, score1, score2) = line.split() * f_id = self.get_f_id(fword) * e_id = self.get_e_id(eword) # <<<<<<<<<<<<<< @@ -16024,7 +16024,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_e_id = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":302 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":302 * f_id = self.get_f_id(fword) * e_id = self.get_e_id(eword) * index = self.f_index.arr[f_id] + fcount.arr[f_id] # <<<<<<<<<<<<<< @@ -16039,7 +16039,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_index = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":303 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":303 * e_id = self.get_e_id(eword) * index = self.f_index.arr[f_id] + fcount.arr[f_id] * fcount.arr[f_id] = fcount.arr[f_id] + 1 # <<<<<<<<<<<<<< @@ -16050,7 +16050,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_15 = __Pyx_PyIndex_AsSsize_t(__pyx_v_f_id); if (unlikely((__pyx_t_15 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L7_error;} (__pyx_v_fcount->arr[__pyx_t_15]) = ((__pyx_v_fcount->arr[__pyx_t_17]) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":304 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":304 * index = self.f_index.arr[f_id] + fcount.arr[f_id] * fcount.arr[f_id] = fcount.arr[f_id] + 1 * self.e_index.arr[index] = int(e_id) # <<<<<<<<<<<<<< @@ -16070,7 +16070,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_17 = __Pyx_PyIndex_AsSsize_t(__pyx_v_index); if (unlikely((__pyx_t_17 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L7_error;} (__pyx_v_self->e_index->arr[__pyx_t_17]) = __pyx_t_20; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":305 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":305 * fcount.arr[f_id] = fcount.arr[f_id] + 1 * self.e_index.arr[index] = int(e_id) * self.col1[index] = float(score1) # <<<<<<<<<<<<<< @@ -16083,7 +16083,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ if (PyObject_SetItem(((PyObject *)__pyx_v_self->col1), __pyx_v_index, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L7_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":306 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":306 * self.e_index.arr[index] = int(e_id) * self.col1[index] = float(score1) * self.col2[index] = float(score2) # <<<<<<<<<<<<<< @@ -16111,7 +16111,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":273 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":273 * * fcount = IntList() * with gzip_or_text(filename) as f: # <<<<<<<<<<<<<< @@ -16191,7 +16191,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_L31:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":309 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":309 * * # Sort buckets by eword * for b from 0 <= b < n_f: # <<<<<<<<<<<<<< @@ -16207,7 +16207,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_b = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":310 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":310 * # Sort buckets by eword * for b from 0 <= b < n_f: * i = self.f_index.arr[b] # <<<<<<<<<<<<<< @@ -16221,7 +16221,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_i = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":311 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":311 * for b from 0 <= b < n_f: * i = self.f_index.arr[b] * j = self.f_index.arr[b+1] # <<<<<<<<<<<<<< @@ -16238,7 +16238,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_j = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":312 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":312 * i = self.f_index.arr[b] * j = self.f_index.arr[b+1] * self.qsort(i,j, "") # <<<<<<<<<<<<<< @@ -16256,7 +16256,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_18 = __Pyx_PyInt_AsLong(__pyx_v_b); if (unlikely((__pyx_t_18 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":309 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":309 * * # Sort buckets by eword * for b from 0 <= b < n_f: # <<<<<<<<<<<<<< @@ -16302,7 +16302,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_10read_text(struct __pyx_obj_3_sa_BiLex *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":315 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":315 * * * cdef swap(self, int i, int j): # <<<<<<<<<<<<<< @@ -16318,7 +16318,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s int __pyx_t_1; __Pyx_RefNannySetupContext("swap", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":319 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":319 * cdef float ftmp * * if i == j: # <<<<<<<<<<<<<< @@ -16328,7 +16328,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s __pyx_t_1 = (__pyx_v_i == __pyx_v_j); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":320 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":320 * * if i == j: * return # <<<<<<<<<<<<<< @@ -16342,7 +16342,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":322 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":322 * return * * itmp = self.e_index.arr[i] # <<<<<<<<<<<<<< @@ -16351,7 +16351,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ __pyx_v_itmp = (__pyx_v_self->e_index->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":323 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":323 * * itmp = self.e_index.arr[i] * self.e_index.arr[i] = self.e_index.arr[j] # <<<<<<<<<<<<<< @@ -16360,7 +16360,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ (__pyx_v_self->e_index->arr[__pyx_v_i]) = (__pyx_v_self->e_index->arr[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":324 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":324 * itmp = self.e_index.arr[i] * self.e_index.arr[i] = self.e_index.arr[j] * self.e_index.arr[j] = itmp # <<<<<<<<<<<<<< @@ -16369,7 +16369,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ (__pyx_v_self->e_index->arr[__pyx_v_j]) = __pyx_v_itmp; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":326 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":326 * self.e_index.arr[j] = itmp * * ftmp = self.col1.arr[i] # <<<<<<<<<<<<<< @@ -16378,7 +16378,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ __pyx_v_ftmp = (__pyx_v_self->col1->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":327 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":327 * * ftmp = self.col1.arr[i] * self.col1.arr[i] = self.col1.arr[j] # <<<<<<<<<<<<<< @@ -16387,7 +16387,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ (__pyx_v_self->col1->arr[__pyx_v_i]) = (__pyx_v_self->col1->arr[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":328 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":328 * ftmp = self.col1.arr[i] * self.col1.arr[i] = self.col1.arr[j] * self.col1.arr[j] = ftmp # <<<<<<<<<<<<<< @@ -16396,7 +16396,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ (__pyx_v_self->col1->arr[__pyx_v_j]) = __pyx_v_ftmp; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":330 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":330 * self.col1.arr[j] = ftmp * * ftmp = self.col2.arr[i] # <<<<<<<<<<<<<< @@ -16405,7 +16405,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ __pyx_v_ftmp = (__pyx_v_self->col2->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":331 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":331 * * ftmp = self.col2.arr[i] * self.col2.arr[i] = self.col2.arr[j] # <<<<<<<<<<<<<< @@ -16414,7 +16414,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s */ (__pyx_v_self->col2->arr[__pyx_v_i]) = (__pyx_v_self->col2->arr[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":332 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":332 * ftmp = self.col2.arr[i] * self.col2.arr[i] = self.col2.arr[j] * self.col2.arr[j] = ftmp # <<<<<<<<<<<<<< @@ -16430,7 +16430,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_swap(struct __pyx_obj_3_sa_BiLex *__pyx_v_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":335 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":335 * * * cdef qsort(self, int i, int j, pad): # <<<<<<<<<<<<<< @@ -16453,7 +16453,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("qsort", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":338 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":338 * cdef int pval, p * * if i > j: # <<<<<<<<<<<<<< @@ -16463,7 +16463,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __pyx_t_1 = (__pyx_v_i > __pyx_v_j); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":339 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":339 * * if i > j: * raise Exception("Sort error in CLex") # <<<<<<<<<<<<<< @@ -16479,7 +16479,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":340 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":340 * if i > j: * raise Exception("Sort error in CLex") * if i == j: #empty interval # <<<<<<<<<<<<<< @@ -16489,7 +16489,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __pyx_t_1 = (__pyx_v_i == __pyx_v_j); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":341 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":341 * raise Exception("Sort error in CLex") * if i == j: #empty interval * return # <<<<<<<<<<<<<< @@ -16503,7 +16503,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":342 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":342 * if i == j: #empty interval * return * if i == j-1: # singleton interval # <<<<<<<<<<<<<< @@ -16513,7 +16513,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __pyx_t_1 = (__pyx_v_i == (__pyx_v_j - 1)); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":343 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":343 * return * if i == j-1: # singleton interval * return # <<<<<<<<<<<<<< @@ -16527,7 +16527,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":345 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":345 * return * * p = (i+j)/2 # <<<<<<<<<<<<<< @@ -16536,7 +16536,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ */ __pyx_v_p = __Pyx_div_long((__pyx_v_i + __pyx_v_j), 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":346 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":346 * * p = (i+j)/2 * pval = self.e_index.arr[p] # <<<<<<<<<<<<<< @@ -16545,7 +16545,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ */ __pyx_v_pval = (__pyx_v_self->e_index->arr[__pyx_v_p]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":347 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":347 * p = (i+j)/2 * pval = self.e_index.arr[p] * self.swap(i, p) # <<<<<<<<<<<<<< @@ -16556,7 +16556,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":348 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":348 * pval = self.e_index.arr[p] * self.swap(i, p) * p = i # <<<<<<<<<<<<<< @@ -16565,7 +16565,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ */ __pyx_v_p = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":349 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":349 * self.swap(i, p) * p = i * for k from i+1 <= k < j: # <<<<<<<<<<<<<< @@ -16575,7 +16575,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __pyx_t_3 = __pyx_v_j; for (__pyx_v_k = (__pyx_v_i + 1); __pyx_v_k < __pyx_t_3; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":350 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":350 * p = i * for k from i+1 <= k < j: * if pval >= self.e_index.arr[k]: # <<<<<<<<<<<<<< @@ -16585,7 +16585,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __pyx_t_1 = (__pyx_v_pval >= (__pyx_v_self->e_index->arr[__pyx_v_k])); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":351 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":351 * for k from i+1 <= k < j: * if pval >= self.e_index.arr[k]: * self.swap(p+1, k) # <<<<<<<<<<<<<< @@ -16596,7 +16596,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":352 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":352 * if pval >= self.e_index.arr[k]: * self.swap(p+1, k) * self.swap(p, p+1) # <<<<<<<<<<<<<< @@ -16607,7 +16607,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":353 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":353 * self.swap(p+1, k) * self.swap(p, p+1) * p = p + 1 # <<<<<<<<<<<<<< @@ -16620,7 +16620,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __pyx_L8:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":354 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":354 * self.swap(p, p+1) * p = p + 1 * self.qsort(i,p, pad+" ") # <<<<<<<<<<<<<< @@ -16634,7 +16634,7 @@ static PyObject *__pyx_f_3_sa_5BiLex_qsort(struct __pyx_obj_3_sa_BiLex *__pyx_v_ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":355 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":355 * p = p + 1 * self.qsort(i,p, pad+" ") * self.qsort(p+1,j, pad+" ") # <<<<<<<<<<<<<< @@ -16682,7 +16682,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_13write_enhanced(PyObject *__pyx_v_self, P return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":358 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":358 * * * def write_enhanced(self, char* filename): # <<<<<<<<<<<<<< @@ -16719,7 +16719,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":359 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":359 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -16759,7 +16759,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":360 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":360 * def write_enhanced(self, char* filename): * with open(filename, "w") as f: * for i in self.f_index: # <<<<<<<<<<<<<< @@ -16804,7 +16804,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __pyx_v_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":361 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":361 * with open(filename, "w") as f: * for i in self.f_index: * f.write("%d " % i) # <<<<<<<<<<<<<< @@ -16828,7 +16828,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":362 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":362 * for i in self.f_index: * f.write("%d " % i) * f.write("\n") # <<<<<<<<<<<<<< @@ -16842,7 +16842,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":363 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":363 * f.write("%d " % i) * f.write("\n") * for i, s1, s2 in zip(self.e_index, self.col1, self.col2): # <<<<<<<<<<<<<< @@ -16963,7 +16963,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __pyx_v_s2 = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":364 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":364 * f.write("\n") * for i, s1, s2 in zip(self.e_index, self.col1, self.col2): * f.write("%d %f %f " % (i, s1, s2)) # <<<<<<<<<<<<<< @@ -16999,7 +16999,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":365 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":365 * for i, s1, s2 in zip(self.e_index, self.col1, self.col2): * f.write("%d %f %f " % (i, s1, s2)) * f.write("\n") # <<<<<<<<<<<<<< @@ -17013,7 +17013,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":366 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":366 * f.write("%d %f %f " % (i, s1, s2)) * f.write("\n") * for i, w in enumerate(self.id2fword): # <<<<<<<<<<<<<< @@ -17068,7 +17068,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __pyx_t_1 = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":367 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":367 * f.write("\n") * for i, w in enumerate(self.id2fword): * f.write("%d %s " % (i, w)) # <<<<<<<<<<<<<< @@ -17102,7 +17102,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":368 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":368 * for i, w in enumerate(self.id2fword): * f.write("%d %s " % (i, w)) * f.write("\n") # <<<<<<<<<<<<<< @@ -17116,7 +17116,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":369 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":369 * f.write("%d %s " % (i, w)) * f.write("\n") * for i, w in enumerate(self.id2eword): # <<<<<<<<<<<<<< @@ -17171,7 +17171,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __pyx_t_2 = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":370 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":370 * f.write("\n") * for i, w in enumerate(self.id2eword): * f.write("%d %s " % (i, w)) # <<<<<<<<<<<<<< @@ -17205,7 +17205,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":371 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":371 * for i, w in enumerate(self.id2eword): * f.write("%d %s " % (i, w)) * f.write("\n") # <<<<<<<<<<<<<< @@ -17231,7 +17231,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_12write_enhanced(struct __pyx_obj_3_sa_BiL __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":359 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":359 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -17398,7 +17398,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_15get_score(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":374 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":374 * * * def get_score(self, fword, eword, col): # <<<<<<<<<<<<<< @@ -17424,7 +17424,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_score", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":377 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":377 * cdef e_id, f_id, low, high, midpoint, val * * if eword not in self.eword2id: # <<<<<<<<<<<<<< @@ -17434,7 +17434,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_eword, __pyx_v_self->eword2id, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":378 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":378 * * if eword not in self.eword2id: * return None # <<<<<<<<<<<<<< @@ -17449,7 +17449,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":379 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":379 * if eword not in self.eword2id: * return None * if fword not in self.fword2id: # <<<<<<<<<<<<<< @@ -17459,7 +17459,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_fword, __pyx_v_self->fword2id, Py_NE)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 379; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":380 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":380 * return None * if fword not in self.fword2id: * return None # <<<<<<<<<<<<<< @@ -17474,7 +17474,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":381 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":381 * if fword not in self.fword2id: * return None * f_id = self.fword2id[fword] # <<<<<<<<<<<<<< @@ -17486,7 +17486,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_f_id = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":382 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":382 * return None * f_id = self.fword2id[fword] * e_id = self.eword2id[eword] # <<<<<<<<<<<<<< @@ -17498,7 +17498,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_e_id = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":383 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":383 * f_id = self.fword2id[fword] * e_id = self.eword2id[eword] * low = self.f_index.arr[f_id] # <<<<<<<<<<<<<< @@ -17511,7 +17511,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_low = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":384 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":384 * e_id = self.eword2id[eword] * low = self.f_index.arr[f_id] * high = self.f_index.arr[f_id+1] # <<<<<<<<<<<<<< @@ -17527,7 +17527,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_high = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":385 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":385 * low = self.f_index.arr[f_id] * high = self.f_index.arr[f_id+1] * while high - low > 0: # <<<<<<<<<<<<<< @@ -17543,7 +17543,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_1) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":386 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":386 * high = self.f_index.arr[f_id+1] * while high - low > 0: * midpoint = (low+high)/2 # <<<<<<<<<<<<<< @@ -17559,7 +17559,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_midpoint = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":387 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":387 * while high - low > 0: * midpoint = (low+high)/2 * val = self.e_index.arr[midpoint] # <<<<<<<<<<<<<< @@ -17573,7 +17573,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_v_val = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":388 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":388 * midpoint = (low+high)/2 * val = self.e_index.arr[midpoint] * if val == e_id: # <<<<<<<<<<<<<< @@ -17585,7 +17585,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":389 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":389 * val = self.e_index.arr[midpoint] * if val == e_id: * if col == 0: # <<<<<<<<<<<<<< @@ -17597,7 +17597,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":390 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":390 * if val == e_id: * if col == 0: * return self.col1.arr[midpoint] # <<<<<<<<<<<<<< @@ -17615,7 +17615,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":391 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":391 * if col == 0: * return self.col1.arr[midpoint] * if col == 1: # <<<<<<<<<<<<<< @@ -17627,7 +17627,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":392 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":392 * return self.col1.arr[midpoint] * if col == 1: * return self.col2.arr[midpoint] # <<<<<<<<<<<<<< @@ -17648,7 +17648,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":393 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":393 * if col == 1: * return self.col2.arr[midpoint] * if val > e_id: # <<<<<<<<<<<<<< @@ -17660,7 +17660,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":394 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":394 * return self.col2.arr[midpoint] * if val > e_id: * high = midpoint # <<<<<<<<<<<<<< @@ -17674,7 +17674,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ } __pyx_L10:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":395 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":395 * if val > e_id: * high = midpoint * if val < e_id: # <<<<<<<<<<<<<< @@ -17686,7 +17686,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":396 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":396 * high = midpoint * if val < e_id: * low = midpoint + 1 # <<<<<<<<<<<<<< @@ -17703,7 +17703,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_14get_score(struct __pyx_obj_3_sa_BiLex *_ __pyx_L11:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":397 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":397 * if val < e_id: * low = midpoint + 1 * return None # <<<<<<<<<<<<<< @@ -17756,7 +17756,7 @@ static PyObject *__pyx_pw_3_sa_5BiLex_17write_text(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":400 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":400 * * * def write_text(self, char* filename): # <<<<<<<<<<<<<< @@ -17793,7 +17793,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":404 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":404 * cdef i, N, e_id, f_id * * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -17833,7 +17833,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":405 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":405 * * with open(filename, "w") as f: * N = len(self.e_index) # <<<<<<<<<<<<<< @@ -17849,7 +17849,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_v_N = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":406 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":406 * with open(filename, "w") as f: * N = len(self.e_index) * f_id = 0 # <<<<<<<<<<<<<< @@ -17859,7 +17859,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __Pyx_INCREF(__pyx_int_0); __pyx_v_f_id = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":407 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":407 * N = len(self.e_index) * f_id = 0 * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -17874,7 +17874,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_v_i = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":408 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":408 * f_id = 0 * for i from 0 <= i < N: * while self.f_index.arr[f_id+1] == i: # <<<<<<<<<<<<<< @@ -17894,7 +17894,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (!__pyx_t_11) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":409 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":409 * for i from 0 <= i < N: * while self.f_index.arr[f_id+1] == i: * f_id = f_id + 1 # <<<<<<<<<<<<<< @@ -17908,7 +17908,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_t_1 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":410 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":410 * while self.f_index.arr[f_id+1] == i: * f_id = f_id + 1 * e_id = self.e_index.arr[i] # <<<<<<<<<<<<<< @@ -17922,7 +17922,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_v_e_id = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":411 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":411 * f_id = f_id + 1 * e_id = self.e_index.arr[i] * score1 = self.col1.arr[i] # <<<<<<<<<<<<<< @@ -17936,7 +17936,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_v_score1 = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":412 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":412 * e_id = self.e_index.arr[i] * score1 = self.col1.arr[i] * score2 = self.col2.arr[i] # <<<<<<<<<<<<<< @@ -17949,7 +17949,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_v_score2 = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":413 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":413 * score1 = self.col1.arr[i] * score2 = self.col2.arr[i] * f.write("%s %s %.6f %.6f\n" % (self.id2fword[f_id], self.id2eword[e_id], score1, score2)) # <<<<<<<<<<<<<< @@ -17990,7 +17990,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __pyx_t_10 = __Pyx_PyInt_AsLong(__pyx_v_i); if (unlikely((__pyx_t_10 == (long)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L7_error;} } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":407 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":407 * N = len(self.e_index) * f_id = 0 * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -18013,7 +18013,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":404 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":404 * cdef i, N, e_id, f_id * * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -18115,7 +18115,7 @@ static PyObject *__pyx_pf_3_sa_5BiLex_16write_text(struct __pyx_obj_3_sa_BiLex * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":21 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":21 * cdef int LOWER_MASK[32] * * cdef void _init_lower_mask(): # <<<<<<<<<<<<<< @@ -18131,7 +18131,7 @@ static void __pyx_f_3_sa__init_lower_mask(void) { unsigned int __pyx_t_2; __Pyx_RefNannySetupContext("_init_lower_mask", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":23 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":23 * cdef void _init_lower_mask(): * cdef unsigned i * cdef int mask = 0 # <<<<<<<<<<<<<< @@ -18140,7 +18140,7 @@ static void __pyx_f_3_sa__init_lower_mask(void) { */ __pyx_v_mask = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":24 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":24 * cdef unsigned i * cdef int mask = 0 * for i in range(MIN_BOTTOM_SIZE): # <<<<<<<<<<<<<< @@ -18151,7 +18151,7 @@ static void __pyx_f_3_sa__init_lower_mask(void) { for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_i = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":25 * cdef int mask = 0 * for i in range(MIN_BOTTOM_SIZE): * mask = (mask << 1) + 1 # <<<<<<<<<<<<<< @@ -18160,7 +18160,7 @@ static void __pyx_f_3_sa__init_lower_mask(void) { */ __pyx_v_mask = ((__pyx_v_mask << 1) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":26 * for i in range(MIN_BOTTOM_SIZE): * mask = (mask << 1) + 1 * LOWER_MASK[i] = mask # <<<<<<<<<<<<<< @@ -18173,7 +18173,7 @@ static void __pyx_f_3_sa__init_lower_mask(void) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":37 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":37 * * * cdef _BitSet* new_BitSet(): # <<<<<<<<<<<<<< @@ -18187,7 +18187,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("new_BitSet", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":40 * cdef _BitSet* b * * b = <_BitSet*> malloc(sizeof(_BitSet)) # <<<<<<<<<<<<<< @@ -18196,7 +18196,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { */ __pyx_v_b = ((struct __pyx_t_3_sa__BitSet *)malloc((sizeof(struct __pyx_t_3_sa__BitSet)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":41 * * b = <_BitSet*> malloc(sizeof(_BitSet)) * b.bitset = 0 # <<<<<<<<<<<<<< @@ -18205,7 +18205,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { */ __pyx_v_b->bitset = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":42 * b = <_BitSet*> malloc(sizeof(_BitSet)) * b.bitset = 0 * b.min_val = -1 # <<<<<<<<<<<<<< @@ -18214,7 +18214,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { */ __pyx_v_b->min_val = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":43 * b.bitset = 0 * b.min_val = -1 * b.max_val = -1 # <<<<<<<<<<<<<< @@ -18223,7 +18223,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { */ __pyx_v_b->max_val = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":44 * b.min_val = -1 * b.max_val = -1 * b.size = 0 # <<<<<<<<<<<<<< @@ -18232,7 +18232,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { */ __pyx_v_b->size = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":45 * b.max_val = -1 * b.size = 0 * return b # <<<<<<<<<<<<<< @@ -18248,7 +18248,7 @@ static struct __pyx_t_3_sa__BitSet *__pyx_f_3_sa_new_BitSet(void) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":48 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":48 * * * cdef int bitset_findsucc(_BitSet* b, int i): # <<<<<<<<<<<<<< @@ -18269,7 +18269,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, int __pyx_t_3; __Pyx_RefNannySetupContext("bitset_findsucc", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":52 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":52 * cdef int low, high, mid * * if b.max_val == -1 or i >= b.max_val: # <<<<<<<<<<<<<< @@ -18285,7 +18285,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":53 * * if b.max_val == -1 or i >= b.max_val: * return -1 # <<<<<<<<<<<<<< @@ -18298,7 +18298,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":54 * if b.max_val == -1 or i >= b.max_val: * return -1 * if i < b.min_val: # <<<<<<<<<<<<<< @@ -18308,7 +18308,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, __pyx_t_3 = (__pyx_v_i < __pyx_v_b->min_val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":55 * return -1 * if i < b.min_val: * return b.min_val # <<<<<<<<<<<<<< @@ -18321,7 +18321,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":57 * return b.min_val * * bitset = b.bitset & ~LOWER_MASK[i] # <<<<<<<<<<<<<< @@ -18330,7 +18330,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_bitset = (__pyx_v_b->bitset & (~(__pyx_v_3_sa_LOWER_MASK[__pyx_v_i]))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":58 * * bitset = b.bitset & ~LOWER_MASK[i] * low = i+1 # <<<<<<<<<<<<<< @@ -18339,7 +18339,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_low = (__pyx_v_i + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":59 * bitset = b.bitset & ~LOWER_MASK[i] * low = i+1 * high = b.max_val+1 # <<<<<<<<<<<<<< @@ -18348,7 +18348,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_high = (__pyx_v_b->max_val + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":60 * low = i+1 * high = b.max_val+1 * while low < high-1: # <<<<<<<<<<<<<< @@ -18359,7 +18359,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, __pyx_t_3 = (__pyx_v_low < (__pyx_v_high - 1)); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":61 * high = b.max_val+1 * while low < high-1: * mid = (high + low)/2 # <<<<<<<<<<<<<< @@ -18368,7 +18368,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_mid = __Pyx_div_long((__pyx_v_high + __pyx_v_low), 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":62 * while low < high-1: * mid = (high + low)/2 * mask = ~(LOWER_MASK[high-1] ^ LOWER_MASK[mid-1]) # <<<<<<<<<<<<<< @@ -18377,7 +18377,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_mask = (~((__pyx_v_3_sa_LOWER_MASK[(__pyx_v_high - 1)]) ^ (__pyx_v_3_sa_LOWER_MASK[(__pyx_v_mid - 1)]))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":63 * mid = (high + low)/2 * mask = ~(LOWER_MASK[high-1] ^ LOWER_MASK[mid-1]) * if bitset & mask == 0: # <<<<<<<<<<<<<< @@ -18387,7 +18387,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, __pyx_t_3 = ((__pyx_v_bitset & __pyx_v_mask) == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":64 * mask = ~(LOWER_MASK[high-1] ^ LOWER_MASK[mid-1]) * if bitset & mask == 0: * low = mid # <<<<<<<<<<<<<< @@ -18399,7 +18399,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":66 * low = mid * else: * bitset = bitset & mask # <<<<<<<<<<<<<< @@ -18408,7 +18408,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_bitset = (__pyx_v_bitset & __pyx_v_mask); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":67 * else: * bitset = bitset & mask * high = mid # <<<<<<<<<<<<<< @@ -18420,7 +18420,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, __pyx_L7:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":68 * bitset = bitset & mask * high = mid * return low # <<<<<<<<<<<<<< @@ -18436,7 +18436,7 @@ static int __pyx_f_3_sa_bitset_findsucc(struct __pyx_t_3_sa__BitSet *__pyx_v_b, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":71 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":71 * * * cdef int bitset_insert(_BitSet* b, int i): # <<<<<<<<<<<<<< @@ -18451,7 +18451,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in int __pyx_t_1; __Pyx_RefNannySetupContext("bitset_insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":74 * cdef int val * * val = 1 << i # <<<<<<<<<<<<<< @@ -18460,7 +18460,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in */ __pyx_v_val = (1 << __pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":75 * * val = 1 << i * if b.bitset & val == 0: # <<<<<<<<<<<<<< @@ -18470,7 +18470,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in __pyx_t_1 = ((__pyx_v_b->bitset & __pyx_v_val) == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":76 * val = 1 << i * if b.bitset & val == 0: * b.bitset = b.bitset | val # <<<<<<<<<<<<<< @@ -18479,7 +18479,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in */ __pyx_v_b->bitset = (__pyx_v_b->bitset | __pyx_v_val); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":77 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":77 * if b.bitset & val == 0: * b.bitset = b.bitset | val * if b.size == 0: # <<<<<<<<<<<<<< @@ -18489,7 +18489,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in __pyx_t_1 = (__pyx_v_b->size == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":78 * b.bitset = b.bitset | val * if b.size == 0: * b.min_val = i # <<<<<<<<<<<<<< @@ -18498,7 +18498,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in */ __pyx_v_b->min_val = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":79 * if b.size == 0: * b.min_val = i * b.max_val = i # <<<<<<<<<<<<<< @@ -18510,7 +18510,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":81 * b.max_val = i * else: * if i < b.min_val: # <<<<<<<<<<<<<< @@ -18520,7 +18520,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in __pyx_t_1 = (__pyx_v_i < __pyx_v_b->min_val); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":82 * else: * if i < b.min_val: * b.min_val = i # <<<<<<<<<<<<<< @@ -18532,7 +18532,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":83 * if i < b.min_val: * b.min_val = i * if i > b.max_val: # <<<<<<<<<<<<<< @@ -18542,7 +18542,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in __pyx_t_1 = (__pyx_v_i > __pyx_v_b->max_val); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":84 * b.min_val = i * if i > b.max_val: * b.max_val = i # <<<<<<<<<<<<<< @@ -18556,7 +18556,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":85 * if i > b.max_val: * b.max_val = i * b.size = b.size + 1 # <<<<<<<<<<<<<< @@ -18565,7 +18565,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in */ __pyx_v_b->size = (__pyx_v_b->size + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":86 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":86 * b.max_val = i * b.size = b.size + 1 * return 1 # <<<<<<<<<<<<<< @@ -18578,7 +18578,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":87 * b.size = b.size + 1 * return 1 * return 0 # <<<<<<<<<<<<<< @@ -18594,7 +18594,7 @@ static int __pyx_f_3_sa_bitset_insert(struct __pyx_t_3_sa__BitSet *__pyx_v_b, in return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":90 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":90 * * * cdef int bitset_contains(_BitSet* b, int i): # <<<<<<<<<<<<<< @@ -18609,7 +18609,7 @@ static int __pyx_f_3_sa_bitset_contains(struct __pyx_t_3_sa__BitSet *__pyx_v_b, int __pyx_t_1; __Pyx_RefNannySetupContext("bitset_contains", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":93 * cdef int val * * val = 1 << i # <<<<<<<<<<<<<< @@ -18618,7 +18618,7 @@ static int __pyx_f_3_sa_bitset_contains(struct __pyx_t_3_sa__BitSet *__pyx_v_b, */ __pyx_v_val = (1 << __pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":94 * * val = 1 << i * if b.bitset & val == 0: # <<<<<<<<<<<<<< @@ -18628,7 +18628,7 @@ static int __pyx_f_3_sa_bitset_contains(struct __pyx_t_3_sa__BitSet *__pyx_v_b, __pyx_t_1 = ((__pyx_v_b->bitset & __pyx_v_val) == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":95 * val = 1 << i * if b.bitset & val == 0: * return 0 # <<<<<<<<<<<<<< @@ -18641,7 +18641,7 @@ static int __pyx_f_3_sa_bitset_contains(struct __pyx_t_3_sa__BitSet *__pyx_v_b, } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":97 * return 0 * else: * return 1 # <<<<<<<<<<<<<< @@ -18670,7 +18670,7 @@ static PyObject *__pyx_pw_3_sa_14BitSetIterator_1__next__(PyObject *__pyx_v_self return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":104 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":104 * cdef int next_val * * def __next__(self): # <<<<<<<<<<<<<< @@ -18689,7 +18689,7 @@ static PyObject *__pyx_pf_3_sa_14BitSetIterator___next__(struct __pyx_obj_3_sa_B int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__next__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":107 * cdef int ret_val * * if self.next_val == -1: # <<<<<<<<<<<<<< @@ -18699,7 +18699,7 @@ static PyObject *__pyx_pf_3_sa_14BitSetIterator___next__(struct __pyx_obj_3_sa_B __pyx_t_1 = (__pyx_v_self->next_val == -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":108 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":108 * * if self.next_val == -1: * raise StopIteration() # <<<<<<<<<<<<<< @@ -18715,7 +18715,7 @@ static PyObject *__pyx_pf_3_sa_14BitSetIterator___next__(struct __pyx_obj_3_sa_B } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":109 * if self.next_val == -1: * raise StopIteration() * ret_val = self.next_val # <<<<<<<<<<<<<< @@ -18724,7 +18724,7 @@ static PyObject *__pyx_pf_3_sa_14BitSetIterator___next__(struct __pyx_obj_3_sa_B */ __pyx_v_ret_val = __pyx_v_self->next_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":110 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":110 * raise StopIteration() * ret_val = self.next_val * self.next_val = bitset_findsucc(self.b, ret_val) # <<<<<<<<<<<<<< @@ -18733,7 +18733,7 @@ static PyObject *__pyx_pf_3_sa_14BitSetIterator___next__(struct __pyx_obj_3_sa_B */ __pyx_v_self->next_val = __pyx_f_3_sa_bitset_findsucc(__pyx_v_self->b, __pyx_v_ret_val); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":111 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":111 * ret_val = self.next_val * self.next_val = bitset_findsucc(self.b, ret_val) * return ret_val # <<<<<<<<<<<<<< @@ -18773,7 +18773,7 @@ static int __pyx_pw_3_sa_6BitSet_1__cinit__(PyObject *__pyx_v_self, PyObject *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":122 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":122 * cdef _BitSet* b * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -18786,7 +18786,7 @@ static int __pyx_pf_3_sa_6BitSet___cinit__(struct __pyx_obj_3_sa_BitSet *__pyx_v __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":123 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":123 * * def __cinit__(self): * self.b = new_BitSet() # <<<<<<<<<<<<<< @@ -18809,7 +18809,7 @@ static void __pyx_pw_3_sa_6BitSet_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":125 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":125 * self.b = new_BitSet() * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -18821,7 +18821,7 @@ static void __pyx_pf_3_sa_6BitSet_2__dealloc__(struct __pyx_obj_3_sa_BitSet *__p __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":126 * * def __dealloc__(self): * free(self.b) # <<<<<<<<<<<<<< @@ -18844,7 +18844,7 @@ static PyObject *__pyx_pw_3_sa_6BitSet_5__iter__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":128 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":128 * free(self.b) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -18862,7 +18862,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_4__iter__(struct __pyx_obj_3_sa_BitSet *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__iter__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":130 * def __iter__(self): * cdef BitSetIterator it * it = BitSetIterator() # <<<<<<<<<<<<<< @@ -18874,7 +18874,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_4__iter__(struct __pyx_obj_3_sa_BitSet *_ __pyx_v_it = ((struct __pyx_obj_3_sa_BitSetIterator *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":131 * cdef BitSetIterator it * it = BitSetIterator() * it.b = self.b # <<<<<<<<<<<<<< @@ -18883,7 +18883,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_4__iter__(struct __pyx_obj_3_sa_BitSet *_ */ __pyx_v_it->b = __pyx_v_self->b; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":132 * it = BitSetIterator() * it.b = self.b * it.next_val = self.b.min_val # <<<<<<<<<<<<<< @@ -18892,7 +18892,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_4__iter__(struct __pyx_obj_3_sa_BitSet *_ */ __pyx_v_it->next_val = __pyx_v_self->b->min_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":133 * it.b = self.b * it.next_val = self.b.min_val * return it # <<<<<<<<<<<<<< @@ -18928,7 +18928,7 @@ static PyObject *__pyx_pw_3_sa_6BitSet_7insert(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":135 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":135 * return it * * def insert(self, i): # <<<<<<<<<<<<<< @@ -18946,7 +18946,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_6insert(struct __pyx_obj_3_sa_BitSet *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":136 * * def insert(self, i): * return bitset_insert(self.b, i) # <<<<<<<<<<<<<< @@ -18984,7 +18984,7 @@ static PyObject *__pyx_pw_3_sa_6BitSet_9findsucc(PyObject *__pyx_v_self, PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":138 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":138 * return bitset_insert(self.b, i) * * def findsucc(self, i): # <<<<<<<<<<<<<< @@ -19002,7 +19002,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_8findsucc(struct __pyx_obj_3_sa_BitSet *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("findsucc", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":139 * * def findsucc(self, i): * return bitset_findsucc(self.b, i) # <<<<<<<<<<<<<< @@ -19040,7 +19040,7 @@ static PyObject *__pyx_pw_3_sa_6BitSet_11__str__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":141 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":141 * return bitset_findsucc(self.b, i) * * def __str__(self): # <<<<<<<<<<<<<< @@ -19059,7 +19059,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_10__str__(struct __pyx_obj_3_sa_BitSet *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":142 * * def __str__(self): * return dec2bin(self.b.bitset)+" ("+str(self.b.size)+","+str(self.b.min_val)+","+str(self.b.max_val)+")" # <<<<<<<<<<<<<< @@ -19152,7 +19152,7 @@ static PyObject *__pyx_pw_3_sa_6BitSet_13min(PyObject *__pyx_v_self, CYTHON_UNUS return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":144 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":144 * return dec2bin(self.b.bitset)+" ("+str(self.b.size)+","+str(self.b.min_val)+","+str(self.b.max_val)+")" * * def min(self): # <<<<<<<<<<<<<< @@ -19169,7 +19169,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_12min(struct __pyx_obj_3_sa_BitSet *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("min", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":145 * * def min(self): * return self.b.min_val # <<<<<<<<<<<<<< @@ -19206,7 +19206,7 @@ static PyObject *__pyx_pw_3_sa_6BitSet_15max(PyObject *__pyx_v_self, CYTHON_UNUS return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":147 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":147 * return self.b.min_val * * def max(self): # <<<<<<<<<<<<<< @@ -19223,7 +19223,7 @@ static PyObject *__pyx_pf_3_sa_6BitSet_14max(struct __pyx_obj_3_sa_BitSet *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("max", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":148 * * def max(self): * return self.b.max_val # <<<<<<<<<<<<<< @@ -19260,7 +19260,7 @@ static Py_ssize_t __pyx_pw_3_sa_6BitSet_17__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":150 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":150 * return self.b.max_val * * def __len__(self): # <<<<<<<<<<<<<< @@ -19273,7 +19273,7 @@ static Py_ssize_t __pyx_pf_3_sa_6BitSet_16__len__(struct __pyx_obj_3_sa_BitSet * __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":151 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":151 * * def __len__(self): * return self.b.size # <<<<<<<<<<<<<< @@ -19300,7 +19300,7 @@ static int __pyx_pw_3_sa_6BitSet_19__contains__(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":153 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":153 * return self.b.size * * def __contains__(self, i): # <<<<<<<<<<<<<< @@ -19319,7 +19319,7 @@ static int __pyx_pf_3_sa_6BitSet_18__contains__(struct __pyx_obj_3_sa_BitSet *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__contains__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":154 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":154 * * def __contains__(self, i): * return bool(bitset_contains(self.b, i)) # <<<<<<<<<<<<<< @@ -19345,7 +19345,7 @@ static int __pyx_pf_3_sa_6BitSet_18__contains__(struct __pyx_obj_3_sa_BitSet *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":157 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":157 * * * cdef str dec2bin(long i): # <<<<<<<<<<<<<< @@ -19367,7 +19367,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("dec2bin", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":158 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":158 * * cdef str dec2bin(long i): * cdef str result = "" # <<<<<<<<<<<<<< @@ -19377,7 +19377,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { __Pyx_INCREF(((PyObject *)__pyx_kp_s_45)); __pyx_v_result = __pyx_kp_s_45; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":160 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":160 * cdef str result = "" * cdef unsigned d * for d in range(MIN_BOTTOM_SIZE): # <<<<<<<<<<<<<< @@ -19388,7 +19388,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":161 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":161 * cdef unsigned d * for d in range(MIN_BOTTOM_SIZE): * if i & LOWER_MASK[0] == 0: # <<<<<<<<<<<<<< @@ -19398,7 +19398,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { __pyx_t_3 = ((__pyx_v_i & (__pyx_v_3_sa_LOWER_MASK[0])) == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":162 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":162 * for d in range(MIN_BOTTOM_SIZE): * if i & LOWER_MASK[0] == 0: * result = "0"+result # <<<<<<<<<<<<<< @@ -19414,7 +19414,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":164 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":164 * result = "0"+result * else: * result = "1"+result # <<<<<<<<<<<<<< @@ -19429,7 +19429,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":165 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":165 * else: * result = "1"+result * i = i >> 1 # <<<<<<<<<<<<<< @@ -19439,7 +19439,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { __pyx_v_i = (__pyx_v_i >> 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":166 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":166 * result = "1"+result * i = i >> 1 * return result # <<<<<<<<<<<<<< @@ -19464,7 +19464,7 @@ static PyObject *__pyx_f_3_sa_dec2bin(long __pyx_v_i) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":177 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":177 * void** bottom * * cdef _VEB* new_VEB(int n): # <<<<<<<<<<<<<< @@ -19485,7 +19485,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("new_VEB", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":181 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":181 * cdef int num_bits, num_top_bits, i * * veb = <_VEB*> malloc(sizeof(_VEB)) # <<<<<<<<<<<<<< @@ -19494,7 +19494,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb = ((struct __pyx_t_3_sa__VEB *)malloc((sizeof(struct __pyx_t_3_sa__VEB)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":183 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":183 * veb = <_VEB*> malloc(sizeof(_VEB)) * * num_bits = int(ceil(log(n) / log(2))) # <<<<<<<<<<<<<< @@ -19509,7 +19509,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { } __pyx_v_num_bits = ((int)ceil((__pyx_t_1 / __pyx_t_2))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":184 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":184 * * num_bits = int(ceil(log(n) / log(2))) * veb.num_bottom_bits = num_bits/2 # <<<<<<<<<<<<<< @@ -19518,7 +19518,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb->num_bottom_bits = __Pyx_div_long(__pyx_v_num_bits, 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":185 * num_bits = int(ceil(log(n) / log(2))) * veb.num_bottom_bits = num_bits/2 * if veb.num_bottom_bits < MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -19528,7 +19528,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { __pyx_t_3 = (__pyx_v_veb->num_bottom_bits < __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":186 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":186 * veb.num_bottom_bits = num_bits/2 * if veb.num_bottom_bits < MIN_BOTTOM_BITS: * veb.num_bottom_bits = MIN_BOTTOM_BITS # <<<<<<<<<<<<<< @@ -19540,7 +19540,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":187 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":187 * if veb.num_bottom_bits < MIN_BOTTOM_BITS: * veb.num_bottom_bits = MIN_BOTTOM_BITS * veb.top_universe_size = (n >> veb.num_bottom_bits) + 1 # <<<<<<<<<<<<<< @@ -19549,7 +19549,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb->top_universe_size = ((__pyx_v_n >> __pyx_v_veb->num_bottom_bits) + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":189 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":189 * veb.top_universe_size = (n >> veb.num_bottom_bits) + 1 * * veb.bottom = malloc(veb.top_universe_size * sizeof(void*)) # <<<<<<<<<<<<<< @@ -19558,7 +19558,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb->bottom = ((void **)malloc((__pyx_v_veb->top_universe_size * (sizeof(void *))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":190 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":190 * * veb.bottom = malloc(veb.top_universe_size * sizeof(void*)) * memset(veb.bottom, 0, veb.top_universe_size * sizeof(void*)) # <<<<<<<<<<<<<< @@ -19567,7 +19567,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ memset(__pyx_v_veb->bottom, 0, (__pyx_v_veb->top_universe_size * (sizeof(void *)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":192 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":192 * memset(veb.bottom, 0, veb.top_universe_size * sizeof(void*)) * * if veb.top_universe_size > MIN_BOTTOM_SIZE: # <<<<<<<<<<<<<< @@ -19577,7 +19577,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { __pyx_t_3 = (__pyx_v_veb->top_universe_size > __pyx_v_3_sa_MIN_BOTTOM_SIZE); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":193 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":193 * * if veb.top_universe_size > MIN_BOTTOM_SIZE: * veb.top = new_VEB(veb.top_universe_size) # <<<<<<<<<<<<<< @@ -19589,7 +19589,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":195 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":195 * veb.top = new_VEB(veb.top_universe_size) * else: * veb.top = new_BitSet() # <<<<<<<<<<<<<< @@ -19600,7 +19600,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":197 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":197 * veb.top = new_BitSet() * * veb.max_val = -1 # <<<<<<<<<<<<<< @@ -19609,7 +19609,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb->max_val = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":198 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":198 * * veb.max_val = -1 * veb.min_val = -1 # <<<<<<<<<<<<<< @@ -19618,7 +19618,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb->min_val = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":199 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":199 * veb.max_val = -1 * veb.min_val = -1 * veb.size = 0 # <<<<<<<<<<<<<< @@ -19627,7 +19627,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { */ __pyx_v_veb->size = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":200 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":200 * veb.min_val = -1 * veb.size = 0 * return veb # <<<<<<<<<<<<<< @@ -19647,7 +19647,7 @@ static struct __pyx_t_3_sa__VEB *__pyx_f_3_sa_new_VEB(int __pyx_v_n) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":203 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":203 * * * cdef int VEB_insert(_VEB* veb, int i): # <<<<<<<<<<<<<< @@ -19668,7 +19668,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ int __pyx_t_3; __Pyx_RefNannySetupContext("VEB_insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":208 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":208 * cdef int a, b, tmp * * if veb.size == 0: # <<<<<<<<<<<<<< @@ -19678,7 +19678,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_1 = (__pyx_v_veb->size == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":209 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":209 * * if veb.size == 0: * veb.min_val = i # <<<<<<<<<<<<<< @@ -19687,7 +19687,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_veb->min_val = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":210 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":210 * if veb.size == 0: * veb.min_val = i * veb.max_val = i # <<<<<<<<<<<<<< @@ -19698,7 +19698,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":211 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":211 * veb.min_val = i * veb.max_val = i * elif i == veb.min_val or i == veb.max_val: # <<<<<<<<<<<<<< @@ -19714,7 +19714,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":212 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":212 * veb.max_val = i * elif i == veb.min_val or i == veb.max_val: * return 0 # <<<<<<<<<<<<<< @@ -19727,7 +19727,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":214 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":214 * return 0 * else: * if i < veb.min_val: # <<<<<<<<<<<<<< @@ -19737,7 +19737,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_v_i < __pyx_v_veb->min_val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":215 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":215 * else: * if i < veb.min_val: * tmp = i # <<<<<<<<<<<<<< @@ -19746,7 +19746,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_tmp = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":216 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":216 * if i < veb.min_val: * tmp = i * i = veb.min_val # <<<<<<<<<<<<<< @@ -19755,7 +19755,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_i = __pyx_v_veb->min_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":217 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":217 * tmp = i * i = veb.min_val * veb.min_val = tmp # <<<<<<<<<<<<<< @@ -19767,7 +19767,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":218 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":218 * i = veb.min_val * veb.min_val = tmp * a = i >> veb.num_bottom_bits # <<<<<<<<<<<<<< @@ -19776,7 +19776,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_a = (__pyx_v_i >> __pyx_v_veb->num_bottom_bits); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":219 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":219 * veb.min_val = tmp * a = i >> veb.num_bottom_bits * b = i & LOWER_MASK[veb.num_bottom_bits-1] # <<<<<<<<<<<<<< @@ -19785,7 +19785,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_b = (__pyx_v_i & (__pyx_v_3_sa_LOWER_MASK[(__pyx_v_veb->num_bottom_bits - 1)])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":220 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":220 * a = i >> veb.num_bottom_bits * b = i & LOWER_MASK[veb.num_bottom_bits-1] * if veb.bottom[a] == NULL: # <<<<<<<<<<<<<< @@ -19795,7 +19795,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = ((__pyx_v_veb->bottom[__pyx_v_a]) == NULL); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":221 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":221 * b = i & LOWER_MASK[veb.num_bottom_bits-1] * if veb.bottom[a] == NULL: * if veb.top_universe_size > MIN_BOTTOM_SIZE: # <<<<<<<<<<<<<< @@ -19805,7 +19805,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_v_veb->top_universe_size > __pyx_v_3_sa_MIN_BOTTOM_SIZE); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":222 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":222 * if veb.bottom[a] == NULL: * if veb.top_universe_size > MIN_BOTTOM_SIZE: * subv = <_VEB*> veb.top # <<<<<<<<<<<<<< @@ -19814,7 +19814,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_subv = ((struct __pyx_t_3_sa__VEB *)__pyx_v_veb->top); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":223 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":223 * if veb.top_universe_size > MIN_BOTTOM_SIZE: * subv = <_VEB*> veb.top * VEB_insert(subv, a) # <<<<<<<<<<<<<< @@ -19826,7 +19826,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":225 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":225 * VEB_insert(subv, a) * else: * subb = <_BitSet*> veb.top # <<<<<<<<<<<<<< @@ -19835,7 +19835,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_subb = ((struct __pyx_t_3_sa__BitSet *)__pyx_v_veb->top); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":226 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":226 * else: * subb = <_BitSet*> veb.top * bitset_insert(subb, a) # <<<<<<<<<<<<<< @@ -19846,7 +19846,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":227 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":227 * subb = <_BitSet*> veb.top * bitset_insert(subb, a) * if veb.num_bottom_bits > MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -19856,7 +19856,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_v_veb->num_bottom_bits > __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":228 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":228 * bitset_insert(subb, a) * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * veb.bottom[a] = new_VEB(1 << veb.num_bottom_bits) # <<<<<<<<<<<<<< @@ -19868,7 +19868,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":230 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":230 * veb.bottom[a] = new_VEB(1 << veb.num_bottom_bits) * else: * veb.bottom[a] = new_BitSet() # <<<<<<<<<<<<<< @@ -19882,7 +19882,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":231 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":231 * else: * veb.bottom[a] = new_BitSet() * if veb.num_bottom_bits > MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -19892,7 +19892,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_v_veb->num_bottom_bits > __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":232 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":232 * veb.bottom[a] = new_BitSet() * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[a] # <<<<<<<<<<<<<< @@ -19901,7 +19901,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_subv = ((struct __pyx_t_3_sa__VEB *)(__pyx_v_veb->bottom[__pyx_v_a])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":233 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":233 * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[a] * if VEB_insert(subv, b) == 0: # <<<<<<<<<<<<<< @@ -19911,7 +19911,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_f_3_sa_VEB_insert(__pyx_v_subv, __pyx_v_b) == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":234 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":234 * subv = <_VEB*> veb.bottom[a] * if VEB_insert(subv, b) == 0: * return 0 # <<<<<<<<<<<<<< @@ -19927,7 +19927,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":236 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":236 * return 0 * else: * subb = <_BitSet*> veb.bottom[a] # <<<<<<<<<<<<<< @@ -19936,7 +19936,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_subb = ((struct __pyx_t_3_sa__BitSet *)(__pyx_v_veb->bottom[__pyx_v_a])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":237 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":237 * else: * subb = <_BitSet*> veb.bottom[a] * if bitset_insert(subb, b) == 0: # <<<<<<<<<<<<<< @@ -19946,7 +19946,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_f_3_sa_bitset_insert(__pyx_v_subb, __pyx_v_b) == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":238 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":238 * subb = <_BitSet*> veb.bottom[a] * if bitset_insert(subb, b) == 0: * return 0 # <<<<<<<<<<<<<< @@ -19961,7 +19961,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":240 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":240 * return 0 * * if i > veb.max_val: # <<<<<<<<<<<<<< @@ -19971,7 +19971,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ __pyx_t_3 = (__pyx_v_i > __pyx_v_veb->max_val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":241 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":241 * * if i > veb.max_val: * veb.max_val = i # <<<<<<<<<<<<<< @@ -19985,7 +19985,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":242 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":242 * if i > veb.max_val: * veb.max_val = i * veb.size = veb.size + 1 # <<<<<<<<<<<<<< @@ -19994,7 +19994,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ */ __pyx_v_veb->size = (__pyx_v_veb->size + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":243 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":243 * veb.max_val = i * veb.size = veb.size + 1 * return 1 # <<<<<<<<<<<<<< @@ -20010,7 +20010,7 @@ static int __pyx_f_3_sa_VEB_insert(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":246 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":246 * * * cdef del_VEB(_VEB* veb): # <<<<<<<<<<<<<< @@ -20029,7 +20029,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("del_VEB", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":249 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":249 * cdef int i * * if veb.top_universe_size > MIN_BOTTOM_SIZE: # <<<<<<<<<<<<<< @@ -20039,7 +20039,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { __pyx_t_1 = (__pyx_v_veb->top_universe_size > __pyx_v_3_sa_MIN_BOTTOM_SIZE); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":250 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":250 * * if veb.top_universe_size > MIN_BOTTOM_SIZE: * i = (<_VEB*> veb.top).min_val # <<<<<<<<<<<<<< @@ -20051,7 +20051,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":252 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":252 * i = (<_VEB*> veb.top).min_val * else: * i = (<_BitSet*> veb.top).min_val # <<<<<<<<<<<<<< @@ -20062,7 +20062,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":254 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":254 * i = (<_BitSet*> veb.top).min_val * * while i != -1: # <<<<<<<<<<<<<< @@ -20073,7 +20073,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { __pyx_t_1 = (__pyx_v_i != -1); if (!__pyx_t_1) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":255 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":255 * * while i != -1: * if veb.num_bottom_bits > MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -20083,7 +20083,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { __pyx_t_1 = (__pyx_v_veb->num_bottom_bits > __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":256 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":256 * while i != -1: * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * del_VEB(<_VEB*> veb.bottom[i]) # <<<<<<<<<<<<<< @@ -20097,7 +20097,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":258 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":258 * del_VEB(<_VEB*> veb.bottom[i]) * else: * free(<_BitSet*> veb.bottom[i]) # <<<<<<<<<<<<<< @@ -20108,7 +20108,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":260 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":260 * free(<_BitSet*> veb.bottom[i]) * * if veb.top_universe_size > MIN_BOTTOM_SIZE: # <<<<<<<<<<<<<< @@ -20118,7 +20118,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { __pyx_t_1 = (__pyx_v_veb->top_universe_size > __pyx_v_3_sa_MIN_BOTTOM_SIZE); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":261 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":261 * * if veb.top_universe_size > MIN_BOTTOM_SIZE: * i = VEB_findsucc(<_VEB*> veb.top, i) # <<<<<<<<<<<<<< @@ -20130,7 +20130,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":263 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":263 * i = VEB_findsucc(<_VEB*> veb.top, i) * else: * i = bitset_findsucc(<_BitSet*> veb.top, i) # <<<<<<<<<<<<<< @@ -20142,7 +20142,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { __pyx_L7:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":265 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":265 * i = bitset_findsucc(<_BitSet*> veb.top, i) * * if veb.top_universe_size > MIN_BOTTOM_SIZE: # <<<<<<<<<<<<<< @@ -20152,7 +20152,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { __pyx_t_1 = (__pyx_v_veb->top_universe_size > __pyx_v_3_sa_MIN_BOTTOM_SIZE); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":266 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":266 * * if veb.top_universe_size > MIN_BOTTOM_SIZE: * del_VEB(<_VEB*> veb.top) # <<<<<<<<<<<<<< @@ -20166,7 +20166,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":268 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":268 * del_VEB(<_VEB*> veb.top) * else: * free(<_BitSet*> veb.top) # <<<<<<<<<<<<<< @@ -20177,7 +20177,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":269 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":269 * else: * free(<_BitSet*> veb.top) * free(veb.bottom) # <<<<<<<<<<<<<< @@ -20186,7 +20186,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { */ free(__pyx_v_veb->bottom); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":270 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":270 * free(<_BitSet*> veb.top) * free(veb.bottom) * free(veb) # <<<<<<<<<<<<<< @@ -20207,7 +20207,7 @@ static PyObject *__pyx_f_3_sa_del_VEB(struct __pyx_t_3_sa__VEB *__pyx_v_veb) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":273 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":273 * * * cdef int VEB_findsucc(_VEB* veb, int i): # <<<<<<<<<<<<<< @@ -20230,7 +20230,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int int __pyx_t_3; __Pyx_RefNannySetupContext("VEB_findsucc", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":278 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":278 * cdef int a, b, j, c, found * * if veb.max_val == -1 or i>=veb.max_val: # <<<<<<<<<<<<<< @@ -20246,7 +20246,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":279 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":279 * * if veb.max_val == -1 or i>=veb.max_val: * return -1 # <<<<<<<<<<<<<< @@ -20259,7 +20259,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":280 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":280 * if veb.max_val == -1 or i>=veb.max_val: * return -1 * if i < veb.min_val: # <<<<<<<<<<<<<< @@ -20269,7 +20269,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_i < __pyx_v_veb->min_val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":281 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":281 * return -1 * if i < veb.min_val: * return veb.min_val # <<<<<<<<<<<<<< @@ -20282,7 +20282,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":283 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":283 * return veb.min_val * * a = i >> veb.num_bottom_bits # <<<<<<<<<<<<<< @@ -20291,7 +20291,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_a = (__pyx_v_i >> __pyx_v_veb->num_bottom_bits); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":284 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":284 * * a = i >> veb.num_bottom_bits * b = i & LOWER_MASK[veb.num_bottom_bits-1] # <<<<<<<<<<<<<< @@ -20300,7 +20300,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_b = (__pyx_v_i & (__pyx_v_3_sa_LOWER_MASK[(__pyx_v_veb->num_bottom_bits - 1)])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":285 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":285 * a = i >> veb.num_bottom_bits * b = i & LOWER_MASK[veb.num_bottom_bits-1] * found = 0 # <<<<<<<<<<<<<< @@ -20309,7 +20309,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_found = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":286 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":286 * b = i & LOWER_MASK[veb.num_bottom_bits-1] * found = 0 * if veb.bottom[a] != NULL: # <<<<<<<<<<<<<< @@ -20319,7 +20319,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = ((__pyx_v_veb->bottom[__pyx_v_a]) != NULL); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":287 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":287 * found = 0 * if veb.bottom[a] != NULL: * if veb.num_bottom_bits > MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -20329,7 +20329,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_veb->num_bottom_bits > __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":288 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":288 * if veb.bottom[a] != NULL: * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[a] # <<<<<<<<<<<<<< @@ -20338,7 +20338,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subv = ((struct __pyx_t_3_sa__VEB *)(__pyx_v_veb->bottom[__pyx_v_a])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":289 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":289 * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[a] * if subv.max_val > b: # <<<<<<<<<<<<<< @@ -20348,7 +20348,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_subv->max_val > __pyx_v_b); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":290 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":290 * subv = <_VEB*> veb.bottom[a] * if subv.max_val > b: * j = (a << veb.num_bottom_bits) + VEB_findsucc(subv, b) # <<<<<<<<<<<<<< @@ -20357,7 +20357,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_j = ((__pyx_v_a << __pyx_v_veb->num_bottom_bits) + __pyx_f_3_sa_VEB_findsucc(__pyx_v_subv, __pyx_v_b)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":291 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":291 * if subv.max_val > b: * j = (a << veb.num_bottom_bits) + VEB_findsucc(subv, b) * found = 1 # <<<<<<<<<<<<<< @@ -20372,7 +20372,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":293 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":293 * found = 1 * else: * subb = <_BitSet*> veb.bottom[a] # <<<<<<<<<<<<<< @@ -20381,7 +20381,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subb = ((struct __pyx_t_3_sa__BitSet *)(__pyx_v_veb->bottom[__pyx_v_a])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":294 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":294 * else: * subb = <_BitSet*> veb.bottom[a] * if subb.max_val > b: # <<<<<<<<<<<<<< @@ -20391,7 +20391,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_subb->max_val > __pyx_v_b); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":295 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":295 * subb = <_BitSet*> veb.bottom[a] * if subb.max_val > b: * j = (a << veb.num_bottom_bits) + bitset_findsucc(subb, b) # <<<<<<<<<<<<<< @@ -20400,7 +20400,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_j = ((__pyx_v_a << __pyx_v_veb->num_bottom_bits) + __pyx_f_3_sa_bitset_findsucc(__pyx_v_subb, __pyx_v_b)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":296 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":296 * if subb.max_val > b: * j = (a << veb.num_bottom_bits) + bitset_findsucc(subb, b) * found = 1 # <<<<<<<<<<<<<< @@ -20417,7 +20417,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":297 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":297 * j = (a << veb.num_bottom_bits) + bitset_findsucc(subb, b) * found = 1 * if found==0: # <<<<<<<<<<<<<< @@ -20427,7 +20427,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_found == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":298 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":298 * found = 1 * if found==0: * if veb.top_universe_size > MIN_BOTTOM_SIZE: # <<<<<<<<<<<<<< @@ -20437,7 +20437,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_veb->top_universe_size > __pyx_v_3_sa_MIN_BOTTOM_SIZE); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":299 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":299 * if found==0: * if veb.top_universe_size > MIN_BOTTOM_SIZE: * subv = <_VEB*> veb.top # <<<<<<<<<<<<<< @@ -20446,7 +20446,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subv = ((struct __pyx_t_3_sa__VEB *)__pyx_v_veb->top); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":300 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":300 * if veb.top_universe_size > MIN_BOTTOM_SIZE: * subv = <_VEB*> veb.top * c = VEB_findsucc(subv, a) # <<<<<<<<<<<<<< @@ -20458,7 +20458,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":302 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":302 * c = VEB_findsucc(subv, a) * else: * subb = <_BitSet*> veb.top # <<<<<<<<<<<<<< @@ -20467,7 +20467,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subb = ((struct __pyx_t_3_sa__BitSet *)__pyx_v_veb->top); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":303 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":303 * else: * subb = <_BitSet*> veb.top * c = bitset_findsucc(subb, a) # <<<<<<<<<<<<<< @@ -20478,7 +20478,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L10:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":304 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":304 * subb = <_BitSet*> veb.top * c = bitset_findsucc(subb, a) * if veb.num_bottom_bits > MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -20488,7 +20488,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_3 = (__pyx_v_veb->num_bottom_bits > __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":305 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":305 * c = bitset_findsucc(subb, a) * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[c] # <<<<<<<<<<<<<< @@ -20497,7 +20497,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subv = ((struct __pyx_t_3_sa__VEB *)(__pyx_v_veb->bottom[__pyx_v_c])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":306 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":306 * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[c] * j = (c << veb.num_bottom_bits) + subv.min_val # <<<<<<<<<<<<<< @@ -20509,7 +20509,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":308 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":308 * j = (c << veb.num_bottom_bits) + subv.min_val * else: * subb = <_BitSet*> veb.bottom[c] # <<<<<<<<<<<<<< @@ -20518,7 +20518,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subb = ((struct __pyx_t_3_sa__BitSet *)(__pyx_v_veb->bottom[__pyx_v_c])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":309 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":309 * else: * subb = <_BitSet*> veb.bottom[c] * j = (c << veb.num_bottom_bits) + subb.min_val # <<<<<<<<<<<<<< @@ -20532,7 +20532,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":310 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":310 * subb = <_BitSet*> veb.bottom[c] * j = (c << veb.num_bottom_bits) + subb.min_val * return j # <<<<<<<<<<<<<< @@ -20548,7 +20548,7 @@ static int __pyx_f_3_sa_VEB_findsucc(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":313 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":313 * * * cdef int VEB_contains(_VEB* veb, int i): # <<<<<<<<<<<<<< @@ -20569,7 +20569,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int int __pyx_t_4; __Pyx_RefNannySetupContext("VEB_contains", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":318 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":318 * cdef int a, b * * if veb.size == 0 or i < veb.min_val or i > veb.max_val: # <<<<<<<<<<<<<< @@ -20591,7 +20591,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":319 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":319 * * if veb.size == 0 or i < veb.min_val or i > veb.max_val: * return 0 # <<<<<<<<<<<<<< @@ -20604,7 +20604,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":321 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":321 * return 0 * * if veb.min_val == i: # <<<<<<<<<<<<<< @@ -20614,7 +20614,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_2 = (__pyx_v_veb->min_val == __pyx_v_i); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":322 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":322 * * if veb.min_val == i: * return 1 # <<<<<<<<<<<<<< @@ -20627,7 +20627,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":324 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":324 * return 1 * else: * if veb.size == 1: # <<<<<<<<<<<<<< @@ -20637,7 +20637,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_2 = (__pyx_v_veb->size == 1); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":325 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":325 * else: * if veb.size == 1: * return 0 # <<<<<<<<<<<<<< @@ -20652,7 +20652,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":327 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":327 * return 0 * * a = i >> veb.num_bottom_bits # <<<<<<<<<<<<<< @@ -20661,7 +20661,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_a = (__pyx_v_i >> __pyx_v_veb->num_bottom_bits); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":328 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":328 * * a = i >> veb.num_bottom_bits * b = i & LOWER_MASK[veb.num_bottom_bits-1] # <<<<<<<<<<<<<< @@ -20670,7 +20670,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_b = (__pyx_v_i & (__pyx_v_3_sa_LOWER_MASK[(__pyx_v_veb->num_bottom_bits - 1)])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":329 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":329 * a = i >> veb.num_bottom_bits * b = i & LOWER_MASK[veb.num_bottom_bits-1] * if veb.bottom[a] == NULL: # <<<<<<<<<<<<<< @@ -20680,7 +20680,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_2 = ((__pyx_v_veb->bottom[__pyx_v_a]) == NULL); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":330 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":330 * b = i & LOWER_MASK[veb.num_bottom_bits-1] * if veb.bottom[a] == NULL: * return 0 # <<<<<<<<<<<<<< @@ -20693,7 +20693,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":332 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":332 * return 0 * else: * if veb.num_bottom_bits > MIN_BOTTOM_BITS: # <<<<<<<<<<<<<< @@ -20703,7 +20703,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int __pyx_t_2 = (__pyx_v_veb->num_bottom_bits > __pyx_v_3_sa_MIN_BOTTOM_BITS); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":333 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":333 * else: * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[a] # <<<<<<<<<<<<<< @@ -20712,7 +20712,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subv = ((struct __pyx_t_3_sa__VEB *)(__pyx_v_veb->bottom[__pyx_v_a])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":334 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":334 * if veb.num_bottom_bits > MIN_BOTTOM_BITS: * subv = <_VEB*> veb.bottom[a] * return VEB_contains(subv, b) # <<<<<<<<<<<<<< @@ -20725,7 +20725,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":336 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":336 * return VEB_contains(subv, b) * else: * subb = <_BitSet*> veb.bottom[a] # <<<<<<<<<<<<<< @@ -20734,7 +20734,7 @@ static int __pyx_f_3_sa_VEB_contains(struct __pyx_t_3_sa__VEB *__pyx_v_veb, int */ __pyx_v_subb = ((struct __pyx_t_3_sa__BitSet *)(__pyx_v_veb->bottom[__pyx_v_a])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":337 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":337 * else: * subb = <_BitSet*> veb.bottom[a] * return bitset_contains(subb, b) # <<<<<<<<<<<<<< @@ -20765,7 +20765,7 @@ static PyObject *__pyx_pw_3_sa_11VEBIterator_1__next__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":344 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":344 * cdef int next_val * * def __next__(self): # <<<<<<<<<<<<<< @@ -20784,7 +20784,7 @@ static PyObject *__pyx_pf_3_sa_11VEBIterator___next__(struct __pyx_obj_3_sa_VEBI int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__next__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":347 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":347 * cdef int ret_val * * if self.next_val == -1: # <<<<<<<<<<<<<< @@ -20794,7 +20794,7 @@ static PyObject *__pyx_pf_3_sa_11VEBIterator___next__(struct __pyx_obj_3_sa_VEBI __pyx_t_1 = (__pyx_v_self->next_val == -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":348 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":348 * * if self.next_val == -1: * raise StopIteration() # <<<<<<<<<<<<<< @@ -20810,7 +20810,7 @@ static PyObject *__pyx_pf_3_sa_11VEBIterator___next__(struct __pyx_obj_3_sa_VEBI } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":349 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":349 * if self.next_val == -1: * raise StopIteration() * ret_val = self.next_val # <<<<<<<<<<<<<< @@ -20819,7 +20819,7 @@ static PyObject *__pyx_pf_3_sa_11VEBIterator___next__(struct __pyx_obj_3_sa_VEBI */ __pyx_v_ret_val = __pyx_v_self->next_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":350 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":350 * raise StopIteration() * ret_val = self.next_val * self.next_val = VEB_findsucc(self.v, ret_val) # <<<<<<<<<<<<<< @@ -20828,7 +20828,7 @@ static PyObject *__pyx_pf_3_sa_11VEBIterator___next__(struct __pyx_obj_3_sa_VEBI */ __pyx_v_self->next_val = __pyx_f_3_sa_VEB_findsucc(__pyx_v_self->v, __pyx_v_ret_val); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":351 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":351 * ret_val = self.next_val * self.next_val = VEB_findsucc(self.v, ret_val) * return ret_val # <<<<<<<<<<<<<< @@ -20901,7 +20901,7 @@ static int __pyx_pw_3_sa_3VEB_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":360 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":360 * cdef int _first(self) * * def __cinit__(self, int size): # <<<<<<<<<<<<<< @@ -20914,7 +20914,7 @@ static int __pyx_pf_3_sa_3VEB___cinit__(struct __pyx_obj_3_sa_VEB *__pyx_v_self, __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":361 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":361 * * def __cinit__(self, int size): * self.veb = new_VEB(size) # <<<<<<<<<<<<<< @@ -20937,7 +20937,7 @@ static void __pyx_pw_3_sa_3VEB_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":363 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":363 * self.veb = new_VEB(size) * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -20953,7 +20953,7 @@ static void __pyx_pf_3_sa_3VEB_2__dealloc__(struct __pyx_obj_3_sa_VEB *__pyx_v_s int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":364 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":364 * * def __dealloc__(self): * del_VEB(self.veb) # <<<<<<<<<<<<<< @@ -20983,7 +20983,7 @@ static PyObject *__pyx_pw_3_sa_3VEB_5__iter__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":366 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":366 * del_VEB(self.veb) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -21001,7 +21001,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_4__iter__(struct __pyx_obj_3_sa_VEB *__pyx_v int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__iter__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":368 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":368 * def __iter__(self): * cdef VEBIterator it * it = VEBIterator() # <<<<<<<<<<<<<< @@ -21013,7 +21013,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_4__iter__(struct __pyx_obj_3_sa_VEB *__pyx_v __pyx_v_it = ((struct __pyx_obj_3_sa_VEBIterator *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":369 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":369 * cdef VEBIterator it * it = VEBIterator() * it.v = self.veb # <<<<<<<<<<<<<< @@ -21022,7 +21022,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_4__iter__(struct __pyx_obj_3_sa_VEB *__pyx_v */ __pyx_v_it->v = __pyx_v_self->veb; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":370 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":370 * it = VEBIterator() * it.v = self.veb * it.next_val = self.veb.min_val # <<<<<<<<<<<<<< @@ -21031,7 +21031,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_4__iter__(struct __pyx_obj_3_sa_VEB *__pyx_v */ __pyx_v_it->next_val = __pyx_v_self->veb->min_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":371 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":371 * it.v = self.veb * it.next_val = self.veb.min_val * return it # <<<<<<<<<<<<<< @@ -21067,7 +21067,7 @@ static PyObject *__pyx_pw_3_sa_3VEB_7insert(PyObject *__pyx_v_self, PyObject *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":373 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":373 * return it * * def insert(self, i): # <<<<<<<<<<<<<< @@ -21085,7 +21085,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_6insert(struct __pyx_obj_3_sa_VEB *__pyx_v_s int __pyx_clineno = 0; __Pyx_RefNannySetupContext("insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":374 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":374 * * def insert(self, i): * return VEB_insert(self.veb, i) # <<<<<<<<<<<<<< @@ -21112,7 +21112,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_6insert(struct __pyx_obj_3_sa_VEB *__pyx_v_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":376 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":376 * return VEB_insert(self.veb, i) * * cdef int _insert(self, int i): # <<<<<<<<<<<<<< @@ -21125,7 +21125,7 @@ static int __pyx_f_3_sa_3VEB__insert(struct __pyx_obj_3_sa_VEB *__pyx_v_self, in __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":377 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":377 * * cdef int _insert(self, int i): * return VEB_insert(self.veb, i) # <<<<<<<<<<<<<< @@ -21152,7 +21152,7 @@ static PyObject *__pyx_pw_3_sa_3VEB_9findsucc(PyObject *__pyx_v_self, PyObject * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":379 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":379 * return VEB_insert(self.veb, i) * * def findsucc(self, i): # <<<<<<<<<<<<<< @@ -21170,7 +21170,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_8findsucc(struct __pyx_obj_3_sa_VEB *__pyx_v int __pyx_clineno = 0; __Pyx_RefNannySetupContext("findsucc", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":380 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":380 * * def findsucc(self, i): * return VEB_findsucc(self.veb, i) # <<<<<<<<<<<<<< @@ -21197,7 +21197,7 @@ static PyObject *__pyx_pf_3_sa_3VEB_8findsucc(struct __pyx_obj_3_sa_VEB *__pyx_v return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":382 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":382 * return VEB_findsucc(self.veb, i) * * cdef int _first(self): # <<<<<<<<<<<<<< @@ -21210,7 +21210,7 @@ static int __pyx_f_3_sa_3VEB__first(struct __pyx_obj_3_sa_VEB *__pyx_v_self) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_first", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":383 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":383 * * cdef int _first(self): * return self.veb.min_val # <<<<<<<<<<<<<< @@ -21226,7 +21226,7 @@ static int __pyx_f_3_sa_3VEB__first(struct __pyx_obj_3_sa_VEB *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":385 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":385 * return self.veb.min_val * * cdef int _findsucc(self, int i): # <<<<<<<<<<<<<< @@ -21239,7 +21239,7 @@ static int __pyx_f_3_sa_3VEB__findsucc(struct __pyx_obj_3_sa_VEB *__pyx_v_self, __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("_findsucc", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":386 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":386 * * cdef int _findsucc(self, int i): * return VEB_findsucc(self.veb, i) # <<<<<<<<<<<<<< @@ -21266,7 +21266,7 @@ static Py_ssize_t __pyx_pw_3_sa_3VEB_11__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":388 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":388 * return VEB_findsucc(self.veb, i) * * def __len__(self): # <<<<<<<<<<<<<< @@ -21279,7 +21279,7 @@ static Py_ssize_t __pyx_pf_3_sa_3VEB_10__len__(struct __pyx_obj_3_sa_VEB *__pyx_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":389 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":389 * * def __len__(self): * return self.veb.size # <<<<<<<<<<<<<< @@ -21306,7 +21306,7 @@ static int __pyx_pw_3_sa_3VEB_13__contains__(PyObject *__pyx_v_self, PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":391 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":391 * return self.veb.size * * def __contains__(self, i): # <<<<<<<<<<<<<< @@ -21322,7 +21322,7 @@ static int __pyx_pf_3_sa_3VEB_12__contains__(struct __pyx_obj_3_sa_VEB *__pyx_v_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__contains__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":392 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":392 * * def __contains__(self, i): * return VEB_contains(self.veb, i) # <<<<<<<<<<<<<< @@ -21393,7 +21393,7 @@ static int __pyx_pw_3_sa_3LCP_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":9 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":9 * cdef IntList lcp * * def __cinit__(self, SuffixArray sa): # <<<<<<<<<<<<<< @@ -21422,7 +21422,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":13 * cdef IntList rank * * logger.info("Constructing LCP array") # <<<<<<<<<<<<<< @@ -21439,7 +21439,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":14 * * logger.info("Constructing LCP array") * self.sa = sa # <<<<<<<<<<<<<< @@ -21452,7 +21452,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __Pyx_DECREF(((PyObject *)__pyx_v_self->sa)); __pyx_v_self->sa = __pyx_v_sa; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":15 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":15 * logger.info("Constructing LCP array") * self.sa = sa * n = self.sa.sa.len # <<<<<<<<<<<<<< @@ -21461,7 +21461,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, */ __pyx_v_n = __pyx_v_self->sa->sa->len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":16 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":16 * self.sa = sa * n = self.sa.sa.len * self.lcp = IntList(initial_len=n) # <<<<<<<<<<<<<< @@ -21483,7 +21483,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_v_self->lcp = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":18 * self.lcp = IntList(initial_len=n) * * rank = IntList(initial_len=n) # <<<<<<<<<<<<<< @@ -21502,7 +21502,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_v_rank = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":19 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":19 * * rank = IntList(initial_len=n) * for i from 0 <= i < n: # <<<<<<<<<<<<<< @@ -21512,7 +21512,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_t_3 = __pyx_v_n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":20 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":20 * rank = IntList(initial_len=n) * for i from 0 <= i < n: * rank.arr[sa.sa.arr[i]] = i # <<<<<<<<<<<<<< @@ -21522,7 +21522,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, (__pyx_v_rank->arr[(__pyx_v_sa->sa->arr[__pyx_v_i])]) = __pyx_v_i; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":22 * rank.arr[sa.sa.arr[i]] = i * * h = 0 # <<<<<<<<<<<<<< @@ -21531,7 +21531,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, */ __pyx_v_h = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":23 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":23 * * h = 0 * for i from 0 <= i < n: # <<<<<<<<<<<<<< @@ -21541,7 +21541,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_t_3 = __pyx_v_n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":24 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":24 * h = 0 * for i from 0 <= i < n: * k = rank.arr[i] # <<<<<<<<<<<<<< @@ -21550,7 +21550,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, */ __pyx_v_k = (__pyx_v_rank->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":25 * for i from 0 <= i < n: * k = rank.arr[i] * if k == 0: # <<<<<<<<<<<<<< @@ -21560,7 +21560,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_t_4 = (__pyx_v_k == 0); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":26 * k = rank.arr[i] * if k == 0: * self.lcp.arr[k] = -1 # <<<<<<<<<<<<<< @@ -21572,7 +21572,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":28 * self.lcp.arr[k] = -1 * else: * j = sa.sa.arr[k-1] # <<<<<<<<<<<<<< @@ -21581,7 +21581,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, */ __pyx_v_j = (__pyx_v_sa->sa->arr[(__pyx_v_k - 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":29 * else: * j = sa.sa.arr[k-1] * while i+h < n and j+h < n and sa.darray.data.arr[i+h] == sa.darray.data.arr[j+h]: # <<<<<<<<<<<<<< @@ -21604,7 +21604,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, } if (!__pyx_t_5) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":30 * j = sa.sa.arr[k-1] * while i+h < n and j+h < n and sa.darray.data.arr[i+h] == sa.darray.data.arr[j+h]: * h = h+1 # <<<<<<<<<<<<<< @@ -21614,7 +21614,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_v_h = (__pyx_v_h + 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":31 * while i+h < n and j+h < n and sa.darray.data.arr[i+h] == sa.darray.data.arr[j+h]: * h = h+1 * self.lcp.arr[k] = h # <<<<<<<<<<<<<< @@ -21625,7 +21625,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":32 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":32 * h = h+1 * self.lcp.arr[k] = h * if h > 0: # <<<<<<<<<<<<<< @@ -21635,7 +21635,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_t_5 = (__pyx_v_h > 0); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":33 * self.lcp.arr[k] = h * if h > 0: * h = h-1 # <<<<<<<<<<<<<< @@ -21648,7 +21648,7 @@ static int __pyx_pf_3_sa_3LCP___cinit__(struct __pyx_obj_3_sa_LCP *__pyx_v_self, __pyx_L10:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":34 * if h > 0: * h = h-1 * logger.info("LCP array completed") # <<<<<<<<<<<<<< @@ -21701,7 +21701,7 @@ static PyObject *__pyx_pw_3_sa_3LCP_3compute_stats(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":36 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":36 * logger.info("LCP array completed") * * def compute_stats(self, int max_n): # <<<<<<<<<<<<<< @@ -21772,7 +21772,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[9]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":48 * cdef VEB veb * * N = self.sa.sa.len # <<<<<<<<<<<<<< @@ -21781,7 +21781,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_N = __pyx_cur_scope->__pyx_v_self->sa->sa->len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":50 * N = self.sa.sa.len * * ngram_starts = [] # <<<<<<<<<<<<<< @@ -21794,7 +21794,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_ngram_starts = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":51 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":51 * * ngram_starts = [] * for n from 0 <= n < max_n: # <<<<<<<<<<<<<< @@ -21804,7 +21804,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_2 = __pyx_cur_scope->__pyx_v_max_n; for (__pyx_cur_scope->__pyx_v_n = 0; __pyx_cur_scope->__pyx_v_n < __pyx_t_2; __pyx_cur_scope->__pyx_v_n++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":52 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":52 * ngram_starts = [] * for n from 0 <= n < max_n: * ngram_starts.append(IntList(initial_len=N)) # <<<<<<<<<<<<<< @@ -21824,7 +21824,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":54 * ngram_starts.append(IntList(initial_len=N)) * * run_start = IntList(initial_len=max_n) # <<<<<<<<<<<<<< @@ -21844,7 +21844,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_run_start = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":55 * * run_start = IntList(initial_len=max_n) * veb = VEB(N) # <<<<<<<<<<<<<< @@ -21865,7 +21865,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_veb = ((struct __pyx_obj_3_sa_VEB *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":57 * veb = VEB(N) * * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -21875,7 +21875,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_2 = __pyx_cur_scope->__pyx_v_N; for (__pyx_cur_scope->__pyx_v_i = 0; __pyx_cur_scope->__pyx_v_i < __pyx_t_2; __pyx_cur_scope->__pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":58 * * for i from 0 <= i < N: * h = self.lcp.arr[i] # <<<<<<<<<<<<<< @@ -21884,7 +21884,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_h = (__pyx_cur_scope->__pyx_v_self->lcp->arr[__pyx_cur_scope->__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":59 * for i from 0 <= i < N: * h = self.lcp.arr[i] * if h < 0: # <<<<<<<<<<<<<< @@ -21894,7 +21894,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_5 = (__pyx_cur_scope->__pyx_v_h < 0); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":60 * h = self.lcp.arr[i] * if h < 0: * h = 0 # <<<<<<<<<<<<<< @@ -21906,7 +21906,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":61 * if h < 0: * h = 0 * for n from h <= n < max_n: # <<<<<<<<<<<<<< @@ -21916,7 +21916,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_6 = __pyx_cur_scope->__pyx_v_max_n; for (__pyx_cur_scope->__pyx_v_n = __pyx_cur_scope->__pyx_v_h; __pyx_cur_scope->__pyx_v_n < __pyx_t_6; __pyx_cur_scope->__pyx_v_n++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":62 * h = 0 * for n from h <= n < max_n: * rs = run_start.arr[n] # <<<<<<<<<<<<<< @@ -21925,7 +21925,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_rs = (__pyx_cur_scope->__pyx_v_run_start->arr[__pyx_cur_scope->__pyx_v_n]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":63 * for n from h <= n < max_n: * rs = run_start.arr[n] * run_start.arr[n] = i # <<<<<<<<<<<<<< @@ -21934,7 +21934,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ (__pyx_cur_scope->__pyx_v_run_start->arr[__pyx_cur_scope->__pyx_v_n]) = __pyx_cur_scope->__pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":64 * rs = run_start.arr[n] * run_start.arr[n] = i * freq = i - rs # <<<<<<<<<<<<<< @@ -21943,7 +21943,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_freq = (__pyx_cur_scope->__pyx_v_i - __pyx_cur_scope->__pyx_v_rs); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":65 * run_start.arr[n] = i * freq = i - rs * if freq > 1000: # arbitrary, but see note below # <<<<<<<<<<<<<< @@ -21953,7 +21953,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_5 = (__pyx_cur_scope->__pyx_v_freq > 1000); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":66 * freq = i - rs * if freq > 1000: # arbitrary, but see note below * veb._insert(freq) # <<<<<<<<<<<<<< @@ -21962,7 +21962,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ ((struct __pyx_vtabstruct_3_sa_VEB *)__pyx_cur_scope->__pyx_v_veb->__pyx_vtab)->_insert(__pyx_cur_scope->__pyx_v_veb, __pyx_cur_scope->__pyx_v_freq); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":67 * if freq > 1000: # arbitrary, but see note below * veb._insert(freq) * ngram_start = ngram_starts[n] # <<<<<<<<<<<<<< @@ -21978,7 +21978,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_ngram_start = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":68 * veb._insert(freq) * ngram_start = ngram_starts[n] * while ngram_start.arr[freq] > 0: # <<<<<<<<<<<<<< @@ -21989,7 +21989,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_5 = ((__pyx_cur_scope->__pyx_v_ngram_start->arr[__pyx_cur_scope->__pyx_v_freq]) > 0); if (!__pyx_t_5) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":69 * ngram_start = ngram_starts[n] * while ngram_start.arr[freq] > 0: * freq = freq + 1 # cheating a bit, should be ok for sparse histogram # <<<<<<<<<<<<<< @@ -21999,7 +21999,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_freq = (__pyx_cur_scope->__pyx_v_freq + 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":70 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":70 * while ngram_start.arr[freq] > 0: * freq = freq + 1 # cheating a bit, should be ok for sparse histogram * ngram_start.arr[freq] = rs # <<<<<<<<<<<<<< @@ -22013,7 +22013,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen } } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":71 * freq = freq + 1 # cheating a bit, should be ok for sparse histogram * ngram_start.arr[freq] = rs * i = veb.veb.min_val # <<<<<<<<<<<<<< @@ -22022,7 +22022,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_i = __pyx_cur_scope->__pyx_v_veb->veb->min_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":72 * ngram_start.arr[freq] = rs * i = veb.veb.min_val * while i != -1: # <<<<<<<<<<<<<< @@ -22033,7 +22033,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_5 = (__pyx_cur_scope->__pyx_v_i != -1); if (!__pyx_t_5) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":73 * i = veb.veb.min_val * while i != -1: * ii = veb._findsucc(i) # <<<<<<<<<<<<<< @@ -22042,7 +22042,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_ii = ((struct __pyx_vtabstruct_3_sa_VEB *)__pyx_cur_scope->__pyx_v_veb->__pyx_vtab)->_findsucc(__pyx_cur_scope->__pyx_v_veb, __pyx_cur_scope->__pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":74 * while i != -1: * ii = veb._findsucc(i) * for n from 0 <= n < max_n: # <<<<<<<<<<<<<< @@ -22052,7 +22052,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_2 = __pyx_cur_scope->__pyx_v_max_n; for (__pyx_cur_scope->__pyx_v_n = 0; __pyx_cur_scope->__pyx_v_n < __pyx_t_2; __pyx_cur_scope->__pyx_v_n++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":75 * ii = veb._findsucc(i) * for n from 0 <= n < max_n: * ngram_start = ngram_starts[n] # <<<<<<<<<<<<<< @@ -22068,7 +22068,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_ngram_start = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":76 * for n from 0 <= n < max_n: * ngram_start = ngram_starts[n] * iii = i # <<<<<<<<<<<<<< @@ -22077,7 +22077,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_iii = __pyx_cur_scope->__pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":77 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":77 * ngram_start = ngram_starts[n] * iii = i * rs = ngram_start.arr[iii] # <<<<<<<<<<<<<< @@ -22086,7 +22086,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_rs = (__pyx_cur_scope->__pyx_v_ngram_start->arr[__pyx_cur_scope->__pyx_v_iii]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":78 * iii = i * rs = ngram_start.arr[iii] * while (ii==-1 or iii < ii) and rs != 0: # <<<<<<<<<<<<<< @@ -22109,7 +22109,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen } if (!__pyx_t_7) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":79 * rs = ngram_start.arr[iii] * while (ii==-1 or iii < ii) and rs != 0: * j = self.sa.sa.arr[rs] # <<<<<<<<<<<<<< @@ -22118,7 +22118,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_j = (__pyx_cur_scope->__pyx_v_self->sa->sa->arr[__pyx_cur_scope->__pyx_v_rs]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":80 * while (ii==-1 or iii < ii) and rs != 0: * j = self.sa.sa.arr[rs] * valid = 1 # <<<<<<<<<<<<<< @@ -22127,7 +22127,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_valid = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":81 * j = self.sa.sa.arr[rs] * valid = 1 * for k from 0 <= k < n+1: # <<<<<<<<<<<<<< @@ -22137,7 +22137,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_9 = (__pyx_cur_scope->__pyx_v_n + 1); for (__pyx_cur_scope->__pyx_v_k = 0; __pyx_cur_scope->__pyx_v_k < __pyx_t_9; __pyx_cur_scope->__pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":82 * valid = 1 * for k from 0 <= k < n+1: * if self.sa.darray.data.arr[j+k] < 2: # <<<<<<<<<<<<<< @@ -22147,7 +22147,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_t_7 = ((__pyx_cur_scope->__pyx_v_self->sa->darray->data->arr[(__pyx_cur_scope->__pyx_v_j + __pyx_cur_scope->__pyx_v_k)]) < 2); if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":83 * for k from 0 <= k < n+1: * if self.sa.darray.data.arr[j+k] < 2: * valid = 0 # <<<<<<<<<<<<<< @@ -22160,7 +22160,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_L22:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":84 * if self.sa.darray.data.arr[j+k] < 2: * valid = 0 * if valid: # <<<<<<<<<<<<<< @@ -22169,7 +22169,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ if (__pyx_cur_scope->__pyx_v_valid) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":85 * valid = 0 * if valid: * ngram = tuple([self.sa.darray.data.arr[j+k] for k in range(n+1)]) # <<<<<<<<<<<<<< @@ -22195,7 +22195,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen __pyx_cur_scope->__pyx_v_ngram = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":86 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":86 * if valid: * ngram = tuple([self.sa.darray.data.arr[j+k] for k in range(n+1)]) * yield i, n+1, ngram # <<<<<<<<<<<<<< @@ -22232,7 +22232,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen } __pyx_L23:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":87 * ngram = tuple([self.sa.darray.data.arr[j+k] for k in range(n+1)]) * yield i, n+1, ngram * iii = iii + 1 # <<<<<<<<<<<<<< @@ -22241,7 +22241,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen */ __pyx_cur_scope->__pyx_v_iii = (__pyx_cur_scope->__pyx_v_iii + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":88 * yield i, n+1, ngram * iii = iii + 1 * rs = ngram_start.arr[iii] # <<<<<<<<<<<<<< @@ -22251,7 +22251,7 @@ static PyObject *__pyx_gb_3_sa_3LCP_4generator1(__pyx_GeneratorObject *__pyx_gen } } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":89 * iii = iii + 1 * rs = ngram_start.arr[iii] * i = ii # <<<<<<<<<<<<<< @@ -22287,7 +22287,7 @@ static int __pyx_pw_3_sa_8Alphabet_1__cinit__(PyObject *__pyx_v_self, PyObject * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":12 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":12 * cdef dict id2sym * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -22304,7 +22304,7 @@ static int __pyx_pf_3_sa_8Alphabet___cinit__(struct __pyx_obj_3_sa_Alphabet *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":13 * * def __cinit__(self): * self.terminals = StringMap() # <<<<<<<<<<<<<< @@ -22319,7 +22319,7 @@ static int __pyx_pf_3_sa_8Alphabet___cinit__(struct __pyx_obj_3_sa_Alphabet *__p __pyx_v_self->terminals = ((struct __pyx_obj_3_sa_StringMap *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":14 * def __cinit__(self): * self.terminals = StringMap() * self.nonterminals = StringMap() # <<<<<<<<<<<<<< @@ -22334,7 +22334,7 @@ static int __pyx_pf_3_sa_8Alphabet___cinit__(struct __pyx_obj_3_sa_Alphabet *__p __pyx_v_self->nonterminals = ((struct __pyx_obj_3_sa_StringMap *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":15 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":15 * self.terminals = StringMap() * self.nonterminals = StringMap() * self.id2sym = {} # <<<<<<<<<<<<<< @@ -22349,7 +22349,7 @@ static int __pyx_pf_3_sa_8Alphabet___cinit__(struct __pyx_obj_3_sa_Alphabet *__p __pyx_v_self->id2sym = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":16 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":16 * self.nonterminals = StringMap() * self.id2sym = {} * self.first_nonterminal = -1 # <<<<<<<<<<<<<< @@ -22378,7 +22378,7 @@ static void __pyx_pw_3_sa_8Alphabet_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":18 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":18 * self.first_nonterminal = -1 * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -22393,7 +22393,7 @@ static void __pyx_pf_3_sa_8Alphabet_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_ __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":21 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":21 * pass * * cdef int isvar(self, int sym): # <<<<<<<<<<<<<< @@ -22406,7 +22406,7 @@ static int __pyx_f_3_sa_8Alphabet_isvar(CYTHON_UNUSED struct __pyx_obj_3_sa_Alph __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("isvar", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":22 * * cdef int isvar(self, int sym): * return sym < 0 # <<<<<<<<<<<<<< @@ -22422,7 +22422,7 @@ static int __pyx_f_3_sa_8Alphabet_isvar(CYTHON_UNUSED struct __pyx_obj_3_sa_Alph return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":24 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":24 * return sym < 0 * * cdef int isword(self, int sym): # <<<<<<<<<<<<<< @@ -22435,7 +22435,7 @@ static int __pyx_f_3_sa_8Alphabet_isword(CYTHON_UNUSED struct __pyx_obj_3_sa_Alp __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("isword", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":25 * * cdef int isword(self, int sym): * return sym >= 0 # <<<<<<<<<<<<<< @@ -22451,7 +22451,7 @@ static int __pyx_f_3_sa_8Alphabet_isword(CYTHON_UNUSED struct __pyx_obj_3_sa_Alp return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":27 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":27 * return sym >= 0 * * cdef int getindex(self, int sym): # <<<<<<<<<<<<<< @@ -22464,7 +22464,7 @@ static int __pyx_f_3_sa_8Alphabet_getindex(CYTHON_UNUSED struct __pyx_obj_3_sa_A __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("getindex", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":28 * * cdef int getindex(self, int sym): * return -sym & INDEX_MASK # <<<<<<<<<<<<<< @@ -22480,7 +22480,7 @@ static int __pyx_f_3_sa_8Alphabet_getindex(CYTHON_UNUSED struct __pyx_obj_3_sa_A return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":30 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":30 * return -sym & INDEX_MASK * * cdef int setindex(self, int sym, int ind): # <<<<<<<<<<<<<< @@ -22493,7 +22493,7 @@ static int __pyx_f_3_sa_8Alphabet_setindex(CYTHON_UNUSED struct __pyx_obj_3_sa_A __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("setindex", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":31 * * cdef int setindex(self, int sym, int ind): * return -(-sym & ~INDEX_MASK | ind) # <<<<<<<<<<<<<< @@ -22509,7 +22509,7 @@ static int __pyx_f_3_sa_8Alphabet_setindex(CYTHON_UNUSED struct __pyx_obj_3_sa_A return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":33 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":33 * return -(-sym & ~INDEX_MASK | ind) * * cdef int clearindex(self, int sym): # <<<<<<<<<<<<<< @@ -22522,7 +22522,7 @@ static int __pyx_f_3_sa_8Alphabet_clearindex(CYTHON_UNUSED struct __pyx_obj_3_sa __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("clearindex", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":34 * * cdef int clearindex(self, int sym): * return -(-sym& ~INDEX_MASK) # <<<<<<<<<<<<<< @@ -22538,7 +22538,7 @@ static int __pyx_f_3_sa_8Alphabet_clearindex(CYTHON_UNUSED struct __pyx_obj_3_sa return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":36 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":36 * return -(-sym& ~INDEX_MASK) * * cdef int match(self, int sym1, int sym2): # <<<<<<<<<<<<<< @@ -22551,7 +22551,7 @@ static int __pyx_f_3_sa_8Alphabet_match(struct __pyx_obj_3_sa_Alphabet *__pyx_v_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("match", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":37 * * cdef int match(self, int sym1, int sym2): * return self.clearindex(sym1) == self.clearindex(sym2); # <<<<<<<<<<<<<< @@ -22567,7 +22567,7 @@ static int __pyx_f_3_sa_8Alphabet_match(struct __pyx_obj_3_sa_Alphabet *__pyx_v_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":39 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":39 * return self.clearindex(sym1) == self.clearindex(sym2); * * cdef char* tocat(self, int sym): # <<<<<<<<<<<<<< @@ -22580,7 +22580,7 @@ static char *__pyx_f_3_sa_8Alphabet_tocat(struct __pyx_obj_3_sa_Alphabet *__pyx_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("tocat", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":40 * * cdef char* tocat(self, int sym): * return self.nonterminals.word((-sym >> INDEX_SHIFT)-1) # <<<<<<<<<<<<<< @@ -22596,7 +22596,7 @@ static char *__pyx_f_3_sa_8Alphabet_tocat(struct __pyx_obj_3_sa_Alphabet *__pyx_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":42 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":42 * return self.nonterminals.word((-sym >> INDEX_SHIFT)-1) * * cdef int fromcat(self, char *s): # <<<<<<<<<<<<<< @@ -22611,7 +22611,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ int __pyx_t_1; __Pyx_RefNannySetupContext("fromcat", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":44 * cdef int fromcat(self, char *s): * cdef int i * i = self.nonterminals.index(s) # <<<<<<<<<<<<<< @@ -22620,7 +22620,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ */ __pyx_v_i = ((struct __pyx_vtabstruct_3_sa_StringMap *)__pyx_v_self->nonterminals->__pyx_vtab)->index(__pyx_v_self->nonterminals, __pyx_v_s); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":45 * cdef int i * i = self.nonterminals.index(s) * if self.first_nonterminal == -1: # <<<<<<<<<<<<<< @@ -22630,7 +22630,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ __pyx_t_1 = (__pyx_v_self->first_nonterminal == -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":46 * i = self.nonterminals.index(s) * if self.first_nonterminal == -1: * self.first_nonterminal = i # <<<<<<<<<<<<<< @@ -22642,7 +22642,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":47 * if self.first_nonterminal == -1: * self.first_nonterminal = i * if i > self.last_nonterminal: # <<<<<<<<<<<<<< @@ -22652,7 +22652,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ __pyx_t_1 = (__pyx_v_i > __pyx_v_self->last_nonterminal); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":48 * self.first_nonterminal = i * if i > self.last_nonterminal: * self.last_nonterminal = i # <<<<<<<<<<<<<< @@ -22664,7 +22664,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":49 * if i > self.last_nonterminal: * self.last_nonterminal = i * return -(i+1 << INDEX_SHIFT) # <<<<<<<<<<<<<< @@ -22680,7 +22680,7 @@ static int __pyx_f_3_sa_8Alphabet_fromcat(struct __pyx_obj_3_sa_Alphabet *__pyx_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":51 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":51 * return -(i+1 << INDEX_SHIFT) * * cdef char* tostring(self, int sym): # <<<<<<<<<<<<<< @@ -22703,7 +22703,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("tostring", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":53 * cdef char* tostring(self, int sym): * cdef int ind * if self.isvar(sym): # <<<<<<<<<<<<<< @@ -22713,7 +22713,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p __pyx_t_1 = ((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_self->__pyx_vtab)->isvar(__pyx_v_self, __pyx_v_sym); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":54 * cdef int ind * if self.isvar(sym): * if sym in self.id2sym: # <<<<<<<<<<<<<< @@ -22730,7 +22730,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":55 * if self.isvar(sym): * if sym in self.id2sym: * return self.id2sym[sym] # <<<<<<<<<<<<<< @@ -22751,7 +22751,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":56 * if sym in self.id2sym: * return self.id2sym[sym] * ind = self.getindex(sym) # <<<<<<<<<<<<<< @@ -22760,7 +22760,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_v_ind = ((struct __pyx_vtabstruct_3_sa_Alphabet *)__pyx_v_self->__pyx_vtab)->getindex(__pyx_v_self, __pyx_v_sym); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":57 * return self.id2sym[sym] * ind = self.getindex(sym) * if ind > 0: # <<<<<<<<<<<<<< @@ -22770,7 +22770,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p __pyx_t_3 = (__pyx_v_ind > 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":58 * ind = self.getindex(sym) * if ind > 0: * self.id2sym[sym] = "[%s,%d]" % (self.tocat(sym), ind) # <<<<<<<<<<<<<< @@ -22802,7 +22802,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":60 * self.id2sym[sym] = "[%s,%d]" % (self.tocat(sym), ind) * else: * self.id2sym[sym] = "[%s]" % self.tocat(sym) # <<<<<<<<<<<<<< @@ -22823,7 +22823,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":61 * else: * self.id2sym[sym] = "[%s]" % self.tocat(sym) * return self.id2sym[sym] # <<<<<<<<<<<<<< @@ -22844,7 +22844,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":63 * return self.id2sym[sym] * else: * return self.terminals.word(sym) # <<<<<<<<<<<<<< @@ -22869,7 +22869,7 @@ static char *__pyx_f_3_sa_8Alphabet_tostring(struct __pyx_obj_3_sa_Alphabet *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":65 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":65 * return self.terminals.word(sym) * * cdef int fromstring(self, char *s, bint terminal): # <<<<<<<<<<<<<< @@ -22897,7 +22897,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("fromstring", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":69 * cdef char *comma * cdef int n * n = strlen(s) # <<<<<<<<<<<<<< @@ -22906,7 +22906,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_v_n = strlen(__pyx_v_s); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":71 * n = strlen(s) * cdef char *sep * sep = strstr(s,"_SEP_") # <<<<<<<<<<<<<< @@ -22915,7 +22915,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_v_sep = strstr(__pyx_v_s, __pyx_k___SEP_); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":72 * cdef char *sep * sep = strstr(s,"_SEP_") * if n >= 3 and s[0] == c'[' and s[n-1] == c']' and sep == NULL: # <<<<<<<<<<<<<< @@ -22943,7 +22943,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p } if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":73 * sep = strstr(s,"_SEP_") * if n >= 3 and s[0] == c'[' and s[n-1] == c']' and sep == NULL: * if terminal: # <<<<<<<<<<<<<< @@ -22952,7 +22952,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ if (__pyx_v_terminal) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":74 * if n >= 3 and s[0] == c'[' and s[n-1] == c']' and sep == NULL: * if terminal: * s1 = "\\"+s # <<<<<<<<<<<<<< @@ -22967,7 +22967,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p __pyx_v_s1 = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":75 * if terminal: * s1 = "\\"+s * return self.terminals.index(s1) # <<<<<<<<<<<<<< @@ -22981,7 +22981,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":76 * s1 = "\\"+s * return self.terminals.index(s1) * s[n-1] = c'\0' # <<<<<<<<<<<<<< @@ -22990,7 +22990,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ (__pyx_v_s[(__pyx_v_n - 1)]) = '\x00'; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":77 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":77 * return self.terminals.index(s1) * s[n-1] = c'\0' * s = s + 1 # <<<<<<<<<<<<<< @@ -22999,7 +22999,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_v_s = (__pyx_v_s + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":78 * s[n-1] = c'\0' * s = s + 1 * comma = strrchr(s, c',') # <<<<<<<<<<<<<< @@ -23008,7 +23008,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ __pyx_v_comma = strrchr(__pyx_v_s, ','); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":79 * s = s + 1 * comma = strrchr(s, c',') * if comma != NULL: # <<<<<<<<<<<<<< @@ -23018,7 +23018,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p __pyx_t_2 = (__pyx_v_comma != NULL); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":80 * comma = strrchr(s, c',') * if comma != NULL: * comma[0] = c'\0' # <<<<<<<<<<<<<< @@ -23027,7 +23027,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p */ (__pyx_v_comma[0]) = '\x00'; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":81 * if comma != NULL: * comma[0] = c'\0' * return self.setindex(self.fromcat(s), strtol(comma+1, NULL, 10)) # <<<<<<<<<<<<<< @@ -23040,7 +23040,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":83 * return self.setindex(self.fromcat(s), strtol(comma+1, NULL, 10)) * else: * return self.fromcat(s) # <<<<<<<<<<<<<< @@ -23055,7 +23055,7 @@ static int __pyx_f_3_sa_8Alphabet_fromstring(struct __pyx_obj_3_sa_Alphabet *__p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":85 * return self.fromcat(s) * else: * return self.terminals.index(s) # <<<<<<<<<<<<<< @@ -23091,7 +23091,7 @@ static PyObject *__pyx_pw_3_sa_8Alphabet_9terminals_1__get__(PyObject *__pyx_v_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":8 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":8 * * cdef class Alphabet: * cdef readonly StringMap terminals, nonterminals # <<<<<<<<<<<<<< @@ -23142,7 +23142,7 @@ static PyObject *__pyx_pf_3_sa_8Alphabet_12nonterminals___get__(struct __pyx_obj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":89 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":89 * cdef Alphabet ALPHABET = Alphabet() * * cdef char* sym_tostring(int sym): # <<<<<<<<<<<<<< @@ -23155,7 +23155,7 @@ static char *__pyx_f_3_sa_sym_tostring(int __pyx_v_sym) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sym_tostring", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":90 * * cdef char* sym_tostring(int sym): * return ALPHABET.tostring(sym) # <<<<<<<<<<<<<< @@ -23171,7 +23171,7 @@ static char *__pyx_f_3_sa_sym_tostring(int __pyx_v_sym) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":92 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":92 * return ALPHABET.tostring(sym) * * cdef char* sym_tocat(int sym): # <<<<<<<<<<<<<< @@ -23184,7 +23184,7 @@ static char *__pyx_f_3_sa_sym_tocat(int __pyx_v_sym) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sym_tocat", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":93 * * cdef char* sym_tocat(int sym): * return ALPHABET.tocat(sym) # <<<<<<<<<<<<<< @@ -23200,7 +23200,7 @@ static char *__pyx_f_3_sa_sym_tocat(int __pyx_v_sym) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":95 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":95 * return ALPHABET.tocat(sym) * * cdef int sym_isvar(int sym): # <<<<<<<<<<<<<< @@ -23213,7 +23213,7 @@ static int __pyx_f_3_sa_sym_isvar(int __pyx_v_sym) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sym_isvar", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":96 * * cdef int sym_isvar(int sym): * return ALPHABET.isvar(sym) # <<<<<<<<<<<<<< @@ -23229,7 +23229,7 @@ static int __pyx_f_3_sa_sym_isvar(int __pyx_v_sym) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":98 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":98 * return ALPHABET.isvar(sym) * * cdef int sym_getindex(int sym): # <<<<<<<<<<<<<< @@ -23242,7 +23242,7 @@ static int __pyx_f_3_sa_sym_getindex(int __pyx_v_sym) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sym_getindex", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":99 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":99 * * cdef int sym_getindex(int sym): * return ALPHABET.getindex(sym) # <<<<<<<<<<<<<< @@ -23258,7 +23258,7 @@ static int __pyx_f_3_sa_sym_getindex(int __pyx_v_sym) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":101 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":101 * return ALPHABET.getindex(sym) * * cdef int sym_setindex(int sym, int id): # <<<<<<<<<<<<<< @@ -23271,7 +23271,7 @@ static int __pyx_f_3_sa_sym_setindex(int __pyx_v_sym, int __pyx_v_id) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sym_setindex", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":102 * * cdef int sym_setindex(int sym, int id): * return ALPHABET.setindex(sym, id) # <<<<<<<<<<<<<< @@ -23287,7 +23287,7 @@ static int __pyx_f_3_sa_sym_setindex(int __pyx_v_sym, int __pyx_v_id) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":104 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":104 * return ALPHABET.setindex(sym, id) * * cdef int sym_fromstring(char* string, bint terminal): # <<<<<<<<<<<<<< @@ -23300,7 +23300,7 @@ static int __pyx_f_3_sa_sym_fromstring(char *__pyx_v_string, int __pyx_v_termina __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("sym_fromstring", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":105 * * cdef int sym_fromstring(char* string, bint terminal): * return ALPHABET.fromstring(string, terminal) # <<<<<<<<<<<<<< @@ -23329,7 +23329,7 @@ static PyObject *__pyx_pw_3_sa_3make_lattice(PyObject *__pyx_self, PyObject *__p } static PyObject *__pyx_gb_3_sa_12make_lattice_2generator7(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":108 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":108 * * def make_lattice(words): * word_ids = (sym_fromstring(word, True) for word in words) # <<<<<<<<<<<<<< @@ -23471,7 +23471,7 @@ static PyObject *__pyx_gb_3_sa_12make_lattice_2generator7(__pyx_GeneratorObject } static PyObject *__pyx_gb_3_sa_12make_lattice_5generator8(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":109 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":109 * def make_lattice(words): * word_ids = (sym_fromstring(word, True) for word in words) * return tuple(((word, None, 1), ) for word in word_ids) # <<<<<<<<<<<<<< @@ -23626,7 +23626,7 @@ static PyObject *__pyx_gb_3_sa_12make_lattice_5generator8(__pyx_GeneratorObject return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":107 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":107 * return ALPHABET.fromstring(string, terminal) * * def make_lattice(words): # <<<<<<<<<<<<<< @@ -23654,7 +23654,7 @@ static PyObject *__pyx_pf_3_sa_2make_lattice(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_INCREF(__pyx_cur_scope->__pyx_v_words); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_words); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":108 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":108 * * def make_lattice(words): * word_ids = (sym_fromstring(word, True) for word in words) # <<<<<<<<<<<<<< @@ -23667,7 +23667,7 @@ static PyObject *__pyx_pf_3_sa_2make_lattice(CYTHON_UNUSED PyObject *__pyx_self, __pyx_cur_scope->__pyx_v_word_ids = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":109 * def make_lattice(words): * word_ids = (sym_fromstring(word, True) for word in words) * return tuple(((word, None, 1), ) for word in word_ids) # <<<<<<<<<<<<<< @@ -23716,7 +23716,7 @@ static PyObject *__pyx_pw_3_sa_5decode_lattice(PyObject *__pyx_self, PyObject *_ } static PyObject *__pyx_gb_3_sa_14decode_lattice_2generator9(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":112 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":112 * * def decode_lattice(lattice): * return tuple((sym_tostring(sym), weight, dist) for (sym, weight, dist) in arc # <<<<<<<<<<<<<< @@ -23790,7 +23790,7 @@ static PyObject *__pyx_gb_3_sa_14decode_lattice_2generator9(__pyx_GeneratorObjec __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 112; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":113 * def decode_lattice(lattice): * return tuple((sym_tostring(sym), weight, dist) for (sym, weight, dist) in arc * for arc in node for node in lattice) # <<<<<<<<<<<<<< @@ -23808,7 +23808,7 @@ static PyObject *__pyx_gb_3_sa_14decode_lattice_2generator9(__pyx_GeneratorObjec } for (;;) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":112 * * def decode_lattice(lattice): * return tuple((sym_tostring(sym), weight, dist) for (sym, weight, dist) in arc # <<<<<<<<<<<<<< @@ -23911,7 +23911,7 @@ static PyObject *__pyx_gb_3_sa_14decode_lattice_2generator9(__pyx_GeneratorObjec __pyx_cur_scope->__pyx_v_dist = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":113 * def decode_lattice(lattice): * return tuple((sym_tostring(sym), weight, dist) for (sym, weight, dist) in arc * for arc in node for node in lattice) # <<<<<<<<<<<<<< @@ -23999,7 +23999,7 @@ static PyObject *__pyx_gb_3_sa_14decode_lattice_2generator9(__pyx_GeneratorObjec __pyx_cur_scope->__pyx_v_node = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":112 * * def decode_lattice(lattice): * return tuple((sym_tostring(sym), weight, dist) for (sym, weight, dist) in arc # <<<<<<<<<<<<<< @@ -24080,7 +24080,7 @@ static PyObject *__pyx_gb_3_sa_14decode_lattice_2generator9(__pyx_GeneratorObjec return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":111 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":111 * return tuple(((word, None, 1), ) for word in word_ids) * * def decode_lattice(lattice): # <<<<<<<<<<<<<< @@ -24108,7 +24108,7 @@ static PyObject *__pyx_pf_3_sa_4decode_lattice(CYTHON_UNUSED PyObject *__pyx_sel __Pyx_INCREF(__pyx_cur_scope->__pyx_v_lattice); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_lattice); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":112 * * def decode_lattice(lattice): * return tuple((sym_tostring(sym), weight, dist) for (sym, weight, dist) in arc # <<<<<<<<<<<<<< @@ -24157,7 +24157,7 @@ static PyObject *__pyx_pw_3_sa_7decode_sentence(PyObject *__pyx_self, PyObject * } static PyObject *__pyx_gb_3_sa_15decode_sentence_2generator10(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":116 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":116 * * def decode_sentence(lattice): * return tuple(sym_tostring(sym) for ((sym, _, _),) in lattice) # <<<<<<<<<<<<<< @@ -24415,7 +24415,7 @@ static PyObject *__pyx_gb_3_sa_15decode_sentence_2generator10(__pyx_GeneratorObj return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":115 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":115 * for arc in node for node in lattice) * * def decode_sentence(lattice): # <<<<<<<<<<<<<< @@ -24442,7 +24442,7 @@ static PyObject *__pyx_pf_3_sa_6decode_sentence(CYTHON_UNUSED PyObject *__pyx_se __Pyx_INCREF(__pyx_cur_scope->__pyx_v_lattice); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_lattice); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":116 * * def decode_sentence(lattice): * return tuple(sym_tostring(sym) for ((sym, _, _),) in lattice) # <<<<<<<<<<<<<< @@ -24523,7 +24523,7 @@ static int __pyx_pw_3_sa_6Phrase_1__cinit__(PyObject *__pyx_v_self, PyObject *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":6 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":6 * cdef class Phrase: * * def __cinit__(self, words): # <<<<<<<<<<<<<< @@ -24547,7 +24547,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":8 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":8 * def __cinit__(self, words): * cdef int i, j, n, n_vars * n_vars = 0 # <<<<<<<<<<<<<< @@ -24556,7 +24556,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ __pyx_v_n_vars = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":9 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":9 * cdef int i, j, n, n_vars * n_vars = 0 * n = len(words) # <<<<<<<<<<<<<< @@ -24566,7 +24566,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_1 = PyObject_Length(__pyx_v_words); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 9; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_n = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":10 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":10 * n_vars = 0 * n = len(words) * self.syms = malloc(n*sizeof(int)) # <<<<<<<<<<<<<< @@ -24575,7 +24575,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ __pyx_v_self->syms = ((int *)malloc((__pyx_v_n * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":11 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":11 * n = len(words) * self.syms = malloc(n*sizeof(int)) * for i from 0 <= i < n: # <<<<<<<<<<<<<< @@ -24585,7 +24585,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_2 = __pyx_v_n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_2; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":12 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":12 * self.syms = malloc(n*sizeof(int)) * for i from 0 <= i < n: * self.syms[i] = words[i] # <<<<<<<<<<<<<< @@ -24598,7 +24598,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; (__pyx_v_self->syms[__pyx_v_i]) = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":13 * for i from 0 <= i < n: * self.syms[i] = words[i] * if sym_isvar(self.syms[i]): # <<<<<<<<<<<<<< @@ -24608,7 +24608,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_4 = __pyx_f_3_sa_sym_isvar((__pyx_v_self->syms[__pyx_v_i])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":14 * self.syms[i] = words[i] * if sym_isvar(self.syms[i]): * n_vars += 1 # <<<<<<<<<<<<<< @@ -24621,7 +24621,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":15 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":15 * if sym_isvar(self.syms[i]): * n_vars += 1 * self.n = n # <<<<<<<<<<<<<< @@ -24630,7 +24630,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ __pyx_v_self->n = __pyx_v_n; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":16 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":16 * n_vars += 1 * self.n = n * self.n_vars = n_vars # <<<<<<<<<<<<<< @@ -24639,7 +24639,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ __pyx_v_self->n_vars = __pyx_v_n_vars; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":17 * self.n = n * self.n_vars = n_vars * self.varpos = malloc(n_vars*sizeof(int)) # <<<<<<<<<<<<<< @@ -24648,7 +24648,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ __pyx_v_self->varpos = ((int *)malloc((__pyx_v_n_vars * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":18 * self.n_vars = n_vars * self.varpos = malloc(n_vars*sizeof(int)) * j = 0 # <<<<<<<<<<<<<< @@ -24657,7 +24657,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ __pyx_v_j = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":19 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":19 * self.varpos = malloc(n_vars*sizeof(int)) * j = 0 * for i from 0 <= i < n: # <<<<<<<<<<<<<< @@ -24667,7 +24667,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_2 = __pyx_v_n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_2; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":20 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":20 * j = 0 * for i from 0 <= i < n: * if sym_isvar(self.syms[i]): # <<<<<<<<<<<<<< @@ -24677,7 +24677,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_4 = __pyx_f_3_sa_sym_isvar((__pyx_v_self->syms[__pyx_v_i])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":21 * for i from 0 <= i < n: * if sym_isvar(self.syms[i]): * self.varpos[j] = i # <<<<<<<<<<<<<< @@ -24686,7 +24686,7 @@ static int __pyx_pf_3_sa_6Phrase___cinit__(struct __pyx_obj_3_sa_Phrase *__pyx_v */ (__pyx_v_self->varpos[__pyx_v_j]) = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":22 * if sym_isvar(self.syms[i]): * self.varpos[j] = i * j = j + 1 # <<<<<<<<<<<<<< @@ -24719,7 +24719,7 @@ static void __pyx_pw_3_sa_6Phrase_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":24 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":24 * j = j + 1 * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -24731,7 +24731,7 @@ static void __pyx_pf_3_sa_6Phrase_2__dealloc__(struct __pyx_obj_3_sa_Phrase *__p __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":25 * * def __dealloc__(self): * free(self.syms) # <<<<<<<<<<<<<< @@ -24740,7 +24740,7 @@ static void __pyx_pf_3_sa_6Phrase_2__dealloc__(struct __pyx_obj_3_sa_Phrase *__p */ free(__pyx_v_self->syms); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":26 * def __dealloc__(self): * free(self.syms) * free(self.varpos) # <<<<<<<<<<<<<< @@ -24763,7 +24763,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_5__str__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":28 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":28 * free(self.varpos) * * def __str__(self): # <<<<<<<<<<<<<< @@ -24787,7 +24787,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":29 * * def __str__(self): * strs = [] # <<<<<<<<<<<<<< @@ -24799,7 +24799,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__ __pyx_v_strs = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":31 * strs = [] * cdef int i, s * for i from 0 <= i < self.n: # <<<<<<<<<<<<<< @@ -24809,7 +24809,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__ __pyx_t_2 = __pyx_v_self->n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_2; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":32 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":32 * cdef int i, s * for i from 0 <= i < self.n: * s = self.syms[i] # <<<<<<<<<<<<<< @@ -24818,7 +24818,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__ */ __pyx_v_s = (__pyx_v_self->syms[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":33 * for i from 0 <= i < self.n: * s = self.syms[i] * strs.append(sym_tostring(s)) # <<<<<<<<<<<<<< @@ -24831,7 +24831,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_4__str__(struct __pyx_obj_3_sa_Phrase *__ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":34 * s = self.syms[i] * strs.append(sym_tostring(s)) * return ' '.join(strs) # <<<<<<<<<<<<<< @@ -24881,7 +24881,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_7handle(PyObject *__pyx_v_self, CYTHON_UN return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":36 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":36 * return ' '.join(strs) * * def handle(self): # <<<<<<<<<<<<<< @@ -24905,7 +24905,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("handle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":39 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":39 * """return a hashable representation that normalizes the ordering * of the nonterminal indices""" * norm = [] # <<<<<<<<<<<<<< @@ -24917,7 +24917,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p __pyx_v_norm = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":41 * norm = [] * cdef int i, j, s * i = 1 # <<<<<<<<<<<<<< @@ -24926,7 +24926,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":42 * cdef int i, j, s * i = 1 * j = 0 # <<<<<<<<<<<<<< @@ -24935,7 +24935,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p */ __pyx_v_j = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":43 * i = 1 * j = 0 * for j from 0 <= j < self.n: # <<<<<<<<<<<<<< @@ -24945,7 +24945,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p __pyx_t_2 = __pyx_v_self->n; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_2; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":44 * j = 0 * for j from 0 <= j < self.n: * s = self.syms[j] # <<<<<<<<<<<<<< @@ -24954,7 +24954,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p */ __pyx_v_s = (__pyx_v_self->syms[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":45 * for j from 0 <= j < self.n: * s = self.syms[j] * if sym_isvar(s): # <<<<<<<<<<<<<< @@ -24964,7 +24964,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p __pyx_t_3 = __pyx_f_3_sa_sym_isvar(__pyx_v_s); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":46 * s = self.syms[j] * if sym_isvar(s): * s = sym_setindex(s,i) # <<<<<<<<<<<<<< @@ -24973,7 +24973,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p */ __pyx_v_s = __pyx_f_3_sa_sym_setindex(__pyx_v_s, __pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":47 * if sym_isvar(s): * s = sym_setindex(s,i) * i = i + 1 # <<<<<<<<<<<<<< @@ -24985,7 +24985,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":48 * s = sym_setindex(s,i) * i = i + 1 * norm.append(s) # <<<<<<<<<<<<<< @@ -24998,7 +24998,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_6handle(struct __pyx_obj_3_sa_Phrase *__p __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":49 * i = i + 1 * norm.append(s) * return tuple(norm) # <<<<<<<<<<<<<< @@ -25036,7 +25036,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_9strhandle(PyObject *__pyx_v_self, CYTHON return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":51 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":51 * return tuple(norm) * * def strhandle(self): # <<<<<<<<<<<<<< @@ -25063,7 +25063,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("strhandle", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":52 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":52 * * def strhandle(self): * strs = [] # <<<<<<<<<<<<<< @@ -25075,7 +25075,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * __pyx_v_strs = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":53 * def strhandle(self): * strs = [] * norm = [] # <<<<<<<<<<<<<< @@ -25087,7 +25087,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * __pyx_v_norm = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":55 * norm = [] * cdef int i, j, s * i = 1 # <<<<<<<<<<<<<< @@ -25096,7 +25096,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":56 * cdef int i, j, s * i = 1 * j = 0 # <<<<<<<<<<<<<< @@ -25105,7 +25105,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * */ __pyx_v_j = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":57 * i = 1 * j = 0 * for j from 0 <= j < self.n: # <<<<<<<<<<<<<< @@ -25115,7 +25115,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * __pyx_t_2 = __pyx_v_self->n; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_2; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":58 * j = 0 * for j from 0 <= j < self.n: * s = self.syms[j] # <<<<<<<<<<<<<< @@ -25124,7 +25124,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * */ __pyx_v_s = (__pyx_v_self->syms[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":59 * for j from 0 <= j < self.n: * s = self.syms[j] * if sym_isvar(s): # <<<<<<<<<<<<<< @@ -25134,7 +25134,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * __pyx_t_3 = __pyx_f_3_sa_sym_isvar(__pyx_v_s); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":60 * s = self.syms[j] * if sym_isvar(s): * s = sym_setindex(s,i) # <<<<<<<<<<<<<< @@ -25143,7 +25143,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * */ __pyx_v_s = __pyx_f_3_sa_sym_setindex(__pyx_v_s, __pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":61 * if sym_isvar(s): * s = sym_setindex(s,i) * i = i + 1 # <<<<<<<<<<<<<< @@ -25155,7 +25155,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":62 * s = sym_setindex(s,i) * i = i + 1 * norm.append(sym_tostring(s)) # <<<<<<<<<<<<<< @@ -25168,7 +25168,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_8strhandle(struct __pyx_obj_3_sa_Phrase * __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":63 * i = i + 1 * norm.append(sym_tostring(s)) * return ' '.join(norm) # <<<<<<<<<<<<<< @@ -25218,7 +25218,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_11arity(PyObject *__pyx_v_self, CYTHON_UN return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":65 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":65 * return ' '.join(norm) * * def arity(self): # <<<<<<<<<<<<<< @@ -25235,7 +25235,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_10arity(struct __pyx_obj_3_sa_Phrase *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("arity", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":66 * * def arity(self): * return self.n_vars # <<<<<<<<<<<<<< @@ -25272,7 +25272,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_13getvarpos(PyObject *__pyx_v_self, PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":68 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":68 * return self.n_vars * * def getvarpos(self, i): # <<<<<<<<<<<<<< @@ -25292,7 +25292,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_12getvarpos(struct __pyx_obj_3_sa_Phrase int __pyx_clineno = 0; __Pyx_RefNannySetupContext("getvarpos", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":69 * * def getvarpos(self, i): * if 0 <= i < self.n_vars: # <<<<<<<<<<<<<< @@ -25311,7 +25311,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_12getvarpos(struct __pyx_obj_3_sa_Phrase __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":70 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":70 * def getvarpos(self, i): * if 0 <= i < self.n_vars: * return self.varpos[i] # <<<<<<<<<<<<<< @@ -25329,7 +25329,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_12getvarpos(struct __pyx_obj_3_sa_Phrase } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":72 * return self.varpos[i] * else: * raise IndexError # <<<<<<<<<<<<<< @@ -25365,7 +25365,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_15getvar(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":74 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":74 * raise IndexError * * def getvar(self, i): # <<<<<<<<<<<<<< @@ -25385,7 +25385,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_14getvar(struct __pyx_obj_3_sa_Phrase *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("getvar", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":75 * * def getvar(self, i): * if 0 <= i < self.n_vars: # <<<<<<<<<<<<<< @@ -25404,7 +25404,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_14getvar(struct __pyx_obj_3_sa_Phrase *__ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":76 * def getvar(self, i): * if 0 <= i < self.n_vars: * return self.syms[self.varpos[i]] # <<<<<<<<<<<<<< @@ -25422,7 +25422,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_14getvar(struct __pyx_obj_3_sa_Phrase *__ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":78 * return self.syms[self.varpos[i]] * else: * raise IndexError # <<<<<<<<<<<<<< @@ -25447,7 +25447,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_14getvar(struct __pyx_obj_3_sa_Phrase *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":80 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":80 * raise IndexError * * cdef int chunkpos(self, int k): # <<<<<<<<<<<<<< @@ -25461,7 +25461,7 @@ int __pyx_f_3_sa_6Phrase_chunkpos(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in int __pyx_t_1; __Pyx_RefNannySetupContext("chunkpos", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":81 * * cdef int chunkpos(self, int k): * if k == 0: # <<<<<<<<<<<<<< @@ -25471,7 +25471,7 @@ int __pyx_f_3_sa_6Phrase_chunkpos(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in __pyx_t_1 = (__pyx_v_k == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":82 * cdef int chunkpos(self, int k): * if k == 0: * return 0 # <<<<<<<<<<<<<< @@ -25484,7 +25484,7 @@ int __pyx_f_3_sa_6Phrase_chunkpos(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":84 * return 0 * else: * return self.varpos[k-1]+1 # <<<<<<<<<<<<<< @@ -25502,7 +25502,7 @@ int __pyx_f_3_sa_6Phrase_chunkpos(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":86 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":86 * return self.varpos[k-1]+1 * * cdef int chunklen(self, int k): # <<<<<<<<<<<<<< @@ -25516,7 +25516,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in int __pyx_t_1; __Pyx_RefNannySetupContext("chunklen", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":87 * * cdef int chunklen(self, int k): * if self.n_vars == 0: # <<<<<<<<<<<<<< @@ -25526,7 +25526,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in __pyx_t_1 = (__pyx_v_self->n_vars == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":88 * cdef int chunklen(self, int k): * if self.n_vars == 0: * return self.n # <<<<<<<<<<<<<< @@ -25538,7 +25538,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":89 * if self.n_vars == 0: * return self.n * elif k == 0: # <<<<<<<<<<<<<< @@ -25548,7 +25548,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in __pyx_t_1 = (__pyx_v_k == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":90 * return self.n * elif k == 0: * return self.varpos[0] # <<<<<<<<<<<<<< @@ -25560,7 +25560,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":91 * elif k == 0: * return self.varpos[0] * elif k == self.n_vars: # <<<<<<<<<<<<<< @@ -25570,7 +25570,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in __pyx_t_1 = (__pyx_v_k == __pyx_v_self->n_vars); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":92 * return self.varpos[0] * elif k == self.n_vars: * return self.n-self.varpos[k-1]-1 # <<<<<<<<<<<<<< @@ -25583,7 +25583,7 @@ int __pyx_f_3_sa_6Phrase_chunklen(struct __pyx_obj_3_sa_Phrase *__pyx_v_self, in } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":94 * return self.n-self.varpos[k-1]-1 * else: * return self.varpos[k]-self.varpos[k-1]-1 # <<<<<<<<<<<<<< @@ -25612,7 +25612,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_17clen(PyObject *__pyx_v_self, PyObject * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":96 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":96 * return self.varpos[k]-self.varpos[k-1]-1 * * def clen(self, k): # <<<<<<<<<<<<<< @@ -25630,7 +25630,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_16clen(struct __pyx_obj_3_sa_Phrase *__py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("clen", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":97 * * def clen(self, k): * return self.chunklen(k) # <<<<<<<<<<<<<< @@ -25668,7 +25668,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_19getchunk(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":99 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":99 * return self.chunklen(k) * * def getchunk(self, ci): # <<<<<<<<<<<<<< @@ -25691,7 +25691,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_18getchunk(struct __pyx_obj_3_sa_Phrase * int __pyx_clineno = 0; __Pyx_RefNannySetupContext("getchunk", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":101 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":101 * def getchunk(self, ci): * cdef int start, stop * start = self.chunkpos(ci) # <<<<<<<<<<<<<< @@ -25701,7 +25701,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_18getchunk(struct __pyx_obj_3_sa_Phrase * __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_ci); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_start = ((struct __pyx_vtabstruct_3_sa_Phrase *)__pyx_v_self->__pyx_vtab)->chunkpos(__pyx_v_self, __pyx_t_1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":102 * cdef int start, stop * start = self.chunkpos(ci) * stop = start+self.chunklen(ci) # <<<<<<<<<<<<<< @@ -25711,7 +25711,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_18getchunk(struct __pyx_obj_3_sa_Phrase * __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_ci); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_stop = (__pyx_v_start + ((struct __pyx_vtabstruct_3_sa_Phrase *)__pyx_v_self->__pyx_vtab)->chunklen(__pyx_v_self, __pyx_t_1)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":103 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":103 * start = self.chunkpos(ci) * stop = start+self.chunklen(ci) * chunk = [] # <<<<<<<<<<<<<< @@ -25723,7 +25723,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_18getchunk(struct __pyx_obj_3_sa_Phrase * __pyx_v_chunk = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":104 * stop = start+self.chunklen(ci) * chunk = [] * for i from start <= i < stop: # <<<<<<<<<<<<<< @@ -25733,7 +25733,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_18getchunk(struct __pyx_obj_3_sa_Phrase * __pyx_t_1 = __pyx_v_stop; for (__pyx_v_i = __pyx_v_start; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":105 * chunk = [] * for i from start <= i < stop: * chunk.append(self.syms[i]) # <<<<<<<<<<<<<< @@ -25746,7 +25746,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_18getchunk(struct __pyx_obj_3_sa_Phrase * __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":106 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":106 * for i from start <= i < stop: * chunk.append(self.syms[i]) * return chunk # <<<<<<<<<<<<<< @@ -25784,7 +25784,7 @@ static int __pyx_pw_3_sa_6Phrase_21__cmp__(PyObject *__pyx_v_self, PyObject *__p } #endif /*!(#if PY_MAJOR_VERSION < 3)*/ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":108 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":108 * return chunk * * def __cmp__(self, other): # <<<<<<<<<<<<<< @@ -25807,7 +25807,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cmp__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":111 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":111 * cdef Phrase otherp * cdef int i * otherp = other # <<<<<<<<<<<<<< @@ -25818,7 +25818,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __Pyx_INCREF(__pyx_v_other); __pyx_v_otherp = ((struct __pyx_obj_3_sa_Phrase *)__pyx_v_other); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":112 * cdef int i * otherp = other * for i from 0 <= i < min(self.n, otherp.n): # <<<<<<<<<<<<<< @@ -25835,7 +25835,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_1 = __pyx_t_3; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":113 * otherp = other * for i from 0 <= i < min(self.n, otherp.n): * if self.syms[i] < otherp.syms[i]: # <<<<<<<<<<<<<< @@ -25845,7 +25845,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_4 = ((__pyx_v_self->syms[__pyx_v_i]) < (__pyx_v_otherp->syms[__pyx_v_i])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":114 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":114 * for i from 0 <= i < min(self.n, otherp.n): * if self.syms[i] < otherp.syms[i]: * return -1 # <<<<<<<<<<<<<< @@ -25857,7 +25857,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v goto __pyx_L5; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":115 * if self.syms[i] < otherp.syms[i]: * return -1 * elif self.syms[i] > otherp.syms[i]: # <<<<<<<<<<<<<< @@ -25867,7 +25867,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_4 = ((__pyx_v_self->syms[__pyx_v_i]) > (__pyx_v_otherp->syms[__pyx_v_i])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":116 * return -1 * elif self.syms[i] > otherp.syms[i]: * return 1 # <<<<<<<<<<<<<< @@ -25881,7 +25881,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":117 * elif self.syms[i] > otherp.syms[i]: * return 1 * if self.n < otherp.n: # <<<<<<<<<<<<<< @@ -25891,7 +25891,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_4 = (__pyx_v_self->n < __pyx_v_otherp->n); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":118 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":118 * return 1 * if self.n < otherp.n: * return -1 # <<<<<<<<<<<<<< @@ -25903,7 +25903,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v goto __pyx_L6; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":119 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":119 * if self.n < otherp.n: * return -1 * elif self.n > otherp.n: # <<<<<<<<<<<<<< @@ -25913,7 +25913,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v __pyx_t_4 = (__pyx_v_self->n > __pyx_v_otherp->n); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":120 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":120 * return -1 * elif self.n > otherp.n: * return 1 # <<<<<<<<<<<<<< @@ -25926,7 +25926,7 @@ static int __pyx_pf_3_sa_6Phrase_20__cmp__(struct __pyx_obj_3_sa_Phrase *__pyx_v } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":122 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":122 * return 1 * else: * return 0 # <<<<<<<<<<<<<< @@ -25961,7 +25961,7 @@ static Py_hash_t __pyx_pw_3_sa_6Phrase_23__hash__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":124 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":124 * return 0 * * def __hash__(self): # <<<<<<<<<<<<<< @@ -25978,7 +25978,7 @@ static Py_hash_t __pyx_pf_3_sa_6Phrase_22__hash__(struct __pyx_obj_3_sa_Phrase * int __pyx_t_2; __Pyx_RefNannySetupContext("__hash__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":127 * cdef int i * cdef unsigned h * h = 0 # <<<<<<<<<<<<<< @@ -25987,7 +25987,7 @@ static Py_hash_t __pyx_pf_3_sa_6Phrase_22__hash__(struct __pyx_obj_3_sa_Phrase * */ __pyx_v_h = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":128 * cdef unsigned h * h = 0 * for i from 0 <= i < self.n: # <<<<<<<<<<<<<< @@ -25997,7 +25997,7 @@ static Py_hash_t __pyx_pf_3_sa_6Phrase_22__hash__(struct __pyx_obj_3_sa_Phrase * __pyx_t_1 = __pyx_v_self->n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":129 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":129 * h = 0 * for i from 0 <= i < self.n: * if self.syms[i] > 0: # <<<<<<<<<<<<<< @@ -26007,7 +26007,7 @@ static Py_hash_t __pyx_pf_3_sa_6Phrase_22__hash__(struct __pyx_obj_3_sa_Phrase * __pyx_t_2 = ((__pyx_v_self->syms[__pyx_v_i]) > 0); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":130 * for i from 0 <= i < self.n: * if self.syms[i] > 0: * h = (h << 1) + self.syms[i] # <<<<<<<<<<<<<< @@ -26019,7 +26019,7 @@ static Py_hash_t __pyx_pf_3_sa_6Phrase_22__hash__(struct __pyx_obj_3_sa_Phrase * } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":132 * h = (h << 1) + self.syms[i] * else: * h = (h << 1) + -self.syms[i] # <<<<<<<<<<<<<< @@ -26031,7 +26031,7 @@ static Py_hash_t __pyx_pf_3_sa_6Phrase_22__hash__(struct __pyx_obj_3_sa_Phrase * __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":133 * else: * h = (h << 1) + -self.syms[i] * return h # <<<<<<<<<<<<<< @@ -26059,7 +26059,7 @@ static Py_ssize_t __pyx_pw_3_sa_6Phrase_25__len__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":135 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":135 * return h * * def __len__(self): # <<<<<<<<<<<<<< @@ -26072,7 +26072,7 @@ static Py_ssize_t __pyx_pf_3_sa_6Phrase_24__len__(struct __pyx_obj_3_sa_Phrase * __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":136 * * def __len__(self): * return self.n # <<<<<<<<<<<<<< @@ -26099,7 +26099,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_27__getitem__(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":138 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":138 * return self.n * * def __getitem__(self, i): # <<<<<<<<<<<<<< @@ -26117,7 +26117,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_26__getitem__(struct __pyx_obj_3_sa_Phras int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":139 * * def __getitem__(self, i): * return self.syms[i] # <<<<<<<<<<<<<< @@ -26156,7 +26156,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_29__iter__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":141 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":141 * return self.syms[i] * * def __iter__(self): # <<<<<<<<<<<<<< @@ -26218,7 +26218,7 @@ static PyObject *__pyx_gb_3_sa_6Phrase_30generator2(__pyx_GeneratorObject *__pyx __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":143 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":143 * def __iter__(self): * cdef int i * for i from 0 <= i < self.n: # <<<<<<<<<<<<<< @@ -26228,7 +26228,7 @@ static PyObject *__pyx_gb_3_sa_6Phrase_30generator2(__pyx_GeneratorObject *__pyx __pyx_t_1 = __pyx_cur_scope->__pyx_v_self->n; for (__pyx_cur_scope->__pyx_v_i = 0; __pyx_cur_scope->__pyx_v_i < __pyx_t_1; __pyx_cur_scope->__pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":144 * cdef int i * for i from 0 <= i < self.n: * yield self.syms[i] # <<<<<<<<<<<<<< @@ -26318,7 +26318,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_32subst(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":146 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":146 * yield self.syms[i] * * def subst(self, start, children): # <<<<<<<<<<<<<< @@ -26341,7 +26341,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_31subst(struct __pyx_obj_3_sa_Phrase *__p __Pyx_RefNannySetupContext("subst", 0); __Pyx_INCREF(__pyx_v_start); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":148 * def subst(self, start, children): * cdef int i * for i from 0 <= i < self.n: # <<<<<<<<<<<<<< @@ -26351,7 +26351,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_31subst(struct __pyx_obj_3_sa_Phrase *__p __pyx_t_1 = __pyx_v_self->n; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":149 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":149 * cdef int i * for i from 0 <= i < self.n: * if sym_isvar(self.syms[i]): # <<<<<<<<<<<<<< @@ -26361,7 +26361,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_31subst(struct __pyx_obj_3_sa_Phrase *__p __pyx_t_2 = __pyx_f_3_sa_sym_isvar((__pyx_v_self->syms[__pyx_v_i])); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":150 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":150 * for i from 0 <= i < self.n: * if sym_isvar(self.syms[i]): * start = start + children[sym_getindex(self.syms[i])-1] # <<<<<<<<<<<<<< @@ -26381,7 +26381,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_31subst(struct __pyx_obj_3_sa_Phrase *__p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":152 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":152 * start = start + children[sym_getindex(self.syms[i])-1] * else: * start = start + (self.syms[i],) # <<<<<<<<<<<<<< @@ -26405,7 +26405,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_31subst(struct __pyx_obj_3_sa_Phrase *__p __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":153 * else: * start = start + (self.syms[i],) * return start # <<<<<<<<<<<<<< @@ -26442,7 +26442,7 @@ static PyObject *__pyx_pw_3_sa_6Phrase_5words_1__get__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":156 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":156 * * property words: * def __get__(self): # <<<<<<<<<<<<<< @@ -26466,7 +26466,7 @@ static PyObject *__pyx_pf_3_sa_6Phrase_5words___get__(struct __pyx_obj_3_sa_Phra int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":157 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":157 * property words: * def __get__(self): * return [sym_tostring(w) for w in self if not sym_isvar(w)] # <<<<<<<<<<<<<< @@ -26561,7 +26561,7 @@ static int __pyx_pw_3_sa_4Rule_1__cinit__(PyObject *__pyx_v_self, PyObject *__py static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__lhs,&__pyx_n_s__f,&__pyx_n_s__e,&__pyx_n_s__scores,&__pyx_n_s__word_alignments,0}; PyObject* values[5] = {0,0,0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":161 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":161 * cdef class Rule: * * def __cinit__(self, int lhs, Phrase f, Phrase e, scores=None, word_alignments=None): # <<<<<<<<<<<<<< @@ -26658,7 +26658,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":162 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":162 * * def __cinit__(self, int lhs, Phrase f, Phrase e, scores=None, word_alignments=None): * if not sym_isvar(lhs): raise Exception('Invalid LHS symbol: %d' % lhs) # <<<<<<<<<<<<<< @@ -26687,7 +26687,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":163 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":163 * def __cinit__(self, int lhs, Phrase f, Phrase e, scores=None, word_alignments=None): * if not sym_isvar(lhs): raise Exception('Invalid LHS symbol: %d' % lhs) * self.lhs = lhs # <<<<<<<<<<<<<< @@ -26696,7 +26696,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel */ __pyx_v_self->lhs = __pyx_v_lhs; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":164 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":164 * if not sym_isvar(lhs): raise Exception('Invalid LHS symbol: %d' % lhs) * self.lhs = lhs * self.f = f # <<<<<<<<<<<<<< @@ -26709,7 +26709,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel __Pyx_DECREF(((PyObject *)__pyx_v_self->f)); __pyx_v_self->f = __pyx_v_f; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":165 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":165 * self.lhs = lhs * self.f = f * self.e = e # <<<<<<<<<<<<<< @@ -26722,7 +26722,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel __Pyx_DECREF(((PyObject *)__pyx_v_self->e)); __pyx_v_self->e = __pyx_v_e; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":166 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":166 * self.f = f * self.e = e * self.word_alignments = word_alignments # <<<<<<<<<<<<<< @@ -26735,7 +26735,7 @@ static int __pyx_pf_3_sa_4Rule___cinit__(struct __pyx_obj_3_sa_Rule *__pyx_v_sel __Pyx_DECREF(__pyx_v_self->word_alignments); __pyx_v_self->word_alignments = __pyx_v_word_alignments; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":167 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":167 * self.e = e * self.word_alignments = word_alignments * self.scores = scores # <<<<<<<<<<<<<< @@ -26772,7 +26772,7 @@ static Py_hash_t __pyx_pw_3_sa_4Rule_3__hash__(PyObject *__pyx_v_self) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":169 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":169 * self.scores = scores * * def __hash__(self): # <<<<<<<<<<<<<< @@ -26791,7 +26791,7 @@ static Py_hash_t __pyx_pf_3_sa_4Rule_2__hash__(struct __pyx_obj_3_sa_Rule *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__hash__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":170 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":170 * * def __hash__(self): * return hash((self.lhs, self.f, self.e)) # <<<<<<<<<<<<<< @@ -26847,7 +26847,7 @@ static int __pyx_pw_3_sa_4Rule_5__cmp__(PyObject *__pyx_v_self, PyObject *__pyx_ } #endif /*!(#if PY_MAJOR_VERSION < 3)*/ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":172 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":172 * return hash((self.lhs, self.f, self.e)) * * def __cmp__(self, Rule other): # <<<<<<<<<<<<<< @@ -26868,7 +26868,7 @@ static int __pyx_pf_3_sa_4Rule_4__cmp__(struct __pyx_obj_3_sa_Rule *__pyx_v_self int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cmp__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":173 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":173 * * def __cmp__(self, Rule other): * return cmp((self.lhs, self.f, self.e, self.word_alignments), # <<<<<<<<<<<<<< @@ -26892,7 +26892,7 @@ static int __pyx_pf_3_sa_4Rule_4__cmp__(struct __pyx_obj_3_sa_Rule *__pyx_v_self __Pyx_GIVEREF(__pyx_v_self->word_alignments); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":174 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":174 * def __cmp__(self, Rule other): * return cmp((self.lhs, self.f, self.e, self.word_alignments), * (other.lhs, other.f, other.e, self.word_alignments)) # <<<<<<<<<<<<<< @@ -26961,7 +26961,7 @@ static PyObject *__pyx_pw_3_sa_4Rule_7fmerge(PyObject *__pyx_v_self, PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":176 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":176 * (other.lhs, other.f, other.e, self.word_alignments)) * * def fmerge(self, Phrase f): # <<<<<<<<<<<<<< @@ -26979,7 +26979,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_6fmerge(struct __pyx_obj_3_sa_Rule *__pyx_v int __pyx_clineno = 0; __Pyx_RefNannySetupContext("fmerge", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":177 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":177 * * def fmerge(self, Phrase f): * if self.f == f: # <<<<<<<<<<<<<< @@ -26991,7 +26991,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_6fmerge(struct __pyx_obj_3_sa_Rule *__pyx_v __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":178 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":178 * def fmerge(self, Phrase f): * if self.f == f: * self.f = f # <<<<<<<<<<<<<< @@ -27030,7 +27030,7 @@ static PyObject *__pyx_pw_3_sa_4Rule_9arity(PyObject *__pyx_v_self, CYTHON_UNUSE return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":180 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":180 * self.f = f * * def arity(self): # <<<<<<<<<<<<<< @@ -27048,7 +27048,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_8arity(struct __pyx_obj_3_sa_Rule *__pyx_v_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("arity", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":181 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":181 * * def arity(self): * return self.f.arity() # <<<<<<<<<<<<<< @@ -27090,7 +27090,7 @@ static PyObject *__pyx_pw_3_sa_4Rule_11__str__(PyObject *__pyx_v_self) { } static PyObject *__pyx_gb_3_sa_4Rule_7__str___2generator11(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":187 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":187 * fields = [sym_tostring(self.lhs), str(self.f), str(self.e), str(self.scores)] * if self.word_alignments is not None: * fields.append(' '.join('%d-%d' % a for a in self.alignments())) # <<<<<<<<<<<<<< @@ -27235,7 +27235,7 @@ static PyObject *__pyx_gb_3_sa_4Rule_7__str___2generator11(__pyx_GeneratorObject return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":183 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":183 * return self.f.arity() * * def __str__(self): # <<<<<<<<<<<<<< @@ -27269,7 +27269,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_10__str__(struct __pyx_obj_3_sa_Rule *__pyx __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":185 * def __str__(self): * cdef unsigned i * fields = [sym_tostring(self.lhs), str(self.f), str(self.e), str(self.scores)] # <<<<<<<<<<<<<< @@ -27319,7 +27319,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_10__str__(struct __pyx_obj_3_sa_Rule *__pyx __pyx_v_fields = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":186 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":186 * cdef unsigned i * fields = [sym_tostring(self.lhs), str(self.f), str(self.e), str(self.scores)] * if self.word_alignments is not None: # <<<<<<<<<<<<<< @@ -27329,7 +27329,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_10__str__(struct __pyx_obj_3_sa_Rule *__pyx __pyx_t_6 = (__pyx_cur_scope->__pyx_v_self->word_alignments != Py_None); if (__pyx_t_6) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":187 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":187 * fields = [sym_tostring(self.lhs), str(self.f), str(self.e), str(self.scores)] * if self.word_alignments is not None: * fields.append(' '.join('%d-%d' % a for a in self.alignments())) # <<<<<<<<<<<<<< @@ -27355,7 +27355,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_10__str__(struct __pyx_obj_3_sa_Rule *__pyx } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":188 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":188 * if self.word_alignments is not None: * fields.append(' '.join('%d-%d' % a for a in self.alignments())) * return ' ||| '.join(fields) # <<<<<<<<<<<<<< @@ -27408,7 +27408,7 @@ static PyObject *__pyx_pw_3_sa_4Rule_13alignments(PyObject *__pyx_v_self, CYTHON return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":190 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":190 * return ' ||| '.join(fields) * * def alignments(self): # <<<<<<<<<<<<<< @@ -27474,7 +27474,7 @@ static PyObject *__pyx_gb_3_sa_4Rule_14generator3(__pyx_GeneratorObject *__pyx_g __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":191 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":191 * * def alignments(self): * for point in self.word_alignments: # <<<<<<<<<<<<<< @@ -27520,7 +27520,7 @@ static PyObject *__pyx_gb_3_sa_4Rule_14generator3(__pyx_GeneratorObject *__pyx_g __pyx_cur_scope->__pyx_v_point = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rule.pxi":192 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rule.pxi":192 * def alignments(self): * for point in self.word_alignments: * yield point/65536, point%65536 # <<<<<<<<<<<<<< @@ -27635,7 +27635,7 @@ static PyObject *__pyx_pf_3_sa_4Rule_1e___get__(struct __pyx_obj_3_sa_Rule *__py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":21 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":21 * int arr_len * * cdef _Trie_Node* new_trie_node(): # <<<<<<<<<<<<<< @@ -27649,7 +27649,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_new_trie_node(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("new_trie_node", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":23 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":23 * cdef _Trie_Node* new_trie_node(): * cdef _Trie_Node* node * node = <_Trie_Node*> malloc(sizeof(_Trie_Node)) # <<<<<<<<<<<<<< @@ -27658,7 +27658,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_new_trie_node(void) { */ __pyx_v_node = ((struct __pyx_t_3_sa__Trie_Node *)malloc((sizeof(struct __pyx_t_3_sa__Trie_Node)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":24 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":24 * cdef _Trie_Node* node * node = <_Trie_Node*> malloc(sizeof(_Trie_Node)) * node.root = NULL # <<<<<<<<<<<<<< @@ -27667,7 +27667,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_new_trie_node(void) { */ __pyx_v_node->root = NULL; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":25 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":25 * node = <_Trie_Node*> malloc(sizeof(_Trie_Node)) * node.root = NULL * node.arr_len = 0 # <<<<<<<<<<<<<< @@ -27676,7 +27676,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_new_trie_node(void) { */ __pyx_v_node->arr_len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":26 * node.root = NULL * node.arr_len = 0 * node.arr = malloc(sizeof(0*sizeof(int))) # <<<<<<<<<<<<<< @@ -27685,7 +27685,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_new_trie_node(void) { */ __pyx_v_node->arr = ((int *)malloc((sizeof((0 * (sizeof(int))))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":27 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":27 * node.arr_len = 0 * node.arr = malloc(sizeof(0*sizeof(int))) * return node # <<<<<<<<<<<<<< @@ -27701,7 +27701,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_new_trie_node(void) { return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":29 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":29 * return node * * cdef _Trie_Edge* new_trie_edge(int val): # <<<<<<<<<<<<<< @@ -27715,7 +27715,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("new_trie_edge", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":31 * cdef _Trie_Edge* new_trie_edge(int val): * cdef _Trie_Edge* edge * edge = <_Trie_Edge*> malloc(sizeof(_Trie_Edge)) # <<<<<<<<<<<<<< @@ -27724,7 +27724,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va */ __pyx_v_edge = ((struct __pyx_t_3_sa__Trie_Edge *)malloc((sizeof(struct __pyx_t_3_sa__Trie_Edge)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":32 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":32 * cdef _Trie_Edge* edge * edge = <_Trie_Edge*> malloc(sizeof(_Trie_Edge)) * edge.node = new_trie_node() # <<<<<<<<<<<<<< @@ -27733,7 +27733,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va */ __pyx_v_edge->node = __pyx_f_3_sa_new_trie_node(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":33 * edge = <_Trie_Edge*> malloc(sizeof(_Trie_Edge)) * edge.node = new_trie_node() * edge.bigger = NULL # <<<<<<<<<<<<<< @@ -27742,7 +27742,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va */ __pyx_v_edge->bigger = NULL; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":34 * edge.node = new_trie_node() * edge.bigger = NULL * edge.smaller = NULL # <<<<<<<<<<<<<< @@ -27751,7 +27751,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va */ __pyx_v_edge->smaller = NULL; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":35 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":35 * edge.bigger = NULL * edge.smaller = NULL * edge.val = val # <<<<<<<<<<<<<< @@ -27760,7 +27760,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va */ __pyx_v_edge->val = __pyx_v_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":36 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":36 * edge.smaller = NULL * edge.val = val * return edge # <<<<<<<<<<<<<< @@ -27776,7 +27776,7 @@ static struct __pyx_t_3_sa__Trie_Edge *__pyx_f_3_sa_new_trie_edge(int __pyx_v_va return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":38 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":38 * return edge * * cdef free_trie_node(_Trie_Node* node): # <<<<<<<<<<<<<< @@ -27794,7 +27794,7 @@ static PyObject *__pyx_f_3_sa_free_trie_node(struct __pyx_t_3_sa__Trie_Node *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("free_trie_node", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":39 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":39 * * cdef free_trie_node(_Trie_Node* node): * if node != NULL: # <<<<<<<<<<<<<< @@ -27804,7 +27804,7 @@ static PyObject *__pyx_f_3_sa_free_trie_node(struct __pyx_t_3_sa__Trie_Node *__p __pyx_t_1 = (__pyx_v_node != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":40 * cdef free_trie_node(_Trie_Node* node): * if node != NULL: * free_trie_edge(node.root) # <<<<<<<<<<<<<< @@ -27815,7 +27815,7 @@ static PyObject *__pyx_f_3_sa_free_trie_node(struct __pyx_t_3_sa__Trie_Node *__p __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":41 * if node != NULL: * free_trie_edge(node.root) * free(node.arr) # <<<<<<<<<<<<<< @@ -27839,7 +27839,7 @@ static PyObject *__pyx_f_3_sa_free_trie_node(struct __pyx_t_3_sa__Trie_Node *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":43 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":43 * free(node.arr) * * cdef free_trie_edge(_Trie_Edge* edge): # <<<<<<<<<<<<<< @@ -27857,7 +27857,7 @@ static PyObject *__pyx_f_3_sa_free_trie_edge(struct __pyx_t_3_sa__Trie_Edge *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("free_trie_edge", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":44 * * cdef free_trie_edge(_Trie_Edge* edge): * if edge != NULL: # <<<<<<<<<<<<<< @@ -27867,7 +27867,7 @@ static PyObject *__pyx_f_3_sa_free_trie_edge(struct __pyx_t_3_sa__Trie_Edge *__p __pyx_t_1 = (__pyx_v_edge != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":45 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":45 * cdef free_trie_edge(_Trie_Edge* edge): * if edge != NULL: * free_trie_node(edge.node) # <<<<<<<<<<<<<< @@ -27878,7 +27878,7 @@ static PyObject *__pyx_f_3_sa_free_trie_edge(struct __pyx_t_3_sa__Trie_Edge *__p __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":46 * if edge != NULL: * free_trie_node(edge.node) * free_trie_edge(edge.bigger) # <<<<<<<<<<<<<< @@ -27889,7 +27889,7 @@ static PyObject *__pyx_f_3_sa_free_trie_edge(struct __pyx_t_3_sa__Trie_Edge *__p __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":47 * free_trie_node(edge.node) * free_trie_edge(edge.bigger) * free_trie_edge(edge.smaller) # <<<<<<<<<<<<<< @@ -27915,7 +27915,7 @@ static PyObject *__pyx_f_3_sa_free_trie_edge(struct __pyx_t_3_sa__Trie_Edge *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":49 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":49 * free_trie_edge(edge.smaller) * * cdef _Trie_Node* trie_find(_Trie_Node* node, int val): # <<<<<<<<<<<<<< @@ -27932,7 +27932,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s int __pyx_t_3; __Pyx_RefNannySetupContext("trie_find", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":51 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":51 * cdef _Trie_Node* trie_find(_Trie_Node* node, int val): * cdef _Trie_Edge* cur * cur = node.root # <<<<<<<<<<<<<< @@ -27941,7 +27941,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s */ __pyx_v_cur = __pyx_v_node->root; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":52 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":52 * cdef _Trie_Edge* cur * cur = node.root * while cur != NULL and cur.val != val: # <<<<<<<<<<<<<< @@ -27958,7 +27958,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s } if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":53 * cur = node.root * while cur != NULL and cur.val != val: * if val > cur.val: # <<<<<<<<<<<<<< @@ -27968,7 +27968,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s __pyx_t_3 = (__pyx_v_val > __pyx_v_cur->val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":54 * while cur != NULL and cur.val != val: * if val > cur.val: * cur = cur.bigger # <<<<<<<<<<<<<< @@ -27979,7 +27979,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s goto __pyx_L5; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":55 * if val > cur.val: * cur = cur.bigger * elif val < cur.val: # <<<<<<<<<<<<<< @@ -27989,7 +27989,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s __pyx_t_3 = (__pyx_v_val < __pyx_v_cur->val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":56 * cur = cur.bigger * elif val < cur.val: * cur = cur.smaller # <<<<<<<<<<<<<< @@ -28002,7 +28002,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":57 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":57 * elif val < cur.val: * cur = cur.smaller * if cur == NULL: # <<<<<<<<<<<<<< @@ -28012,7 +28012,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s __pyx_t_3 = (__pyx_v_cur == NULL); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":58 * cur = cur.smaller * if cur == NULL: * return NULL # <<<<<<<<<<<<<< @@ -28025,7 +28025,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":60 * return NULL * else: * return cur.node # <<<<<<<<<<<<<< @@ -28043,7 +28043,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_find(struct __pyx_t_3_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":62 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":62 * return cur.node * * cdef trie_node_data_append(_Trie_Node* node, int val): # <<<<<<<<<<<<<< @@ -28057,7 +28057,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_append(struct __pyx_t_3_sa__Trie_No __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("trie_node_data_append", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":64 * cdef trie_node_data_append(_Trie_Node* node, int val): * cdef int new_len * new_len = node.arr_len + 1 # <<<<<<<<<<<<<< @@ -28066,7 +28066,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_append(struct __pyx_t_3_sa__Trie_No */ __pyx_v_new_len = (__pyx_v_node->arr_len + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":65 * cdef int new_len * new_len = node.arr_len + 1 * node.arr = realloc(node.arr, new_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -28075,7 +28075,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_append(struct __pyx_t_3_sa__Trie_No */ __pyx_v_node->arr = ((int *)realloc(__pyx_v_node->arr, (__pyx_v_new_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":66 * new_len = node.arr_len + 1 * node.arr = realloc(node.arr, new_len*sizeof(int)) * node.arr[node.arr_len] = val # <<<<<<<<<<<<<< @@ -28084,7 +28084,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_append(struct __pyx_t_3_sa__Trie_No */ (__pyx_v_node->arr[__pyx_v_node->arr_len]) = __pyx_v_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":67 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":67 * node.arr = realloc(node.arr, new_len*sizeof(int)) * node.arr[node.arr_len] = val * node.arr_len = new_len # <<<<<<<<<<<<<< @@ -28099,7 +28099,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_append(struct __pyx_t_3_sa__Trie_No return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":69 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":69 * node.arr_len = new_len * * cdef trie_node_data_extend(_Trie_Node* node, int* vals, int num_vals): # <<<<<<<<<<<<<< @@ -28113,7 +28113,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_extend(struct __pyx_t_3_sa__Trie_No __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("trie_node_data_extend", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":71 * cdef trie_node_data_extend(_Trie_Node* node, int* vals, int num_vals): * cdef int new_len * new_len = node.arr_len + num_vals # <<<<<<<<<<<<<< @@ -28122,7 +28122,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_extend(struct __pyx_t_3_sa__Trie_No */ __pyx_v_new_len = (__pyx_v_node->arr_len + __pyx_v_num_vals); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":72 * cdef int new_len * new_len = node.arr_len + num_vals * node.arr = realloc(node.arr, new_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -28131,7 +28131,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_extend(struct __pyx_t_3_sa__Trie_No */ __pyx_v_node->arr = ((int *)realloc(__pyx_v_node->arr, (__pyx_v_new_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":73 * new_len = node.arr_len + num_vals * node.arr = realloc(node.arr, new_len*sizeof(int)) * memcpy(node.arr + node.arr_len, vals, num_vals*sizeof(int)) # <<<<<<<<<<<<<< @@ -28140,7 +28140,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_extend(struct __pyx_t_3_sa__Trie_No */ memcpy((__pyx_v_node->arr + __pyx_v_node->arr_len), __pyx_v_vals, (__pyx_v_num_vals * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":74 * node.arr = realloc(node.arr, new_len*sizeof(int)) * memcpy(node.arr + node.arr_len, vals, num_vals*sizeof(int)) * node.arr_len = new_len # <<<<<<<<<<<<<< @@ -28155,7 +28155,7 @@ static PyObject *__pyx_f_3_sa_trie_node_data_extend(struct __pyx_t_3_sa__Trie_No return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":77 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":77 * * * cdef _Trie_Node* trie_insert(_Trie_Node* node, int val): # <<<<<<<<<<<<<< @@ -28172,7 +28172,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 int __pyx_t_3; __Pyx_RefNannySetupContext("trie_insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":79 * cdef _Trie_Node* trie_insert(_Trie_Node* node, int val): * cdef _Trie_Edge** cur * cur = &node.root # <<<<<<<<<<<<<< @@ -28181,7 +28181,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 */ __pyx_v_cur = (&__pyx_v_node->root); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":80 * cdef _Trie_Edge** cur * cur = &node.root * while cur[0] != NULL and cur[0].val != val: # <<<<<<<<<<<<<< @@ -28198,7 +28198,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 } if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":81 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":81 * cur = &node.root * while cur[0] != NULL and cur[0].val != val: * if val > cur[0].val: # <<<<<<<<<<<<<< @@ -28208,7 +28208,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 __pyx_t_3 = (__pyx_v_val > (__pyx_v_cur[0])->val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":82 * while cur[0] != NULL and cur[0].val != val: * if val > cur[0].val: * cur = &cur[0].bigger # <<<<<<<<<<<<<< @@ -28219,7 +28219,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 goto __pyx_L5; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":83 * if val > cur[0].val: * cur = &cur[0].bigger * elif val < cur[0].val: # <<<<<<<<<<<<<< @@ -28229,7 +28229,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 __pyx_t_3 = (__pyx_v_val < (__pyx_v_cur[0])->val); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":84 * cur = &cur[0].bigger * elif val < cur[0].val: * cur = &cur[0].smaller # <<<<<<<<<<<<<< @@ -28242,7 +28242,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":85 * elif val < cur[0].val: * cur = &cur[0].smaller * if cur[0] == NULL: # <<<<<<<<<<<<<< @@ -28252,7 +28252,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 __pyx_t_3 = ((__pyx_v_cur[0]) == NULL); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":86 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":86 * cur = &cur[0].smaller * if cur[0] == NULL: * cur[0] = new_trie_edge(val) # <<<<<<<<<<<<<< @@ -28264,7 +28264,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":87 * if cur[0] == NULL: * cur[0] = new_trie_edge(val) * return cur[0].node # <<<<<<<<<<<<<< @@ -28280,7 +28280,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_trie_insert(struct __pyx_t_3 return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":89 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":89 * return cur[0].node * * cdef trie_node_to_map(_Trie_Node* node, result, prefix, int include_zeros): # <<<<<<<<<<<<<< @@ -28300,7 +28300,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("trie_node_to_map", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":92 * cdef IntList arr * * if include_zeros or node.arr_len > 0: # <<<<<<<<<<<<<< @@ -28315,7 +28315,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ } if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":93 * * if include_zeros or node.arr_len > 0: * arr = IntList() # <<<<<<<<<<<<<< @@ -28327,7 +28327,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ __pyx_v_arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":94 * if include_zeros or node.arr_len > 0: * arr = IntList() * free(arr.arr) # <<<<<<<<<<<<<< @@ -28336,7 +28336,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ */ free(__pyx_v_arr->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":95 * arr = IntList() * free(arr.arr) * arr.arr = malloc(node.arr_len * sizeof(int)) # <<<<<<<<<<<<<< @@ -28345,7 +28345,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ */ __pyx_v_arr->arr = ((int *)malloc((__pyx_v_node->arr_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":96 * free(arr.arr) * arr.arr = malloc(node.arr_len * sizeof(int)) * memcpy(arr.arr, node.arr, node.arr_len * sizeof(int)) # <<<<<<<<<<<<<< @@ -28354,7 +28354,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ */ memcpy(__pyx_v_arr->arr, __pyx_v_node->arr, (__pyx_v_node->arr_len * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":97 * arr.arr = malloc(node.arr_len * sizeof(int)) * memcpy(arr.arr, node.arr, node.arr_len * sizeof(int)) * arr.len = node.arr_len # <<<<<<<<<<<<<< @@ -28363,7 +28363,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ */ __pyx_v_arr->len = __pyx_v_node->arr_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":98 * memcpy(arr.arr, node.arr, node.arr_len * sizeof(int)) * arr.len = node.arr_len * arr.size = node.arr_len # <<<<<<<<<<<<<< @@ -28372,7 +28372,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ */ __pyx_v_arr->size = __pyx_v_node->arr_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":99 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":99 * arr.len = node.arr_len * arr.size = node.arr_len * result[prefix] = arr # <<<<<<<<<<<<<< @@ -28384,7 +28384,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":100 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":100 * arr.size = node.arr_len * result[prefix] = arr * trie_edge_to_map(node.root, result, prefix, include_zeros) # <<<<<<<<<<<<<< @@ -28408,7 +28408,7 @@ static PyObject *__pyx_f_3_sa_trie_node_to_map(struct __pyx_t_3_sa__Trie_Node *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":102 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":102 * trie_edge_to_map(node.root, result, prefix, include_zeros) * * cdef trie_edge_to_map(_Trie_Edge* edge, result, prefix, int include_zeros): # <<<<<<<<<<<<<< @@ -28428,7 +28428,7 @@ static PyObject *__pyx_f_3_sa_trie_edge_to_map(struct __pyx_t_3_sa__Trie_Edge *_ __Pyx_RefNannySetupContext("trie_edge_to_map", 0); __Pyx_INCREF(__pyx_v_prefix); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":103 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":103 * * cdef trie_edge_to_map(_Trie_Edge* edge, result, prefix, int include_zeros): * if edge != NULL: # <<<<<<<<<<<<<< @@ -28438,7 +28438,7 @@ static PyObject *__pyx_f_3_sa_trie_edge_to_map(struct __pyx_t_3_sa__Trie_Edge *_ __pyx_t_1 = (__pyx_v_edge != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":104 * cdef trie_edge_to_map(_Trie_Edge* edge, result, prefix, int include_zeros): * if edge != NULL: * trie_edge_to_map(edge.smaller, result, prefix, include_zeros) # <<<<<<<<<<<<<< @@ -28449,7 +28449,7 @@ static PyObject *__pyx_f_3_sa_trie_edge_to_map(struct __pyx_t_3_sa__Trie_Edge *_ __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":105 * if edge != NULL: * trie_edge_to_map(edge.smaller, result, prefix, include_zeros) * trie_edge_to_map(edge.bigger, result, prefix, include_zeros) # <<<<<<<<<<<<<< @@ -28460,7 +28460,7 @@ static PyObject *__pyx_f_3_sa_trie_edge_to_map(struct __pyx_t_3_sa__Trie_Edge *_ __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":106 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":106 * trie_edge_to_map(edge.smaller, result, prefix, include_zeros) * trie_edge_to_map(edge.bigger, result, prefix, include_zeros) * prefix = prefix + (edge.val,) # <<<<<<<<<<<<<< @@ -28481,7 +28481,7 @@ static PyObject *__pyx_f_3_sa_trie_edge_to_map(struct __pyx_t_3_sa__Trie_Edge *_ __pyx_v_prefix = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":107 * trie_edge_to_map(edge.bigger, result, prefix, include_zeros) * prefix = prefix + (edge.val,) * trie_node_to_map(edge.node, result, prefix, include_zeros) # <<<<<<<<<<<<<< @@ -28556,7 +28556,7 @@ static int __pyx_pw_3_sa_7TrieMap_1__cinit__(PyObject *__pyx_v_self, PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":114 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":114 * cdef int V * * def __cinit__(self, int alphabet_size): # <<<<<<<<<<<<<< @@ -28569,7 +28569,7 @@ static int __pyx_pf_3_sa_7TrieMap___cinit__(struct __pyx_obj_3_sa_TrieMap *__pyx __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":115 * * def __cinit__(self, int alphabet_size): * self.V = alphabet_size # <<<<<<<<<<<<<< @@ -28578,7 +28578,7 @@ static int __pyx_pf_3_sa_7TrieMap___cinit__(struct __pyx_obj_3_sa_TrieMap *__pyx */ __pyx_v_self->V = __pyx_v_alphabet_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":116 * def __cinit__(self, int alphabet_size): * self.V = alphabet_size * self.root = <_Trie_Node**> malloc(self.V * sizeof(_Trie_Node*)) # <<<<<<<<<<<<<< @@ -28587,7 +28587,7 @@ static int __pyx_pf_3_sa_7TrieMap___cinit__(struct __pyx_obj_3_sa_TrieMap *__pyx */ __pyx_v_self->root = ((struct __pyx_t_3_sa__Trie_Node **)malloc((__pyx_v_self->V * (sizeof(struct __pyx_t_3_sa__Trie_Node *))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":117 * self.V = alphabet_size * self.root = <_Trie_Node**> malloc(self.V * sizeof(_Trie_Node*)) * memset(self.root, 0, self.V * sizeof(_Trie_Node*)) # <<<<<<<<<<<<<< @@ -28610,7 +28610,7 @@ static void __pyx_pw_3_sa_7TrieMap_3__dealloc__(PyObject *__pyx_v_self) { __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":120 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":120 * * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -28629,7 +28629,7 @@ static void __pyx_pf_3_sa_7TrieMap_2__dealloc__(struct __pyx_obj_3_sa_TrieMap *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":122 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":122 * def __dealloc__(self): * cdef int i * for i from 0 <= i < self.V: # <<<<<<<<<<<<<< @@ -28639,7 +28639,7 @@ static void __pyx_pf_3_sa_7TrieMap_2__dealloc__(struct __pyx_obj_3_sa_TrieMap *_ __pyx_t_1 = __pyx_v_self->V; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":123 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":123 * cdef int i * for i from 0 <= i < self.V: * if self.root[i] != NULL: # <<<<<<<<<<<<<< @@ -28649,7 +28649,7 @@ static void __pyx_pf_3_sa_7TrieMap_2__dealloc__(struct __pyx_obj_3_sa_TrieMap *_ __pyx_t_2 = ((__pyx_v_self->root[__pyx_v_i]) != NULL); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":124 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":124 * for i from 0 <= i < self.V: * if self.root[i] != NULL: * free_trie_node(self.root[i]) # <<<<<<<<<<<<<< @@ -28664,7 +28664,7 @@ static void __pyx_pf_3_sa_7TrieMap_2__dealloc__(struct __pyx_obj_3_sa_TrieMap *_ __pyx_L5:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":125 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":125 * if self.root[i] != NULL: * free_trie_node(self.root[i]) * free(self.root) # <<<<<<<<<<<<<< @@ -28692,7 +28692,7 @@ static PyObject *__pyx_pw_3_sa_7TrieMap_5insert(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":128 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":128 * * * def insert(self, pattern): # <<<<<<<<<<<<<< @@ -28715,7 +28715,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":131 * cdef int* p * cdef int i, l * l = len(pattern) # <<<<<<<<<<<<<< @@ -28725,7 +28725,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ __pyx_t_1 = PyObject_Length(__pyx_v_pattern); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_l = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":132 * cdef int i, l * l = len(pattern) * p = malloc(l*sizeof(int)) # <<<<<<<<<<<<<< @@ -28734,7 +28734,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ */ __pyx_v_p = ((int *)malloc((__pyx_v_l * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":133 * l = len(pattern) * p = malloc(l*sizeof(int)) * for i from 0 <= i < l: # <<<<<<<<<<<<<< @@ -28744,7 +28744,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ __pyx_t_2 = __pyx_v_l; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_2; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":134 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":134 * p = malloc(l*sizeof(int)) * for i from 0 <= i < l: * p[i] = pattern[i] # <<<<<<<<<<<<<< @@ -28758,7 +28758,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ (__pyx_v_p[__pyx_v_i]) = __pyx_t_4; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":135 * for i from 0 <= i < l: * p[i] = pattern[i] * self._insert(p,l) # <<<<<<<<<<<<<< @@ -28767,7 +28767,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ */ ((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_self->__pyx_vtab)->_insert(__pyx_v_self, __pyx_v_p, __pyx_v_l); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":136 * p[i] = pattern[i] * self._insert(p,l) * free(p) # <<<<<<<<<<<<<< @@ -28788,7 +28788,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_4insert(struct __pyx_obj_3_sa_TrieMap *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":139 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":139 * * * cdef _Trie_Node* _insert(self, int* pattern, int pattern_len): # <<<<<<<<<<<<<< @@ -28805,7 +28805,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__insert(struct __py int __pyx_t_2; __Pyx_RefNannySetupContext("_insert", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":142 * cdef int i * cdef _Trie_Node* node * if self.root[pattern[0]] == NULL: # <<<<<<<<<<<<<< @@ -28815,7 +28815,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__insert(struct __py __pyx_t_1 = ((__pyx_v_self->root[(__pyx_v_pattern[0])]) == NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":143 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":143 * cdef _Trie_Node* node * if self.root[pattern[0]] == NULL: * self.root[pattern[0]] = new_trie_node() # <<<<<<<<<<<<<< @@ -28827,7 +28827,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__insert(struct __py } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":144 * if self.root[pattern[0]] == NULL: * self.root[pattern[0]] = new_trie_node() * node = self.root[pattern[0]] # <<<<<<<<<<<<<< @@ -28836,7 +28836,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__insert(struct __py */ __pyx_v_node = (__pyx_v_self->root[(__pyx_v_pattern[0])]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":145 * self.root[pattern[0]] = new_trie_node() * node = self.root[pattern[0]] * for i from 1 <= i < pattern_len: # <<<<<<<<<<<<<< @@ -28846,7 +28846,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__insert(struct __py __pyx_t_2 = __pyx_v_pattern_len; for (__pyx_v_i = 1; __pyx_v_i < __pyx_t_2; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":146 * node = self.root[pattern[0]] * for i from 1 <= i < pattern_len: * node = trie_insert(node, pattern[i]) # <<<<<<<<<<<<<< @@ -28856,7 +28856,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__insert(struct __py __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, (__pyx_v_pattern[__pyx_v_i])); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":147 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":147 * for i from 1 <= i < pattern_len: * node = trie_insert(node, pattern[i]) * return node # <<<<<<<<<<<<<< @@ -28883,7 +28883,7 @@ static PyObject *__pyx_pw_3_sa_7TrieMap_7contains(PyObject *__pyx_v_self, PyObje return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":149 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":149 * return node * * def contains(self, pattern): # <<<<<<<<<<<<<< @@ -28908,7 +28908,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap int __pyx_clineno = 0; __Pyx_RefNannySetupContext("contains", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":153 * cdef int i, l * cdef _Trie_Node* node * l = len(pattern) # <<<<<<<<<<<<<< @@ -28918,7 +28918,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap __pyx_t_1 = PyObject_Length(__pyx_v_pattern); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_l = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":154 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":154 * cdef _Trie_Node* node * l = len(pattern) * p = malloc(l*sizeof(int)) # <<<<<<<<<<<<<< @@ -28927,7 +28927,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap */ __pyx_v_p = ((int *)malloc((__pyx_v_l * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":155 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":155 * l = len(pattern) * p = malloc(l*sizeof(int)) * for i from 0 <= i < l: # <<<<<<<<<<<<<< @@ -28937,7 +28937,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap __pyx_t_2 = __pyx_v_l; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_2; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":156 * p = malloc(l*sizeof(int)) * for i from 0 <= i < l: * p[i] = pattern[i] # <<<<<<<<<<<<<< @@ -28951,7 +28951,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap (__pyx_v_p[__pyx_v_i]) = __pyx_t_4; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":157 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":157 * for i from 0 <= i < l: * p[i] = pattern[i] * node = self._contains(p,l) # <<<<<<<<<<<<<< @@ -28960,7 +28960,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap */ __pyx_v_node = ((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_self->__pyx_vtab)->_contains(__pyx_v_self, __pyx_v_p, __pyx_v_l); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":158 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":158 * p[i] = pattern[i] * node = self._contains(p,l) * free(p) # <<<<<<<<<<<<<< @@ -28969,7 +28969,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap */ free(__pyx_v_p); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":159 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":159 * node = self._contains(p,l) * free(p) * if node == NULL: # <<<<<<<<<<<<<< @@ -28979,7 +28979,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap __pyx_t_5 = (__pyx_v_node == NULL); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":160 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":160 * free(p) * if node == NULL: * return False # <<<<<<<<<<<<<< @@ -28996,7 +28996,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":162 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":162 * return False * else: * return True # <<<<<<<<<<<<<< @@ -29024,7 +29024,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_6contains(struct __pyx_obj_3_sa_TrieMap return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":164 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":164 * return True * * cdef _Trie_Node* _contains(self, int* pattern, int pattern_len): # <<<<<<<<<<<<<< @@ -29042,7 +29042,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__contains(struct __ int __pyx_t_3; __Pyx_RefNannySetupContext("_contains", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":167 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":167 * cdef int i * cdef _Trie_Node* node * node = self.root[pattern[0]] # <<<<<<<<<<<<<< @@ -29051,7 +29051,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__contains(struct __ */ __pyx_v_node = (__pyx_v_self->root[(__pyx_v_pattern[0])]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":168 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":168 * cdef _Trie_Node* node * node = self.root[pattern[0]] * i = 1 # <<<<<<<<<<<<<< @@ -29060,7 +29060,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__contains(struct __ */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":169 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":169 * node = self.root[pattern[0]] * i = 1 * while node != NULL and i < pattern_len: # <<<<<<<<<<<<<< @@ -29077,7 +29077,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__contains(struct __ } if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":170 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":170 * i = 1 * while node != NULL and i < pattern_len: * node = trie_find(node, pattern[i]) # <<<<<<<<<<<<<< @@ -29086,7 +29086,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__contains(struct __ */ __pyx_v_node = __pyx_f_3_sa_trie_find(__pyx_v_node, (__pyx_v_pattern[__pyx_v_i])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":171 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":171 * while node != NULL and i < pattern_len: * node = trie_find(node, pattern[i]) * i = i+1 # <<<<<<<<<<<<<< @@ -29096,7 +29096,7 @@ static struct __pyx_t_3_sa__Trie_Node *__pyx_f_3_sa_7TrieMap__contains(struct __ __pyx_v_i = (__pyx_v_i + 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":172 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":172 * node = trie_find(node, pattern[i]) * i = i+1 * return node # <<<<<<<<<<<<<< @@ -29123,7 +29123,7 @@ static PyObject *__pyx_pw_3_sa_7TrieMap_9toMap(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":174 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":174 * return node * * def toMap(self, flag): # <<<<<<<<<<<<<< @@ -29146,7 +29146,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("toMap", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":177 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":177 * cdef int i, include_zeros * * if flag: # <<<<<<<<<<<<<< @@ -29156,7 +29156,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_flag); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":178 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":178 * * if flag: * include_zeros=1 # <<<<<<<<<<<<<< @@ -29168,7 +29168,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":180 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":180 * include_zeros=1 * else: * include_zeros=0 # <<<<<<<<<<<<<< @@ -29179,7 +29179,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":181 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":181 * else: * include_zeros=0 * result = {} # <<<<<<<<<<<<<< @@ -29191,7 +29191,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ __pyx_v_result = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":182 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":182 * include_zeros=0 * result = {} * for i from 0 <= i < self.V: # <<<<<<<<<<<<<< @@ -29201,7 +29201,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ __pyx_t_3 = __pyx_v_self->V; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":183 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":183 * result = {} * for i from 0 <= i < self.V: * if self.root[i] != NULL: # <<<<<<<<<<<<<< @@ -29211,7 +29211,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ __pyx_t_1 = ((__pyx_v_self->root[__pyx_v_i]) != NULL); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":184 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":184 * for i from 0 <= i < self.V: * if self.root[i] != NULL: * trie_node_to_map(self.root[i], result, (i,), include_zeros) # <<<<<<<<<<<<<< @@ -29234,7 +29234,7 @@ static PyObject *__pyx_pf_3_sa_7TrieMap_8toMap(struct __pyx_obj_3_sa_TrieMap *__ __pyx_L6:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":185 * if self.root[i] != NULL: * trie_node_to_map(self.root[i], result, (i,), include_zeros) * return result # <<<<<<<<<<<<<< @@ -29279,7 +29279,7 @@ static int __pyx_pw_3_sa_14Precomputation_1__cinit__(PyObject *__pyx_v_self, PyO static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__fsarray,&__pyx_n_s__from_stats,&__pyx_n_s__from_binary,&__pyx_n_s__precompute_rank,&__pyx_n_s_70,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s_71,&__pyx_n_s__train_min_gap_size,0}; PyObject* values[9] = {0,0,0,0,0,0,0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":200 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":200 * cdef write_map(self, m, FILE* f) * * def __cinit__(self, fsarray=None, from_stats=None, from_binary=None, # <<<<<<<<<<<<<< @@ -29413,7 +29413,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":204 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":204 * max_length=5, max_nonterminals=2, * train_max_initial_size=10, train_min_gap_size=2): * self.precompute_rank = precompute_rank # <<<<<<<<<<<<<< @@ -29423,7 +29423,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_precompute_rank); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 204; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->precompute_rank = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":205 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":205 * train_max_initial_size=10, train_min_gap_size=2): * self.precompute_rank = precompute_rank * self.precompute_secondary_rank = precompute_secondary_rank # <<<<<<<<<<<<<< @@ -29433,7 +29433,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_precompute_secondary_rank); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 205; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->precompute_secondary_rank = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":206 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":206 * self.precompute_rank = precompute_rank * self.precompute_secondary_rank = precompute_secondary_rank * self.max_length = max_length # <<<<<<<<<<<<<< @@ -29443,7 +29443,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_max_length); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 206; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->max_length = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":207 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":207 * self.precompute_secondary_rank = precompute_secondary_rank * self.max_length = max_length * self.max_nonterminals = max_nonterminals # <<<<<<<<<<<<<< @@ -29453,7 +29453,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_max_nonterminals); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 207; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->max_nonterminals = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":208 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":208 * self.max_length = max_length * self.max_nonterminals = max_nonterminals * self.train_max_initial_size = train_max_initial_size # <<<<<<<<<<<<<< @@ -29463,7 +29463,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_train_max_initial_size); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->train_max_initial_size = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":209 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":209 * self.max_nonterminals = max_nonterminals * self.train_max_initial_size = train_max_initial_size * self.train_min_gap_size = train_min_gap_size # <<<<<<<<<<<<<< @@ -29473,7 +29473,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_train_min_gap_size); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 209; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->train_min_gap_size = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":210 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":210 * self.train_max_initial_size = train_max_initial_size * self.train_min_gap_size = train_min_gap_size * if from_binary: # <<<<<<<<<<<<<< @@ -29483,7 +29483,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_binary); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":211 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":211 * self.train_min_gap_size = train_min_gap_size * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< @@ -29505,7 +29505,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":212 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":212 * if from_binary: * self.read_binary(from_binary) * elif from_stats: # <<<<<<<<<<<<<< @@ -29515,7 +29515,7 @@ static int __pyx_pf_3_sa_14Precomputation___cinit__(struct __pyx_obj_3_sa_Precom __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_stats); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 212; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":213 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":213 * self.read_binary(from_binary) * elif from_stats: * self.precompute(from_stats, fsarray) # <<<<<<<<<<<<<< @@ -29575,7 +29575,7 @@ static PyObject *__pyx_pw_3_sa_14Precomputation_3read_binary(PyObject *__pyx_v_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":216 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":216 * * * def read_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -29593,7 +29593,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":218 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":218 * def read_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -29602,7 +29602,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":219 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":219 * cdef FILE* f * f = fopen(filename, "r") * fread(&(self.precompute_rank), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29611,7 +29611,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ fread((&__pyx_v_self->precompute_rank), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":220 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":220 * f = fopen(filename, "r") * fread(&(self.precompute_rank), sizeof(int), 1, f) * fread(&(self.precompute_secondary_rank), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29620,7 +29620,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ fread((&__pyx_v_self->precompute_secondary_rank), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":221 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":221 * fread(&(self.precompute_rank), sizeof(int), 1, f) * fread(&(self.precompute_secondary_rank), sizeof(int), 1, f) * fread(&(self.max_length), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29629,7 +29629,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ fread((&__pyx_v_self->max_length), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":222 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":222 * fread(&(self.precompute_secondary_rank), sizeof(int), 1, f) * fread(&(self.max_length), sizeof(int), 1, f) * fread(&(self.max_nonterminals), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29638,7 +29638,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ fread((&__pyx_v_self->max_nonterminals), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":223 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":223 * fread(&(self.max_length), sizeof(int), 1, f) * fread(&(self.max_nonterminals), sizeof(int), 1, f) * fread(&(self.train_max_initial_size), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29647,7 +29647,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ fread((&__pyx_v_self->train_max_initial_size), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":224 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":224 * fread(&(self.max_nonterminals), sizeof(int), 1, f) * fread(&(self.train_max_initial_size), sizeof(int), 1, f) * fread(&(self.train_min_gap_size), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29656,7 +29656,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ */ fread((&__pyx_v_self->train_min_gap_size), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":225 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":225 * fread(&(self.train_max_initial_size), sizeof(int), 1, f) * fread(&(self.train_min_gap_size), sizeof(int), 1, f) * self.precomputed_index = self.read_map(f) # <<<<<<<<<<<<<< @@ -29671,7 +29671,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ __pyx_v_self->precomputed_index = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":226 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":226 * fread(&(self.train_min_gap_size), sizeof(int), 1, f) * self.precomputed_index = self.read_map(f) * self.precomputed_collocations = self.read_map(f) # <<<<<<<<<<<<<< @@ -29686,7 +29686,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_2read_binary(struct __pyx_obj_3_ __pyx_v_self->precomputed_collocations = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":227 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":227 * self.precomputed_index = self.read_map(f) * self.precomputed_collocations = self.read_map(f) * fclose(f) # <<<<<<<<<<<<<< @@ -29728,7 +29728,7 @@ static PyObject *__pyx_pw_3_sa_14Precomputation_5write_binary(PyObject *__pyx_v_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":230 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":230 * * * def write_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -29747,7 +29747,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":232 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":232 * def write_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -29756,7 +29756,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":233 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":233 * cdef FILE* f * f = fopen(filename, "w") * fwrite(&(self.precompute_rank), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29765,7 +29765,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ fwrite((&__pyx_v_self->precompute_rank), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":234 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":234 * f = fopen(filename, "w") * fwrite(&(self.precompute_rank), sizeof(int), 1, f) * fwrite(&(self.precompute_secondary_rank), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29774,7 +29774,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ fwrite((&__pyx_v_self->precompute_secondary_rank), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":235 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":235 * fwrite(&(self.precompute_rank), sizeof(int), 1, f) * fwrite(&(self.precompute_secondary_rank), sizeof(int), 1, f) * fwrite(&(self.max_length), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29783,7 +29783,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ fwrite((&__pyx_v_self->max_length), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":236 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":236 * fwrite(&(self.precompute_secondary_rank), sizeof(int), 1, f) * fwrite(&(self.max_length), sizeof(int), 1, f) * fwrite(&(self.max_nonterminals), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29792,7 +29792,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ fwrite((&__pyx_v_self->max_nonterminals), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":237 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":237 * fwrite(&(self.max_length), sizeof(int), 1, f) * fwrite(&(self.max_nonterminals), sizeof(int), 1, f) * fwrite(&(self.train_max_initial_size), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29801,7 +29801,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ fwrite((&__pyx_v_self->train_max_initial_size), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":238 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":238 * fwrite(&(self.max_nonterminals), sizeof(int), 1, f) * fwrite(&(self.train_max_initial_size), sizeof(int), 1, f) * fwrite(&(self.train_min_gap_size), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29810,7 +29810,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 */ fwrite((&__pyx_v_self->train_min_gap_size), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":239 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":239 * fwrite(&(self.train_max_initial_size), sizeof(int), 1, f) * fwrite(&(self.train_min_gap_size), sizeof(int), 1, f) * self.write_map(self.precomputed_index, f) # <<<<<<<<<<<<<< @@ -29824,7 +29824,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":240 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":240 * fwrite(&(self.train_min_gap_size), sizeof(int), 1, f) * self.write_map(self.precomputed_index, f) * self.write_map(self.precomputed_collocations, f) # <<<<<<<<<<<<<< @@ -29838,7 +29838,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":241 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":241 * self.write_map(self.precomputed_index, f) * self.write_map(self.precomputed_collocations, f) * fclose(f) # <<<<<<<<<<<<<< @@ -29860,7 +29860,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_4write_binary(struct __pyx_obj_3 return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":244 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":244 * * * cdef write_map(self, m, FILE* f): # <<<<<<<<<<<<<< @@ -29891,7 +29891,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_map", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":248 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":248 * cdef IntList arr * * N = len(m) # <<<<<<<<<<<<<< @@ -29901,7 +29901,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ __pyx_t_1 = PyObject_Length(__pyx_v_m); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_N = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":249 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":249 * * N = len(m) * fwrite(&(N), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29910,7 +29910,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ */ fwrite((&__pyx_v_N), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":250 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":250 * N = len(m) * fwrite(&(N), sizeof(int), 1, f) * for pattern, val in m.iteritems(): # <<<<<<<<<<<<<< @@ -29940,7 +29940,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ __pyx_v_val = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":251 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":251 * fwrite(&(N), sizeof(int), 1, f) * for pattern, val in m.iteritems(): * N = len(pattern) # <<<<<<<<<<<<<< @@ -29950,7 +29950,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ __pyx_t_8 = PyObject_Length(__pyx_v_pattern); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_N = __pyx_t_8; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":252 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":252 * for pattern, val in m.iteritems(): * N = len(pattern) * fwrite(&(N), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -29959,7 +29959,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ */ fwrite((&__pyx_v_N), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":253 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":253 * N = len(pattern) * fwrite(&(N), sizeof(int), 1, f) * for word_id in pattern: # <<<<<<<<<<<<<< @@ -30004,7 +30004,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ __pyx_v_word_id = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":254 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":254 * fwrite(&(N), sizeof(int), 1, f) * for word_id in pattern: * i = word_id # <<<<<<<<<<<<<< @@ -30014,7 +30014,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ __pyx_t_7 = __Pyx_PyInt_AsInt(__pyx_v_word_id); if (unlikely((__pyx_t_7 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_i = __pyx_t_7; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":255 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":255 * for word_id in pattern: * i = word_id * fwrite(&(i), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -30025,7 +30025,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":256 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":256 * i = word_id * fwrite(&(i), sizeof(int), 1, f) * arr = val # <<<<<<<<<<<<<< @@ -30037,7 +30037,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ __Pyx_XDECREF(((PyObject *)__pyx_v_arr)); __pyx_v_arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_v_val); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":257 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":257 * fwrite(&(i), sizeof(int), 1, f) * arr = val * arr.write_handle(f) # <<<<<<<<<<<<<< @@ -30066,7 +30066,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_write_map(CYTHON_UNUSED struct __ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":260 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":260 * * * cdef read_map(self, FILE* f): # <<<<<<<<<<<<<< @@ -30094,7 +30094,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_map", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":264 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":264 * cdef IntList arr * * m = {} # <<<<<<<<<<<<<< @@ -30106,7 +30106,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p __pyx_v_m = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":265 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":265 * * m = {} * fread(&(N), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -30115,7 +30115,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p */ fread((&__pyx_v_N), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":266 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":266 * m = {} * fread(&(N), sizeof(int), 1, f) * for j from 0 <= j < N: # <<<<<<<<<<<<<< @@ -30125,7 +30125,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p __pyx_t_2 = __pyx_v_N; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_2; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":267 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":267 * fread(&(N), sizeof(int), 1, f) * for j from 0 <= j < N: * fread(&(i), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -30134,7 +30134,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p */ fread((&__pyx_v_i), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":268 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":268 * for j from 0 <= j < N: * fread(&(i), sizeof(int), 1, f) * key = () # <<<<<<<<<<<<<< @@ -30145,7 +30145,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p __Pyx_XDECREF(((PyObject *)__pyx_v_key)); __pyx_v_key = __pyx_empty_tuple; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":269 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":269 * fread(&(i), sizeof(int), 1, f) * key = () * for k from 0 <= k < i: # <<<<<<<<<<<<<< @@ -30155,7 +30155,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p __pyx_t_3 = __pyx_v_i; for (__pyx_v_k = 0; __pyx_v_k < __pyx_t_3; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":270 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":270 * key = () * for k from 0 <= k < i: * fread(&(word_id), sizeof(int), 1, f) # <<<<<<<<<<<<<< @@ -30164,7 +30164,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p */ fread((&__pyx_v_word_id), (sizeof(int)), 1, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":271 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":271 * for k from 0 <= k < i: * fread(&(word_id), sizeof(int), 1, f) * key = key + (word_id,) # <<<<<<<<<<<<<< @@ -30186,7 +30186,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p __pyx_t_1 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":272 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":272 * fread(&(word_id), sizeof(int), 1, f) * key = key + (word_id,) * arr = IntList() # <<<<<<<<<<<<<< @@ -30199,7 +30199,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p __pyx_v_arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":273 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":273 * key = key + (word_id,) * arr = IntList() * arr.read_handle(f) # <<<<<<<<<<<<<< @@ -30208,7 +30208,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_arr->__pyx_vtab)->read_handle(__pyx_v_arr, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":274 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":274 * arr = IntList() * arr.read_handle(f) * m[key] = arr # <<<<<<<<<<<<<< @@ -30218,7 +30218,7 @@ static PyObject *__pyx_f_3_sa_14Precomputation_read_map(CYTHON_UNUSED struct __p if (PyDict_SetItem(((PyObject *)__pyx_v_m), ((PyObject *)__pyx_v_key), ((PyObject *)__pyx_v_arr)) < 0) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":275 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":275 * arr.read_handle(f) * m[key] = arr * return m # <<<<<<<<<<<<<< @@ -30307,7 +30307,7 @@ static PyObject *__pyx_pw_3_sa_14Precomputation_7precompute(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":278 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":278 * * * def precompute(self, stats, SuffixArray sarray): # <<<<<<<<<<<<<< @@ -30392,7 +30392,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s int __pyx_clineno = 0; __Pyx_RefNannySetupContext("precompute", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":280 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":280 * def precompute(self, stats, SuffixArray sarray): * cdef int i, l, N, max_pattern_len, i1, l1, i2, l2, i3, l3, ptr1, ptr2, ptr3, is_super, sent_count, max_rank * cdef DataArray darray = sarray.darray # <<<<<<<<<<<<<< @@ -30402,7 +30402,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_INCREF(((PyObject *)__pyx_v_sarray->darray)); __pyx_v_darray = __pyx_v_sarray->darray; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":285 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":285 * cdef _Trie_Node* node * * data = darray.data # <<<<<<<<<<<<<< @@ -30412,7 +30412,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_INCREF(((PyObject *)__pyx_v_darray->data)); __pyx_v_data = __pyx_v_darray->data; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":287 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":287 * data = darray.data * * frequent_patterns = TrieMap(len(darray.id2word)) # <<<<<<<<<<<<<< @@ -30436,7 +30436,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_frequent_patterns = ((struct __pyx_obj_3_sa_TrieMap *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":288 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":288 * * frequent_patterns = TrieMap(len(darray.id2word)) * super_frequent_patterns = TrieMap(len(darray.id2word)) # <<<<<<<<<<<<<< @@ -30460,7 +30460,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_super_frequent_patterns = ((struct __pyx_obj_3_sa_TrieMap *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":289 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":289 * frequent_patterns = TrieMap(len(darray.id2word)) * super_frequent_patterns = TrieMap(len(darray.id2word)) * collocations = TrieMap(len(darray.id2word)) # <<<<<<<<<<<<<< @@ -30484,7 +30484,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_collocations = ((struct __pyx_obj_3_sa_TrieMap *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":291 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":291 * collocations = TrieMap(len(darray.id2word)) * * I_set = set() # <<<<<<<<<<<<<< @@ -30496,7 +30496,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_I_set = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":292 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":292 * * I_set = set() * J_set = set() # <<<<<<<<<<<<<< @@ -30508,7 +30508,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_J_set = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":293 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":293 * I_set = set() * J_set = set() * J2_set = set() # <<<<<<<<<<<<<< @@ -30520,7 +30520,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_J2_set = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":294 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":294 * J_set = set() * J2_set = set() * IJ_set = set() # <<<<<<<<<<<<<< @@ -30532,7 +30532,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_IJ_set = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":295 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":295 * J2_set = set() * IJ_set = set() * pattern_rank = {} # <<<<<<<<<<<<<< @@ -30544,7 +30544,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern_rank = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":297 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":297 * pattern_rank = {} * * logger.info("Precomputing frequent intersections") # <<<<<<<<<<<<<< @@ -30561,7 +30561,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":298 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":298 * * logger.info("Precomputing frequent intersections") * cdef float start_time = monitor_cpu() # <<<<<<<<<<<<<< @@ -30570,7 +30570,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_start_time = __pyx_f_3_sa_monitor_cpu(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":300 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":300 * cdef float start_time = monitor_cpu() * * max_pattern_len = 0 # <<<<<<<<<<<<<< @@ -30579,7 +30579,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_max_pattern_len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":301 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":301 * * max_pattern_len = 0 * for rank, (_, _, phrase) in enumerate(stats): # <<<<<<<<<<<<<< @@ -30695,7 +30695,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_1 = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":302 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":302 * max_pattern_len = 0 * for rank, (_, _, phrase) in enumerate(stats): * if rank >= self.precompute_rank: # <<<<<<<<<<<<<< @@ -30710,7 +30710,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":303 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":303 * for rank, (_, _, phrase) in enumerate(stats): * if rank >= self.precompute_rank: * break # <<<<<<<<<<<<<< @@ -30722,7 +30722,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":304 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":304 * if rank >= self.precompute_rank: * break * max_pattern_len = max(max_pattern_len, len(phrase)) # <<<<<<<<<<<<<< @@ -30738,7 +30738,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_v_max_pattern_len = __pyx_t_14; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":305 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":305 * break * max_pattern_len = max(max_pattern_len, len(phrase)) * frequent_patterns.insert(phrase) # <<<<<<<<<<<<<< @@ -30758,7 +30758,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":306 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":306 * max_pattern_len = max(max_pattern_len, len(phrase)) * frequent_patterns.insert(phrase) * I_set.add(phrase) # <<<<<<<<<<<<<< @@ -30767,7 +30767,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_t_15 = PySet_Add(__pyx_v_I_set, __pyx_v_phrase); if (unlikely(__pyx_t_15 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":307 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":307 * frequent_patterns.insert(phrase) * I_set.add(phrase) * pattern_rank[phrase] = rank # <<<<<<<<<<<<<< @@ -30776,7 +30776,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ if (PyDict_SetItem(((PyObject *)__pyx_v_pattern_rank), __pyx_v_phrase, __pyx_v_rank) < 0) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":308 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":308 * I_set.add(phrase) * pattern_rank[phrase] = rank * if rank < self.precompute_secondary_rank: # <<<<<<<<<<<<<< @@ -30791,7 +30791,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":309 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":309 * pattern_rank[phrase] = rank * if rank < self.precompute_secondary_rank: * super_frequent_patterns.insert(phrase) # <<<<<<<<<<<<<< @@ -30811,7 +30811,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":310 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":310 * if rank < self.precompute_secondary_rank: * super_frequent_patterns.insert(phrase) * J_set.add(phrase) # <<<<<<<<<<<<<< @@ -30827,7 +30827,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":312 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":312 * J_set.add(phrase) * * queue = IntList(increment=1000) # <<<<<<<<<<<<<< @@ -30843,7 +30843,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_queue = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":314 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":314 * queue = IntList(increment=1000) * * logger.info(" Computing inverted indexes...") # <<<<<<<<<<<<<< @@ -30860,7 +30860,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":315 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":315 * * logger.info(" Computing inverted indexes...") * N = len(data) # <<<<<<<<<<<<<< @@ -30870,7 +30870,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_2 = PyObject_Length(((PyObject *)__pyx_v_data)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_N = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":316 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":316 * logger.info(" Computing inverted indexes...") * N = len(data) * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -30880,7 +30880,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_13 = __pyx_v_N; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_13; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":317 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":317 * N = len(data) * for i from 0 <= i < N: * sa_word_id = data.arr[i] # <<<<<<<<<<<<<< @@ -30889,7 +30889,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_sa_word_id = (__pyx_v_data->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":318 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":318 * for i from 0 <= i < N: * sa_word_id = data.arr[i] * if sa_word_id == 1: # <<<<<<<<<<<<<< @@ -30899,7 +30899,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (__pyx_v_sa_word_id == 1); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":319 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":319 * sa_word_id = data.arr[i] * if sa_word_id == 1: * queue._append(-1) # <<<<<<<<<<<<<< @@ -30911,7 +30911,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":321 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":321 * queue._append(-1) * else: * for l from 1 <= l <= max_pattern_len: # <<<<<<<<<<<<<< @@ -30921,7 +30921,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_16 = __pyx_v_max_pattern_len; for (__pyx_v_l = 1; __pyx_v_l <= __pyx_t_16; __pyx_v_l++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":322 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":322 * else: * for l from 1 <= l <= max_pattern_len: * node = frequent_patterns._contains(data.arr+i, l) # <<<<<<<<<<<<<< @@ -30930,7 +30930,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_node = ((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_frequent_patterns->__pyx_vtab)->_contains(__pyx_v_frequent_patterns, (__pyx_v_data->arr + __pyx_v_i), __pyx_v_l); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":323 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":323 * for l from 1 <= l <= max_pattern_len: * node = frequent_patterns._contains(data.arr+i, l) * if node == NULL: # <<<<<<<<<<<<<< @@ -30940,7 +30940,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (__pyx_v_node == NULL); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":324 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":324 * node = frequent_patterns._contains(data.arr+i, l) * if node == NULL: * break # <<<<<<<<<<<<<< @@ -30952,7 +30952,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L14:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":325 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":325 * if node == NULL: * break * queue._append(i) # <<<<<<<<<<<<<< @@ -30961,7 +30961,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_queue->__pyx_vtab)->_append(__pyx_v_queue, __pyx_v_i); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":326 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":326 * break * queue._append(i) * queue._append(l) # <<<<<<<<<<<<<< @@ -30970,7 +30970,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_queue->__pyx_vtab)->_append(__pyx_v_queue, __pyx_v_l); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":327 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":327 * queue._append(i) * queue._append(l) * trie_node_data_append(node, i) # <<<<<<<<<<<<<< @@ -30986,7 +30986,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_L11:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":329 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":329 * trie_node_data_append(node, i) * * logger.info(" Computing collocations...") # <<<<<<<<<<<<<< @@ -31003,7 +31003,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":330 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":330 * * logger.info(" Computing collocations...") * N = len(queue) # <<<<<<<<<<<<<< @@ -31013,7 +31013,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_2 = PyObject_Length(((PyObject *)__pyx_v_queue)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_N = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":331 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":331 * logger.info(" Computing collocations...") * N = len(queue) * ptr1 = 0 # <<<<<<<<<<<<<< @@ -31022,7 +31022,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_ptr1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":332 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":332 * N = len(queue) * ptr1 = 0 * sent_count = 0 # <<<<<<<<<<<<<< @@ -31031,7 +31031,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_sent_count = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":333 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":333 * ptr1 = 0 * sent_count = 0 * while ptr1 < N: # main loop # <<<<<<<<<<<<<< @@ -31042,7 +31042,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (__pyx_v_ptr1 < __pyx_v_N); if (!__pyx_t_11) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":334 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":334 * sent_count = 0 * while ptr1 < N: # main loop * i1 = queue.arr[ptr1] # <<<<<<<<<<<<<< @@ -31051,7 +31051,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_i1 = (__pyx_v_queue->arr[__pyx_v_ptr1]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":335 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":335 * while ptr1 < N: # main loop * i1 = queue.arr[ptr1] * if i1 > -1: # <<<<<<<<<<<<<< @@ -31061,7 +31061,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (__pyx_v_i1 > -1); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":336 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":336 * i1 = queue.arr[ptr1] * if i1 > -1: * l1 = queue.arr[ptr1+1] # <<<<<<<<<<<<<< @@ -31070,7 +31070,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_l1 = (__pyx_v_queue->arr[(__pyx_v_ptr1 + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":337 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":337 * if i1 > -1: * l1 = queue.arr[ptr1+1] * ptr2 = ptr1 + 2 # <<<<<<<<<<<<<< @@ -31079,7 +31079,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_ptr2 = (__pyx_v_ptr1 + 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":338 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":338 * l1 = queue.arr[ptr1+1] * ptr2 = ptr1 + 2 * while ptr2 < N: # <<<<<<<<<<<<<< @@ -31090,7 +31090,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (__pyx_v_ptr2 < __pyx_v_N); if (!__pyx_t_11) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":339 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":339 * ptr2 = ptr1 + 2 * while ptr2 < N: * i2 = queue.arr[ptr2] # <<<<<<<<<<<<<< @@ -31099,7 +31099,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_i2 = (__pyx_v_queue->arr[__pyx_v_ptr2]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":340 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":340 * while ptr2 < N: * i2 = queue.arr[ptr2] * if i2 == -1 or i2 - i1 >= self.train_max_initial_size: # <<<<<<<<<<<<<< @@ -31115,7 +31115,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } if (__pyx_t_18) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":341 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":341 * i2 = queue.arr[ptr2] * if i2 == -1 or i2 - i1 >= self.train_max_initial_size: * break # <<<<<<<<<<<<<< @@ -31127,7 +31127,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L20:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":342 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":342 * if i2 == -1 or i2 - i1 >= self.train_max_initial_size: * break * l2 = queue.arr[ptr2+1] # <<<<<<<<<<<<<< @@ -31136,7 +31136,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_l2 = (__pyx_v_queue->arr[(__pyx_v_ptr2 + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":343 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":343 * break * l2 = queue.arr[ptr2+1] * if (i2 - i1 - l1 >= self.train_min_gap_size and # <<<<<<<<<<<<<< @@ -31146,7 +31146,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_18 = (((__pyx_v_i2 - __pyx_v_i1) - __pyx_v_l1) >= __pyx_v_self->train_min_gap_size); if (__pyx_t_18) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":344 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":344 * l2 = queue.arr[ptr2+1] * if (i2 - i1 - l1 >= self.train_min_gap_size and * i2 + l2 - i1 <= self.train_max_initial_size and # <<<<<<<<<<<<<< @@ -31156,7 +31156,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (((__pyx_v_i2 + __pyx_v_l2) - __pyx_v_i1) <= __pyx_v_self->train_max_initial_size); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":345 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":345 * if (i2 - i1 - l1 >= self.train_min_gap_size and * i2 + l2 - i1 <= self.train_max_initial_size and * l1+l2+1 <= self.max_length): # <<<<<<<<<<<<<< @@ -31174,7 +31174,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":346 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":346 * i2 + l2 - i1 <= self.train_max_initial_size and * l1+l2+1 <= self.max_length): * node = collocations._insert(data.arr+i1, l1) # <<<<<<<<<<<<<< @@ -31183,7 +31183,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_node = ((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_collocations->__pyx_vtab)->_insert(__pyx_v_collocations, (__pyx_v_data->arr + __pyx_v_i1), __pyx_v_l1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":347 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":347 * l1+l2+1 <= self.max_length): * node = collocations._insert(data.arr+i1, l1) * node = trie_insert(node, -1) # <<<<<<<<<<<<<< @@ -31192,7 +31192,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, -1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":348 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":348 * node = collocations._insert(data.arr+i1, l1) * node = trie_insert(node, -1) * for i from i2 <= i < i2+l2: # <<<<<<<<<<<<<< @@ -31202,7 +31202,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_13 = (__pyx_v_i2 + __pyx_v_l2); for (__pyx_v_i = __pyx_v_i2; __pyx_v_i < __pyx_t_13; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":349 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":349 * node = trie_insert(node, -1) * for i from i2 <= i < i2+l2: * node = trie_insert(node, data.arr[i]) # <<<<<<<<<<<<<< @@ -31212,7 +31212,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, (__pyx_v_data->arr[__pyx_v_i])); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":350 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":350 * for i from i2 <= i < i2+l2: * node = trie_insert(node, data.arr[i]) * trie_node_data_append(node, i1) # <<<<<<<<<<<<<< @@ -31223,7 +31223,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":351 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":351 * node = trie_insert(node, data.arr[i]) * trie_node_data_append(node, i1) * trie_node_data_append(node, i2) # <<<<<<<<<<<<<< @@ -31234,7 +31234,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":352 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":352 * trie_node_data_append(node, i1) * trie_node_data_append(node, i2) * if super_frequent_patterns._contains(data.arr+i2, l2) != NULL: # <<<<<<<<<<<<<< @@ -31244,7 +31244,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_super_frequent_patterns->__pyx_vtab)->_contains(__pyx_v_super_frequent_patterns, (__pyx_v_data->arr + __pyx_v_i2), __pyx_v_l2) != NULL); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":353 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":353 * trie_node_data_append(node, i2) * if super_frequent_patterns._contains(data.arr+i2, l2) != NULL: * if super_frequent_patterns._contains(data.arr+i1, l1) != NULL: # <<<<<<<<<<<<<< @@ -31254,7 +31254,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_super_frequent_patterns->__pyx_vtab)->_contains(__pyx_v_super_frequent_patterns, (__pyx_v_data->arr + __pyx_v_i1), __pyx_v_l1) != NULL); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":354 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":354 * if super_frequent_patterns._contains(data.arr+i2, l2) != NULL: * if super_frequent_patterns._contains(data.arr+i1, l1) != NULL: * is_super = 1 # <<<<<<<<<<<<<< @@ -31266,7 +31266,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":356 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":356 * is_super = 1 * else: * is_super = 0 # <<<<<<<<<<<<<< @@ -31277,7 +31277,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L25:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":357 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":357 * else: * is_super = 0 * ptr3 = ptr2 + 2 # <<<<<<<<<<<<<< @@ -31286,7 +31286,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_ptr3 = (__pyx_v_ptr2 + 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":358 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":358 * is_super = 0 * ptr3 = ptr2 + 2 * while ptr3 < N: # <<<<<<<<<<<<<< @@ -31297,7 +31297,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (__pyx_v_ptr3 < __pyx_v_N); if (!__pyx_t_11) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":359 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":359 * ptr3 = ptr2 + 2 * while ptr3 < N: * i3 = queue.arr[ptr3] # <<<<<<<<<<<<<< @@ -31306,7 +31306,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_i3 = (__pyx_v_queue->arr[__pyx_v_ptr3]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":360 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":360 * while ptr3 < N: * i3 = queue.arr[ptr3] * if i3 == -1 or i3 - i1 >= self.train_max_initial_size: # <<<<<<<<<<<<<< @@ -31322,7 +31322,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":361 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":361 * i3 = queue.arr[ptr3] * if i3 == -1 or i3 - i1 >= self.train_max_initial_size: * break # <<<<<<<<<<<<<< @@ -31334,7 +31334,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L28:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":362 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":362 * if i3 == -1 or i3 - i1 >= self.train_max_initial_size: * break * l3 = queue.arr[ptr3+1] # <<<<<<<<<<<<<< @@ -31343,7 +31343,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_l3 = (__pyx_v_queue->arr[(__pyx_v_ptr3 + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":363 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":363 * break * l3 = queue.arr[ptr3+1] * if (i3 - i2 - l2 >= self.train_min_gap_size and # <<<<<<<<<<<<<< @@ -31353,7 +31353,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (((__pyx_v_i3 - __pyx_v_i2) - __pyx_v_l2) >= __pyx_v_self->train_min_gap_size); if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":364 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":364 * l3 = queue.arr[ptr3+1] * if (i3 - i2 - l2 >= self.train_min_gap_size and * i3 + l3 - i1 <= self.train_max_initial_size and # <<<<<<<<<<<<<< @@ -31363,7 +31363,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_11 = (((__pyx_v_i3 + __pyx_v_l3) - __pyx_v_i1) <= __pyx_v_self->train_max_initial_size); if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":365 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":365 * if (i3 - i2 - l2 >= self.train_min_gap_size and * i3 + l3 - i1 <= self.train_max_initial_size and * l1+l2+l3+2 <= self.max_length): # <<<<<<<<<<<<<< @@ -31381,7 +31381,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } if (__pyx_t_11) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":366 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":366 * i3 + l3 - i1 <= self.train_max_initial_size and * l1+l2+l3+2 <= self.max_length): * if is_super or super_frequent_patterns._contains(data.arr+i3, l3) != NULL: # <<<<<<<<<<<<<< @@ -31396,7 +31396,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":367 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":367 * l1+l2+l3+2 <= self.max_length): * if is_super or super_frequent_patterns._contains(data.arr+i3, l3) != NULL: * node = collocations._insert(data.arr+i1, l1) # <<<<<<<<<<<<<< @@ -31405,7 +31405,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_node = ((struct __pyx_vtabstruct_3_sa_TrieMap *)__pyx_v_collocations->__pyx_vtab)->_insert(__pyx_v_collocations, (__pyx_v_data->arr + __pyx_v_i1), __pyx_v_l1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":368 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":368 * if is_super or super_frequent_patterns._contains(data.arr+i3, l3) != NULL: * node = collocations._insert(data.arr+i1, l1) * node = trie_insert(node, -1) # <<<<<<<<<<<<<< @@ -31414,7 +31414,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, -1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":369 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":369 * node = collocations._insert(data.arr+i1, l1) * node = trie_insert(node, -1) * for i from i2 <= i < i2+l2: # <<<<<<<<<<<<<< @@ -31424,7 +31424,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_13 = (__pyx_v_i2 + __pyx_v_l2); for (__pyx_v_i = __pyx_v_i2; __pyx_v_i < __pyx_t_13; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":370 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":370 * node = trie_insert(node, -1) * for i from i2 <= i < i2+l2: * node = trie_insert(node, data.arr[i]) # <<<<<<<<<<<<<< @@ -31434,7 +31434,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, (__pyx_v_data->arr[__pyx_v_i])); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":371 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":371 * for i from i2 <= i < i2+l2: * node = trie_insert(node, data.arr[i]) * node = trie_insert(node, -1) # <<<<<<<<<<<<<< @@ -31443,7 +31443,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, -1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":372 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":372 * node = trie_insert(node, data.arr[i]) * node = trie_insert(node, -1) * for i from i3 <= i < i3+l3: # <<<<<<<<<<<<<< @@ -31453,7 +31453,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_13 = (__pyx_v_i3 + __pyx_v_l3); for (__pyx_v_i = __pyx_v_i3; __pyx_v_i < __pyx_t_13; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":373 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":373 * node = trie_insert(node, -1) * for i from i3 <= i < i3+l3: * node = trie_insert(node, data.arr[i]) # <<<<<<<<<<<<<< @@ -31463,7 +31463,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_node = __pyx_f_3_sa_trie_insert(__pyx_v_node, (__pyx_v_data->arr[__pyx_v_i])); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":374 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":374 * for i from i3 <= i < i3+l3: * node = trie_insert(node, data.arr[i]) * trie_node_data_append(node, i1) # <<<<<<<<<<<<<< @@ -31474,7 +31474,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":375 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":375 * node = trie_insert(node, data.arr[i]) * trie_node_data_append(node, i1) * trie_node_data_append(node, i2) # <<<<<<<<<<<<<< @@ -31485,7 +31485,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":376 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":376 * trie_node_data_append(node, i1) * trie_node_data_append(node, i2) * trie_node_data_append(node, i3) # <<<<<<<<<<<<<< @@ -31502,7 +31502,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L29:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":377 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":377 * trie_node_data_append(node, i2) * trie_node_data_append(node, i3) * ptr3 = ptr3 + 2 # <<<<<<<<<<<<<< @@ -31519,7 +31519,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L21:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":378 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":378 * trie_node_data_append(node, i3) * ptr3 = ptr3 + 2 * ptr2 = ptr2 + 2 # <<<<<<<<<<<<<< @@ -31530,7 +31530,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L19_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":379 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":379 * ptr3 = ptr3 + 2 * ptr2 = ptr2 + 2 * ptr1 = ptr1 + 2 # <<<<<<<<<<<<<< @@ -31542,7 +31542,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":381 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":381 * ptr1 = ptr1 + 2 * else: * sent_count = sent_count + 1 # <<<<<<<<<<<<<< @@ -31551,7 +31551,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_sent_count = (__pyx_v_sent_count + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":382 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":382 * else: * sent_count = sent_count + 1 * if sent_count % 10000 == 0: # <<<<<<<<<<<<<< @@ -31561,7 +31561,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (__Pyx_mod_long(__pyx_v_sent_count, 10000) == 0); if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":383 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":383 * sent_count = sent_count + 1 * if sent_count % 10000 == 0: * logger.debug(" %d sentences", sent_count) # <<<<<<<<<<<<<< @@ -31592,7 +31592,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __pyx_L35:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":384 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":384 * if sent_count % 10000 == 0: * logger.debug(" %d sentences", sent_count) * ptr1 = ptr1 + 1 # <<<<<<<<<<<<<< @@ -31604,7 +31604,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_L17:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":386 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":386 * ptr1 = ptr1 + 1 * * self.precomputed_collocations = collocations.toMap(False) # <<<<<<<<<<<<<< @@ -31630,7 +31630,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_self->precomputed_collocations = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":387 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":387 * * self.precomputed_collocations = collocations.toMap(False) * self.precomputed_index = frequent_patterns.toMap(True) # <<<<<<<<<<<<<< @@ -31656,7 +31656,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_self->precomputed_index = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":389 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":389 * self.precomputed_index = frequent_patterns.toMap(True) * * x = 0 # <<<<<<<<<<<<<< @@ -31666,7 +31666,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_INCREF(__pyx_int_0); __pyx_v_x = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":390 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":390 * * x = 0 * for pattern1 in J_set: # <<<<<<<<<<<<<< @@ -31692,7 +31692,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern1 = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":391 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":391 * x = 0 * for pattern1 in J_set: * for pattern2 in J_set: # <<<<<<<<<<<<<< @@ -31718,7 +31718,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern2 = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":392 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":392 * for pattern1 in J_set: * for pattern2 in J_set: * if len(pattern1) + len(pattern2) + 1 < self.max_length: # <<<<<<<<<<<<<< @@ -31730,7 +31730,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (((__pyx_t_2 + __pyx_t_14) + 1) < __pyx_v_self->max_length); if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":393 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":393 * for pattern2 in J_set: * if len(pattern1) + len(pattern2) + 1 < self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 # <<<<<<<<<<<<<< @@ -31746,7 +31746,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_combined_pattern = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":394 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":394 * if len(pattern1) + len(pattern2) + 1 < self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 * J2_set.add(combined_pattern) # <<<<<<<<<<<<<< @@ -31762,7 +31762,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":396 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":396 * J2_set.add(combined_pattern) * * for pattern1 in I_set: # <<<<<<<<<<<<<< @@ -31788,7 +31788,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern1 = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":397 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":397 * * for pattern1 in I_set: * for pattern2 in I_set: # <<<<<<<<<<<<<< @@ -31814,7 +31814,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern2 = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":398 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":398 * for pattern1 in I_set: * for pattern2 in I_set: * x = x+1 # <<<<<<<<<<<<<< @@ -31827,7 +31827,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_x = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":399 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":399 * for pattern2 in I_set: * x = x+1 * if len(pattern1) + len(pattern2) + 1 <= self.max_length: # <<<<<<<<<<<<<< @@ -31839,7 +31839,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (((__pyx_t_14 + __pyx_t_2) + 1) <= __pyx_v_self->max_length); if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":400 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":400 * x = x+1 * if len(pattern1) + len(pattern2) + 1 <= self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 # <<<<<<<<<<<<<< @@ -31855,7 +31855,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_combined_pattern = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":401 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":401 * if len(pattern1) + len(pattern2) + 1 <= self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 * IJ_set.add(combined_pattern) # <<<<<<<<<<<<<< @@ -31871,7 +31871,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":403 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":403 * IJ_set.add(combined_pattern) * * for pattern1 in I_set: # <<<<<<<<<<<<<< @@ -31897,7 +31897,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern1 = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":404 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":404 * * for pattern1 in I_set: * for pattern2 in J2_set: # <<<<<<<<<<<<<< @@ -31923,7 +31923,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern2 = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":405 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":405 * for pattern1 in I_set: * for pattern2 in J2_set: * x = x+2 # <<<<<<<<<<<<<< @@ -31936,7 +31936,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_x = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":406 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":406 * for pattern2 in J2_set: * x = x+2 * if len(pattern1) + len(pattern2) + 1<= self.max_length: # <<<<<<<<<<<<<< @@ -31948,7 +31948,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (((__pyx_t_2 + __pyx_t_14) + 1) <= __pyx_v_self->max_length); if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":407 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":407 * x = x+2 * if len(pattern1) + len(pattern2) + 1<= self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 # <<<<<<<<<<<<<< @@ -31964,7 +31964,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_combined_pattern = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":408 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":408 * if len(pattern1) + len(pattern2) + 1<= self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 * IJ_set.add(combined_pattern) # <<<<<<<<<<<<<< @@ -31973,7 +31973,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_t_15 = PySet_Add(__pyx_v_IJ_set, __pyx_v_combined_pattern); if (unlikely(__pyx_t_15 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":409 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":409 * combined_pattern = pattern1 + (-1,) + pattern2 * IJ_set.add(combined_pattern) * combined_pattern = pattern2 + (-1,) + pattern1 # <<<<<<<<<<<<<< @@ -31989,7 +31989,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_combined_pattern = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":410 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":410 * IJ_set.add(combined_pattern) * combined_pattern = pattern2 + (-1,) + pattern1 * IJ_set.add(combined_pattern) # <<<<<<<<<<<<<< @@ -32005,7 +32005,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":412 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":412 * IJ_set.add(combined_pattern) * * N = len(pattern_rank) # <<<<<<<<<<<<<< @@ -32015,7 +32015,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_14 = PyDict_Size(((PyObject *)__pyx_v_pattern_rank)); if (unlikely(__pyx_t_14 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_N = __pyx_t_14; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":413 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":413 * * N = len(pattern_rank) * cost_by_rank = IntList(initial_len=N) # <<<<<<<<<<<<<< @@ -32034,7 +32034,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_cost_by_rank = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":414 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":414 * N = len(pattern_rank) * cost_by_rank = IntList(initial_len=N) * count_by_rank = IntList(initial_len=N) # <<<<<<<<<<<<<< @@ -32053,7 +32053,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_count_by_rank = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":415 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":415 * cost_by_rank = IntList(initial_len=N) * count_by_rank = IntList(initial_len=N) * for pattern, arr in self.precomputed_collocations.iteritems(): # <<<<<<<<<<<<<< @@ -32083,7 +32083,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_arr = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":416 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":416 * count_by_rank = IntList(initial_len=N) * for pattern, arr in self.precomputed_collocations.iteritems(): * if pattern not in IJ_set: # <<<<<<<<<<<<<< @@ -32093,7 +32093,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (__Pyx_PySequence_Contains(__pyx_v_pattern, ((PyObject *)__pyx_v_IJ_set), Py_NE)); if (unlikely(__pyx_t_19 < 0)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":417 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":417 * for pattern, arr in self.precomputed_collocations.iteritems(): * if pattern not in IJ_set: * s = "" # <<<<<<<<<<<<<< @@ -32104,7 +32104,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_XDECREF(__pyx_v_s); __pyx_v_s = ((PyObject *)__pyx_kp_s_45); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":418 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":418 * if pattern not in IJ_set: * s = "" * for word_id in pattern: # <<<<<<<<<<<<<< @@ -32149,7 +32149,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_word_id = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":419 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":419 * s = "" * for word_id in pattern: * if word_id == -1: # <<<<<<<<<<<<<< @@ -32161,7 +32161,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":420 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":420 * for word_id in pattern: * if word_id == -1: * s = s + "X " # <<<<<<<<<<<<<< @@ -32177,7 +32177,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":422 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":422 * s = s + "X " * else: * s = s + darray.id2word[word_id] + " " # <<<<<<<<<<<<<< @@ -32200,7 +32200,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":423 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":423 * else: * s = s + darray.id2word[word_id] + " " * logger.warn("ERROR: unexpected pattern %s in set of precomputed collocations", s) # <<<<<<<<<<<<<< @@ -32229,7 +32229,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":425 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":425 * logger.warn("ERROR: unexpected pattern %s in set of precomputed collocations", s) * else: * chunk = () # <<<<<<<<<<<<<< @@ -32240,7 +32240,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_XDECREF(((PyObject *)__pyx_v_chunk)); __pyx_v_chunk = __pyx_empty_tuple; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":426 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":426 * else: * chunk = () * max_rank = 0 # <<<<<<<<<<<<<< @@ -32249,7 +32249,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_max_rank = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":427 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":427 * chunk = () * max_rank = 0 * arity = 0 # <<<<<<<<<<<<<< @@ -32260,7 +32260,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_XDECREF(__pyx_v_arity); __pyx_v_arity = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":428 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":428 * max_rank = 0 * arity = 0 * for word_id in pattern: # <<<<<<<<<<<<<< @@ -32305,7 +32305,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_word_id = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":429 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":429 * arity = 0 * for word_id in pattern: * if word_id == -1: # <<<<<<<<<<<<<< @@ -32317,7 +32317,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":430 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":430 * for word_id in pattern: * if word_id == -1: * max_rank = max(max_rank, pattern_rank[chunk]) # <<<<<<<<<<<<<< @@ -32347,7 +32347,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_max_rank = __pyx_t_16; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":431 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":431 * if word_id == -1: * max_rank = max(max_rank, pattern_rank[chunk]) * arity = arity + 1 # <<<<<<<<<<<<<< @@ -32360,7 +32360,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_arity = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":432 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":432 * max_rank = max(max_rank, pattern_rank[chunk]) * arity = arity + 1 * chunk = () # <<<<<<<<<<<<<< @@ -32374,7 +32374,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":434 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":434 * chunk = () * else: * chunk = chunk + (word_id,) # <<<<<<<<<<<<<< @@ -32397,7 +32397,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":435 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":435 * else: * chunk = chunk + (word_id,) * max_rank = max(max_rank, pattern_rank[chunk]) # <<<<<<<<<<<<<< @@ -32427,7 +32427,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_max_rank = __pyx_t_16; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":436 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":436 * chunk = chunk + (word_id,) * max_rank = max(max_rank, pattern_rank[chunk]) * cost_by_rank.arr[max_rank] = cost_by_rank.arr[max_rank] + (4*len(arr)) # <<<<<<<<<<<<<< @@ -32437,7 +32437,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_12 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_cost_by_rank->arr[__pyx_v_max_rank]) = ((__pyx_v_cost_by_rank->arr[__pyx_v_max_rank]) + (4 * __pyx_t_12)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":437 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":437 * max_rank = max(max_rank, pattern_rank[chunk]) * cost_by_rank.arr[max_rank] = cost_by_rank.arr[max_rank] + (4*len(arr)) * count_by_rank.arr[max_rank] = count_by_rank.arr[max_rank] + (len(arr)/(arity+1)) # <<<<<<<<<<<<<< @@ -32467,7 +32467,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":439 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":439 * count_by_rank.arr[max_rank] = count_by_rank.arr[max_rank] + (len(arr)/(arity+1)) * * cumul_cost = 0 # <<<<<<<<<<<<<< @@ -32477,7 +32477,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_INCREF(__pyx_int_0); __pyx_v_cumul_cost = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":440 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":440 * * cumul_cost = 0 * cumul_count = 0 # <<<<<<<<<<<<<< @@ -32487,7 +32487,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_INCREF(__pyx_int_0); __pyx_v_cumul_count = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":441 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":441 * cumul_cost = 0 * cumul_count = 0 * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -32497,7 +32497,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_13 = __pyx_v_N; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_13; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":442 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":442 * cumul_count = 0 * for i from 0 <= i < N: * cumul_cost = cumul_cost + cost_by_rank.arr[i] # <<<<<<<<<<<<<< @@ -32513,7 +32513,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_cumul_cost = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":443 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":443 * for i from 0 <= i < N: * cumul_cost = cumul_cost + cost_by_rank.arr[i] * cumul_count = cumul_count + count_by_rank.arr[i] # <<<<<<<<<<<<<< @@ -32529,7 +32529,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_cumul_count = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":444 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":444 * cumul_cost = cumul_cost + cost_by_rank.arr[i] * cumul_count = cumul_count + count_by_rank.arr[i] * logger.debug("RANK %d\tCOUNT, COST: %d %d\tCUMUL: %d, %d", i, count_by_rank.arr[i], cost_by_rank.arr[i], cumul_count, cumul_cost) # <<<<<<<<<<<<<< @@ -32574,7 +32574,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":446 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":446 * logger.debug("RANK %d\tCOUNT, COST: %d %d\tCUMUL: %d, %d", i, count_by_rank.arr[i], cost_by_rank.arr[i], cumul_count, cumul_cost) * * num_found_patterns = len(self.precomputed_collocations) # <<<<<<<<<<<<<< @@ -32590,7 +32590,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_num_found_patterns = __pyx_t_8; __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":447 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":447 * * num_found_patterns = len(self.precomputed_collocations) * for pattern in IJ_set: # <<<<<<<<<<<<<< @@ -32616,7 +32616,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_v_pattern = __pyx_t_7; __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":448 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":448 * num_found_patterns = len(self.precomputed_collocations) * for pattern in IJ_set: * if pattern not in self.precomputed_collocations: # <<<<<<<<<<<<<< @@ -32626,7 +32626,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __pyx_t_19 = (__Pyx_PySequence_Contains(__pyx_v_pattern, __pyx_v_self->precomputed_collocations, Py_NE)); if (unlikely(__pyx_t_19 < 0)) {__pyx_filename = __pyx_f[11]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":449 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":449 * for pattern in IJ_set: * if pattern not in self.precomputed_collocations: * self.precomputed_collocations[pattern] = IntList() # <<<<<<<<<<<<<< @@ -32643,7 +32643,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s } __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":451 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":451 * self.precomputed_collocations[pattern] = IntList() * * cdef float stop_time = monitor_cpu() # <<<<<<<<<<<<<< @@ -32652,7 +32652,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s */ __pyx_v_stop_time = __pyx_f_3_sa_monitor_cpu(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":452 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":452 * * cdef float stop_time = monitor_cpu() * logger.info("Precomputed collocations for %d patterns out of %d possible (upper bound %d)", num_found_patterns, len(self.precomputed_collocations), x) # <<<<<<<<<<<<<< @@ -32690,7 +32690,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":453 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":453 * cdef float stop_time = monitor_cpu() * logger.info("Precomputed collocations for %d patterns out of %d possible (upper bound %d)", num_found_patterns, len(self.precomputed_collocations), x) * logger.info("Precomputed inverted index for %d patterns ", len(self.precomputed_index)) # <<<<<<<<<<<<<< @@ -32721,7 +32721,7 @@ static PyObject *__pyx_pf_3_sa_14Precomputation_6precompute(struct __pyx_obj_3_s __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":454 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":454 * logger.info("Precomputed collocations for %d patterns out of %d possible (upper bound %d)", num_found_patterns, len(self.precomputed_collocations), x) * logger.info("Precomputed inverted index for %d patterns ", len(self.precomputed_index)) * logger.info("Precomputation took %f seconds", (stop_time - start_time)) # <<<<<<<<<<<<<< @@ -32807,7 +32807,7 @@ static int __pyx_pw_3_sa_11SuffixArray_1__cinit__(PyObject *__pyx_v_self, PyObje static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__from_binary,&__pyx_n_s__from_text,&__pyx_n_s__side,0}; PyObject* values[3] = {0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":11 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":11 * cdef IntList ha * * def __cinit__(self, from_binary=None, from_text=None, side=None): # <<<<<<<<<<<<<< @@ -32886,7 +32886,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":12 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":12 * * def __cinit__(self, from_binary=None, from_text=None, side=None): * self.darray = DataArray() # <<<<<<<<<<<<<< @@ -32901,7 +32901,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr __pyx_v_self->darray = ((struct __pyx_obj_3_sa_DataArray *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":13 * def __cinit__(self, from_binary=None, from_text=None, side=None): * self.darray = DataArray() * self.sa = IntList() # <<<<<<<<<<<<<< @@ -32916,7 +32916,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr __pyx_v_self->sa = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":14 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":14 * self.darray = DataArray() * self.sa = IntList() * self.ha = IntList() # <<<<<<<<<<<<<< @@ -32931,7 +32931,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr __pyx_v_self->ha = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":15 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":15 * self.sa = IntList() * self.ha = IntList() * if from_binary: # <<<<<<<<<<<<<< @@ -32941,7 +32941,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_binary); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":16 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":16 * self.ha = IntList() * if from_binary: * self.read_binary(from_binary) # <<<<<<<<<<<<<< @@ -32963,7 +32963,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":17 * if from_binary: * self.read_binary(from_binary) * elif from_text: # <<<<<<<<<<<<<< @@ -32973,7 +32973,7 @@ static int __pyx_pf_3_sa_11SuffixArray___cinit__(struct __pyx_obj_3_sa_SuffixArr __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_from_text); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":18 * self.read_binary(from_binary) * elif from_text: * self.read_text(from_text, side) # <<<<<<<<<<<<<< @@ -33023,7 +33023,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_3__getitem__(PyObject *__pyx_v_self return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":20 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":20 * self.read_text(from_text, side) * * def __getitem__(self, i): # <<<<<<<<<<<<<< @@ -33041,7 +33041,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_2__getitem__(struct __pyx_obj_3_sa_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":21 * * def __getitem__(self, i): * return self.sa.arr[i] # <<<<<<<<<<<<<< @@ -33125,7 +33125,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_5read_text(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":23 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":23 * return self.sa.arr[i] * * def read_text(self, filename, side): # <<<<<<<<<<<<<< @@ -33165,7 +33165,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su int __pyx_clineno = 0; __Pyx_RefNannySetupContext("read_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":29 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":29 * cdef IntList isa, word_count * * self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) # <<<<<<<<<<<<<< @@ -33189,7 +33189,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_v_self->darray = ((struct __pyx_obj_3_sa_DataArray *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":30 * * self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) * N = len(self.darray) # <<<<<<<<<<<<<< @@ -33202,7 +33202,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_N = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":31 * self.darray = DataArray(from_text=filename, side=side, use_sent_id=True) * N = len(self.darray) * V = len(self.darray.id2word) # <<<<<<<<<<<<<< @@ -33215,7 +33215,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_V = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":33 * V = len(self.darray.id2word) * * self.sa = IntList(initial_len=N) # <<<<<<<<<<<<<< @@ -33237,7 +33237,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_v_self->sa = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":34 * * self.sa = IntList(initial_len=N) * self.ha = IntList(initial_len=V+1) # <<<<<<<<<<<<<< @@ -33259,7 +33259,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_v_self->ha = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":36 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":36 * self.ha = IntList(initial_len=V+1) * * isa = IntList(initial_len=N) # <<<<<<<<<<<<<< @@ -33278,7 +33278,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_v_isa = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":37 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":37 * * isa = IntList(initial_len=N) * word_count = IntList(initial_len=V+1) # <<<<<<<<<<<<<< @@ -33297,7 +33297,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_v_word_count = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":40 * * '''Step 1: bucket sort data''' * cdef float sort_start_time = monitor_cpu() # <<<<<<<<<<<<<< @@ -33306,7 +33306,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_sort_start_time = __pyx_f_3_sa_monitor_cpu(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":41 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":41 * '''Step 1: bucket sort data''' * cdef float sort_start_time = monitor_cpu() * cdef float start_time = sort_start_time # <<<<<<<<<<<<<< @@ -33315,7 +33315,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_start_time = __pyx_v_sort_start_time; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":42 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":42 * cdef float sort_start_time = monitor_cpu() * cdef float start_time = sort_start_time * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -33325,7 +33325,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_4 = __pyx_v_N; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_4; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":43 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":43 * cdef float start_time = sort_start_time * for i from 0 <= i < N: * a_i = self.darray.data.arr[i] # <<<<<<<<<<<<<< @@ -33334,7 +33334,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_a_i = (__pyx_v_self->darray->data->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":44 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":44 * for i from 0 <= i < N: * a_i = self.darray.data.arr[i] * word_count.arr[a_i] = word_count.arr[a_i] + 1 # <<<<<<<<<<<<<< @@ -33344,7 +33344,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su (__pyx_v_word_count->arr[__pyx_v_a_i]) = ((__pyx_v_word_count->arr[__pyx_v_a_i]) + 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":46 * word_count.arr[a_i] = word_count.arr[a_i] + 1 * * n = 0 # <<<<<<<<<<<<<< @@ -33353,7 +33353,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_n = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":47 * * n = 0 * for i from 0 <= i < V+1: # <<<<<<<<<<<<<< @@ -33363,7 +33363,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_5 = (__pyx_v_V + 1); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_5; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":48 * n = 0 * for i from 0 <= i < V+1: * self.ha.arr[i] = n # <<<<<<<<<<<<<< @@ -33372,7 +33372,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ (__pyx_v_self->ha->arr[__pyx_v_i]) = __pyx_v_n; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":49 * for i from 0 <= i < V+1: * self.ha.arr[i] = n * n = n + word_count.arr[i] # <<<<<<<<<<<<<< @@ -33381,7 +33381,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_n = (__pyx_v_n + (__pyx_v_word_count->arr[__pyx_v_i])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":50 * self.ha.arr[i] = n * n = n + word_count.arr[i] * word_count.arr[i] = 0 # <<<<<<<<<<<<<< @@ -33391,7 +33391,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su (__pyx_v_word_count->arr[__pyx_v_i]) = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":52 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":52 * word_count.arr[i] = 0 * * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -33401,7 +33401,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_4 = __pyx_v_N; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_4; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":53 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":53 * * for i from 0 <= i < N: * a_i = self.darray.data.arr[i] # <<<<<<<<<<<<<< @@ -33410,7 +33410,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_a_i = (__pyx_v_self->darray->data->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":54 * for i from 0 <= i < N: * a_i = self.darray.data.arr[i] * self.sa.arr[self.ha.arr[a_i] + word_count.arr[a_i]] = i # <<<<<<<<<<<<<< @@ -33419,7 +33419,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ (__pyx_v_self->sa->arr[((__pyx_v_self->ha->arr[__pyx_v_a_i]) + (__pyx_v_word_count->arr[__pyx_v_a_i]))]) = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":55 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":55 * a_i = self.darray.data.arr[i] * self.sa.arr[self.ha.arr[a_i] + word_count.arr[a_i]] = i * isa.arr[i] = self.ha.arr[a_i + 1] - 1 # bucket pointer is last index in bucket # <<<<<<<<<<<<<< @@ -33428,7 +33428,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ (__pyx_v_isa->arr[__pyx_v_i]) = ((__pyx_v_self->ha->arr[(__pyx_v_a_i + 1)]) - 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":56 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":56 * self.sa.arr[self.ha.arr[a_i] + word_count.arr[a_i]] = i * isa.arr[i] = self.ha.arr[a_i + 1] - 1 # bucket pointer is last index in bucket * word_count.arr[a_i] = word_count.arr[a_i] + 1 # <<<<<<<<<<<<<< @@ -33438,7 +33438,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su (__pyx_v_word_count->arr[__pyx_v_a_i]) = ((__pyx_v_word_count->arr[__pyx_v_a_i]) + 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":59 * * '''Determine size of initial runs''' * current_run = 0 # <<<<<<<<<<<<<< @@ -33447,7 +33447,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_current_run = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":60 * '''Determine size of initial runs''' * current_run = 0 * for i from 0 <= i < V+1: # <<<<<<<<<<<<<< @@ -33457,7 +33457,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_5 = (__pyx_v_V + 1); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_5; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":61 * current_run = 0 * for i from 0 <= i < V+1: * if i < V and self.ha.arr[i+1] - self.ha.arr[i] == 1: # <<<<<<<<<<<<<< @@ -33473,7 +33473,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su } if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":62 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":62 * for i from 0 <= i < V+1: * if i < V and self.ha.arr[i+1] - self.ha.arr[i] == 1: * current_run = current_run + 1 # <<<<<<<<<<<<<< @@ -33485,7 +33485,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":64 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":64 * current_run = current_run + 1 * else: * if current_run > 0: # <<<<<<<<<<<<<< @@ -33495,7 +33495,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_8 = (__pyx_v_current_run > 0); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":65 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":65 * else: * if current_run > 0: * self.sa.arr[self.ha.arr[i] - current_run] = -current_run # <<<<<<<<<<<<<< @@ -33504,7 +33504,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ (__pyx_v_self->sa->arr[((__pyx_v_self->ha->arr[__pyx_v_i]) - __pyx_v_current_run)]) = (-__pyx_v_current_run); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":66 * if current_run > 0: * self.sa.arr[self.ha.arr[i] - current_run] = -current_run * current_run = 0 # <<<<<<<<<<<<<< @@ -33519,7 +33519,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_L11:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":68 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":68 * current_run = 0 * * logger.info(" Bucket sort took %f seconds", (monitor_cpu() - sort_start_time)) # <<<<<<<<<<<<<< @@ -33547,7 +33547,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":71 * * '''Step 2: prefix-doubling sort''' * h = 1 # <<<<<<<<<<<<<< @@ -33556,7 +33556,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_h = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":72 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":72 * '''Step 2: prefix-doubling sort''' * h = 1 * while self.sa.arr[0] != -N: # <<<<<<<<<<<<<< @@ -33567,7 +33567,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_8 = ((__pyx_v_self->sa->arr[0]) != (-__pyx_v_N)); if (!__pyx_t_8) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":73 * h = 1 * while self.sa.arr[0] != -N: * sort_start_time = monitor_cpu() # <<<<<<<<<<<<<< @@ -33576,7 +33576,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_sort_start_time = __pyx_f_3_sa_monitor_cpu(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":74 * while self.sa.arr[0] != -N: * sort_start_time = monitor_cpu() * logger.debug(" Refining, sort depth = %d", h) # <<<<<<<<<<<<<< @@ -33604,7 +33604,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":75 * sort_start_time = monitor_cpu() * logger.debug(" Refining, sort depth = %d", h) * i = 0 # <<<<<<<<<<<<<< @@ -33613,7 +33613,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_i = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":76 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":76 * logger.debug(" Refining, sort depth = %d", h) * i = 0 * skip = 0 # <<<<<<<<<<<<<< @@ -33622,7 +33622,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_skip = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":77 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":77 * i = 0 * skip = 0 * while i < N: # <<<<<<<<<<<<<< @@ -33633,7 +33633,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_8 = (__pyx_v_i < __pyx_v_N); if (!__pyx_t_8) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":78 * skip = 0 * while i < N: * if self.sa.arr[i] < 0: # <<<<<<<<<<<<<< @@ -33643,7 +33643,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_8 = ((__pyx_v_self->sa->arr[__pyx_v_i]) < 0); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":79 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":79 * while i < N: * if self.sa.arr[i] < 0: * skip = skip + self.sa.arr[i] # <<<<<<<<<<<<<< @@ -33652,7 +33652,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_skip = (__pyx_v_skip + (__pyx_v_self->sa->arr[__pyx_v_i])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":80 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":80 * if self.sa.arr[i] < 0: * skip = skip + self.sa.arr[i] * i = i - self.sa.arr[i] # <<<<<<<<<<<<<< @@ -33664,7 +33664,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":82 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":82 * i = i - self.sa.arr[i] * else: * if skip < 0: # <<<<<<<<<<<<<< @@ -33674,7 +33674,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_8 = (__pyx_v_skip < 0); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":83 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":83 * else: * if skip < 0: * self.sa.arr[i+skip] = skip # <<<<<<<<<<<<<< @@ -33683,7 +33683,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ (__pyx_v_self->sa->arr[(__pyx_v_i + __pyx_v_skip)]) = __pyx_v_skip; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":84 * if skip < 0: * self.sa.arr[i+skip] = skip * skip = 0 # <<<<<<<<<<<<<< @@ -33695,7 +33695,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su } __pyx_L18:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":85 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":85 * self.sa.arr[i+skip] = skip * skip = 0 * j = isa.arr[self.sa.arr[i]] # <<<<<<<<<<<<<< @@ -33704,7 +33704,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_j = (__pyx_v_isa->arr[(__pyx_v_self->sa->arr[__pyx_v_i])]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":86 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":86 * skip = 0 * j = isa.arr[self.sa.arr[i]] * self.q3sort(i, j, h, isa) # <<<<<<<<<<<<<< @@ -33739,7 +33739,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":87 * j = isa.arr[self.sa.arr[i]] * self.q3sort(i, j, h, isa) * i = j+1 # <<<<<<<<<<<<<< @@ -33751,7 +33751,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_L17:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":88 * self.q3sort(i, j, h, isa) * i = j+1 * if skip < 0: # <<<<<<<<<<<<<< @@ -33761,7 +33761,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_8 = (__pyx_v_skip < 0); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":89 * i = j+1 * if skip < 0: * self.sa.arr[i+skip] = skip # <<<<<<<<<<<<<< @@ -33773,7 +33773,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su } __pyx_L19:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":90 * if skip < 0: * self.sa.arr[i+skip] = skip * h = h * 2 # <<<<<<<<<<<<<< @@ -33782,7 +33782,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_h = (__pyx_v_h * 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":91 * self.sa.arr[i+skip] = skip * h = h * 2 * logger.debug(" Refinement took %f seconds", (monitor_cpu() - sort_start_time)) # <<<<<<<<<<<<<< @@ -33811,7 +33811,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":94 * * '''Step 3: read off suffix array from inverse suffix array''' * logger.info(" Finalizing sort...") # <<<<<<<<<<<<<< @@ -33828,7 +33828,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":95 * '''Step 3: read off suffix array from inverse suffix array''' * logger.info(" Finalizing sort...") * for i from 0 <= i < N: # <<<<<<<<<<<<<< @@ -33838,7 +33838,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su __pyx_t_4 = __pyx_v_N; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_4; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":96 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":96 * logger.info(" Finalizing sort...") * for i from 0 <= i < N: * j = isa.arr[i] # <<<<<<<<<<<<<< @@ -33847,7 +33847,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su */ __pyx_v_j = (__pyx_v_isa->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":97 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":97 * for i from 0 <= i < N: * j = isa.arr[i] * self.sa.arr[j] = i # <<<<<<<<<<<<<< @@ -33857,7 +33857,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_4read_text(struct __pyx_obj_3_sa_Su (__pyx_v_self->sa->arr[__pyx_v_j]) = __pyx_v_i; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":98 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":98 * j = isa.arr[i] * self.sa.arr[j] = i * logger.info("Suffix array construction took %f seconds", (monitor_cpu() - start_time)) # <<<<<<<<<<<<<< @@ -33995,7 +33995,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_7q3sort(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":100 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":100 * logger.info("Suffix array construction took %f seconds", (monitor_cpu() - start_time)) * * def q3sort(self, int i, int j, int h, IntList isa, pad=""): # <<<<<<<<<<<<<< @@ -34025,7 +34025,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi int __pyx_clineno = 0; __Pyx_RefNannySetupContext("q3sort", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":107 * cdef int k, midpoint, pval, phead, ptail, tmp * * if j-i < -1: # <<<<<<<<<<<<<< @@ -34035,7 +34035,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = ((__pyx_v_j - __pyx_v_i) < -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":108 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":108 * * if j-i < -1: * raise Exception("Unexpected condition found in q3sort: sort from %d to %d" % (i,j)) # <<<<<<<<<<<<<< @@ -34072,7 +34072,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":109 * if j-i < -1: * raise Exception("Unexpected condition found in q3sort: sort from %d to %d" % (i,j)) * if j-i == -1: # recursive base case -- empty interval # <<<<<<<<<<<<<< @@ -34082,7 +34082,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = ((__pyx_v_j - __pyx_v_i) == -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":110 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":110 * raise Exception("Unexpected condition found in q3sort: sort from %d to %d" % (i,j)) * if j-i == -1: # recursive base case -- empty interval * return # <<<<<<<<<<<<<< @@ -34096,7 +34096,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":111 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":111 * if j-i == -1: # recursive base case -- empty interval * return * if (j-i == 0): # recursive base case -- singleton interval # <<<<<<<<<<<<<< @@ -34106,7 +34106,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = ((__pyx_v_j - __pyx_v_i) == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":112 * return * if (j-i == 0): # recursive base case -- singleton interval * isa.arr[self.sa.arr[i]] = i # <<<<<<<<<<<<<< @@ -34115,7 +34115,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_isa->arr[(__pyx_v_self->sa->arr[__pyx_v_i])]) = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":113 * if (j-i == 0): # recursive base case -- singleton interval * isa.arr[self.sa.arr[i]] = i * self.sa.arr[i] = -1 # <<<<<<<<<<<<<< @@ -34124,7 +34124,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_self->sa->arr[__pyx_v_i]) = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":114 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":114 * isa.arr[self.sa.arr[i]] = i * self.sa.arr[i] = -1 * return # <<<<<<<<<<<<<< @@ -34138,7 +34138,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":123 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":123 * # If the method of assigning word_id's is changed, this method * # may need to be reconsidered as well. * midpoint = (i+j)/2 # <<<<<<<<<<<<<< @@ -34147,7 +34147,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_midpoint = __Pyx_div_long((__pyx_v_i + __pyx_v_j), 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":124 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":124 * # may need to be reconsidered as well. * midpoint = (i+j)/2 * pval = isa.arr[self.sa.arr[midpoint] + h] # <<<<<<<<<<<<<< @@ -34156,7 +34156,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_pval = (__pyx_v_isa->arr[((__pyx_v_self->sa->arr[__pyx_v_midpoint]) + __pyx_v_h)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":125 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":125 * midpoint = (i+j)/2 * pval = isa.arr[self.sa.arr[midpoint] + h] * if i != midpoint: # <<<<<<<<<<<<<< @@ -34166,7 +34166,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = (__pyx_v_i != __pyx_v_midpoint); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":126 * pval = isa.arr[self.sa.arr[midpoint] + h] * if i != midpoint: * tmp = self.sa.arr[midpoint] # <<<<<<<<<<<<<< @@ -34175,7 +34175,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_tmp = (__pyx_v_self->sa->arr[__pyx_v_midpoint]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":127 * if i != midpoint: * tmp = self.sa.arr[midpoint] * self.sa.arr[midpoint] = self.sa.arr[i] # <<<<<<<<<<<<<< @@ -34184,7 +34184,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_self->sa->arr[__pyx_v_midpoint]) = (__pyx_v_self->sa->arr[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":128 * tmp = self.sa.arr[midpoint] * self.sa.arr[midpoint] = self.sa.arr[i] * self.sa.arr[i] = tmp # <<<<<<<<<<<<<< @@ -34196,7 +34196,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":129 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":129 * self.sa.arr[midpoint] = self.sa.arr[i] * self.sa.arr[i] = tmp * phead = i # <<<<<<<<<<<<<< @@ -34205,7 +34205,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_phead = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":130 * self.sa.arr[i] = tmp * phead = i * ptail = i # <<<<<<<<<<<<<< @@ -34214,7 +34214,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_ptail = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":134 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":134 * # find the three partitions. phead marks the first element * # of the middle partition, and ptail marks the last element * for k from i+1 <= k < j+1: # <<<<<<<<<<<<<< @@ -34224,7 +34224,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_5 = (__pyx_v_j + 1); for (__pyx_v_k = (__pyx_v_i + 1); __pyx_v_k < __pyx_t_5; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":135 * # of the middle partition, and ptail marks the last element * for k from i+1 <= k < j+1: * if isa.arr[self.sa.arr[k] + h] < pval: # <<<<<<<<<<<<<< @@ -34234,7 +34234,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = ((__pyx_v_isa->arr[((__pyx_v_self->sa->arr[__pyx_v_k]) + __pyx_v_h)]) < __pyx_v_pval); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":136 * for k from i+1 <= k < j+1: * if isa.arr[self.sa.arr[k] + h] < pval: * if k > ptail+1: # <<<<<<<<<<<<<< @@ -34244,7 +34244,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = (__pyx_v_k > (__pyx_v_ptail + 1)); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":137 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":137 * if isa.arr[self.sa.arr[k] + h] < pval: * if k > ptail+1: * tmp = self.sa.arr[phead] # <<<<<<<<<<<<<< @@ -34253,7 +34253,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_tmp = (__pyx_v_self->sa->arr[__pyx_v_phead]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":138 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":138 * if k > ptail+1: * tmp = self.sa.arr[phead] * self.sa.arr[phead] = self.sa.arr[k] # <<<<<<<<<<<<<< @@ -34262,7 +34262,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_self->sa->arr[__pyx_v_phead]) = (__pyx_v_self->sa->arr[__pyx_v_k]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":139 * tmp = self.sa.arr[phead] * self.sa.arr[phead] = self.sa.arr[k] * self.sa.arr[k] = self.sa.arr[ptail+1] # <<<<<<<<<<<<<< @@ -34271,7 +34271,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_self->sa->arr[__pyx_v_k]) = (__pyx_v_self->sa->arr[(__pyx_v_ptail + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":140 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":140 * self.sa.arr[phead] = self.sa.arr[k] * self.sa.arr[k] = self.sa.arr[ptail+1] * self.sa.arr[ptail+1] = tmp # <<<<<<<<<<<<<< @@ -34283,7 +34283,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":142 * self.sa.arr[ptail+1] = tmp * else: # k == ptail+1 * tmp = self.sa.arr[phead] # <<<<<<<<<<<<<< @@ -34292,7 +34292,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_tmp = (__pyx_v_self->sa->arr[__pyx_v_phead]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":143 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":143 * else: # k == ptail+1 * tmp = self.sa.arr[phead] * self.sa.arr[phead] = self.sa.arr[k] # <<<<<<<<<<<<<< @@ -34301,7 +34301,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_self->sa->arr[__pyx_v_phead]) = (__pyx_v_self->sa->arr[__pyx_v_k]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":144 * tmp = self.sa.arr[phead] * self.sa.arr[phead] = self.sa.arr[k] * self.sa.arr[k] = tmp # <<<<<<<<<<<<<< @@ -34312,7 +34312,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L10:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":145 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":145 * self.sa.arr[phead] = self.sa.arr[k] * self.sa.arr[k] = tmp * phead = phead + 1 # <<<<<<<<<<<<<< @@ -34321,7 +34321,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_phead = (__pyx_v_phead + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":146 * self.sa.arr[k] = tmp * phead = phead + 1 * ptail = ptail + 1 # <<<<<<<<<<<<<< @@ -34333,7 +34333,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":148 * ptail = ptail + 1 * else: * if isa.arr[self.sa.arr[k] + h] == pval: # <<<<<<<<<<<<<< @@ -34343,7 +34343,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = ((__pyx_v_isa->arr[((__pyx_v_self->sa->arr[__pyx_v_k]) + __pyx_v_h)]) == __pyx_v_pval); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":149 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":149 * else: * if isa.arr[self.sa.arr[k] + h] == pval: * if k > ptail+1: # <<<<<<<<<<<<<< @@ -34353,7 +34353,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = (__pyx_v_k > (__pyx_v_ptail + 1)); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":150 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":150 * if isa.arr[self.sa.arr[k] + h] == pval: * if k > ptail+1: * tmp = self.sa.arr[ptail+1] # <<<<<<<<<<<<<< @@ -34362,7 +34362,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ __pyx_v_tmp = (__pyx_v_self->sa->arr[(__pyx_v_ptail + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":151 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":151 * if k > ptail+1: * tmp = self.sa.arr[ptail+1] * self.sa.arr[ptail+1] = self.sa.arr[k] # <<<<<<<<<<<<<< @@ -34371,7 +34371,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi */ (__pyx_v_self->sa->arr[(__pyx_v_ptail + 1)]) = (__pyx_v_self->sa->arr[__pyx_v_k]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":152 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":152 * tmp = self.sa.arr[ptail+1] * self.sa.arr[ptail+1] = self.sa.arr[k] * self.sa.arr[k] = tmp # <<<<<<<<<<<<<< @@ -34383,7 +34383,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L12:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":153 * self.sa.arr[ptail+1] = self.sa.arr[k] * self.sa.arr[k] = tmp * ptail = ptail + 1 # <<<<<<<<<<<<<< @@ -34398,7 +34398,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_L9:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":156 * * # recursively sort smaller suffixes * self.q3sort(i, phead-1, h, isa, pad+" ") # <<<<<<<<<<<<<< @@ -34438,7 +34438,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":160 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":160 * # update suffixes with pivot value * # corresponds to update_group function in Larsson & Sadakane * for k from phead <= k < ptail+1: # <<<<<<<<<<<<<< @@ -34448,7 +34448,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_5 = (__pyx_v_ptail + 1); for (__pyx_v_k = __pyx_v_phead; __pyx_v_k < __pyx_t_5; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":161 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":161 * # corresponds to update_group function in Larsson & Sadakane * for k from phead <= k < ptail+1: * isa.arr[self.sa.arr[k]] = ptail # <<<<<<<<<<<<<< @@ -34458,7 +34458,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi (__pyx_v_isa->arr[(__pyx_v_self->sa->arr[__pyx_v_k])]) = __pyx_v_ptail; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":162 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":162 * for k from phead <= k < ptail+1: * isa.arr[self.sa.arr[k]] = ptail * if phead == ptail: # <<<<<<<<<<<<<< @@ -34468,7 +34468,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi __pyx_t_1 = (__pyx_v_phead == __pyx_v_ptail); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":163 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":163 * isa.arr[self.sa.arr[k]] = ptail * if phead == ptail: * self.sa.arr[phead] = -1 # <<<<<<<<<<<<<< @@ -34480,7 +34480,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_6q3sort(struct __pyx_obj_3_sa_Suffi } __pyx_L15:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":166 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":166 * * # recursively sort larger suffixes * self.q3sort(ptail+1, j, h, isa, pad+" ") # <<<<<<<<<<<<<< @@ -34558,7 +34558,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_9write_text(PyObject *__pyx_v_self, return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":169 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":169 * * * def write_text(self, char* filename): # <<<<<<<<<<<<<< @@ -34577,7 +34577,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_8write_text(struct __pyx_obj_3_sa_S int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_text", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":170 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":170 * * def write_text(self, char* filename): * self.darray.write_text(filename) # <<<<<<<<<<<<<< @@ -34634,7 +34634,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_11read_binary(PyObject *__pyx_v_sel return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":172 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":172 * self.darray.write_text(filename) * * def read_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -34648,7 +34648,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_binary(struct __pyx_obj_3_sa __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":174 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":174 * def read_binary(self, char* filename): * cdef FILE *f * f = fopen(filename, "r") # <<<<<<<<<<<<<< @@ -34657,7 +34657,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_binary(struct __pyx_obj_3_sa */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__r); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":175 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":175 * cdef FILE *f * f = fopen(filename, "r") * self.darray.read_handle(f) # <<<<<<<<<<<<<< @@ -34666,7 +34666,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_binary(struct __pyx_obj_3_sa */ ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->darray->__pyx_vtab)->read_handle(__pyx_v_self->darray, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":176 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":176 * f = fopen(filename, "r") * self.darray.read_handle(f) * self.sa.read_handle(f) # <<<<<<<<<<<<<< @@ -34675,7 +34675,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_binary(struct __pyx_obj_3_sa */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sa->__pyx_vtab)->read_handle(__pyx_v_self->sa, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":177 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":177 * self.darray.read_handle(f) * self.sa.read_handle(f) * self.ha.read_handle(f) # <<<<<<<<<<<<<< @@ -34684,7 +34684,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_10read_binary(struct __pyx_obj_3_sa */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->ha->__pyx_vtab)->read_handle(__pyx_v_self->ha, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":178 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":178 * self.sa.read_handle(f) * self.ha.read_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -34720,7 +34720,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_13write_binary(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":180 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":180 * fclose(f) * * def write_binary(self, char* filename): # <<<<<<<<<<<<<< @@ -34734,7 +34734,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12write_binary(struct __pyx_obj_3_s __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("write_binary", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":182 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":182 * def write_binary(self, char* filename): * cdef FILE* f * f = fopen(filename, "w") # <<<<<<<<<<<<<< @@ -34743,7 +34743,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12write_binary(struct __pyx_obj_3_s */ __pyx_v_f = fopen(__pyx_v_filename, __pyx_k__w); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":183 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":183 * cdef FILE* f * f = fopen(filename, "w") * self.darray.write_handle(f) # <<<<<<<<<<<<<< @@ -34752,7 +34752,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12write_binary(struct __pyx_obj_3_s */ ((struct __pyx_vtabstruct_3_sa_DataArray *)__pyx_v_self->darray->__pyx_vtab)->write_handle(__pyx_v_self->darray, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":184 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":184 * f = fopen(filename, "w") * self.darray.write_handle(f) * self.sa.write_handle(f) # <<<<<<<<<<<<<< @@ -34761,7 +34761,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12write_binary(struct __pyx_obj_3_s */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->sa->__pyx_vtab)->write_handle(__pyx_v_self->sa, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":185 * self.darray.write_handle(f) * self.sa.write_handle(f) * self.ha.write_handle(f) # <<<<<<<<<<<<<< @@ -34770,7 +34770,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_12write_binary(struct __pyx_obj_3_s */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_self->ha->__pyx_vtab)->write_handle(__pyx_v_self->ha, __pyx_v_f); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":186 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":186 * self.sa.write_handle(f) * self.ha.write_handle(f) * fclose(f) # <<<<<<<<<<<<<< @@ -34806,7 +34806,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_15write_enhanced(PyObject *__pyx_v_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":188 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":188 * fclose(f) * * def write_enhanced(self, char* filename): # <<<<<<<<<<<<<< @@ -34838,7 +34838,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("write_enhanced", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":189 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":189 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -34878,7 +34878,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 __pyx_v_f = __pyx_t_4; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":190 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":190 * def write_enhanced(self, char* filename): * with open(filename, "w") as f: * self.darray.write_enhanced_handle(f) # <<<<<<<<<<<<<< @@ -34898,7 +34898,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":191 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":191 * with open(filename, "w") as f: * self.darray.write_enhanced_handle(f) * for a_i in self.sa: # <<<<<<<<<<<<<< @@ -34943,7 +34943,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 __pyx_v_a_i = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":192 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":192 * self.darray.write_enhanced_handle(f) * for a_i in self.sa: * f.write("%d " % a_i) # <<<<<<<<<<<<<< @@ -34967,7 +34967,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":193 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":193 * for a_i in self.sa: * f.write("%d " % a_i) * f.write("\n") # <<<<<<<<<<<<<< @@ -34981,7 +34981,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":194 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":194 * f.write("%d " % a_i) * f.write("\n") * for w_i in self.ha: # <<<<<<<<<<<<<< @@ -35026,7 +35026,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 __pyx_v_w_i = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":195 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":195 * f.write("\n") * for w_i in self.ha: * f.write("%d " % w_i) # <<<<<<<<<<<<<< @@ -35050,7 +35050,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":196 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":196 * for w_i in self.ha: * f.write("%d " % w_i) * f.write("\n") # <<<<<<<<<<<<<< @@ -35074,7 +35074,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":189 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":189 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -35172,7 +35172,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_14write_enhanced(struct __pyx_obj_3 return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":198 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":198 * f.write("\n") * * cdef int __search_high(self, int word_id, int offset, int low, int high): # <<<<<<<<<<<<<< @@ -35187,7 +35187,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix int __pyx_t_1; __Pyx_RefNannySetupContext("__search_high", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":201 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":201 * cdef int midpoint * * if low >= high: # <<<<<<<<<<<<<< @@ -35197,7 +35197,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix __pyx_t_1 = (__pyx_v_low >= __pyx_v_high); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":202 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":202 * * if low >= high: * return high # <<<<<<<<<<<<<< @@ -35210,7 +35210,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":203 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":203 * if low >= high: * return high * midpoint = (high + low) / 2 # <<<<<<<<<<<<<< @@ -35219,7 +35219,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix */ __pyx_v_midpoint = __Pyx_div_long((__pyx_v_high + __pyx_v_low), 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":204 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":204 * return high * midpoint = (high + low) / 2 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: # <<<<<<<<<<<<<< @@ -35229,7 +35229,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix __pyx_t_1 = ((__pyx_v_self->darray->data->arr[((__pyx_v_self->sa->arr[__pyx_v_midpoint]) + __pyx_v_offset)]) == __pyx_v_word_id); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":205 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":205 * midpoint = (high + low) / 2 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: * return self.__search_high(word_id, offset, midpoint+1, high) # <<<<<<<<<<<<<< @@ -35242,7 +35242,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":207 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":207 * return self.__search_high(word_id, offset, midpoint+1, high) * else: * return self.__search_high(word_id, offset, low, midpoint) # <<<<<<<<<<<<<< @@ -35260,7 +35260,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_high(struct __pyx_obj_3_sa_Suffix return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":209 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":209 * return self.__search_high(word_id, offset, low, midpoint) * * cdef int __search_low(self, int word_id, int offset, int low, int high): # <<<<<<<<<<<<<< @@ -35275,7 +35275,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA int __pyx_t_1; __Pyx_RefNannySetupContext("__search_low", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":212 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":212 * cdef int midpoint * * if low >= high: # <<<<<<<<<<<<<< @@ -35285,7 +35285,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA __pyx_t_1 = (__pyx_v_low >= __pyx_v_high); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":213 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":213 * * if low >= high: * return high # <<<<<<<<<<<<<< @@ -35298,7 +35298,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":214 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":214 * if low >= high: * return high * midpoint = (high + low) / 2 # <<<<<<<<<<<<<< @@ -35307,7 +35307,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA */ __pyx_v_midpoint = __Pyx_div_long((__pyx_v_high + __pyx_v_low), 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":215 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":215 * return high * midpoint = (high + low) / 2 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: # <<<<<<<<<<<<<< @@ -35317,7 +35317,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA __pyx_t_1 = ((__pyx_v_self->darray->data->arr[((__pyx_v_self->sa->arr[__pyx_v_midpoint]) + __pyx_v_offset)]) == __pyx_v_word_id); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":216 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":216 * midpoint = (high + low) / 2 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: * return self.__search_low(word_id, offset, low, midpoint) # <<<<<<<<<<<<<< @@ -35330,7 +35330,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":218 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":218 * return self.__search_low(word_id, offset, low, midpoint) * else: * return self.__search_low(word_id, offset, midpoint+1, high) # <<<<<<<<<<<<<< @@ -35348,7 +35348,7 @@ static int __pyx_f_3_sa_11SuffixArray___search_low(struct __pyx_obj_3_sa_SuffixA return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":220 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":220 * return self.__search_low(word_id, offset, midpoint+1, high) * * cdef __get_range(self, int word_id, int offset, int low, int high, int midpoint): # <<<<<<<<<<<<<< @@ -35367,7 +35367,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___get_range(struct __pyx_obj_3_sa_Su int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get_range", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":221 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":221 * * cdef __get_range(self, int word_id, int offset, int low, int high, int midpoint): * return (self.__search_low(word_id, offset, low, midpoint), # <<<<<<<<<<<<<< @@ -35378,7 +35378,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___get_range(struct __pyx_obj_3_sa_Su __pyx_t_1 = PyInt_FromLong(((struct __pyx_vtabstruct_3_sa_SuffixArray *)__pyx_v_self->__pyx_vtab)->__pyx___search_low(__pyx_v_self, __pyx_v_word_id, __pyx_v_offset, __pyx_v_low, __pyx_v_midpoint)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 221; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":222 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":222 * cdef __get_range(self, int word_id, int offset, int low, int high, int midpoint): * return (self.__search_low(word_id, offset, low, midpoint), * self.__search_high(word_id, offset, midpoint, high)) # <<<<<<<<<<<<<< @@ -35413,7 +35413,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___get_range(struct __pyx_obj_3_sa_Su return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":224 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":224 * self.__search_high(word_id, offset, midpoint, high)) * * cdef __lookup_helper(self, int word_id, int offset, int low, int high): # <<<<<<<<<<<<<< @@ -35434,7 +35434,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__lookup_helper", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":227 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":227 * cdef int midpoint * * if offset == 0: # <<<<<<<<<<<<<< @@ -35444,7 +35444,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s __pyx_t_1 = (__pyx_v_offset == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":228 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":228 * * if offset == 0: * return (self.ha.arr[word_id], self.ha.arr[word_id+1]) # <<<<<<<<<<<<<< @@ -35471,7 +35471,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":229 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":229 * if offset == 0: * return (self.ha.arr[word_id], self.ha.arr[word_id+1]) * if low >= high: # <<<<<<<<<<<<<< @@ -35481,7 +35481,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s __pyx_t_1 = (__pyx_v_low >= __pyx_v_high); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":230 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":230 * return (self.ha.arr[word_id], self.ha.arr[word_id+1]) * if low >= high: * return None # <<<<<<<<<<<<<< @@ -35496,7 +35496,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":232 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":232 * return None * * midpoint = (high + low) / 2 # <<<<<<<<<<<<<< @@ -35505,7 +35505,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s */ __pyx_v_midpoint = __Pyx_div_long((__pyx_v_high + __pyx_v_low), 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":233 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":233 * * midpoint = (high + low) / 2 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: # <<<<<<<<<<<<<< @@ -35515,7 +35515,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s __pyx_t_1 = ((__pyx_v_self->darray->data->arr[((__pyx_v_self->sa->arr[__pyx_v_midpoint]) + __pyx_v_offset)]) == __pyx_v_word_id); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":234 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":234 * midpoint = (high + low) / 2 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: * return self.__get_range(word_id, offset, low, high, midpoint) # <<<<<<<<<<<<<< @@ -35532,7 +35532,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":235 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":235 * if self.darray.data.arr[self.sa.arr[midpoint] + offset] == word_id: * return self.__get_range(word_id, offset, low, high, midpoint) * if self.darray.data.arr[self.sa.arr[midpoint] + offset] > word_id: # <<<<<<<<<<<<<< @@ -35542,7 +35542,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s __pyx_t_1 = ((__pyx_v_self->darray->data->arr[((__pyx_v_self->sa->arr[__pyx_v_midpoint]) + __pyx_v_offset)]) > __pyx_v_word_id); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":236 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":236 * return self.__get_range(word_id, offset, low, high, midpoint) * if self.darray.data.arr[self.sa.arr[midpoint] + offset] > word_id: * return self.__lookup_helper(word_id, offset, low, midpoint) # <<<<<<<<<<<<<< @@ -35559,7 +35559,7 @@ static PyObject *__pyx_f_3_sa_11SuffixArray___lookup_helper(struct __pyx_obj_3_s } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":238 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":238 * return self.__lookup_helper(word_id, offset, low, midpoint) * else: * return self.__lookup_helper(word_id, offset, midpoint+1, high) # <<<<<<<<<<<<<< @@ -35663,7 +35663,7 @@ static PyObject *__pyx_pw_3_sa_11SuffixArray_17lookup(PyObject *__pyx_v_self, Py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":240 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":240 * return self.__lookup_helper(word_id, offset, midpoint+1, high) * * def lookup(self, word, int offset, int low, int high): # <<<<<<<<<<<<<< @@ -35684,7 +35684,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lookup", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":242 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":242 * def lookup(self, word, int offset, int low, int high): * cdef int wordid * if low == -1: # <<<<<<<<<<<<<< @@ -35694,7 +35694,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff __pyx_t_1 = (__pyx_v_low == -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":243 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":243 * cdef int wordid * if low == -1: * low = 0 # <<<<<<<<<<<<<< @@ -35706,7 +35706,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":244 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":244 * if low == -1: * low = 0 * if high == -1: # <<<<<<<<<<<<<< @@ -35716,7 +35716,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff __pyx_t_1 = (__pyx_v_high == -1); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":245 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":245 * low = 0 * if high == -1: * high = len(self.sa) # <<<<<<<<<<<<<< @@ -35732,7 +35732,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":246 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":246 * if high == -1: * high = len(self.sa) * if word in self.darray.word2id: # <<<<<<<<<<<<<< @@ -35742,7 +35742,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_word, __pyx_v_self->darray->word2id, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[12]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":247 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":247 * high = len(self.sa) * if word in self.darray.word2id: * word_id = self.darray.word2id[word] # <<<<<<<<<<<<<< @@ -35754,7 +35754,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff __pyx_v_word_id = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":248 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":248 * if word in self.darray.word2id: * word_id = self.darray.word2id[word] * return self.__lookup_helper(word_id, offset, low, high) # <<<<<<<<<<<<<< @@ -35772,7 +35772,7 @@ static PyObject *__pyx_pf_3_sa_11SuffixArray_16lookup(struct __pyx_obj_3_sa_Suff } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":250 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":250 * return self.__lookup_helper(word_id, offset, low, high) * else: * return None # <<<<<<<<<<<<<< @@ -35811,7 +35811,7 @@ static int __pyx_pw_3_sa_8TrieNode_1__cinit__(PyObject *__pyx_v_self, PyObject * return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":39 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":39 * cdef public children * * def __cinit__(self): # <<<<<<<<<<<<<< @@ -35828,7 +35828,7 @@ static int __pyx_pf_3_sa_8TrieNode___cinit__(struct __pyx_obj_3_sa_TrieNode *__p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":40 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":40 * * def __cinit__(self): * self.children = {} # <<<<<<<<<<<<<< @@ -35865,7 +35865,7 @@ static PyObject *__pyx_pw_3_sa_8TrieNode_8children_1__get__(PyObject *__pyx_v_se return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":37 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":37 * * cdef class TrieNode: * cdef public children # <<<<<<<<<<<<<< @@ -35954,7 +35954,7 @@ static int __pyx_pw_3_sa_16ExtendedTrieNode_1__cinit__(PyObject *__pyx_v_self, P static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__phrase,&__pyx_n_s__phrase_location,&__pyx_n_s__suffix_link,0}; PyObject* values[3] = {0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":47 * cdef public suffix_link * * def __cinit__(self, phrase=None, phrase_location=None, suffix_link=None): # <<<<<<<<<<<<<< @@ -36026,7 +36026,7 @@ static int __pyx_pf_3_sa_16ExtendedTrieNode___cinit__(struct __pyx_obj_3_sa_Exte __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":48 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":48 * * def __cinit__(self, phrase=None, phrase_location=None, suffix_link=None): * self.phrase = phrase # <<<<<<<<<<<<<< @@ -36039,7 +36039,7 @@ static int __pyx_pf_3_sa_16ExtendedTrieNode___cinit__(struct __pyx_obj_3_sa_Exte __Pyx_DECREF(__pyx_v_self->phrase); __pyx_v_self->phrase = __pyx_v_phrase; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":49 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":49 * def __cinit__(self, phrase=None, phrase_location=None, suffix_link=None): * self.phrase = phrase * self.phrase_location = phrase_location # <<<<<<<<<<<<<< @@ -36052,7 +36052,7 @@ static int __pyx_pf_3_sa_16ExtendedTrieNode___cinit__(struct __pyx_obj_3_sa_Exte __Pyx_DECREF(__pyx_v_self->phrase_location); __pyx_v_self->phrase_location = __pyx_v_phrase_location; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":50 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":50 * self.phrase = phrase * self.phrase_location = phrase_location * self.suffix_link = suffix_link # <<<<<<<<<<<<<< @@ -36081,7 +36081,7 @@ static PyObject *__pyx_pw_3_sa_16ExtendedTrieNode_6phrase_1__get__(PyObject *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":43 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":43 * * cdef class ExtendedTrieNode(TrieNode): * cdef public phrase # <<<<<<<<<<<<<< @@ -36168,7 +36168,7 @@ static PyObject *__pyx_pw_3_sa_16ExtendedTrieNode_15phrase_location_1__get__(PyO return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":44 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":44 * cdef class ExtendedTrieNode(TrieNode): * cdef public phrase * cdef public phrase_location # <<<<<<<<<<<<<< @@ -36255,7 +36255,7 @@ static PyObject *__pyx_pw_3_sa_16ExtendedTrieNode_11suffix_link_1__get__(PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":45 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":45 * cdef public phrase * cdef public phrase_location * cdef public suffix_link # <<<<<<<<<<<<<< @@ -36383,7 +36383,7 @@ static int __pyx_pw_3_sa_9TrieTable_1__cinit__(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":57 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":57 * cdef public int count * cdef public root * def __cinit__(self, extended=False): # <<<<<<<<<<<<<< @@ -36402,7 +36402,7 @@ static int __pyx_pf_3_sa_9TrieTable___cinit__(struct __pyx_obj_3_sa_TrieTable *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":58 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":58 * cdef public root * def __cinit__(self, extended=False): * self.count = 0 # <<<<<<<<<<<<<< @@ -36411,7 +36411,7 @@ static int __pyx_pf_3_sa_9TrieTable___cinit__(struct __pyx_obj_3_sa_TrieTable *_ */ __pyx_v_self->count = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":59 * def __cinit__(self, extended=False): * self.count = 0 * self.extended = extended # <<<<<<<<<<<<<< @@ -36421,7 +36421,7 @@ static int __pyx_pf_3_sa_9TrieTable___cinit__(struct __pyx_obj_3_sa_TrieTable *_ __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_extended); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_self->extended = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":60 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":60 * self.count = 0 * self.extended = extended * if extended: # <<<<<<<<<<<<<< @@ -36431,7 +36431,7 @@ static int __pyx_pf_3_sa_9TrieTable___cinit__(struct __pyx_obj_3_sa_TrieTable *_ __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_extended); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":61 * self.extended = extended * if extended: * self.root = ExtendedTrieNode() # <<<<<<<<<<<<<< @@ -36449,7 +36449,7 @@ static int __pyx_pf_3_sa_9TrieTable___cinit__(struct __pyx_obj_3_sa_TrieTable *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":63 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":63 * self.root = ExtendedTrieNode() * else: * self.root = TrieNode() # <<<<<<<<<<<<<< @@ -36488,7 +36488,7 @@ static PyObject *__pyx_pw_3_sa_9TrieTable_8extended_1__get__(PyObject *__pyx_v_s return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":54 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":54 * * cdef class TrieTable: * cdef public int extended # <<<<<<<<<<<<<< @@ -36566,7 +36566,7 @@ static PyObject *__pyx_pw_3_sa_9TrieTable_5count_1__get__(PyObject *__pyx_v_self return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":55 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":55 * cdef class TrieTable: * cdef public int extended * cdef public int count # <<<<<<<<<<<<<< @@ -36644,7 +36644,7 @@ static PyObject *__pyx_pw_3_sa_9TrieTable_4root_1__get__(PyObject *__pyx_v_self) return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":56 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":56 * cdef public int extended * cdef public int count * cdef public root # <<<<<<<<<<<<<< @@ -36720,7 +36720,7 @@ static int __pyx_pf_3_sa_9TrieTable_4root_4__del__(struct __pyx_obj_3_sa_TrieTab return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":83 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":83 * * # returns true if sent_id is contained * cdef int contains(self, int sent_id): # <<<<<<<<<<<<<< @@ -36733,7 +36733,7 @@ static int __pyx_f_3_sa_14PhraseLocation_contains(CYTHON_UNUSED struct __pyx_obj __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("contains", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":84 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":84 * # returns true if sent_id is contained * cdef int contains(self, int sent_id): * return 1 # <<<<<<<<<<<<<< @@ -36765,7 +36765,7 @@ static int __pyx_pw_3_sa_14PhraseLocation_1__cinit__(PyObject *__pyx_v_self, PyO static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__sa_low,&__pyx_n_s__sa_high,&__pyx_n_s__arr_low,&__pyx_n_s__arr_high,&__pyx_n_s__arr,&__pyx_n_s__num_subpatterns,0}; PyObject* values[6] = {0,0,0,0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":87 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":87 * * def __cinit__(self, int sa_low=-1, int sa_high=-1, int arr_low=-1, int arr_high=-1, * arr=None, int num_subpatterns=1): # <<<<<<<<<<<<<< @@ -36874,7 +36874,7 @@ static int __pyx_pw_3_sa_14PhraseLocation_1__cinit__(PyObject *__pyx_v_self, PyO return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":86 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":86 * return 1 * * def __cinit__(self, int sa_low=-1, int sa_high=-1, int arr_low=-1, int arr_high=-1, # <<<<<<<<<<<<<< @@ -36890,7 +36890,7 @@ static int __pyx_pf_3_sa_14PhraseLocation___cinit__(struct __pyx_obj_3_sa_Phrase int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":88 * def __cinit__(self, int sa_low=-1, int sa_high=-1, int arr_low=-1, int arr_high=-1, * arr=None, int num_subpatterns=1): * self.sa_low = sa_low # <<<<<<<<<<<<<< @@ -36899,7 +36899,7 @@ static int __pyx_pf_3_sa_14PhraseLocation___cinit__(struct __pyx_obj_3_sa_Phrase */ __pyx_v_self->sa_low = __pyx_v_sa_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":89 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":89 * arr=None, int num_subpatterns=1): * self.sa_low = sa_low * self.sa_high = sa_high # <<<<<<<<<<<<<< @@ -36908,7 +36908,7 @@ static int __pyx_pf_3_sa_14PhraseLocation___cinit__(struct __pyx_obj_3_sa_Phrase */ __pyx_v_self->sa_high = __pyx_v_sa_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":90 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":90 * self.sa_low = sa_low * self.sa_high = sa_high * self.arr_low = arr_low # <<<<<<<<<<<<<< @@ -36917,7 +36917,7 @@ static int __pyx_pf_3_sa_14PhraseLocation___cinit__(struct __pyx_obj_3_sa_Phrase */ __pyx_v_self->arr_low = __pyx_v_arr_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":91 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":91 * self.sa_high = sa_high * self.arr_low = arr_low * self.arr_high = arr_high # <<<<<<<<<<<<<< @@ -36926,7 +36926,7 @@ static int __pyx_pf_3_sa_14PhraseLocation___cinit__(struct __pyx_obj_3_sa_Phrase */ __pyx_v_self->arr_high = __pyx_v_arr_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":92 * self.arr_low = arr_low * self.arr_high = arr_high * self.arr = arr # <<<<<<<<<<<<<< @@ -36940,7 +36940,7 @@ static int __pyx_pf_3_sa_14PhraseLocation___cinit__(struct __pyx_obj_3_sa_Phrase __Pyx_DECREF(((PyObject *)__pyx_v_self->arr)); __pyx_v_self->arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_v_arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":93 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":93 * self.arr_high = arr_high * self.arr = arr * self.num_subpatterns = num_subpatterns # <<<<<<<<<<<<<< @@ -37020,7 +37020,7 @@ static int __pyx_pw_3_sa_7Sampler_1__cinit__(PyObject *__pyx_v_self, PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":103 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":103 * cdef IntList sa * * def __cinit__(self, int sample_size, SuffixArray fsarray): # <<<<<<<<<<<<<< @@ -37040,7 +37040,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":104 * * def __cinit__(self, int sample_size, SuffixArray fsarray): * self.sample_size = sample_size # <<<<<<<<<<<<<< @@ -37049,7 +37049,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx */ __pyx_v_self->sample_size = __pyx_v_sample_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":105 * def __cinit__(self, int sample_size, SuffixArray fsarray): * self.sample_size = sample_size * self.sa = fsarray.sa # <<<<<<<<<<<<<< @@ -37062,7 +37062,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx __Pyx_DECREF(((PyObject *)__pyx_v_self->sa)); __pyx_v_self->sa = __pyx_v_fsarray->sa; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":106 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":106 * self.sample_size = sample_size * self.sa = fsarray.sa * if sample_size > 0: # <<<<<<<<<<<<<< @@ -37072,7 +37072,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx __pyx_t_1 = (__pyx_v_sample_size > 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":107 * self.sa = fsarray.sa * if sample_size > 0: * logger.info("Sampling strategy: uniform, max sample size = %d", sample_size) # <<<<<<<<<<<<<< @@ -37103,7 +37103,7 @@ static int __pyx_pf_3_sa_7Sampler___cinit__(struct __pyx_obj_3_sa_Sampler *__pyx } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":109 * logger.info("Sampling strategy: uniform, max sample size = %d", sample_size) * else: * logger.info("Sampling strategy: no sampling") # <<<<<<<<<<<<<< @@ -37152,7 +37152,7 @@ static PyObject *__pyx_pw_3_sa_7Sampler_3sample(PyObject *__pyx_v_self, PyObject return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":111 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":111 * logger.info("Sampling strategy: no sampling") * * def sample(self, PhraseLocation phrase_location): # <<<<<<<<<<<<<< @@ -37179,7 +37179,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("sample", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":124 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":124 * cdef int num_locations, val, j * * sample = IntList() # <<<<<<<<<<<<<< @@ -37191,7 +37191,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ __pyx_v_sample = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":125 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":125 * * sample = IntList() * if phrase_location.arr is None: # <<<<<<<<<<<<<< @@ -37201,7 +37201,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ __pyx_t_2 = (((PyObject *)__pyx_v_phrase_location->arr) == Py_None); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":126 * sample = IntList() * if phrase_location.arr is None: * num_locations = phrase_location.sa_high - phrase_location.sa_low # <<<<<<<<<<<<<< @@ -37210,7 +37210,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ */ __pyx_v_num_locations = (__pyx_v_phrase_location->sa_high - __pyx_v_phrase_location->sa_low); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":127 * if phrase_location.arr is None: * num_locations = phrase_location.sa_high - phrase_location.sa_low * if self.sample_size == -1 or num_locations <= self.sample_size: # <<<<<<<<<<<<<< @@ -37226,7 +37226,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":128 * num_locations = phrase_location.sa_high - phrase_location.sa_low * if self.sample_size == -1 or num_locations <= self.sample_size: * sample._extend_arr(self.sa.arr + phrase_location.sa_low, num_locations) # <<<<<<<<<<<<<< @@ -37238,7 +37238,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":130 * sample._extend_arr(self.sa.arr + phrase_location.sa_low, num_locations) * else: * stepsize = float(num_locations)/float(self.sample_size) # <<<<<<<<<<<<<< @@ -37251,7 +37251,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } __pyx_v_stepsize = (((double)__pyx_v_num_locations) / ((double)__pyx_v_self->sample_size)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":131 * else: * stepsize = float(num_locations)/float(self.sample_size) * i = phrase_location.sa_low # <<<<<<<<<<<<<< @@ -37260,7 +37260,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ */ __pyx_v_i = __pyx_v_phrase_location->sa_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":132 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":132 * stepsize = float(num_locations)/float(self.sample_size) * i = phrase_location.sa_low * while i < phrase_location.sa_high and sample.len < self.sample_size: # <<<<<<<<<<<<<< @@ -37277,7 +37277,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":135 * '''Note: int(i) not guaranteed to have the desired * effect, according to the python documentation''' * if fmod(i,1.0) > 0.5: # <<<<<<<<<<<<<< @@ -37287,7 +37287,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ __pyx_t_3 = (fmod(__pyx_v_i, 1.0) > 0.5); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":136 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":136 * effect, according to the python documentation''' * if fmod(i,1.0) > 0.5: * val = int(ceil(i)) # <<<<<<<<<<<<<< @@ -37299,7 +37299,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":138 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":138 * val = int(ceil(i)) * else: * val = int(floor(i)) # <<<<<<<<<<<<<< @@ -37310,7 +37310,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":139 * else: * val = int(floor(i)) * sample._append(self.sa.arr[val]) # <<<<<<<<<<<<<< @@ -37319,7 +37319,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_sample->__pyx_vtab)->_append(__pyx_v_sample, (__pyx_v_self->sa->arr[__pyx_v_val])); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":140 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":140 * val = int(floor(i)) * sample._append(self.sa.arr[val]) * i = i + stepsize # <<<<<<<<<<<<<< @@ -37334,7 +37334,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":142 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":142 * i = i + stepsize * else: * num_locations = (phrase_location.arr_high - phrase_location.arr_low) / phrase_location.num_subpatterns # <<<<<<<<<<<<<< @@ -37352,7 +37352,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } __pyx_v_num_locations = __Pyx_div_int(__pyx_t_5, __pyx_v_phrase_location->num_subpatterns); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":143 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":143 * else: * num_locations = (phrase_location.arr_high - phrase_location.arr_low) / phrase_location.num_subpatterns * if self.sample_size == -1 or num_locations <= self.sample_size: # <<<<<<<<<<<<<< @@ -37368,7 +37368,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":144 * num_locations = (phrase_location.arr_high - phrase_location.arr_low) / phrase_location.num_subpatterns * if self.sample_size == -1 or num_locations <= self.sample_size: * sample = phrase_location.arr # <<<<<<<<<<<<<< @@ -37382,7 +37382,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":146 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":146 * sample = phrase_location.arr * else: * stepsize = float(num_locations)/float(self.sample_size) # <<<<<<<<<<<<<< @@ -37395,7 +37395,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } __pyx_v_stepsize = (((double)__pyx_v_num_locations) / ((double)__pyx_v_self->sample_size)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":147 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":147 * else: * stepsize = float(num_locations)/float(self.sample_size) * i = phrase_location.arr_low # <<<<<<<<<<<<<< @@ -37404,7 +37404,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ */ __pyx_v_i = __pyx_v_phrase_location->arr_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":148 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":148 * stepsize = float(num_locations)/float(self.sample_size) * i = phrase_location.arr_low * while i < num_locations and sample.len < self.sample_size * phrase_location.num_subpatterns: # <<<<<<<<<<<<<< @@ -37421,7 +37421,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } if (!__pyx_t_4) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":151 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":151 * '''Note: int(i) not guaranteed to have the desired * effect, according to the python documentation''' * if fmod(i,1.0) > 0.5: # <<<<<<<<<<<<<< @@ -37431,7 +37431,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ __pyx_t_4 = (fmod(__pyx_v_i, 1.0) > 0.5); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":152 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":152 * effect, according to the python documentation''' * if fmod(i,1.0) > 0.5: * val = int(ceil(i)) # <<<<<<<<<<<<<< @@ -37443,7 +37443,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":154 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":154 * val = int(ceil(i)) * else: * val = int(floor(i)) # <<<<<<<<<<<<<< @@ -37454,7 +37454,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } __pyx_L11:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":155 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":155 * else: * val = int(floor(i)) * j = phrase_location.arr_low + (val*phrase_location.num_subpatterns) # <<<<<<<<<<<<<< @@ -37463,7 +37463,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ */ __pyx_v_j = (__pyx_v_phrase_location->arr_low + (__pyx_v_val * __pyx_v_phrase_location->num_subpatterns)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":156 * val = int(floor(i)) * j = phrase_location.arr_low + (val*phrase_location.num_subpatterns) * sample._extend_arr(phrase_location.arr.arr + j, phrase_location.num_subpatterns) # <<<<<<<<<<<<<< @@ -37472,7 +37472,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_sample->__pyx_vtab)->_extend_arr(__pyx_v_sample, (__pyx_v_phrase_location->arr->arr + __pyx_v_j), __pyx_v_phrase_location->num_subpatterns); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":157 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":157 * j = phrase_location.arr_low + (val*phrase_location.num_subpatterns) * sample._extend_arr(phrase_location.arr.arr + j, phrase_location.num_subpatterns) * i = i + stepsize # <<<<<<<<<<<<<< @@ -37486,7 +37486,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":158 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":158 * sample._extend_arr(phrase_location.arr.arr + j, phrase_location.num_subpatterns) * i = i + stepsize * return sample # <<<<<<<<<<<<<< @@ -37511,7 +37511,7 @@ static PyObject *__pyx_pf_3_sa_7Sampler_2sample(struct __pyx_obj_3_sa_Sampler *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":170 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":170 * * * cdef void assign_matching(Matching* m, int* arr, int start, int step, int* sent_id_arr): # <<<<<<<<<<<<<< @@ -37523,7 +37523,7 @@ static void __pyx_f_3_sa_assign_matching(struct __pyx_t_3_sa_Matching *__pyx_v_m __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("assign_matching", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":171 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":171 * * cdef void assign_matching(Matching* m, int* arr, int start, int step, int* sent_id_arr): * m.arr = arr # <<<<<<<<<<<<<< @@ -37532,7 +37532,7 @@ static void __pyx_f_3_sa_assign_matching(struct __pyx_t_3_sa_Matching *__pyx_v_m */ __pyx_v_m->arr = __pyx_v_arr; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":172 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":172 * cdef void assign_matching(Matching* m, int* arr, int start, int step, int* sent_id_arr): * m.arr = arr * m.start = start # <<<<<<<<<<<<<< @@ -37541,7 +37541,7 @@ static void __pyx_f_3_sa_assign_matching(struct __pyx_t_3_sa_Matching *__pyx_v_m */ __pyx_v_m->start = __pyx_v_start; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":173 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":173 * m.arr = arr * m.start = start * m.end = start + step # <<<<<<<<<<<<<< @@ -37550,7 +37550,7 @@ static void __pyx_f_3_sa_assign_matching(struct __pyx_t_3_sa_Matching *__pyx_v_m */ __pyx_v_m->end = (__pyx_v_start + __pyx_v_step); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":174 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":174 * m.start = start * m.end = start + step * m.sent_id = sent_id_arr[arr[start]] # <<<<<<<<<<<<<< @@ -37559,7 +37559,7 @@ static void __pyx_f_3_sa_assign_matching(struct __pyx_t_3_sa_Matching *__pyx_v_m */ __pyx_v_m->sent_id = (__pyx_v_sent_id_arr[(__pyx_v_arr[__pyx_v_start])]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":175 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":175 * m.end = start + step * m.sent_id = sent_id_arr[arr[start]] * m.size = step # <<<<<<<<<<<<<< @@ -37571,7 +37571,7 @@ static void __pyx_f_3_sa_assign_matching(struct __pyx_t_3_sa_Matching *__pyx_v_m __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":178 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":178 * * * cdef int* append_combined_matching(int* arr, Matching* loc1, Matching* loc2, # <<<<<<<<<<<<<< @@ -37588,7 +37588,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx int __pyx_t_2; __Pyx_RefNannySetupContext("append_combined_matching", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":182 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":182 * cdef int i, new_len * * new_len = result_len[0] + num_subpatterns # <<<<<<<<<<<<<< @@ -37597,7 +37597,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx */ __pyx_v_new_len = ((__pyx_v_result_len[0]) + __pyx_v_num_subpatterns); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":183 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":183 * * new_len = result_len[0] + num_subpatterns * arr = realloc(arr, new_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -37606,7 +37606,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx */ __pyx_v_arr = ((int *)realloc(__pyx_v_arr, (__pyx_v_new_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":185 * arr = realloc(arr, new_len*sizeof(int)) * * for i from 0 <= i < loc1.size: # <<<<<<<<<<<<<< @@ -37616,7 +37616,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx __pyx_t_1 = __pyx_v_loc1->size; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":186 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":186 * * for i from 0 <= i < loc1.size: * arr[result_len[0]+i] = loc1.arr[loc1.start+i] # <<<<<<<<<<<<<< @@ -37626,7 +37626,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx (__pyx_v_arr[((__pyx_v_result_len[0]) + __pyx_v_i)]) = (__pyx_v_loc1->arr[(__pyx_v_loc1->start + __pyx_v_i)]); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":187 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":187 * for i from 0 <= i < loc1.size: * arr[result_len[0]+i] = loc1.arr[loc1.start+i] * if num_subpatterns > loc1.size: # <<<<<<<<<<<<<< @@ -37636,7 +37636,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx __pyx_t_2 = (__pyx_v_num_subpatterns > __pyx_v_loc1->size); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":188 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":188 * arr[result_len[0]+i] = loc1.arr[loc1.start+i] * if num_subpatterns > loc1.size: * arr[new_len-1] = loc2.arr[loc2.end-1] # <<<<<<<<<<<<<< @@ -37648,7 +37648,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":189 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":189 * if num_subpatterns > loc1.size: * arr[new_len-1] = loc2.arr[loc2.end-1] * result_len[0] = new_len # <<<<<<<<<<<<<< @@ -37657,7 +37657,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx */ (__pyx_v_result_len[0]) = __pyx_v_new_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":190 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":190 * arr[new_len-1] = loc2.arr[loc2.end-1] * result_len[0] = new_len * return arr # <<<<<<<<<<<<<< @@ -37673,7 +37673,7 @@ static int *__pyx_f_3_sa_append_combined_matching(int *__pyx_v_arr, struct __pyx return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":193 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":193 * * * cdef int* extend_arr(int* arr, int* arr_len, int* appendix, int appendix_len): # <<<<<<<<<<<<<< @@ -37687,7 +37687,7 @@ static int *__pyx_f_3_sa_extend_arr(int *__pyx_v_arr, int *__pyx_v_arr_len, int __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("extend_arr", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":196 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":196 * cdef int new_len * * new_len = arr_len[0] + appendix_len # <<<<<<<<<<<<<< @@ -37696,7 +37696,7 @@ static int *__pyx_f_3_sa_extend_arr(int *__pyx_v_arr, int *__pyx_v_arr_len, int */ __pyx_v_new_len = ((__pyx_v_arr_len[0]) + __pyx_v_appendix_len); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":197 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":197 * * new_len = arr_len[0] + appendix_len * arr = realloc(arr, new_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -37705,7 +37705,7 @@ static int *__pyx_f_3_sa_extend_arr(int *__pyx_v_arr, int *__pyx_v_arr_len, int */ __pyx_v_arr = ((int *)realloc(__pyx_v_arr, (__pyx_v_new_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":198 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":198 * new_len = arr_len[0] + appendix_len * arr = realloc(arr, new_len*sizeof(int)) * memcpy(arr+arr_len[0], appendix, appendix_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -37714,7 +37714,7 @@ static int *__pyx_f_3_sa_extend_arr(int *__pyx_v_arr, int *__pyx_v_arr_len, int */ memcpy((__pyx_v_arr + (__pyx_v_arr_len[0])), __pyx_v_appendix, (__pyx_v_appendix_len * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":199 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":199 * arr = realloc(arr, new_len*sizeof(int)) * memcpy(arr+arr_len[0], appendix, appendix_len*sizeof(int)) * arr_len[0] = new_len # <<<<<<<<<<<<<< @@ -37723,7 +37723,7 @@ static int *__pyx_f_3_sa_extend_arr(int *__pyx_v_arr, int *__pyx_v_arr_len, int */ (__pyx_v_arr_len[0]) = __pyx_v_new_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":200 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":200 * memcpy(arr+arr_len[0], appendix, appendix_len*sizeof(int)) * arr_len[0] = new_len * return arr # <<<<<<<<<<<<<< @@ -37739,7 +37739,7 @@ static int *__pyx_f_3_sa_extend_arr(int *__pyx_v_arr, int *__pyx_v_arr_len, int return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":202 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":202 * return arr * * cdef int median(int low, int high, int step): # <<<<<<<<<<<<<< @@ -37756,7 +37756,7 @@ static int __pyx_f_3_sa_median(int __pyx_v_low, int __pyx_v_high, int __pyx_v_st int __pyx_clineno = 0; __Pyx_RefNannySetupContext("median", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":203 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":203 * * cdef int median(int low, int high, int step): * return low + (((high - low)/step)/2)*step # <<<<<<<<<<<<<< @@ -37785,7 +37785,7 @@ static int __pyx_f_3_sa_median(int __pyx_v_low, int __pyx_v_high, int __pyx_v_st return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":206 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":206 * * * cdef void find_comparable_matchings(int low, int high, int* arr, int step, int loc, int* loc_minus, int* loc_plus): # <<<<<<<<<<<<<< @@ -37800,7 +37800,7 @@ static void __pyx_f_3_sa_find_comparable_matchings(int __pyx_v_low, int __pyx_v_ int __pyx_t_3; __Pyx_RefNannySetupContext("find_comparable_matchings", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":210 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":210 * # in which all matchings have the same first index as the one * # starting at loc * loc_plus[0] = loc + step # <<<<<<<<<<<<<< @@ -37809,7 +37809,7 @@ static void __pyx_f_3_sa_find_comparable_matchings(int __pyx_v_low, int __pyx_v_ */ (__pyx_v_loc_plus[0]) = (__pyx_v_loc + __pyx_v_step); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":211 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":211 * # starting at loc * loc_plus[0] = loc + step * while loc_plus[0] < high and arr[loc_plus[0]] == arr[loc]: # <<<<<<<<<<<<<< @@ -37826,7 +37826,7 @@ static void __pyx_f_3_sa_find_comparable_matchings(int __pyx_v_low, int __pyx_v_ } if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":212 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":212 * loc_plus[0] = loc + step * while loc_plus[0] < high and arr[loc_plus[0]] == arr[loc]: * loc_plus[0] = loc_plus[0] + step # <<<<<<<<<<<<<< @@ -37836,7 +37836,7 @@ static void __pyx_f_3_sa_find_comparable_matchings(int __pyx_v_low, int __pyx_v_ (__pyx_v_loc_plus[0]) = ((__pyx_v_loc_plus[0]) + __pyx_v_step); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":213 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":213 * while loc_plus[0] < high and arr[loc_plus[0]] == arr[loc]: * loc_plus[0] = loc_plus[0] + step * loc_minus[0] = loc # <<<<<<<<<<<<<< @@ -37845,7 +37845,7 @@ static void __pyx_f_3_sa_find_comparable_matchings(int __pyx_v_low, int __pyx_v_ */ (__pyx_v_loc_minus[0]) = __pyx_v_loc; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":214 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":214 * loc_plus[0] = loc_plus[0] + step * loc_minus[0] = loc * while loc_minus[0]-step >= low and arr[loc_minus[0]-step] == arr[loc]: # <<<<<<<<<<<<<< @@ -37862,7 +37862,7 @@ static void __pyx_f_3_sa_find_comparable_matchings(int __pyx_v_low, int __pyx_v_ } if (!__pyx_t_2) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":215 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":215 * loc_minus[0] = loc * while loc_minus[0]-step >= low and arr[loc_minus[0]-step] == arr[loc]: * loc_minus[0] = loc_minus[0] - step # <<<<<<<<<<<<<< @@ -37906,7 +37906,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__alignment,&__pyx_n_s__by_slack_factor,&__pyx_n_s__category,&__pyx_n_s__max_chunks,&__pyx_n_s__max_initial_size,&__pyx_n_s__max_length,&__pyx_n_s__max_nonterminals,&__pyx_n_s__max_target_chunks,&__pyx_n_s__max_target_length,&__pyx_n_s__min_gap_size,&__pyx_n_s__precompute_file,&__pyx_n_s_70,&__pyx_n_s__precompute_rank,&__pyx_n_s_103,&__pyx_n_s_104,&__pyx_n_s_71,&__pyx_n_s__train_min_gap_size,&__pyx_n_s__tight_phrases,&__pyx_n_s__use_baeza_yates,&__pyx_n_s__use_collocations,&__pyx_n_s__use_index,0}; PyObject* values[21] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":275 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":275 * char* category="[X]", * # maximum number of contiguous chunks of terminal symbols in RHS of a rule. If None, defaults to max_nonterminals+1 * max_chunks=None, # <<<<<<<<<<<<<< @@ -37915,7 +37915,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ */ values[3] = ((PyObject *)Py_None); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":283 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":283 * unsigned max_nonterminals=2, * # maximum number of contiguous chunks of terminal symbols in target-side RHS of a rule. If None, defaults to max_nonterminals+1 * max_target_chunks=None, # <<<<<<<<<<<<<< @@ -37924,7 +37924,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ */ values[7] = ((PyObject *)Py_None); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":285 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":285 * max_target_chunks=None, * # maximum number of target side symbols (both T and NT) allowed in a rule. If None, defaults to max_initial_size * max_target_length=None, # <<<<<<<<<<<<<< @@ -37933,7 +37933,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ */ values[8] = ((PyObject *)Py_None); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":289 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":289 * unsigned min_gap_size=2, * # filename of file containing precomputed collocations * precompute_file=None, # <<<<<<<<<<<<<< @@ -38110,7 +38110,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __pyx_v_by_slack_factor = __pyx_PyFloat_AsFloat(values[1]); if (unlikely((__pyx_v_by_slack_factor == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":271 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":271 * Alignment alignment, * # parameter for double-binary search; doesn't seem to matter much * float by_slack_factor=1.0, # <<<<<<<<<<<<<< @@ -38162,7 +38162,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __pyx_v_require_aligned_terminal = __Pyx_PyObject_IsTrue(values[13]); if (unlikely((__pyx_v_require_aligned_terminal == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":295 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":295 * unsigned precompute_rank=100, * # require extracted rules to have at least one aligned word * bint require_aligned_terminal=True, # <<<<<<<<<<<<<< @@ -38175,7 +38175,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __pyx_v_require_aligned_chunks = __Pyx_PyObject_IsTrue(values[14]); if (unlikely((__pyx_v_require_aligned_chunks == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":297 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":297 * bint require_aligned_terminal=True, * # require each contiguous chunk of extracted rules to have at least one aligned word * bint require_aligned_chunks=False, # <<<<<<<<<<<<<< @@ -38198,21 +38198,21 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __pyx_v_tight_phrases = __Pyx_PyObject_IsTrue(values[17]); if (unlikely((__pyx_v_tight_phrases == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":303 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":303 * unsigned train_min_gap_size=2, - * # True if phrases should be tight, False otherwise (False == slower but better results) - * bint tight_phrases=False, # <<<<<<<<<<<<<< + * # False if phrases should be loose (better but slower), True otherwise + * bint tight_phrases=True, # <<<<<<<<<<<<<< * # True to require use of double-binary alg, false otherwise * bint use_baeza_yates=True, */ - __pyx_v_tight_phrases = ((int)0); + __pyx_v_tight_phrases = ((int)1); } if (values[18]) { __pyx_v_use_baeza_yates = __Pyx_PyObject_IsTrue(values[18]); if (unlikely((__pyx_v_use_baeza_yates == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":305 - * bint tight_phrases=False, + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":305 + * bint tight_phrases=True, * # True to require use of double-binary alg, false otherwise * bint use_baeza_yates=True, # <<<<<<<<<<<<<< * # True to enable used of precomputed collocations @@ -38224,7 +38224,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __pyx_v_use_collocations = __Pyx_PyObject_IsTrue(values[19]); if (unlikely((__pyx_v_use_collocations == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":307 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":307 * bint use_baeza_yates=True, * # True to enable used of precomputed collocations * bint use_collocations=True, # <<<<<<<<<<<<<< @@ -38237,7 +38237,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ __pyx_v_use_index = __Pyx_PyObject_IsTrue(values[20]); if (unlikely((__pyx_v_use_index == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } else { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":309 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":309 * bint use_collocations=True, * # True to enable use of precomputed inverted indices * bint use_index=True): # <<<<<<<<<<<<<< @@ -38265,7 +38265,7 @@ static int __pyx_pw_3_sa_23HieroCachingRuleFactory_1__cinit__(PyObject *__pyx_v_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":267 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":267 * cdef IntList findexes1 * * def __cinit__(self, # <<<<<<<<<<<<<< @@ -38285,7 +38285,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":315 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":315 * respectively. This is because Chiang's model does not require * them to be the same, therefore we don't either.''' * self.rules = TrieTable(True) # cache # <<<<<<<<<<<<<< @@ -38308,7 +38308,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_v_self->rules = ((struct __pyx_obj_3_sa_TrieTable *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":316 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":316 * them to be the same, therefore we don't either.''' * self.rules = TrieTable(True) # cache * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) # <<<<<<<<<<<<<< @@ -38330,7 +38330,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_v_self->rules->root = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":317 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":317 * self.rules = TrieTable(True) # cache * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) * if alignment is None: # <<<<<<<<<<<<<< @@ -38340,7 +38340,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_t_3 = (((PyObject *)__pyx_v_alignment) == Py_None); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":318 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":318 * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) * if alignment is None: * raise Exception("Must specify an alignment object") # <<<<<<<<<<<<<< @@ -38356,7 +38356,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":319 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":319 * if alignment is None: * raise Exception("Must specify an alignment object") * self.alignment = alignment # <<<<<<<<<<<<<< @@ -38369,7 +38369,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __Pyx_DECREF(((PyObject *)__pyx_v_self->alignment)); __pyx_v_self->alignment = __pyx_v_alignment; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":323 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":323 * # grammar parameters and settings * # NOTE: setting max_nonterminals > 2 is not currently supported in Hiero * self.max_length = max_length # <<<<<<<<<<<<<< @@ -38378,7 +38378,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->max_length = __pyx_v_max_length; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":324 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":324 * # NOTE: setting max_nonterminals > 2 is not currently supported in Hiero * self.max_length = max_length * self.max_nonterminals = max_nonterminals # <<<<<<<<<<<<<< @@ -38387,7 +38387,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->max_nonterminals = __pyx_v_max_nonterminals; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":325 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":325 * self.max_length = max_length * self.max_nonterminals = max_nonterminals * self.max_initial_size = max_initial_size # <<<<<<<<<<<<<< @@ -38396,7 +38396,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->max_initial_size = __pyx_v_max_initial_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":326 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":326 * self.max_nonterminals = max_nonterminals * self.max_initial_size = max_initial_size * self.train_max_initial_size = train_max_initial_size # <<<<<<<<<<<<<< @@ -38405,7 +38405,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->train_max_initial_size = __pyx_v_train_max_initial_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":327 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":327 * self.max_initial_size = max_initial_size * self.train_max_initial_size = train_max_initial_size * self.min_gap_size = min_gap_size # <<<<<<<<<<<<<< @@ -38414,7 +38414,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->min_gap_size = __pyx_v_min_gap_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":328 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":328 * self.train_max_initial_size = train_max_initial_size * self.min_gap_size = min_gap_size * self.train_min_gap_size = train_min_gap_size # <<<<<<<<<<<<<< @@ -38423,7 +38423,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->train_min_gap_size = __pyx_v_train_min_gap_size; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":329 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":329 * self.min_gap_size = min_gap_size * self.train_min_gap_size = train_min_gap_size * self.category = sym_fromstring(category, False) # <<<<<<<<<<<<<< @@ -38432,7 +38432,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->category = __pyx_f_3_sa_sym_fromstring(__pyx_v_category, 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":331 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":331 * self.category = sym_fromstring(category, False) * * if max_chunks is None: # <<<<<<<<<<<<<< @@ -38442,7 +38442,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_t_3 = (__pyx_v_max_chunks == Py_None); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":332 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":332 * * if max_chunks is None: * self.max_chunks = self.max_nonterminals + 1 # <<<<<<<<<<<<<< @@ -38454,7 +38454,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":334 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":334 * self.max_chunks = self.max_nonterminals + 1 * else: * self.max_chunks = max_chunks # <<<<<<<<<<<<<< @@ -38466,7 +38466,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":336 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":336 * self.max_chunks = max_chunks * * if max_target_chunks is None: # <<<<<<<<<<<<<< @@ -38476,7 +38476,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_t_3 = (__pyx_v_max_target_chunks == Py_None); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":337 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":337 * * if max_target_chunks is None: * self.max_target_chunks = self.max_nonterminals + 1 # <<<<<<<<<<<<<< @@ -38488,7 +38488,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":339 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":339 * self.max_target_chunks = self.max_nonterminals + 1 * else: * self.max_target_chunks = max_target_chunks # <<<<<<<<<<<<<< @@ -38500,7 +38500,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":341 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":341 * self.max_target_chunks = max_target_chunks * * if max_target_length is None: # <<<<<<<<<<<<<< @@ -38510,7 +38510,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_t_3 = (__pyx_v_max_target_length == Py_None); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":342 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":342 * * if max_target_length is None: * self.max_target_length = max_initial_size # <<<<<<<<<<<<<< @@ -38522,7 +38522,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":344 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":344 * self.max_target_length = max_initial_size * else: * self.max_target_length = max_target_length # <<<<<<<<<<<<<< @@ -38534,7 +38534,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":347 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":347 * * # algorithmic parameters and settings * self.precomputed_collocations = {} # <<<<<<<<<<<<<< @@ -38549,7 +38549,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_v_self->precomputed_collocations = ((PyObject *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":348 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":348 * # algorithmic parameters and settings * self.precomputed_collocations = {} * self.precomputed_index = {} # <<<<<<<<<<<<<< @@ -38564,7 +38564,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_v_self->precomputed_index = ((PyObject *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":349 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":349 * self.precomputed_collocations = {} * self.precomputed_index = {} * self.use_index = use_index # <<<<<<<<<<<<<< @@ -38573,7 +38573,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->use_index = __pyx_v_use_index; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":350 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":350 * self.precomputed_index = {} * self.use_index = use_index * self.use_collocations = use_collocations # <<<<<<<<<<<<<< @@ -38582,7 +38582,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->use_collocations = __pyx_v_use_collocations; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":351 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":351 * self.use_index = use_index * self.use_collocations = use_collocations * self.max_rank = {} # <<<<<<<<<<<<<< @@ -38597,7 +38597,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_v_self->max_rank = ((PyObject *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":352 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":352 * self.use_collocations = use_collocations * self.max_rank = {} * self.precompute_file = precompute_file # <<<<<<<<<<<<<< @@ -38610,7 +38610,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __Pyx_DECREF(__pyx_v_self->precompute_file); __pyx_v_self->precompute_file = __pyx_v_precompute_file; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":353 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":353 * self.max_rank = {} * self.precompute_file = precompute_file * self.precompute_rank = precompute_rank # <<<<<<<<<<<<<< @@ -38619,7 +38619,7 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->precompute_rank = __pyx_v_precompute_rank; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":354 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":354 * self.precompute_file = precompute_file * self.precompute_rank = precompute_rank * self.precompute_secondary_rank = precompute_secondary_rank # <<<<<<<<<<<<<< @@ -38628,136 +38628,97 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ */ __pyx_v_self->precompute_secondary_rank = __pyx_v_precompute_secondary_rank; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":355 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":355 * self.precompute_rank = precompute_rank * self.precompute_secondary_rank = precompute_secondary_rank * self.use_baeza_yates = use_baeza_yates # <<<<<<<<<<<<<< * self.by_slack_factor = by_slack_factor - * if tight_phrases: + * self.tight_phrases = tight_phrases */ __pyx_v_self->use_baeza_yates = __pyx_v_use_baeza_yates; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":356 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":356 * self.precompute_secondary_rank = precompute_secondary_rank * self.use_baeza_yates = use_baeza_yates * self.by_slack_factor = by_slack_factor # <<<<<<<<<<<<<< - * if tight_phrases: - * self.tight_phrases = 1 + * self.tight_phrases = tight_phrases + * */ __pyx_v_self->by_slack_factor = __pyx_v_by_slack_factor; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":357 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":357 * self.use_baeza_yates = use_baeza_yates * self.by_slack_factor = by_slack_factor - * if tight_phrases: # <<<<<<<<<<<<<< - * self.tight_phrases = 1 - * else: - */ - if (__pyx_v_tight_phrases) { - - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":358 - * self.by_slack_factor = by_slack_factor - * if tight_phrases: - * self.tight_phrases = 1 # <<<<<<<<<<<<<< - * else: - * self.tight_phrases = 0 - */ - __pyx_v_self->tight_phrases = 1; - goto __pyx_L7; - } - /*else*/ { - - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":360 - * self.tight_phrases = 1 - * else: - * self.tight_phrases = 0 # <<<<<<<<<<<<<< + * self.tight_phrases = tight_phrases # <<<<<<<<<<<<<< * * if require_aligned_chunks: */ - __pyx_v_self->tight_phrases = 0; - } - __pyx_L7:; + __pyx_v_self->tight_phrases = __pyx_v_tight_phrases; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":362 - * self.tight_phrases = 0 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":359 + * self.tight_phrases = tight_phrases * * if require_aligned_chunks: # <<<<<<<<<<<<<< * # one condition is a stronger version of the other. - * self.require_aligned_chunks = 1 + * self.require_aligned_chunks = self.require_aligned_terminal = True */ if (__pyx_v_require_aligned_chunks) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":364 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":361 * if require_aligned_chunks: * # one condition is a stronger version of the other. - * self.require_aligned_chunks = 1 # <<<<<<<<<<<<<< - * self.require_aligned_terminal = 1 + * self.require_aligned_chunks = self.require_aligned_terminal = True # <<<<<<<<<<<<<< * elif require_aligned_terminal: + * self.require_aligned_chunks = False */ __pyx_v_self->require_aligned_chunks = 1; - - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":365 - * # one condition is a stronger version of the other. - * self.require_aligned_chunks = 1 - * self.require_aligned_terminal = 1 # <<<<<<<<<<<<<< - * elif require_aligned_terminal: - * self.require_aligned_chunks = 0 - */ __pyx_v_self->require_aligned_terminal = 1; - goto __pyx_L8; + goto __pyx_L7; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":366 - * self.require_aligned_chunks = 1 - * self.require_aligned_terminal = 1 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":362 + * # one condition is a stronger version of the other. + * self.require_aligned_chunks = self.require_aligned_terminal = True * elif require_aligned_terminal: # <<<<<<<<<<<<<< - * self.require_aligned_chunks = 0 - * self.require_aligned_terminal = 1 + * self.require_aligned_chunks = False + * self.require_aligned_terminal = True */ if (__pyx_v_require_aligned_terminal) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":367 - * self.require_aligned_terminal = 1 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":363 + * self.require_aligned_chunks = self.require_aligned_terminal = True * elif require_aligned_terminal: - * self.require_aligned_chunks = 0 # <<<<<<<<<<<<<< - * self.require_aligned_terminal = 1 + * self.require_aligned_chunks = False # <<<<<<<<<<<<<< + * self.require_aligned_terminal = True * else: */ __pyx_v_self->require_aligned_chunks = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":368 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":364 * elif require_aligned_terminal: - * self.require_aligned_chunks = 0 - * self.require_aligned_terminal = 1 # <<<<<<<<<<<<<< + * self.require_aligned_chunks = False + * self.require_aligned_terminal = True # <<<<<<<<<<<<<< * else: - * self.require_aligned_chunks = 0 + * self.require_aligned_chunks = self.require_aligned_terminal = False */ __pyx_v_self->require_aligned_terminal = 1; - goto __pyx_L8; + goto __pyx_L7; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":370 - * self.require_aligned_terminal = 1 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":366 + * self.require_aligned_terminal = True * else: - * self.require_aligned_chunks = 0 # <<<<<<<<<<<<<< - * self.require_aligned_terminal = 0 + * self.require_aligned_chunks = self.require_aligned_terminal = False # <<<<<<<<<<<<<< * + * # diagnostics */ __pyx_v_self->require_aligned_chunks = 0; - - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":371 - * else: - * self.require_aligned_chunks = 0 - * self.require_aligned_terminal = 0 # <<<<<<<<<<<<<< - * - * - */ __pyx_v_self->require_aligned_terminal = 0; } - __pyx_L8:; + __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":375 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":369 * * # diagnostics * self.prev_norm_prefix = () # <<<<<<<<<<<<<< @@ -38770,17 +38731,17 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __Pyx_DECREF(__pyx_v_self->prev_norm_prefix); __pyx_v_self->prev_norm_prefix = ((PyObject *)__pyx_empty_tuple); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":377 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":371 * self.prev_norm_prefix = () * * self.findexes = IntList(initial_len=10) # <<<<<<<<<<<<<< * self.findexes1 = IntList(initial_len=10) * */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__initial_len), __pyx_int_10) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__initial_len), __pyx_int_10) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_GIVEREF(__pyx_t_1); @@ -38789,17 +38750,17 @@ static int __pyx_pf_3_sa_23HieroCachingRuleFactory___cinit__(struct __pyx_obj_3_ __pyx_v_self->findexes = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":378 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":372 * * self.findexes = IntList(initial_len=10) * self.findexes1 = IntList(initial_len=10) # <<<<<<<<<<<<<< * * def configure(self, SuffixArray fsarray, DataArray edarray, */ - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__initial_len), __pyx_int_10) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__initial_len), __pyx_int_10) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_GIVEREF(__pyx_t_2); @@ -38853,21 +38814,21 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure(PyObject *__ case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__edarray)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__sampler)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__scorer)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, 3); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, 3); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "configure") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "configure") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { goto __pyx_L5_argtuple_error; @@ -38884,16 +38845,16 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure(PyObject *__ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("configure", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.configure", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_fsarray), __pyx_ptype_3_sa_SuffixArray, 1, "fsarray", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_edarray), __pyx_ptype_3_sa_DataArray, 1, "edarray", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_sampler), __pyx_ptype_3_sa_Sampler, 1, "sampler", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_scorer), __pyx_ptype_3_sa_Scorer, 1, "scorer", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_fsarray), __pyx_ptype_3_sa_SuffixArray, 1, "fsarray", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_edarray), __pyx_ptype_3_sa_DataArray, 1, "edarray", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_sampler), __pyx_ptype_3_sa_Sampler, 1, "sampler", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_scorer), __pyx_ptype_3_sa_Scorer, 1, "scorer", 0))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = __pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(((struct __pyx_obj_3_sa_HieroCachingRuleFactory *)__pyx_v_self), __pyx_v_fsarray, __pyx_v_edarray, __pyx_v_sampler, __pyx_v_scorer); goto __pyx_L0; __pyx_L1_error:; @@ -38903,7 +38864,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_3configure(PyObject *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":380 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":374 * self.findexes1 = IntList(initial_len=10) * * def configure(self, SuffixArray fsarray, DataArray edarray, # <<<<<<<<<<<<<< @@ -38921,7 +38882,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("configure", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":385 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":379 * Here we also use it to precompute the most expensive intersections * in the corpus quickly.''' * self.fsa = fsarray # <<<<<<<<<<<<<< @@ -38934,7 +38895,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx __Pyx_DECREF(((PyObject *)__pyx_v_self->fsa)); __pyx_v_self->fsa = __pyx_v_fsarray; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":386 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":380 * in the corpus quickly.''' * self.fsa = fsarray * self.fda = fsarray.darray # <<<<<<<<<<<<<< @@ -38947,7 +38908,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx __Pyx_DECREF(((PyObject *)__pyx_v_self->fda)); __pyx_v_self->fda = __pyx_v_fsarray->darray; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":387 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":381 * self.fsa = fsarray * self.fda = fsarray.darray * self.eda = edarray # <<<<<<<<<<<<<< @@ -38960,7 +38921,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx __Pyx_DECREF(((PyObject *)__pyx_v_self->eda)); __pyx_v_self->eda = __pyx_v_edarray; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":388 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":382 * self.fda = fsarray.darray * self.eda = edarray * self.fid2symid = self.set_idmap(self.fda) # <<<<<<<<<<<<<< @@ -38969,17 +38930,17 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx */ __pyx_t_1 = ((PyObject *)__pyx_v_self->fda); __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->set_idmap(__pyx_v_self, ((struct __pyx_obj_3_sa_DataArray *)__pyx_t_1)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 388; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->set_idmap(__pyx_v_self, ((struct __pyx_obj_3_sa_DataArray *)__pyx_t_1)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 388; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GIVEREF(__pyx_t_2); __Pyx_GOTREF(__pyx_v_self->fid2symid); __Pyx_DECREF(((PyObject *)__pyx_v_self->fid2symid)); __pyx_v_self->fid2symid = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":389 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":383 * self.eda = edarray * self.fid2symid = self.set_idmap(self.fda) * self.eid2symid = self.set_idmap(self.eda) # <<<<<<<<<<<<<< @@ -38988,31 +38949,31 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx */ __pyx_t_2 = ((PyObject *)__pyx_v_self->eda); __Pyx_INCREF(__pyx_t_2); - __pyx_t_1 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->set_idmap(__pyx_v_self, ((struct __pyx_obj_3_sa_DataArray *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->set_idmap(__pyx_v_self, ((struct __pyx_obj_3_sa_DataArray *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GIVEREF(__pyx_t_1); __Pyx_GOTREF(__pyx_v_self->eid2symid); __Pyx_DECREF(((PyObject *)__pyx_v_self->eid2symid)); __pyx_v_self->eid2symid = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":390 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":384 * self.fid2symid = self.set_idmap(self.fda) * self.eid2symid = self.set_idmap(self.eda) * self.precompute() # <<<<<<<<<<<<<< * self.sampler = sampler * self.scorer = scorer */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__precompute); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__precompute); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":391 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":385 * self.eid2symid = self.set_idmap(self.eda) * self.precompute() * self.sampler = sampler # <<<<<<<<<<<<<< @@ -39025,7 +38986,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx __Pyx_DECREF(((PyObject *)__pyx_v_self->sampler)); __pyx_v_self->sampler = __pyx_v_sampler; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":392 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":386 * self.precompute() * self.sampler = sampler * self.scorer = scorer # <<<<<<<<<<<<<< @@ -39051,7 +39012,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_2configure(struct __pyx return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":394 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":388 * self.scorer = scorer * * cdef set_idmap(self, DataArray darray): # <<<<<<<<<<<<<< @@ -39076,7 +39037,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_set_idmap(CYTHON_UNUSED int __pyx_clineno = 0; __Pyx_RefNannySetupContext("set_idmap", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":398 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":392 * cdef IntList idmap * * N = len(darray.id2word) # <<<<<<<<<<<<<< @@ -39085,30 +39046,30 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_set_idmap(CYTHON_UNUSED */ __pyx_t_1 = __pyx_v_darray->id2word; __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_N = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":399 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":393 * * N = len(darray.id2word) * idmap = IntList(initial_len=N) # <<<<<<<<<<<<<< * for word_id from 0 <= word_id < N: * new_word_id = sym_fromstring(darray.id2word[word_id], True) */ - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); - __pyx_t_3 = PyInt_FromLong(__pyx_v_N); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_v_N); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__initial_len), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_1, ((PyObject *)__pyx_n_s__initial_len), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __pyx_v_idmap = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":400 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":394 * N = len(darray.id2word) * idmap = IntList(initial_len=N) * for word_id from 0 <= word_id < N: # <<<<<<<<<<<<<< @@ -39118,20 +39079,20 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_set_idmap(CYTHON_UNUSED __pyx_t_4 = __pyx_v_N; for (__pyx_v_word_id = 0; __pyx_v_word_id < __pyx_t_4; __pyx_v_word_id++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":401 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":395 * idmap = IntList(initial_len=N) * for word_id from 0 <= word_id < N: * new_word_id = sym_fromstring(darray.id2word[word_id], True) # <<<<<<<<<<<<<< * idmap.arr[word_id] = new_word_id * return idmap */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_darray->id2word, __pyx_v_word_id, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_v_darray->id2word, __pyx_v_word_id, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyBytes_AsString(__pyx_t_3); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyBytes_AsString(__pyx_t_3); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_new_word_id = __pyx_f_3_sa_sym_fromstring(__pyx_t_5, 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":402 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":396 * for word_id from 0 <= word_id < N: * new_word_id = sym_fromstring(darray.id2word[word_id], True) * idmap.arr[word_id] = new_word_id # <<<<<<<<<<<<<< @@ -39141,7 +39102,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_set_idmap(CYTHON_UNUSED (__pyx_v_idmap->arr[__pyx_v_word_id]) = __pyx_v_new_word_id; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":403 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":397 * new_word_id = sym_fromstring(darray.id2word[word_id], True) * idmap.arr[word_id] = new_word_id * return idmap # <<<<<<<<<<<<<< @@ -39178,7 +39139,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_5pattern2phrase(PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":406 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":400 * * * def pattern2phrase(self, pattern): # <<<<<<<<<<<<<< @@ -39206,7 +39167,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct int __pyx_clineno = 0; __Pyx_RefNannySetupContext("pattern2phrase", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":408 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":402 * def pattern2phrase(self, pattern): * # pattern is a tuple, which we must convert to a hiero Phrase * result = () # <<<<<<<<<<<<<< @@ -39216,7 +39177,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct __Pyx_INCREF(((PyObject *)__pyx_empty_tuple)); __pyx_v_result = __pyx_empty_tuple; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":409 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":403 * # pattern is a tuple, which we must convert to a hiero Phrase * result = () * arity = 0 # <<<<<<<<<<<<<< @@ -39226,7 +39187,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct __Pyx_INCREF(__pyx_int_0); __pyx_v_arity = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":410 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":404 * result = () * arity = 0 * for word_id in pattern: # <<<<<<<<<<<<<< @@ -39237,7 +39198,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct __pyx_t_1 = __pyx_v_pattern; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_pattern); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_pattern); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -39245,23 +39206,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_4 = __pyx_t_3(__pyx_t_1); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -39271,74 +39232,74 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct __pyx_v_word_id = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":411 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":405 * arity = 0 * for word_id in pattern: * if word_id == -1: # <<<<<<<<<<<<<< * arity = arity + 1 * new_id = sym_setindex(self.category, arity) */ - __pyx_t_4 = PyObject_RichCompare(__pyx_v_word_id, __pyx_int_neg_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_word_id, __pyx_int_neg_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":412 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":406 * for word_id in pattern: * if word_id == -1: * arity = arity + 1 # <<<<<<<<<<<<<< * new_id = sym_setindex(self.category, arity) * else: */ - __pyx_t_4 = PyNumber_Add(__pyx_v_arity, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_v_arity, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_v_arity); __pyx_v_arity = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":413 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":407 * if word_id == -1: * arity = arity + 1 * new_id = sym_setindex(self.category, arity) # <<<<<<<<<<<<<< * else: * new_id = sym_fromstring(self.fda.id2word[word_id], True) */ - __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_v_arity); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 413; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_v_arity); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_new_id = __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_t_6); goto __pyx_L5; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":415 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":409 * new_id = sym_setindex(self.category, arity) * else: * new_id = sym_fromstring(self.fda.id2word[word_id], True) # <<<<<<<<<<<<<< * result = result + (new_id,) * return Phrase(result) */ - __pyx_t_4 = PyObject_GetItem(__pyx_v_self->fda->id2word, __pyx_v_word_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetItem(__pyx_v_self->fda->id2word, __pyx_v_word_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_7 = PyBytes_AsString(__pyx_t_4); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyBytes_AsString(__pyx_t_4); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_new_id = __pyx_f_3_sa_sym_fromstring(__pyx_t_7, 1); } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":416 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":410 * else: * new_id = sym_fromstring(self.fda.id2word[word_id], True) * result = result + (new_id,) # <<<<<<<<<<<<<< * return Phrase(result) * */ - __pyx_t_4 = PyInt_FromLong(__pyx_v_new_id); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_v_new_id); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_v_result), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_v_result), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_result)); @@ -39347,7 +39308,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":417 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":411 * new_id = sym_fromstring(self.fda.id2word[word_id], True) * result = result + (new_id,) * return Phrase(result) # <<<<<<<<<<<<<< @@ -39355,12 +39316,12 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_4pattern2phrase(struct * def pattern2phrase_plus(self, pattern): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_result)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_result)); __Pyx_GIVEREF(((PyObject *)__pyx_v_result)); - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __pyx_r = __pyx_t_4; @@ -39395,7 +39356,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_7pattern2phrase_plus(Py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":419 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":413 * return Phrase(result) * * def pattern2phrase_plus(self, pattern): # <<<<<<<<<<<<<< @@ -39425,19 +39386,19 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st int __pyx_clineno = 0; __Pyx_RefNannySetupContext("pattern2phrase_plus", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":422 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":416 * # returns a list containing both the pattern, and pattern * # suffixed/prefixed with the NT category. * patterns = [] # <<<<<<<<<<<<<< * result = () * arity = 0 */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_patterns = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":423 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":417 * # suffixed/prefixed with the NT category. * patterns = [] * result = () # <<<<<<<<<<<<<< @@ -39447,7 +39408,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st __Pyx_INCREF(((PyObject *)__pyx_empty_tuple)); __pyx_v_result = __pyx_empty_tuple; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":424 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":418 * patterns = [] * result = () * arity = 0 # <<<<<<<<<<<<<< @@ -39457,7 +39418,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st __Pyx_INCREF(__pyx_int_0); __pyx_v_arity = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":425 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":419 * result = () * arity = 0 * for word_id in pattern: # <<<<<<<<<<<<<< @@ -39468,7 +39429,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st __pyx_t_1 = __pyx_v_pattern; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_pattern); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_pattern); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -39476,23 +39437,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_4 = __pyx_t_3(__pyx_t_1); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -39502,74 +39463,74 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st __pyx_v_word_id = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":426 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":420 * arity = 0 * for word_id in pattern: * if word_id == -1: # <<<<<<<<<<<<<< * arity = arity + 1 * new_id = sym_setindex(self.category, arity) */ - __pyx_t_4 = PyObject_RichCompare(__pyx_v_word_id, __pyx_int_neg_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_word_id, __pyx_int_neg_1, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":427 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":421 * for word_id in pattern: * if word_id == -1: * arity = arity + 1 # <<<<<<<<<<<<<< * new_id = sym_setindex(self.category, arity) * else: */ - __pyx_t_4 = PyNumber_Add(__pyx_v_arity, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_v_arity, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_v_arity); __pyx_v_arity = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":428 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":422 * if word_id == -1: * arity = arity + 1 * new_id = sym_setindex(self.category, arity) # <<<<<<<<<<<<<< * else: * new_id = sym_fromstring(self.fda.id2word[word_id], True) */ - __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_v_arity); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_v_arity); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_new_id = __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_t_6); goto __pyx_L5; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":430 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":424 * new_id = sym_setindex(self.category, arity) * else: * new_id = sym_fromstring(self.fda.id2word[word_id], True) # <<<<<<<<<<<<<< * result = result + (new_id,) * patterns.append(Phrase(result)) */ - __pyx_t_4 = PyObject_GetItem(__pyx_v_self->fda->id2word, __pyx_v_word_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetItem(__pyx_v_self->fda->id2word, __pyx_v_word_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 424; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_7 = PyBytes_AsString(__pyx_t_4); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyBytes_AsString(__pyx_t_4); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 424; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_new_id = __pyx_f_3_sa_sym_fromstring(__pyx_t_7, 1); } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":431 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":425 * else: * new_id = sym_fromstring(self.fda.id2word[word_id], True) * result = result + (new_id,) # <<<<<<<<<<<<<< * patterns.append(Phrase(result)) * patterns.append(Phrase(result + (sym_setindex(self.category, 1),))) */ - __pyx_t_4 = PyInt_FromLong(__pyx_v_new_id); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_v_new_id); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_v_result), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_v_result), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_result)); @@ -39578,81 +39539,81 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_6pattern2phrase_plus(st } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":432 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":426 * new_id = sym_fromstring(self.fda.id2word[word_id], True) * result = result + (new_id,) * patterns.append(Phrase(result)) # <<<<<<<<<<<<<< * patterns.append(Phrase(result + (sym_setindex(self.category, 1),))) * patterns.append(Phrase((sym_setindex(self.category, 1),) + result)) */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_result)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_result)); __Pyx_GIVEREF(((PyObject *)__pyx_v_result)); - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_9 = PyList_Append(__pyx_v_patterns, __pyx_t_4); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyList_Append(__pyx_v_patterns, __pyx_t_4); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":433 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":427 * result = result + (new_id,) * patterns.append(Phrase(result)) * patterns.append(Phrase(result + (sym_setindex(self.category, 1),))) # <<<<<<<<<<<<<< * patterns.append(Phrase((sym_setindex(self.category, 1),) + result)) * return patterns */ - __pyx_t_4 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, 1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, 1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_v_result), ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_v_result), ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_4)); __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_9 = PyList_Append(__pyx_v_patterns, __pyx_t_4); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyList_Append(__pyx_v_patterns, __pyx_t_4); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":434 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":428 * patterns.append(Phrase(result)) * patterns.append(Phrase(result + (sym_setindex(self.category, 1),))) * patterns.append(Phrase((sym_setindex(self.category, 1),) + result)) # <<<<<<<<<<<<<< * return patterns * */ - __pyx_t_4 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, 1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, 1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_t_1), ((PyObject *)__pyx_v_result)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(((PyObject *)__pyx_t_1), ((PyObject *)__pyx_v_result)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_4)); __Pyx_GIVEREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_9 = PyList_Append(__pyx_v_patterns, __pyx_t_4); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyList_Append(__pyx_v_patterns, __pyx_t_4); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":435 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":429 * patterns.append(Phrase(result + (sym_setindex(self.category, 1),))) * patterns.append(Phrase((sym_setindex(self.category, 1),) + result)) * return patterns # <<<<<<<<<<<<<< @@ -39693,7 +39654,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_9precompute(PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":437 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":431 * return patterns * * def precompute(self): # <<<<<<<<<<<<<< @@ -39727,7 +39688,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("precompute", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":440 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":434 * cdef Precomputation pre * * if self.precompute_file is not None: # <<<<<<<<<<<<<< @@ -39737,31 +39698,31 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_1 = (__pyx_v_self->precompute_file != Py_None); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":441 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":435 * * if self.precompute_file is not None: * start_time = monitor_cpu() # <<<<<<<<<<<<<< * logger.info("Reading precomputed data from file %s... ", self.precompute_file) * pre = Precomputation(from_binary=self.precompute_file) */ - __pyx_t_2 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_v_start_time = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":442 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":436 * if self.precompute_file is not None: * start_time = monitor_cpu() * logger.info("Reading precomputed data from file %s... ", self.precompute_file) # <<<<<<<<<<<<<< * pre = Precomputation(from_binary=self.precompute_file) * # check parameters of precomputation -- some are critical and some are not */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__info); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__info); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_kp_s_108)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_108)); @@ -39769,29 +39730,29 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_INCREF(__pyx_v_self->precompute_file); PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_self->precompute_file); __Pyx_GIVEREF(__pyx_v_self->precompute_file); - __pyx_t_4 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":443 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":437 * start_time = monitor_cpu() * logger.info("Reading precomputed data from file %s... ", self.precompute_file) * pre = Precomputation(from_binary=self.precompute_file) # <<<<<<<<<<<<<< * # check parameters of precomputation -- some are critical and some are not * if pre.max_nonterminals != self.max_nonterminals: */ - __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); - if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__from_binary), __pyx_v_self->precompute_file) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Precomputation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_4, ((PyObject *)__pyx_n_s__from_binary), __pyx_v_self->precompute_file) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Precomputation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __pyx_v_pre = ((struct __pyx_obj_3_sa_Precomputation *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":445 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":439 * pre = Precomputation(from_binary=self.precompute_file) * # check parameters of precomputation -- some are critical and some are not * if pre.max_nonterminals != self.max_nonterminals: # <<<<<<<<<<<<<< @@ -39801,23 +39762,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_1 = (__pyx_v_pre->max_nonterminals != __pyx_v_self->max_nonterminals); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":446 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":440 * # check parameters of precomputation -- some are critical and some are not * if pre.max_nonterminals != self.max_nonterminals: * logger.warn("Precomputation done with max nonterminals %d, decoder uses %d", pre.max_nonterminals, self.max_nonterminals) # <<<<<<<<<<<<<< * if pre.max_length != self.max_length: * logger.warn("Precomputation done with max terminals %d, decoder uses %d", pre.max_length, self.max_length) */ - __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__warn); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__warn); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyInt_FromLong(__pyx_v_pre->max_nonterminals); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_pre->max_nonterminals); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyInt_FromLong(__pyx_v_self->max_nonterminals); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_v_self->max_nonterminals); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(((PyObject *)__pyx_kp_s_109)); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_kp_s_109)); @@ -39828,7 +39789,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GIVEREF(__pyx_t_3); __pyx_t_2 = 0; __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; @@ -39837,7 +39798,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":447 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":441 * if pre.max_nonterminals != self.max_nonterminals: * logger.warn("Precomputation done with max nonterminals %d, decoder uses %d", pre.max_nonterminals, self.max_nonterminals) * if pre.max_length != self.max_length: # <<<<<<<<<<<<<< @@ -39847,23 +39808,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_1 = (__pyx_v_pre->max_length != __pyx_v_self->max_length); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":448 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":442 * logger.warn("Precomputation done with max nonterminals %d, decoder uses %d", pre.max_nonterminals, self.max_nonterminals) * if pre.max_length != self.max_length: * logger.warn("Precomputation done with max terminals %d, decoder uses %d", pre.max_length, self.max_length) # <<<<<<<<<<<<<< * if pre.train_max_initial_size != self.train_max_initial_size: * raise Exception("Precomputation done with max initial size %d, decoder uses %d" % (pre.train_max_initial_size, self.train_max_initial_size)) */ - __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__warn); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__warn); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromLong(__pyx_v_pre->max_length); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_v_pre->max_length); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyInt_FromLong(__pyx_v_self->max_length); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_v_self->max_length); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyTuple_New(3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_kp_s_110)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_110)); @@ -39874,7 +39835,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GIVEREF(__pyx_t_4); __pyx_t_3 = 0; __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; @@ -39883,7 +39844,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":449 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":443 * if pre.max_length != self.max_length: * logger.warn("Precomputation done with max terminals %d, decoder uses %d", pre.max_length, self.max_length) * if pre.train_max_initial_size != self.train_max_initial_size: # <<<<<<<<<<<<<< @@ -39893,18 +39854,18 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_1 = (__pyx_v_pre->train_max_initial_size != __pyx_v_self->train_max_initial_size); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":450 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":444 * logger.warn("Precomputation done with max terminals %d, decoder uses %d", pre.max_length, self.max_length) * if pre.train_max_initial_size != self.train_max_initial_size: * raise Exception("Precomputation done with max initial size %d, decoder uses %d" % (pre.train_max_initial_size, self.train_max_initial_size)) # <<<<<<<<<<<<<< * if pre.train_min_gap_size != self.train_min_gap_size: * raise Exception("Precomputation done with min gap size %d, decoder uses %d" % (pre.train_min_gap_size, self.train_min_gap_size)) */ - __pyx_t_4 = PyInt_FromLong(__pyx_v_pre->train_max_initial_size); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_v_pre->train_max_initial_size); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyInt_FromLong(__pyx_v_self->train_max_initial_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_self->train_max_initial_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); @@ -39912,25 +39873,25 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GIVEREF(__pyx_t_2); __pyx_t_4 = 0; __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_111), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_111), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L6; } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":451 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":445 * if pre.train_max_initial_size != self.train_max_initial_size: * raise Exception("Precomputation done with max initial size %d, decoder uses %d" % (pre.train_max_initial_size, self.train_max_initial_size)) * if pre.train_min_gap_size != self.train_min_gap_size: # <<<<<<<<<<<<<< @@ -39940,18 +39901,18 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_1 = (__pyx_v_pre->train_min_gap_size != __pyx_v_self->train_min_gap_size); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":452 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":446 * raise Exception("Precomputation done with max initial size %d, decoder uses %d" % (pre.train_max_initial_size, self.train_max_initial_size)) * if pre.train_min_gap_size != self.train_min_gap_size: * raise Exception("Precomputation done with min gap size %d, decoder uses %d" % (pre.train_min_gap_size, self.train_min_gap_size)) # <<<<<<<<<<<<<< * if self.use_index: * logger.info("Converting %d hash keys on precomputed inverted index... ", len(pre.precomputed_index)) */ - __pyx_t_2 = PyInt_FromLong(__pyx_v_pre->train_min_gap_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_pre->train_min_gap_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = PyInt_FromLong(__pyx_v_self->train_min_gap_size); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromLong(__pyx_v_self->train_min_gap_size); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); @@ -39959,25 +39920,25 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __Pyx_GIVEREF(__pyx_t_5); __pyx_t_2 = 0; __pyx_t_5 = 0; - __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_112), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_112), ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_5)); __Pyx_GIVEREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_5, 0, 0, 0); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L7; } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":453 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":447 * if pre.train_min_gap_size != self.train_min_gap_size: * raise Exception("Precomputation done with min gap size %d, decoder uses %d" % (pre.train_min_gap_size, self.train_min_gap_size)) * if self.use_index: # <<<<<<<<<<<<<< @@ -39986,25 +39947,25 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py */ if (__pyx_v_self->use_index) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":454 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":448 * raise Exception("Precomputation done with min gap size %d, decoder uses %d" % (pre.train_min_gap_size, self.train_min_gap_size)) * if self.use_index: * logger.info("Converting %d hash keys on precomputed inverted index... ", len(pre.precomputed_index)) # <<<<<<<<<<<<<< * for pattern, arr in pre.precomputed_index.iteritems(): * phrases = self.pattern2phrase_plus(pattern) */ - __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__info); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__info); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __pyx_v_pre->precomputed_index; __Pyx_INCREF(__pyx_t_5); - __pyx_t_6 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyInt_FromSsize_t(__pyx_t_6); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromSsize_t(__pyx_t_6); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_kp_s_113)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_113)); @@ -40012,13 +39973,13 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":455 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":449 * if self.use_index: * logger.info("Converting %d hash keys on precomputed inverted index... ", len(pre.precomputed_index)) * for pattern, arr in pre.precomputed_index.iteritems(): # <<<<<<<<<<<<<< @@ -40028,9 +39989,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_6 = 0; if (unlikely(__pyx_v_pre->precomputed_index == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "iteritems"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 455; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_2 = __Pyx_dict_iterator(__pyx_v_pre->precomputed_index, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_7), (&__pyx_t_8)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 455; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_dict_iterator(__pyx_v_pre->precomputed_index, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_7), (&__pyx_t_8)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = __pyx_t_2; @@ -40038,7 +39999,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py while (1) { __pyx_t_9 = __Pyx_dict_iter_next(__pyx_t_5, __pyx_t_7, &__pyx_t_6, &__pyx_t_2, &__pyx_t_4, NULL, __pyx_t_8); if (unlikely(__pyx_t_9 == 0)) break; - if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 455; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_GOTREF(__pyx_t_4); __Pyx_XDECREF(__pyx_v_pattern); @@ -40048,21 +40009,21 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_v_arr = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":456 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":450 * logger.info("Converting %d hash keys on precomputed inverted index... ", len(pre.precomputed_index)) * for pattern, arr in pre.precomputed_index.iteritems(): * phrases = self.pattern2phrase_plus(pattern) # <<<<<<<<<<<<<< * for phrase in phrases: * self.precomputed_index[phrase] = arr */ - __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__pattern2phrase_plus); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__pattern2phrase_plus); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_v_pattern); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_pattern); __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; @@ -40070,7 +40031,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_v_phrases = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":457 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":451 * for pattern, arr in pre.precomputed_index.iteritems(): * phrases = self.pattern2phrase_plus(pattern) * for phrase in phrases: # <<<<<<<<<<<<<< @@ -40081,7 +40042,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_3 = __pyx_v_phrases; __Pyx_INCREF(__pyx_t_3); __pyx_t_10 = 0; __pyx_t_11 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_phrases); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_phrases); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __pyx_t_11 = Py_TYPE(__pyx_t_3)->tp_iternext; } @@ -40089,23 +40050,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py if (!__pyx_t_11 && PyList_CheckExact(__pyx_t_3)) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_2 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_10); __Pyx_INCREF(__pyx_t_2); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_10); __Pyx_INCREF(__pyx_t_2); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_2 = PySequence_ITEM(__pyx_t_3, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(__pyx_t_3, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_11 && PyTuple_CheckExact(__pyx_t_3)) { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_2 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_10); __Pyx_INCREF(__pyx_t_2); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_10); __Pyx_INCREF(__pyx_t_2); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_2 = PySequence_ITEM(__pyx_t_3, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(__pyx_t_3, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_2 = __pyx_t_11(__pyx_t_3); if (unlikely(!__pyx_t_2)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -40115,14 +40076,14 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_v_phrase = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":458 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":452 * phrases = self.pattern2phrase_plus(pattern) * for phrase in phrases: * self.precomputed_index[phrase] = arr # <<<<<<<<<<<<<< * if self.use_collocations: * logger.info("Converting %d hash keys on precomputed collocations... ", len(pre.precomputed_collocations)) */ - if (PyObject_SetItem(__pyx_v_self->precomputed_index, __pyx_v_phrase, __pyx_v_arr) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_v_self->precomputed_index, __pyx_v_phrase, __pyx_v_arr) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } @@ -40131,7 +40092,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":459 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":453 * for phrase in phrases: * self.precomputed_index[phrase] = arr * if self.use_collocations: # <<<<<<<<<<<<<< @@ -40140,25 +40101,25 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py */ if (__pyx_v_self->use_collocations) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":460 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":454 * self.precomputed_index[phrase] = arr * if self.use_collocations: * logger.info("Converting %d hash keys on precomputed collocations... ", len(pre.precomputed_collocations)) # <<<<<<<<<<<<<< * for pattern, arr in pre.precomputed_collocations.iteritems(): * phrase = self.pattern2phrase(pattern) */ - __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__info); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__info); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __pyx_v_pre->precomputed_collocations; __Pyx_INCREF(__pyx_t_5); - __pyx_t_7 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyInt_FromSsize_t(__pyx_t_7); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromSsize_t(__pyx_t_7); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_kp_s_114)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_114)); @@ -40166,13 +40127,13 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":461 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":455 * if self.use_collocations: * logger.info("Converting %d hash keys on precomputed collocations... ", len(pre.precomputed_collocations)) * for pattern, arr in pre.precomputed_collocations.iteritems(): # <<<<<<<<<<<<<< @@ -40182,9 +40143,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_t_7 = 0; if (unlikely(__pyx_v_pre->precomputed_collocations == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "iteritems"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 455; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_2 = __Pyx_dict_iterator(__pyx_v_pre->precomputed_collocations, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_6), (&__pyx_t_8)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_dict_iterator(__pyx_v_pre->precomputed_collocations, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_6), (&__pyx_t_8)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 455; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = __pyx_t_2; @@ -40192,7 +40153,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py while (1) { __pyx_t_9 = __Pyx_dict_iter_next(__pyx_t_5, __pyx_t_6, &__pyx_t_7, &__pyx_t_2, &__pyx_t_3, NULL, __pyx_t_8); if (unlikely(__pyx_t_9 == 0)) break; - if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 455; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_GOTREF(__pyx_t_3); __Pyx_XDECREF(__pyx_v_pattern); @@ -40202,21 +40163,21 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_v_arr = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":462 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":456 * logger.info("Converting %d hash keys on precomputed collocations... ", len(pre.precomputed_collocations)) * for pattern, arr in pre.precomputed_collocations.iteritems(): * phrase = self.pattern2phrase(pattern) # <<<<<<<<<<<<<< * self.precomputed_collocations[phrase] = arr * stop_time = monitor_cpu() */ - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__pattern2phrase); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__pattern2phrase); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_v_pattern); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_pattern); __Pyx_GIVEREF(__pyx_v_pattern); - __pyx_t_4 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; @@ -40224,47 +40185,47 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py __pyx_v_phrase = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":463 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":457 * for pattern, arr in pre.precomputed_collocations.iteritems(): * phrase = self.pattern2phrase(pattern) * self.precomputed_collocations[phrase] = arr # <<<<<<<<<<<<<< * stop_time = monitor_cpu() * logger.info("Processing precomputations took %f seconds", stop_time - start_time) */ - if (PyObject_SetItem(__pyx_v_self->precomputed_collocations, __pyx_v_phrase, __pyx_v_arr) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_v_self->precomputed_collocations, __pyx_v_phrase, __pyx_v_arr) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; goto __pyx_L13; } __pyx_L13:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":464 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":458 * phrase = self.pattern2phrase(pattern) * self.precomputed_collocations[phrase] = arr * stop_time = monitor_cpu() # <<<<<<<<<<<<<< * logger.info("Processing precomputations took %f seconds", stop_time - start_time) * */ - __pyx_t_5 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __pyx_v_stop_time = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":465 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":459 * self.precomputed_collocations[phrase] = arr * stop_time = monitor_cpu() * logger.info("Processing precomputations took %f seconds", stop_time - start_time) # <<<<<<<<<<<<<< * * */ - __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__info); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__info); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyNumber_Subtract(__pyx_v_stop_time, __pyx_v_start_time); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Subtract(__pyx_v_stop_time, __pyx_v_start_time); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_kp_s_115)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_115)); @@ -40272,7 +40233,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_8precompute(struct __py PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; @@ -40314,7 +40275,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_11get_precomputed_collo return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":468 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":462 * * * def get_precomputed_collocation(self, phrase): # <<<<<<<<<<<<<< @@ -40336,29 +40297,29 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_10get_precomputed_collo int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_precomputed_collocation", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":469 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":463 * * def get_precomputed_collocation(self, phrase): * if phrase in self.precomputed_collocations: # <<<<<<<<<<<<<< * arr = self.precomputed_collocations[phrase] * return PhraseLocation(arr=arr, arr_low=0, arr_high=len(arr), num_subpatterns=phrase.arity()+1) */ - __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_phrase, __pyx_v_self->precomputed_collocations, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_phrase, __pyx_v_self->precomputed_collocations, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":470 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":464 * def get_precomputed_collocation(self, phrase): * if phrase in self.precomputed_collocations: * arr = self.precomputed_collocations[phrase] # <<<<<<<<<<<<<< * return PhraseLocation(arr=arr, arr_low=0, arr_high=len(arr), num_subpatterns=phrase.arity()+1) * return None */ - __pyx_t_2 = PyObject_GetItem(__pyx_v_self->precomputed_collocations, __pyx_v_phrase); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetItem(__pyx_v_self->precomputed_collocations, __pyx_v_phrase); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_v_arr = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":471 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":465 * if phrase in self.precomputed_collocations: * arr = self.precomputed_collocations[phrase] * return PhraseLocation(arr=arr, arr_low=0, arr_high=len(arr), num_subpatterns=phrase.arity()+1) # <<<<<<<<<<<<<< @@ -40366,26 +40327,26 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_10get_precomputed_collo * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__arr), __pyx_v_arr) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__arr_low), __pyx_int_0) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_4 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__arr), __pyx_v_arr) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__arr_low), __pyx_int_0) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__arr_high), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__arr_high), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_GetAttr(__pyx_v_phrase, __pyx_n_s__arity); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(__pyx_v_phrase, __pyx_n_s__arity); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(__pyx_t_5, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_t_5, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__num_subpatterns), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__num_subpatterns), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_r = __pyx_t_4; @@ -40395,7 +40356,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_10get_precomputed_collo } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":472 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":466 * arr = self.precomputed_collocations[phrase] * return PhraseLocation(arr=arr, arr_low=0, arr_high=len(arr), num_subpatterns=phrase.arity()+1) * return None # <<<<<<<<<<<<<< @@ -40422,7 +40383,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_10get_precomputed_collo return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":475 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":469 * * * cdef int* baeza_yates_helper(self, int low1, int high1, int* arr1, int step1, # <<<<<<<<<<<<<< @@ -40468,7 +40429,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("baeza_yates_helper", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":488 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":482 * cdef Matching loc1, loc2 * * result = malloc(0*sizeof(int*)) # <<<<<<<<<<<<<< @@ -40477,7 +40438,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_result = ((int *)malloc((0 * (sizeof(int *))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":490 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":484 * result = malloc(0*sizeof(int*)) * * d_first = 0 # <<<<<<<<<<<<<< @@ -40486,7 +40447,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_d_first = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":491 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":485 * * d_first = 0 * if high1 - low1 > high2 - low2: # <<<<<<<<<<<<<< @@ -40496,7 +40457,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_1 = ((__pyx_v_high1 - __pyx_v_low1) > (__pyx_v_high2 - __pyx_v_low2)); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":492 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":486 * d_first = 0 * if high1 - low1 > high2 - low2: * d_first = 1 # <<<<<<<<<<<<<< @@ -40508,7 +40469,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":496 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":490 * # First, check to see if we are at any of the recursive base cases * # Case 1: one of the sets is empty * if low1 >= high1 or low2 >= high2: # <<<<<<<<<<<<<< @@ -40524,7 +40485,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":497 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":491 * # Case 1: one of the sets is empty * if low1 >= high1 or low2 >= high2: * return result # <<<<<<<<<<<<<< @@ -40537,7 +40498,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":500 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":494 * * # Case 2: sets are non-overlapping * assign_matching(&loc1, arr1, high1-step1, step1, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -40546,7 +40507,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc1), __pyx_v_arr1, (__pyx_v_high1 - __pyx_v_step1), __pyx_v_step1, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":501 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":495 * # Case 2: sets are non-overlapping * assign_matching(&loc1, arr1, high1-step1, step1, self.fda.sent_id.arr) * assign_matching(&loc2, arr2, low2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -40555,7 +40516,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, __pyx_v_low2, __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":502 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":496 * assign_matching(&loc1, arr1, high1-step1, step1, self.fda.sent_id.arr) * assign_matching(&loc2, arr2, low2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) == -1: # <<<<<<<<<<<<<< @@ -40565,7 +40526,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, (&__pyx_v_loc1), (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last) == -1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":503 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":497 * assign_matching(&loc2, arr2, low2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) == -1: * return result # <<<<<<<<<<<<<< @@ -40578,7 +40539,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":505 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":499 * return result * * assign_matching(&loc1, arr1, low1, step1, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -40587,7 +40548,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc1), __pyx_v_arr1, __pyx_v_low1, __pyx_v_step1, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":506 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":500 * * assign_matching(&loc1, arr1, low1, step1, self.fda.sent_id.arr) * assign_matching(&loc2, arr2, high2-step2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -40596,7 +40557,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, (__pyx_v_high2 - __pyx_v_step2), __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":507 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":501 * assign_matching(&loc1, arr1, low1, step1, self.fda.sent_id.arr) * assign_matching(&loc2, arr2, high2-step2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) == 1: # <<<<<<<<<<<<<< @@ -40606,7 +40567,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, (&__pyx_v_loc1), (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last) == 1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":508 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":502 * assign_matching(&loc2, arr2, high2-step2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) == 1: * return result # <<<<<<<<<<<<<< @@ -40619,7 +40580,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":512 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":506 * # Case 3: query set and data set do not meet size mismatch constraints; * # We use mergesort instead in this case * qsetsize = (high1-low1) / step1 # <<<<<<<<<<<<<< @@ -40629,15 +40590,15 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_4 = (__pyx_v_high1 - __pyx_v_low1); if (unlikely(__pyx_v_step1 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } else if (sizeof(int) == sizeof(long) && unlikely(__pyx_v_step1 == -1) && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_t_4))) { PyErr_Format(PyExc_OverflowError, "value too large to perform division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_v_qsetsize = __Pyx_div_int(__pyx_t_4, __pyx_v_step1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":513 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":507 * # We use mergesort instead in this case * qsetsize = (high1-low1) / step1 * dsetsize = (high2-low2) / step2 # <<<<<<<<<<<<<< @@ -40647,15 +40608,15 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_4 = (__pyx_v_high2 - __pyx_v_low2); if (unlikely(__pyx_v_step2 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } else if (sizeof(int) == sizeof(long) && unlikely(__pyx_v_step2 == -1) && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_t_4))) { PyErr_Format(PyExc_OverflowError, "value too large to perform division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_v_dsetsize = __Pyx_div_int(__pyx_t_4, __pyx_v_step2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":514 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":508 * qsetsize = (high1-low1) / step1 * dsetsize = (high2-low2) / step2 * if d_first: # <<<<<<<<<<<<<< @@ -40664,7 +40625,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ if (__pyx_v_d_first) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":515 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":509 * dsetsize = (high2-low2) / step2 * if d_first: * tmp = qsetsize # <<<<<<<<<<<<<< @@ -40673,7 +40634,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_tmp = __pyx_v_qsetsize; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":516 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":510 * if d_first: * tmp = qsetsize * qsetsize = dsetsize # <<<<<<<<<<<<<< @@ -40682,7 +40643,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_qsetsize = __pyx_v_dsetsize; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":517 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":511 * tmp = qsetsize * qsetsize = dsetsize * dsetsize = tmp # <<<<<<<<<<<<<< @@ -40694,7 +40655,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L7:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":519 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":513 * dsetsize = tmp * * if self.by_slack_factor * qsetsize * log(dsetsize) / log(2) > dsetsize: # <<<<<<<<<<<<<< @@ -40705,12 +40666,12 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_6 = log(2.0); if (unlikely(__pyx_t_6 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "float division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 519; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_t_3 = ((__pyx_t_5 / __pyx_t_6) > __pyx_v_dsetsize); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":520 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":514 * * if self.by_slack_factor * qsetsize * log(dsetsize) / log(2) > dsetsize: * free(result) # <<<<<<<<<<<<<< @@ -40719,7 +40680,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ free(__pyx_v_result); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":521 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":515 * if self.by_slack_factor * qsetsize * log(dsetsize) / log(2) > dsetsize: * free(result) * return self.merge_helper(low1, high1, arr1, step1, low2, high2, arr2, step2, offset_by_one, len_last, num_subpatterns, result_len) # <<<<<<<<<<<<<< @@ -40732,7 +40693,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":525 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":519 * # binary search. There are two flavors, depending on * # whether the queryset or dataset is first * if d_first: # <<<<<<<<<<<<<< @@ -40741,7 +40702,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ if (__pyx_v_d_first) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":526 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":520 * # whether the queryset or dataset is first * if d_first: * med2 = median(low2, high2, step2) # <<<<<<<<<<<<<< @@ -40750,7 +40711,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2 = __pyx_f_3_sa_median(__pyx_v_low2, __pyx_v_high2, __pyx_v_step2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":527 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":521 * if d_first: * med2 = median(low2, high2, step2) * assign_matching(&loc2, arr2, med2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -40759,7 +40720,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, __pyx_v_med2, __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":529 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":523 * assign_matching(&loc2, arr2, med2, step2, self.fda.sent_id.arr) * * search_low = low1 # <<<<<<<<<<<<<< @@ -40768,7 +40729,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_search_low = __pyx_v_low1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":530 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":524 * * search_low = low1 * search_high = high1 # <<<<<<<<<<<<<< @@ -40777,7 +40738,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_search_high = __pyx_v_high1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":531 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":525 * search_low = low1 * search_high = high1 * while search_low < search_high: # <<<<<<<<<<<<<< @@ -40788,7 +40749,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_search_low < __pyx_v_search_high); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":532 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":526 * search_high = high1 * while search_low < search_high: * med1 = median(search_low, search_high, step1) # <<<<<<<<<<<<<< @@ -40797,7 +40758,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med1 = __pyx_f_3_sa_median(__pyx_v_search_low, __pyx_v_search_high, __pyx_v_step1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":533 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":527 * while search_low < search_high: * med1 = median(search_low, search_high, step1) * find_comparable_matchings(low1, high1, arr1, step1, med1, &med1_minus, &med1_plus) # <<<<<<<<<<<<<< @@ -40806,7 +40767,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_find_comparable_matchings(__pyx_v_low1, __pyx_v_high1, __pyx_v_arr1, __pyx_v_step1, __pyx_v_med1, (&__pyx_v_med1_minus), (&__pyx_v_med1_plus)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":534 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":528 * med1 = median(search_low, search_high, step1) * find_comparable_matchings(low1, high1, arr1, step1, med1, &med1_minus, &med1_plus) * comparison = self.compare_matchings_set(med1_minus, med1_plus, arr1, step1, &loc2, offset_by_one, len_last) # <<<<<<<<<<<<<< @@ -40815,7 +40776,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_comparison = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings_set(__pyx_v_self, __pyx_v_med1_minus, __pyx_v_med1_plus, __pyx_v_arr1, __pyx_v_step1, (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":537 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":531 * if comparison == -1: * search_low = med1_plus * elif comparison == 1: # <<<<<<<<<<<<<< @@ -40824,7 +40785,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ switch (__pyx_v_comparison) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":535 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":529 * find_comparable_matchings(low1, high1, arr1, step1, med1, &med1_minus, &med1_plus) * comparison = self.compare_matchings_set(med1_minus, med1_plus, arr1, step1, &loc2, offset_by_one, len_last) * if comparison == -1: # <<<<<<<<<<<<<< @@ -40833,7 +40794,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ case -1: - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":536 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":530 * comparison = self.compare_matchings_set(med1_minus, med1_plus, arr1, step1, &loc2, offset_by_one, len_last) * if comparison == -1: * search_low = med1_plus # <<<<<<<<<<<<<< @@ -40843,7 +40804,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_v_search_low = __pyx_v_med1_plus; break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":537 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":531 * if comparison == -1: * search_low = med1_plus * elif comparison == 1: # <<<<<<<<<<<<<< @@ -40852,7 +40813,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ case 1: - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":538 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":532 * search_low = med1_plus * elif comparison == 1: * search_high = med1_minus # <<<<<<<<<<<<<< @@ -40863,7 +40824,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p break; default: - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":540 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":534 * search_high = med1_minus * else: * break # <<<<<<<<<<<<<< @@ -40879,7 +40840,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":542 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":536 * break * else: * med1 = median(low1, high1, step1) # <<<<<<<<<<<<<< @@ -40888,7 +40849,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med1 = __pyx_f_3_sa_median(__pyx_v_low1, __pyx_v_high1, __pyx_v_step1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":543 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":537 * else: * med1 = median(low1, high1, step1) * find_comparable_matchings(low1, high1, arr1, step1, med1, &med1_minus, &med1_plus) # <<<<<<<<<<<<<< @@ -40897,7 +40858,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_find_comparable_matchings(__pyx_v_low1, __pyx_v_high1, __pyx_v_arr1, __pyx_v_step1, __pyx_v_med1, (&__pyx_v_med1_minus), (&__pyx_v_med1_plus)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":545 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":539 * find_comparable_matchings(low1, high1, arr1, step1, med1, &med1_minus, &med1_plus) * * search_low = low2 # <<<<<<<<<<<<<< @@ -40906,7 +40867,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_search_low = __pyx_v_low2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":546 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":540 * * search_low = low2 * search_high = high2 # <<<<<<<<<<<<<< @@ -40915,7 +40876,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_search_high = __pyx_v_high2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":547 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":541 * search_low = low2 * search_high = high2 * while search_low < search_high: # <<<<<<<<<<<<<< @@ -40926,7 +40887,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_search_low < __pyx_v_search_high); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":548 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":542 * search_high = high2 * while search_low < search_high: * med2 = median(search_low, search_high, step2) # <<<<<<<<<<<<<< @@ -40935,7 +40896,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2 = __pyx_f_3_sa_median(__pyx_v_search_low, __pyx_v_search_high, __pyx_v_step2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":549 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":543 * while search_low < search_high: * med2 = median(search_low, search_high, step2) * assign_matching(&loc2, arr2, med2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -40944,7 +40905,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, __pyx_v_med2, __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":550 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":544 * med2 = median(search_low, search_high, step2) * assign_matching(&loc2, arr2, med2, step2, self.fda.sent_id.arr) * comparison = self.compare_matchings_set(med1_minus, med1_plus, arr1, step1, &loc2, offset_by_one, len_last) # <<<<<<<<<<<<<< @@ -40953,7 +40914,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_comparison = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings_set(__pyx_v_self, __pyx_v_med1_minus, __pyx_v_med1_plus, __pyx_v_arr1, __pyx_v_step1, (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":553 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":547 * if comparison == -1: * search_high = med2 * elif comparison == 1: # <<<<<<<<<<<<<< @@ -40962,7 +40923,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ switch (__pyx_v_comparison) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":551 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":545 * assign_matching(&loc2, arr2, med2, step2, self.fda.sent_id.arr) * comparison = self.compare_matchings_set(med1_minus, med1_plus, arr1, step1, &loc2, offset_by_one, len_last) * if comparison == -1: # <<<<<<<<<<<<<< @@ -40971,7 +40932,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ case -1: - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":552 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":546 * comparison = self.compare_matchings_set(med1_minus, med1_plus, arr1, step1, &loc2, offset_by_one, len_last) * if comparison == -1: * search_high = med2 # <<<<<<<<<<<<<< @@ -40981,7 +40942,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_v_search_high = __pyx_v_med2; break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":553 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":547 * if comparison == -1: * search_high = med2 * elif comparison == 1: # <<<<<<<<<<<<<< @@ -40990,7 +40951,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ case 1: - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":554 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":548 * search_high = med2 * elif comparison == 1: * search_low = med2 + step2 # <<<<<<<<<<<<<< @@ -41001,7 +40962,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p break; default: - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":556 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":550 * search_low = med2 + step2 * else: * break # <<<<<<<<<<<<<< @@ -41016,7 +40977,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":558 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":552 * break * * med_result_len = 0 # <<<<<<<<<<<<<< @@ -41025,7 +40986,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med_result_len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":559 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":553 * * med_result_len = 0 * med_result = malloc(0*sizeof(int*)) # <<<<<<<<<<<<<< @@ -41034,7 +40995,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med_result = ((int *)malloc((0 * (sizeof(int *))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":560 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":554 * med_result_len = 0 * med_result = malloc(0*sizeof(int*)) * if search_high > search_low: # <<<<<<<<<<<<<< @@ -41044,7 +41005,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_search_high > __pyx_v_search_low); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":566 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":560 * # want to store the bindings for all of those elements. We can * # subsequently throw all of them away. * med2_minus = med2 # <<<<<<<<<<<<<< @@ -41053,7 +41014,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2_minus = __pyx_v_med2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":567 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":561 * # subsequently throw all of them away. * med2_minus = med2 * med2_plus = med2 + step2 # <<<<<<<<<<<<<< @@ -41062,7 +41023,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2_plus = (__pyx_v_med2 + __pyx_v_step2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":568 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":562 * med2_minus = med2 * med2_plus = med2 + step2 * i1 = med1_minus # <<<<<<<<<<<<<< @@ -41071,7 +41032,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_i1 = __pyx_v_med1_minus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":569 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":563 * med2_plus = med2 + step2 * i1 = med1_minus * while i1 < med1_plus: # <<<<<<<<<<<<<< @@ -41082,7 +41043,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_i1 < __pyx_v_med1_plus); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":570 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":564 * i1 = med1_minus * while i1 < med1_plus: * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -41091,7 +41052,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc1), __pyx_v_arr1, __pyx_v_i1, __pyx_v_step1, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":571 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":565 * while i1 < med1_plus: * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) * while med2_minus-step2 >= low2: # <<<<<<<<<<<<<< @@ -41102,7 +41063,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = ((__pyx_v_med2_minus - __pyx_v_step2) >= __pyx_v_low2); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":572 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":566 * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) * while med2_minus-step2 >= low2: * assign_matching(&loc2, arr2, med2_minus-step2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -41111,7 +41072,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, (__pyx_v_med2_minus - __pyx_v_step2), __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":573 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":567 * while med2_minus-step2 >= low2: * assign_matching(&loc2, arr2, med2_minus-step2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) < 1: # <<<<<<<<<<<<<< @@ -41121,7 +41082,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, (&__pyx_v_loc1), (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last) < 1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":574 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":568 * assign_matching(&loc2, arr2, med2_minus-step2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) < 1: * med2_minus = med2_minus - step2 # <<<<<<<<<<<<<< @@ -41133,7 +41094,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":576 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":570 * med2_minus = med2_minus - step2 * else: * break # <<<<<<<<<<<<<< @@ -41146,7 +41107,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L18_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":577 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":571 * else: * break * i2 = med2_minus # <<<<<<<<<<<<<< @@ -41155,7 +41116,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_i2 = __pyx_v_med2_minus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":578 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":572 * break * i2 = med2_minus * while i2 < high2: # <<<<<<<<<<<<<< @@ -41166,7 +41127,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_i2 < __pyx_v_high2); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":579 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":573 * i2 = med2_minus * while i2 < high2: * assign_matching(&loc2, arr2, i2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -41175,7 +41136,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, __pyx_v_i2, __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":580 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":574 * while i2 < high2: * assign_matching(&loc2, arr2, i2, step2, self.fda.sent_id.arr) * comparison = self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) # <<<<<<<<<<<<<< @@ -41184,7 +41145,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_comparison = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, (&__pyx_v_loc1), (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":581 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":575 * assign_matching(&loc2, arr2, i2, step2, self.fda.sent_id.arr) * comparison = self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) * if comparison == 0: # <<<<<<<<<<<<<< @@ -41194,7 +41155,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_comparison == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":583 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":577 * if comparison == 0: * pass * med_result = append_combined_matching(med_result, &loc1, &loc2, offset_by_one, num_subpatterns, &med_result_len) # <<<<<<<<<<<<<< @@ -41206,7 +41167,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L22:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":584 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":578 * pass * med_result = append_combined_matching(med_result, &loc1, &loc2, offset_by_one, num_subpatterns, &med_result_len) * if comparison == -1: # <<<<<<<<<<<<<< @@ -41216,7 +41177,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_comparison == -1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":585 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":579 * med_result = append_combined_matching(med_result, &loc1, &loc2, offset_by_one, num_subpatterns, &med_result_len) * if comparison == -1: * break # <<<<<<<<<<<<<< @@ -41228,7 +41189,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L23:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":586 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":580 * if comparison == -1: * break * i2 = i2 + step2 # <<<<<<<<<<<<<< @@ -41239,7 +41200,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L21_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":587 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":581 * break * i2 = i2 + step2 * if i2 > med2_plus: # <<<<<<<<<<<<<< @@ -41249,7 +41210,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_i2 > __pyx_v_med2_plus); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":588 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":582 * i2 = i2 + step2 * if i2 > med2_plus: * med2_plus = i2 # <<<<<<<<<<<<<< @@ -41261,7 +41222,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L24:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":589 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":583 * if i2 > med2_plus: * med2_plus = i2 * i1 = i1 + step1 # <<<<<<<<<<<<<< @@ -41271,7 +41232,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_v_i1 = (__pyx_v_i1 + __pyx_v_step1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":591 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":585 * i1 = i1 + step1 * * tmp = med1_minus # <<<<<<<<<<<<<< @@ -41280,7 +41241,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_tmp = __pyx_v_med1_minus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":592 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":586 * * tmp = med1_minus * med1_minus = med1_plus # <<<<<<<<<<<<<< @@ -41289,7 +41250,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med1_minus = __pyx_v_med1_plus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":593 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":587 * tmp = med1_minus * med1_minus = med1_plus * med1_plus = tmp # <<<<<<<<<<<<<< @@ -41301,7 +41262,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":596 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":590 * else: * # No match; need to figure out the point of division in D and Q * med2_minus = med2 # <<<<<<<<<<<<<< @@ -41310,7 +41271,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2_minus = __pyx_v_med2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":597 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":591 * # No match; need to figure out the point of division in D and Q * med2_minus = med2 * med2_plus = med2 # <<<<<<<<<<<<<< @@ -41319,7 +41280,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2_plus = __pyx_v_med2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":598 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":592 * med2_minus = med2 * med2_plus = med2 * if d_first: # <<<<<<<<<<<<<< @@ -41328,7 +41289,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ if (__pyx_v_d_first) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":599 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":593 * med2_plus = med2 * if d_first: * med2_minus = med2_minus + step2 # <<<<<<<<<<<<<< @@ -41337,7 +41298,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2_minus = (__pyx_v_med2_minus + __pyx_v_step2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":600 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":594 * if d_first: * med2_minus = med2_minus + step2 * if comparison == -1: # <<<<<<<<<<<<<< @@ -41347,7 +41308,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_comparison == -1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":601 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":595 * med2_minus = med2_minus + step2 * if comparison == -1: * med1_minus = med1_plus # <<<<<<<<<<<<<< @@ -41359,7 +41320,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L26:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":602 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":596 * if comparison == -1: * med1_minus = med1_plus * if comparison == 1: # <<<<<<<<<<<<<< @@ -41369,7 +41330,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_comparison == 1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":603 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":597 * med1_minus = med1_plus * if comparison == 1: * med1_plus = med1_minus # <<<<<<<<<<<<<< @@ -41384,7 +41345,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":605 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":599 * med1_plus = med1_minus * else: * tmp = med1_minus # <<<<<<<<<<<<<< @@ -41393,7 +41354,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_tmp = __pyx_v_med1_minus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":606 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":600 * else: * tmp = med1_minus * med1_minus = med1_plus # <<<<<<<<<<<<<< @@ -41402,7 +41363,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med1_minus = __pyx_v_med1_plus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":607 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":601 * tmp = med1_minus * med1_minus = med1_plus * med1_plus = tmp # <<<<<<<<<<<<<< @@ -41411,7 +41372,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med1_plus = __pyx_v_tmp; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":608 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":602 * med1_minus = med1_plus * med1_plus = tmp * if comparison == 1: # <<<<<<<<<<<<<< @@ -41421,7 +41382,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p __pyx_t_3 = (__pyx_v_comparison == 1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":609 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":603 * med1_plus = tmp * if comparison == 1: * med2_minus = med2_minus + step2 # <<<<<<<<<<<<<< @@ -41430,7 +41391,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_med2_minus = (__pyx_v_med2_minus + __pyx_v_step2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":610 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":604 * if comparison == 1: * med2_minus = med2_minus + step2 * med2_plus = med2_plus + step2 # <<<<<<<<<<<<<< @@ -41446,7 +41407,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p } __pyx_L14:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":612 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":606 * med2_plus = med2_plus + step2 * * low_result_len = 0 # <<<<<<<<<<<<<< @@ -41455,7 +41416,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_low_result_len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":613 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":607 * * low_result_len = 0 * low_result = self.baeza_yates_helper(low1, med1_plus, arr1, step1, low2, med2_plus, arr2, step2, offset_by_one, len_last, num_subpatterns, &low_result_len) # <<<<<<<<<<<<<< @@ -41464,7 +41425,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_low_result = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->baeza_yates_helper(__pyx_v_self, __pyx_v_low1, __pyx_v_med1_plus, __pyx_v_arr1, __pyx_v_step1, __pyx_v_low2, __pyx_v_med2_plus, __pyx_v_arr2, __pyx_v_step2, __pyx_v_offset_by_one, __pyx_v_len_last, __pyx_v_num_subpatterns, (&__pyx_v_low_result_len)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":614 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":608 * low_result_len = 0 * low_result = self.baeza_yates_helper(low1, med1_plus, arr1, step1, low2, med2_plus, arr2, step2, offset_by_one, len_last, num_subpatterns, &low_result_len) * high_result_len = 0 # <<<<<<<<<<<<<< @@ -41473,7 +41434,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_high_result_len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":615 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":609 * low_result = self.baeza_yates_helper(low1, med1_plus, arr1, step1, low2, med2_plus, arr2, step2, offset_by_one, len_last, num_subpatterns, &low_result_len) * high_result_len = 0 * high_result = self.baeza_yates_helper(med1_minus, high1, arr1, step1, med2_minus, high2, arr2, step2, offset_by_one, len_last, num_subpatterns, &high_result_len) # <<<<<<<<<<<<<< @@ -41482,7 +41443,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_high_result = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->baeza_yates_helper(__pyx_v_self, __pyx_v_med1_minus, __pyx_v_high1, __pyx_v_arr1, __pyx_v_step1, __pyx_v_med2_minus, __pyx_v_high2, __pyx_v_arr2, __pyx_v_step2, __pyx_v_offset_by_one, __pyx_v_len_last, __pyx_v_num_subpatterns, (&__pyx_v_high_result_len)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":617 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":611 * high_result = self.baeza_yates_helper(med1_minus, high1, arr1, step1, med2_minus, high2, arr2, step2, offset_by_one, len_last, num_subpatterns, &high_result_len) * * result = extend_arr(result, result_len, low_result, low_result_len) # <<<<<<<<<<<<<< @@ -41491,7 +41452,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_result = __pyx_f_3_sa_extend_arr(__pyx_v_result, __pyx_v_result_len, __pyx_v_low_result, __pyx_v_low_result_len); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":618 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":612 * * result = extend_arr(result, result_len, low_result, low_result_len) * result = extend_arr(result, result_len, med_result, med_result_len) # <<<<<<<<<<<<<< @@ -41500,7 +41461,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_result = __pyx_f_3_sa_extend_arr(__pyx_v_result, __pyx_v_result_len, __pyx_v_med_result, __pyx_v_med_result_len); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":619 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":613 * result = extend_arr(result, result_len, low_result, low_result_len) * result = extend_arr(result, result_len, med_result, med_result_len) * result = extend_arr(result, result_len, high_result, high_result_len) # <<<<<<<<<<<<<< @@ -41509,7 +41470,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ __pyx_v_result = __pyx_f_3_sa_extend_arr(__pyx_v_result, __pyx_v_result_len, __pyx_v_high_result, __pyx_v_high_result_len); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":620 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":614 * result = extend_arr(result, result_len, med_result, med_result_len) * result = extend_arr(result, result_len, high_result, high_result_len) * free(low_result) # <<<<<<<<<<<<<< @@ -41518,7 +41479,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ free(__pyx_v_low_result); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":621 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":615 * result = extend_arr(result, result_len, high_result, high_result_len) * free(low_result) * free(med_result) # <<<<<<<<<<<<<< @@ -41527,7 +41488,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ free(__pyx_v_med_result); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":622 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":616 * free(low_result) * free(med_result) * free(high_result) # <<<<<<<<<<<<<< @@ -41536,7 +41497,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p */ free(__pyx_v_high_result); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":624 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":618 * free(high_result) * * return result # <<<<<<<<<<<<<< @@ -41556,7 +41517,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_baeza_yates_helper(struct __p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":628 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":622 * * * cdef long compare_matchings_set(self, int i1_minus, int i1_plus, int* arr1, int step1, # <<<<<<<<<<<<<< @@ -41575,7 +41536,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct int __pyx_t_1; __Pyx_RefNannySetupContext("compare_matchings_set", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":639 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":633 * cdef Matching* loc1 * * loc1 = &l1_stack # <<<<<<<<<<<<<< @@ -41584,7 +41545,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct */ __pyx_v_loc1 = (&__pyx_v_l1_stack); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":641 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":635 * loc1 = &l1_stack * * i1 = i1_minus # <<<<<<<<<<<<<< @@ -41593,7 +41554,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct */ __pyx_v_i1 = __pyx_v_i1_minus; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":642 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":636 * * i1 = i1_minus * while i1 < i1_plus: # <<<<<<<<<<<<<< @@ -41604,7 +41565,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct __pyx_t_1 = (__pyx_v_i1 < __pyx_v_i1_plus); if (!__pyx_t_1) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":643 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":637 * i1 = i1_minus * while i1 < i1_plus: * assign_matching(loc1, arr1, i1, step1, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -41613,7 +41574,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct */ __pyx_f_3_sa_assign_matching(__pyx_v_loc1, __pyx_v_arr1, __pyx_v_i1, __pyx_v_step1, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":644 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":638 * while i1 < i1_plus: * assign_matching(loc1, arr1, i1, step1, self.fda.sent_id.arr) * comparison = self.compare_matchings(loc1, loc2, offset_by_one, len_last) # <<<<<<<<<<<<<< @@ -41622,7 +41583,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct */ __pyx_v_comparison = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, __pyx_v_loc1, __pyx_v_loc2, __pyx_v_offset_by_one, __pyx_v_len_last); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":645 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":639 * assign_matching(loc1, arr1, i1, step1, self.fda.sent_id.arr) * comparison = self.compare_matchings(loc1, loc2, offset_by_one, len_last) * if comparison == 0: # <<<<<<<<<<<<<< @@ -41632,7 +41593,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct __pyx_t_1 = (__pyx_v_comparison == 0); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":646 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":640 * comparison = self.compare_matchings(loc1, loc2, offset_by_one, len_last) * if comparison == 0: * prev_comparison = 0 # <<<<<<<<<<<<<< @@ -41641,7 +41602,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct */ __pyx_v_prev_comparison = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":647 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":641 * if comparison == 0: * prev_comparison = 0 * break # <<<<<<<<<<<<<< @@ -41652,7 +41613,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct goto __pyx_L5; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":648 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":642 * prev_comparison = 0 * break * elif i1 == i1_minus: # <<<<<<<<<<<<<< @@ -41662,7 +41623,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct __pyx_t_1 = (__pyx_v_i1 == __pyx_v_i1_minus); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":649 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":643 * break * elif i1 == i1_minus: * prev_comparison = comparison # <<<<<<<<<<<<<< @@ -41674,7 +41635,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":651 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":645 * prev_comparison = comparison * else: * if comparison != prev_comparison: # <<<<<<<<<<<<<< @@ -41684,7 +41645,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct __pyx_t_1 = (__pyx_v_comparison != __pyx_v_prev_comparison); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":652 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":646 * else: * if comparison != prev_comparison: * prev_comparison = 0 # <<<<<<<<<<<<<< @@ -41693,7 +41654,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct */ __pyx_v_prev_comparison = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":653 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":647 * if comparison != prev_comparison: * prev_comparison = 0 * break # <<<<<<<<<<<<<< @@ -41707,7 +41668,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":654 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":648 * prev_comparison = 0 * break * i1 = i1 + step1 # <<<<<<<<<<<<<< @@ -41718,7 +41679,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct } __pyx_L4_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":655 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":649 * break * i1 = i1 + step1 * return prev_comparison # <<<<<<<<<<<<<< @@ -41734,7 +41695,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings_set(struct return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":658 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":652 * * * cdef long compare_matchings(self, Matching* loc1, Matching* loc2, int offset_by_one, int len_last): # <<<<<<<<<<<<<< @@ -41752,7 +41713,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py int __pyx_t_4; __Pyx_RefNannySetupContext("compare_matchings", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":661 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":655 * cdef int i * * if loc1.sent_id > loc2.sent_id: # <<<<<<<<<<<<<< @@ -41762,7 +41723,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_1 = (__pyx_v_loc1->sent_id > __pyx_v_loc2->sent_id); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":662 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":656 * * if loc1.sent_id > loc2.sent_id: * return 1 # <<<<<<<<<<<<<< @@ -41775,7 +41736,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":663 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":657 * if loc1.sent_id > loc2.sent_id: * return 1 * if loc2.sent_id > loc1.sent_id: # <<<<<<<<<<<<<< @@ -41785,7 +41746,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_1 = (__pyx_v_loc2->sent_id > __pyx_v_loc1->sent_id); if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":664 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":658 * return 1 * if loc2.sent_id > loc1.sent_id: * return -1 # <<<<<<<<<<<<<< @@ -41798,7 +41759,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":666 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":660 * return -1 * * if loc1.size == 1 and loc2.size == 1: # <<<<<<<<<<<<<< @@ -41814,7 +41775,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":667 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":661 * * if loc1.size == 1 and loc2.size == 1: * if loc2.arr[loc2.start] - loc1.arr[loc1.start] <= self.train_min_gap_size: # <<<<<<<<<<<<<< @@ -41824,7 +41785,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = (((__pyx_v_loc2->arr[__pyx_v_loc2->start]) - (__pyx_v_loc1->arr[__pyx_v_loc1->start])) <= __pyx_v_self->train_min_gap_size); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":668 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":662 * if loc1.size == 1 and loc2.size == 1: * if loc2.arr[loc2.start] - loc1.arr[loc1.start] <= self.train_min_gap_size: * return 1 # <<<<<<<<<<<<<< @@ -41839,7 +41800,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py goto __pyx_L5; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":670 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":664 * return 1 * * elif offset_by_one: # <<<<<<<<<<<<<< @@ -41848,7 +41809,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py */ if (__pyx_v_offset_by_one) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":671 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":665 * * elif offset_by_one: * for i from 1 <= i < loc1.size: # <<<<<<<<<<<<<< @@ -41858,7 +41819,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_4 = __pyx_v_loc1->size; for (__pyx_v_i = 1; __pyx_v_i < __pyx_t_4; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":672 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":666 * elif offset_by_one: * for i from 1 <= i < loc1.size: * if loc1.arr[loc1.start+i] > loc2.arr[loc2.start+i-1]: # <<<<<<<<<<<<<< @@ -41868,7 +41829,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = ((__pyx_v_loc1->arr[(__pyx_v_loc1->start + __pyx_v_i)]) > (__pyx_v_loc2->arr[((__pyx_v_loc2->start + __pyx_v_i) - 1)])); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":673 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":667 * for i from 1 <= i < loc1.size: * if loc1.arr[loc1.start+i] > loc2.arr[loc2.start+i-1]: * return 1 # <<<<<<<<<<<<<< @@ -41881,7 +41842,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":674 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":668 * if loc1.arr[loc1.start+i] > loc2.arr[loc2.start+i-1]: * return 1 * if loc1.arr[loc1.start+i] < loc2.arr[loc2.start+i-1]: # <<<<<<<<<<<<<< @@ -41891,7 +41852,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = ((__pyx_v_loc1->arr[(__pyx_v_loc1->start + __pyx_v_i)]) < (__pyx_v_loc2->arr[((__pyx_v_loc2->start + __pyx_v_i) - 1)])); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":675 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":669 * return 1 * if loc1.arr[loc1.start+i] < loc2.arr[loc2.start+i-1]: * return -1 # <<<<<<<<<<<<<< @@ -41908,7 +41869,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":678 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":672 * * else: * if loc1.arr[loc1.start]+1 > loc2.arr[loc2.start]: # <<<<<<<<<<<<<< @@ -41918,7 +41879,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = (((__pyx_v_loc1->arr[__pyx_v_loc1->start]) + 1) > (__pyx_v_loc2->arr[__pyx_v_loc2->start])); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":679 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":673 * else: * if loc1.arr[loc1.start]+1 > loc2.arr[loc2.start]: * return 1 # <<<<<<<<<<<<<< @@ -41931,7 +41892,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L11:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":680 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":674 * if loc1.arr[loc1.start]+1 > loc2.arr[loc2.start]: * return 1 * if loc1.arr[loc1.start]+1 < loc2.arr[loc2.start]: # <<<<<<<<<<<<<< @@ -41941,7 +41902,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = (((__pyx_v_loc1->arr[__pyx_v_loc1->start]) + 1) < (__pyx_v_loc2->arr[__pyx_v_loc2->start])); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":681 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":675 * return 1 * if loc1.arr[loc1.start]+1 < loc2.arr[loc2.start]: * return -1 # <<<<<<<<<<<<<< @@ -41954,7 +41915,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L12:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":683 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":677 * return -1 * * for i from 1 <= i < loc1.size: # <<<<<<<<<<<<<< @@ -41964,7 +41925,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_4 = __pyx_v_loc1->size; for (__pyx_v_i = 1; __pyx_v_i < __pyx_t_4; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":684 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":678 * * for i from 1 <= i < loc1.size: * if loc1.arr[loc1.start+i] > loc2.arr[loc2.start+i]: # <<<<<<<<<<<<<< @@ -41974,7 +41935,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = ((__pyx_v_loc1->arr[(__pyx_v_loc1->start + __pyx_v_i)]) > (__pyx_v_loc2->arr[(__pyx_v_loc2->start + __pyx_v_i)])); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":685 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":679 * for i from 1 <= i < loc1.size: * if loc1.arr[loc1.start+i] > loc2.arr[loc2.start+i]: * return 1 # <<<<<<<<<<<<<< @@ -41987,7 +41948,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L15:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":686 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":680 * if loc1.arr[loc1.start+i] > loc2.arr[loc2.start+i]: * return 1 * if loc1.arr[loc1.start+i] < loc2.arr[loc2.start+i]: # <<<<<<<<<<<<<< @@ -41997,7 +41958,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = ((__pyx_v_loc1->arr[(__pyx_v_loc1->start + __pyx_v_i)]) < (__pyx_v_loc2->arr[(__pyx_v_loc2->start + __pyx_v_i)])); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":687 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":681 * return 1 * if loc1.arr[loc1.start+i] < loc2.arr[loc2.start+i]: * return -1 # <<<<<<<<<<<<<< @@ -42013,7 +41974,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":689 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":683 * return -1 * * if loc2.arr[loc2.end-1] + len_last - loc1.arr[loc1.start] > self.train_max_initial_size: # <<<<<<<<<<<<<< @@ -42023,7 +41984,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py __pyx_t_3 = ((((__pyx_v_loc2->arr[(__pyx_v_loc2->end - 1)]) + __pyx_v_len_last) - (__pyx_v_loc1->arr[__pyx_v_loc1->start])) > __pyx_v_self->train_max_initial_size); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":690 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":684 * * if loc2.arr[loc2.end-1] + len_last - loc1.arr[loc1.start] > self.train_max_initial_size: * return -1 # <<<<<<<<<<<<<< @@ -42036,7 +41997,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py } __pyx_L17:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":691 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":685 * if loc2.arr[loc2.end-1] + len_last - loc1.arr[loc1.start] > self.train_max_initial_size: * return -1 * return 0 # <<<<<<<<<<<<<< @@ -42052,7 +42013,7 @@ static long __pyx_f_3_sa_23HieroCachingRuleFactory_compare_matchings(struct __py return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":694 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":688 * * * cdef int* merge_helper(self, int low1, int high1, int* arr1, int step1, # <<<<<<<<<<<<<< @@ -42076,7 +42037,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj int __pyx_t_3; __Pyx_RefNannySetupContext("merge_helper", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":702 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":696 * cdef Matching loc1, loc2 * * result_len[0] = 0 # <<<<<<<<<<<<<< @@ -42085,7 +42046,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ (__pyx_v_result_len[0]) = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":703 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":697 * * result_len[0] = 0 * result = malloc(0*sizeof(int)) # <<<<<<<<<<<<<< @@ -42094,7 +42055,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_v_result = ((int *)malloc((0 * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":705 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":699 * result = malloc(0*sizeof(int)) * * i1 = low1 # <<<<<<<<<<<<<< @@ -42103,7 +42064,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_v_i1 = __pyx_v_low1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":706 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":700 * * i1 = low1 * i2 = low2 # <<<<<<<<<<<<<< @@ -42112,7 +42073,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_v_i2 = __pyx_v_low2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":707 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":701 * i1 = low1 * i2 = low2 * while i1 < high1 and i2 < high2: # <<<<<<<<<<<<<< @@ -42129,7 +42090,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":710 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":704 * * # First, pop all unneeded loc2's off the stack * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -42138,7 +42099,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc1), __pyx_v_arr1, __pyx_v_i1, __pyx_v_step1, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":711 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":705 * # First, pop all unneeded loc2's off the stack * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) * while i2 < high2: # <<<<<<<<<<<<<< @@ -42149,7 +42110,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj __pyx_t_3 = (__pyx_v_i2 < __pyx_v_high2); if (!__pyx_t_3) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":712 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":706 * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) * while i2 < high2: * assign_matching(&loc2, arr2, i2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -42158,7 +42119,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, __pyx_v_i2, __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":713 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":707 * while i2 < high2: * assign_matching(&loc2, arr2, i2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) == 1: # <<<<<<<<<<<<<< @@ -42168,7 +42129,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj __pyx_t_3 = (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, (&__pyx_v_loc1), (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last) == 1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":714 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":708 * assign_matching(&loc2, arr2, i2, step2, self.fda.sent_id.arr) * if self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) == 1: * i2 = i2 + step2 # <<<<<<<<<<<<<< @@ -42180,7 +42141,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":716 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":710 * i2 = i2 + step2 * else: * break # <<<<<<<<<<<<<< @@ -42193,7 +42154,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } __pyx_L6_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":719 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":713 * * # Next: process all loc1's with the same starting val * j1 = i1 # <<<<<<<<<<<<<< @@ -42202,7 +42163,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_v_j1 = __pyx_v_i1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":720 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":714 * # Next: process all loc1's with the same starting val * j1 = i1 * while i1 < high1 and arr1[j1] == arr1[i1]: # <<<<<<<<<<<<<< @@ -42219,7 +42180,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } if (!__pyx_t_2) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":721 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":715 * j1 = i1 * while i1 < high1 and arr1[j1] == arr1[i1]: * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -42228,7 +42189,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc1), __pyx_v_arr1, __pyx_v_i1, __pyx_v_step1, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":722 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":716 * while i1 < high1 and arr1[j1] == arr1[i1]: * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) * j2 = i2 # <<<<<<<<<<<<<< @@ -42237,7 +42198,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_v_j2 = __pyx_v_i2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":723 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":717 * assign_matching(&loc1, arr1, i1, step1, self.fda.sent_id.arr) * j2 = i2 * while j2 < high2: # <<<<<<<<<<<<<< @@ -42248,7 +42209,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj __pyx_t_2 = (__pyx_v_j2 < __pyx_v_high2); if (!__pyx_t_2) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":724 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":718 * j2 = i2 * while j2 < high2: * assign_matching(&loc2, arr2, j2, step2, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -42257,7 +42218,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_f_3_sa_assign_matching((&__pyx_v_loc2), __pyx_v_arr2, __pyx_v_j2, __pyx_v_step2, __pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":725 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":719 * while j2 < high2: * assign_matching(&loc2, arr2, j2, step2, self.fda.sent_id.arr) * comparison = self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) # <<<<<<<<<<<<<< @@ -42266,7 +42227,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj */ __pyx_v_comparison = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->compare_matchings(__pyx_v_self, (&__pyx_v_loc1), (&__pyx_v_loc2), __pyx_v_offset_by_one, __pyx_v_len_last); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":726 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":720 * assign_matching(&loc2, arr2, j2, step2, self.fda.sent_id.arr) * comparison = self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) * if comparison == 0: # <<<<<<<<<<<<<< @@ -42276,7 +42237,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj __pyx_t_2 = (__pyx_v_comparison == 0); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":727 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":721 * comparison = self.compare_matchings(&loc1, &loc2, offset_by_one, len_last) * if comparison == 0: * result = append_combined_matching(result, &loc1, &loc2, offset_by_one, num_subpatterns, result_len) # <<<<<<<<<<<<<< @@ -42288,7 +42249,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } __pyx_L12:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":728 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":722 * if comparison == 0: * result = append_combined_matching(result, &loc1, &loc2, offset_by_one, num_subpatterns, result_len) * if comparison == 1: # <<<<<<<<<<<<<< @@ -42301,7 +42262,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } __pyx_L13:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":730 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":724 * if comparison == 1: * pass * if comparison == -1: # <<<<<<<<<<<<<< @@ -42311,7 +42272,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj __pyx_t_2 = (__pyx_v_comparison == -1); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":731 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":725 * pass * if comparison == -1: * break # <<<<<<<<<<<<<< @@ -42323,7 +42284,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":733 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":727 * break * else: * j2 = j2 + step2 # <<<<<<<<<<<<<< @@ -42336,7 +42297,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } __pyx_L11_break:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":734 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":728 * else: * j2 = j2 + step2 * i1 = i1 + step1 # <<<<<<<<<<<<<< @@ -42347,7 +42308,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj } } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":735 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":729 * j2 = j2 + step2 * i1 = i1 + step1 * return result # <<<<<<<<<<<<<< @@ -42363,7 +42324,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_merge_helper(struct __pyx_obj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":738 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":732 * * * cdef void sort_phrase_loc(self, IntList arr, PhraseLocation loc, Phrase phrase): # <<<<<<<<<<<<<< @@ -42385,26 +42346,26 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("sort_phrase_loc", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":743 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":737 * cdef IntList result * * if phrase in self.precomputed_index: # <<<<<<<<<<<<<< * loc.arr = self.precomputed_index[phrase] * else: */ - __pyx_t_1 = (__Pyx_PySequence_Contains(((PyObject *)__pyx_v_phrase), __pyx_v_self->precomputed_index, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 743; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = (__Pyx_PySequence_Contains(((PyObject *)__pyx_v_phrase), __pyx_v_self->precomputed_index, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_1) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":744 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":738 * * if phrase in self.precomputed_index: * loc.arr = self.precomputed_index[phrase] # <<<<<<<<<<<<<< * else: * loc.arr = IntList(initial_len=loc.sa_high-loc.sa_low) */ - __pyx_t_2 = PyObject_GetItem(__pyx_v_self->precomputed_index, ((PyObject *)__pyx_v_phrase)); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 744; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetItem(__pyx_v_self->precomputed_index, ((PyObject *)__pyx_v_phrase)); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 744; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GIVEREF(__pyx_t_2); __Pyx_GOTREF(__pyx_v_loc->arr); __Pyx_DECREF(((PyObject *)__pyx_v_loc->arr)); @@ -42414,20 +42375,20 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":746 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":740 * loc.arr = self.precomputed_index[phrase] * else: * loc.arr = IntList(initial_len=loc.sa_high-loc.sa_low) # <<<<<<<<<<<<<< * veb = VEB(arr.len) * for i from loc.sa_low <= i < loc.sa_high: */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 746; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 740; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_3 = PyInt_FromLong((__pyx_v_loc->sa_high - __pyx_v_loc->sa_low)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 746; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong((__pyx_v_loc->sa_high - __pyx_v_loc->sa_low)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 740; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__initial_len), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 746; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__initial_len), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 740; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 746; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 740; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_GIVEREF(__pyx_t_3); @@ -42436,27 +42397,27 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ __pyx_v_loc->arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":747 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":741 * else: * loc.arr = IntList(initial_len=loc.sa_high-loc.sa_low) * veb = VEB(arr.len) # <<<<<<<<<<<<<< * for i from loc.sa_low <= i < loc.sa_high: * veb._insert(arr.arr[i]) */ - __pyx_t_3 = PyInt_FromLong(__pyx_v_arr->len); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 747; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_v_arr->len); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 741; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 747; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 741; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_VEB)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 747; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_VEB)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 741; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_v_veb = ((struct __pyx_obj_3_sa_VEB *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":748 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":742 * loc.arr = IntList(initial_len=loc.sa_high-loc.sa_low) * veb = VEB(arr.len) * for i from loc.sa_low <= i < loc.sa_high: # <<<<<<<<<<<<<< @@ -42466,7 +42427,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ __pyx_t_4 = __pyx_v_loc->sa_high; for (__pyx_v_i = __pyx_v_loc->sa_low; __pyx_v_i < __pyx_t_4; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":749 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":743 * veb = VEB(arr.len) * for i from loc.sa_low <= i < loc.sa_high: * veb._insert(arr.arr[i]) # <<<<<<<<<<<<<< @@ -42476,7 +42437,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ ((struct __pyx_vtabstruct_3_sa_VEB *)__pyx_v_veb->__pyx_vtab)->_insert(__pyx_v_veb, (__pyx_v_arr->arr[__pyx_v_i])); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":750 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":744 * for i from loc.sa_low <= i < loc.sa_high: * veb._insert(arr.arr[i]) * i = veb.veb.min_val # <<<<<<<<<<<<<< @@ -42485,7 +42446,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ */ __pyx_v_i = __pyx_v_veb->veb->min_val; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":751 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":745 * veb._insert(arr.arr[i]) * i = veb.veb.min_val * for j from 0 <= j < loc.sa_high-loc.sa_low: # <<<<<<<<<<<<<< @@ -42495,7 +42456,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ __pyx_t_4 = (__pyx_v_loc->sa_high - __pyx_v_loc->sa_low); for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_4; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":752 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":746 * i = veb.veb.min_val * for j from 0 <= j < loc.sa_high-loc.sa_low: * loc.arr.arr[j] = i # <<<<<<<<<<<<<< @@ -42504,7 +42465,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ */ (__pyx_v_loc->arr->arr[__pyx_v_j]) = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":753 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":747 * for j from 0 <= j < loc.sa_high-loc.sa_low: * loc.arr.arr[j] = i * i = veb._findsucc(i) # <<<<<<<<<<<<<< @@ -42516,7 +42477,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":754 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":748 * loc.arr.arr[j] = i * i = veb._findsucc(i) * loc.arr_low = 0 # <<<<<<<<<<<<<< @@ -42525,7 +42486,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ */ __pyx_v_loc->arr_low = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":755 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":749 * i = veb._findsucc(i) * loc.arr_low = 0 * loc.arr_high = loc.arr.len # <<<<<<<<<<<<<< @@ -42544,7 +42505,7 @@ static void __pyx_f_3_sa_23HieroCachingRuleFactory_sort_phrase_loc(struct __pyx_ __Pyx_RefNannyFinishContext(); } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":758 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":752 * * * cdef intersect_helper(self, Phrase prefix, Phrase suffix, # <<<<<<<<<<<<<< @@ -42581,7 +42542,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct int __pyx_clineno = 0; __Pyx_RefNannySetupContext("intersect_helper", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":765 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":759 * cdef int* result_ptr * * result_len = 0 # <<<<<<<<<<<<<< @@ -42590,21 +42551,21 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_result_len = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":767 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":761 * result_len = 0 * * if sym_isvar(suffix[0]): # <<<<<<<<<<<<<< * offset_by_one = 1 * else: */ - __pyx_t_1 = __Pyx_GetItemInt(((PyObject *)__pyx_v_suffix), 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(((PyObject *)__pyx_v_suffix), 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 761; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 761; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_3 = __pyx_f_3_sa_sym_isvar(__pyx_t_2); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":768 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":762 * * if sym_isvar(suffix[0]): * offset_by_one = 1 # <<<<<<<<<<<<<< @@ -42616,7 +42577,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":770 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":764 * offset_by_one = 1 * else: * offset_by_one = 0 # <<<<<<<<<<<<<< @@ -42627,34 +42588,34 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":772 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":766 * offset_by_one = 0 * * len_last = len(suffix.getchunk(suffix.arity())) # <<<<<<<<<<<<<< * * if prefix_loc.arr is None: */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_suffix), __pyx_n_s__getchunk); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_suffix), __pyx_n_s__getchunk); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 766; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_suffix), __pyx_n_s__arity); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_suffix), __pyx_n_s__arity); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 766; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 766; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 766; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 766; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; - __pyx_t_6 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 766; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_len_last = __pyx_t_6; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":774 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":768 * len_last = len(suffix.getchunk(suffix.arity())) * * if prefix_loc.arr is None: # <<<<<<<<<<<<<< @@ -42664,7 +42625,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct __pyx_t_7 = (((PyObject *)__pyx_v_prefix_loc->arr) == Py_None); if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":775 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":769 * * if prefix_loc.arr is None: * self.sort_phrase_loc(self.fsa.sa, prefix_loc, prefix) # <<<<<<<<<<<<<< @@ -42679,7 +42640,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":776 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":770 * if prefix_loc.arr is None: * self.sort_phrase_loc(self.fsa.sa, prefix_loc, prefix) * arr1 = prefix_loc.arr # <<<<<<<<<<<<<< @@ -42689,7 +42650,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct __Pyx_INCREF(((PyObject *)__pyx_v_prefix_loc->arr)); __pyx_v_arr1 = __pyx_v_prefix_loc->arr; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":777 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":771 * self.sort_phrase_loc(self.fsa.sa, prefix_loc, prefix) * arr1 = prefix_loc.arr * low1 = prefix_loc.arr_low # <<<<<<<<<<<<<< @@ -42698,7 +42659,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_low1 = __pyx_v_prefix_loc->arr_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":778 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":772 * arr1 = prefix_loc.arr * low1 = prefix_loc.arr_low * high1 = prefix_loc.arr_high # <<<<<<<<<<<<<< @@ -42707,7 +42668,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_high1 = __pyx_v_prefix_loc->arr_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":779 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":773 * low1 = prefix_loc.arr_low * high1 = prefix_loc.arr_high * step1 = prefix_loc.num_subpatterns # <<<<<<<<<<<<<< @@ -42716,7 +42677,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_step1 = __pyx_v_prefix_loc->num_subpatterns; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":781 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":775 * step1 = prefix_loc.num_subpatterns * * if suffix_loc.arr is None: # <<<<<<<<<<<<<< @@ -42726,7 +42687,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct __pyx_t_7 = (((PyObject *)__pyx_v_suffix_loc->arr) == Py_None); if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":782 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":776 * * if suffix_loc.arr is None: * self.sort_phrase_loc(self.fsa.sa, suffix_loc, suffix) # <<<<<<<<<<<<<< @@ -42741,7 +42702,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":783 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":777 * if suffix_loc.arr is None: * self.sort_phrase_loc(self.fsa.sa, suffix_loc, suffix) * arr2 = suffix_loc.arr # <<<<<<<<<<<<<< @@ -42751,7 +42712,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct __Pyx_INCREF(((PyObject *)__pyx_v_suffix_loc->arr)); __pyx_v_arr2 = __pyx_v_suffix_loc->arr; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":784 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":778 * self.sort_phrase_loc(self.fsa.sa, suffix_loc, suffix) * arr2 = suffix_loc.arr * low2 = suffix_loc.arr_low # <<<<<<<<<<<<<< @@ -42760,7 +42721,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_low2 = __pyx_v_suffix_loc->arr_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":785 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":779 * arr2 = suffix_loc.arr * low2 = suffix_loc.arr_low * high2 = suffix_loc.arr_high # <<<<<<<<<<<<<< @@ -42769,7 +42730,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_high2 = __pyx_v_suffix_loc->arr_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":786 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":780 * low2 = suffix_loc.arr_low * high2 = suffix_loc.arr_high * step2 = suffix_loc.num_subpatterns # <<<<<<<<<<<<<< @@ -42778,26 +42739,26 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_step2 = __pyx_v_suffix_loc->num_subpatterns; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":788 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":782 * step2 = suffix_loc.num_subpatterns * * num_subpatterns = prefix.arity()+1 # <<<<<<<<<<<<<< * * if algorithm == MERGE: */ - __pyx_t_5 = PyObject_GetAttr(((PyObject *)__pyx_v_prefix), __pyx_n_s__arity); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetAttr(((PyObject *)__pyx_v_prefix), __pyx_n_s__arity); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyNumber_Add(__pyx_t_4, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Add(__pyx_t_4, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_3 = __Pyx_PyInt_AsInt(__pyx_t_5); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 788; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyInt_AsInt(__pyx_t_5); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_num_subpatterns = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":790 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":784 * num_subpatterns = prefix.arity()+1 * * if algorithm == MERGE: # <<<<<<<<<<<<<< @@ -42807,7 +42768,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct __pyx_t_7 = (__pyx_v_algorithm == __pyx_v_3_sa_MERGE); if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":793 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":787 * result_ptr = self.merge_helper(low1, high1, arr1.arr, step1, * low2, high2, arr2.arr, step2, * offset_by_one, len_last, num_subpatterns, &result_len) # <<<<<<<<<<<<<< @@ -42819,7 +42780,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":797 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":791 * result_ptr = self.baeza_yates_helper(low1, high1, arr1.arr, step1, * low2, high2, arr2.arr, step2, * offset_by_one, len_last, num_subpatterns, &result_len) # <<<<<<<<<<<<<< @@ -42830,7 +42791,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":799 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":793 * offset_by_one, len_last, num_subpatterns, &result_len) * * if result_len == 0: # <<<<<<<<<<<<<< @@ -42840,7 +42801,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct __pyx_t_7 = (__pyx_v_result_len == 0); if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":800 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":794 * * if result_len == 0: * free(result_ptr) # <<<<<<<<<<<<<< @@ -42849,7 +42810,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ free(__pyx_v_result_ptr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":801 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":795 * if result_len == 0: * free(result_ptr) * return None # <<<<<<<<<<<<<< @@ -42864,19 +42825,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":803 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":797 * return None * else: * result = IntList() # <<<<<<<<<<<<<< * free(result.arr) * result.arr = result_ptr */ - __pyx_t_5 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 803; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 797; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __pyx_v_result = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_5); __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":804 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":798 * else: * result = IntList() * free(result.arr) # <<<<<<<<<<<<<< @@ -42885,7 +42846,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ free(__pyx_v_result->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":805 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":799 * result = IntList() * free(result.arr) * result.arr = result_ptr # <<<<<<<<<<<<<< @@ -42894,7 +42855,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_result->arr = __pyx_v_result_ptr; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":806 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":800 * free(result.arr) * result.arr = result_ptr * result.len = result_len # <<<<<<<<<<<<<< @@ -42903,7 +42864,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_result->len = __pyx_v_result_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":807 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":801 * result.arr = result_ptr * result.len = result_len * result.size = result_len # <<<<<<<<<<<<<< @@ -42912,7 +42873,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct */ __pyx_v_result->size = __pyx_v_result_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":808 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":802 * result.len = result_len * result.size = result_len * return PhraseLocation(arr_low=0, arr_high=result_len, arr=result, num_subpatterns=num_subpatterns) # <<<<<<<<<<<<<< @@ -42920,19 +42881,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct * cdef loc2str(self, PhraseLocation loc): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_5 = PyDict_New(); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyDict_New(); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_5)); - if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__arr_low), __pyx_int_0) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_4 = PyInt_FromLong(__pyx_v_result_len); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__arr_low), __pyx_int_0) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_v_result_len); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__arr_high), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__arr_high), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__arr), ((PyObject *)__pyx_v_result)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_4 = PyInt_FromLong(__pyx_v_num_subpatterns); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__arr), ((PyObject *)__pyx_v_result)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromLong(__pyx_v_num_subpatterns); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__num_subpatterns), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_5, ((PyObject *)__pyx_n_s__num_subpatterns), __pyx_t_4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 808; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_5)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __pyx_r = __pyx_t_4; @@ -42958,7 +42919,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_intersect_helper(struct return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":810 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":804 * return PhraseLocation(arr_low=0, arr_high=result_len, arr=result, num_subpatterns=num_subpatterns) * * cdef loc2str(self, PhraseLocation loc): # <<<<<<<<<<<<<< @@ -42981,7 +42942,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st int __pyx_clineno = 0; __Pyx_RefNannySetupContext("loc2str", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":812 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":806 * cdef loc2str(self, PhraseLocation loc): * cdef int i, j * result = "{" # <<<<<<<<<<<<<< @@ -42991,7 +42952,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st __Pyx_INCREF(((PyObject *)__pyx_kp_s_116)); __pyx_v_result = ((PyObject *)__pyx_kp_s_116); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":813 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":807 * cdef int i, j * result = "{" * i = 0 # <<<<<<<<<<<<<< @@ -43000,7 +42961,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st */ __pyx_v_i = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":814 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":808 * result = "{" * i = 0 * while i < loc.arr_high: # <<<<<<<<<<<<<< @@ -43011,20 +42972,20 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st __pyx_t_1 = (__pyx_v_i < __pyx_v_loc->arr_high); if (!__pyx_t_1) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":815 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":809 * i = 0 * while i < loc.arr_high: * result = result + "(" # <<<<<<<<<<<<<< * for j from i <= j < i + loc.num_subpatterns: * result = result + ("%d " %loc.arr[j]) */ - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_117)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 815; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_117)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 809; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_v_result); __pyx_v_result = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":816 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":810 * while i < loc.arr_high: * result = result + "(" * for j from i <= j < i + loc.num_subpatterns: # <<<<<<<<<<<<<< @@ -43034,19 +42995,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st __pyx_t_3 = (__pyx_v_i + __pyx_v_loc->num_subpatterns); for (__pyx_v_j = __pyx_v_i; __pyx_v_j < __pyx_t_3; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":817 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":811 * result = result + "(" * for j from i <= j < i + loc.num_subpatterns: * result = result + ("%d " %loc.arr[j]) # <<<<<<<<<<<<<< * result = result + ")" * i = i + loc.num_subpatterns */ - __pyx_t_2 = __Pyx_GetItemInt(((PyObject *)__pyx_v_loc->arr), __pyx_v_j, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 817; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(((PyObject *)__pyx_v_loc->arr), __pyx_v_j, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 817; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_21), __pyx_t_2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_4)); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 817; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_v_result); @@ -43054,20 +43015,20 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st __pyx_t_2 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":818 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":812 * for j from i <= j < i + loc.num_subpatterns: * result = result + ("%d " %loc.arr[j]) * result = result + ")" # <<<<<<<<<<<<<< * i = i + loc.num_subpatterns * result = result + "}" */ - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_59)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 818; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_59)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_v_result); __pyx_v_result = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":819 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":813 * result = result + ("%d " %loc.arr[j]) * result = result + ")" * i = i + loc.num_subpatterns # <<<<<<<<<<<<<< @@ -43077,20 +43038,20 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st __pyx_v_i = (__pyx_v_i + __pyx_v_loc->num_subpatterns); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":820 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":814 * result = result + ")" * i = i + loc.num_subpatterns * result = result + "}" # <<<<<<<<<<<<<< * return result * */ - __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_118)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 820; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_v_result, ((PyObject *)__pyx_kp_s_118)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 814; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_v_result); __pyx_v_result = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":821 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":815 * i = i + loc.num_subpatterns * result = result + "}" * return result # <<<<<<<<<<<<<< @@ -43116,7 +43077,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_loc2str(CYTHON_UNUSED st return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":823 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":817 * return result * * cdef PhraseLocation intersect(self, prefix_node, suffix_node, Phrase phrase): # <<<<<<<<<<<<<< @@ -43142,81 +43103,81 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact int __pyx_clineno = 0; __Pyx_RefNannySetupContext("intersect", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":827 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":821 * cdef PhraseLocation prefix_loc, suffix_loc, result * * prefix = prefix_node.phrase # <<<<<<<<<<<<<< * suffix = suffix_node.phrase * prefix_loc = prefix_node.phrase_location */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_prefix_node, __pyx_n_s__phrase); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_prefix_node, __pyx_n_s__phrase); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 821; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_Phrase))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_Phrase))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 821; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_prefix = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":828 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":822 * * prefix = prefix_node.phrase * suffix = suffix_node.phrase # <<<<<<<<<<<<<< * prefix_loc = prefix_node.phrase_location * suffix_loc = suffix_node.phrase_location */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_suffix_node, __pyx_n_s__phrase); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_suffix_node, __pyx_n_s__phrase); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 822; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_Phrase))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_Phrase))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 822; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_suffix = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":829 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":823 * prefix = prefix_node.phrase * suffix = suffix_node.phrase * prefix_loc = prefix_node.phrase_location # <<<<<<<<<<<<<< * suffix_loc = suffix_node.phrase_location * */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_prefix_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_prefix_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 829; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 823; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_prefix_loc = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":830 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":824 * suffix = suffix_node.phrase * prefix_loc = prefix_node.phrase_location * suffix_loc = suffix_node.phrase_location # <<<<<<<<<<<<<< * * result = self.get_precomputed_collocation(phrase) */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_suffix_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_suffix_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 824; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 830; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 824; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_suffix_loc = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":832 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":826 * suffix_loc = suffix_node.phrase_location * * result = self.get_precomputed_collocation(phrase) # <<<<<<<<<<<<<< * if result is not None: * intersect_method = "precomputed" */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_119); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s_119); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_v_phrase)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_phrase)); __Pyx_GIVEREF(((PyObject *)__pyx_v_phrase)); - __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_result = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":833 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":827 * * result = self.get_precomputed_collocation(phrase) * if result is not None: # <<<<<<<<<<<<<< @@ -43226,7 +43187,7 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact __pyx_t_4 = (((PyObject *)__pyx_v_result) != Py_None); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":834 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":828 * result = self.get_precomputed_collocation(phrase) * if result is not None: * intersect_method = "precomputed" # <<<<<<<<<<<<<< @@ -43239,7 +43200,7 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":836 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":830 * intersect_method = "precomputed" * * if result is None: # <<<<<<<<<<<<<< @@ -43249,7 +43210,7 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact __pyx_t_4 = (((PyObject *)__pyx_v_result) == Py_None); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":837 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":831 * * if result is None: * if self.use_baeza_yates: # <<<<<<<<<<<<<< @@ -43258,21 +43219,21 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact */ if (__pyx_v_self->use_baeza_yates) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":838 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":832 * if result is None: * if self.use_baeza_yates: * result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, BAEZA_YATES) # <<<<<<<<<<<<<< * intersect_method="double binary" * else: */ - __pyx_t_3 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->intersect_helper(__pyx_v_self, __pyx_v_prefix, __pyx_v_suffix, __pyx_v_prefix_loc, __pyx_v_suffix_loc, __pyx_v_3_sa_BAEZA_YATES); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->intersect_helper(__pyx_v_self, __pyx_v_prefix, __pyx_v_suffix, __pyx_v_prefix_loc, __pyx_v_suffix_loc, __pyx_v_3_sa_BAEZA_YATES); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 832; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_v_result)); __pyx_v_result = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":839 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":833 * if self.use_baeza_yates: * result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, BAEZA_YATES) * intersect_method="double binary" # <<<<<<<<<<<<<< @@ -43286,21 +43247,21 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":841 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":835 * intersect_method="double binary" * else: * result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, MERGE) # <<<<<<<<<<<<<< * intersect_method="merge" * return result */ - __pyx_t_3 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->intersect_helper(__pyx_v_self, __pyx_v_prefix, __pyx_v_suffix, __pyx_v_prefix_loc, __pyx_v_suffix_loc, __pyx_v_3_sa_MERGE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->intersect_helper(__pyx_v_self, __pyx_v_prefix, __pyx_v_suffix, __pyx_v_prefix_loc, __pyx_v_suffix_loc, __pyx_v_3_sa_MERGE); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 835; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_v_result)); __pyx_v_result = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":842 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":836 * else: * result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, MERGE) * intersect_method="merge" # <<<<<<<<<<<<<< @@ -43316,7 +43277,7 @@ static struct __pyx_obj_3_sa_PhraseLocation *__pyx_f_3_sa_23HieroCachingRuleFact } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":843 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":837 * result = self.intersect_helper(prefix, suffix, prefix_loc, suffix_loc, MERGE) * intersect_method="merge" * return result # <<<<<<<<<<<<<< @@ -43378,16 +43339,16 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance(PyObject *__p case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__res)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("advance", 1, 3, 3, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("advance", 1, 3, 3, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__fwords)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("advance", 1, 3, 3, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("advance", 1, 3, 3, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "advance") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "advance") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; @@ -43402,7 +43363,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance(PyObject *__p } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("advance", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("advance", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 839; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.advance", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -43413,7 +43374,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_13advance(PyObject *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":845 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":839 * return result * * def advance(self, frontier, res, fwords): # <<<<<<<<<<<<<< @@ -43454,19 +43415,19 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("advance", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":847 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":841 * def advance(self, frontier, res, fwords): * cdef unsigned na * nf = [] # <<<<<<<<<<<<<< * for (toskip, (i, alt, pathlen)) in frontier: * spanlen = fwords[i][alt][2] */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 841; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_nf = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":848 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":842 * cdef unsigned na * nf = [] * for (toskip, (i, alt, pathlen)) in frontier: # <<<<<<<<<<<<<< @@ -43477,7 +43438,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __pyx_t_1 = __pyx_v_frontier; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_3 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_frontier); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_frontier); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -43485,23 +43446,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ if (!__pyx_t_3 && PyList_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_3 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_4); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_4 = __pyx_t_3(__pyx_t_1); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -43517,7 +43478,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -43530,14 +43491,14 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(__pyx_t_6); #else - __pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } else { Py_ssize_t index = -1; - __pyx_t_7 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_8 = Py_TYPE(__pyx_t_7)->tp_iternext; @@ -43545,7 +43506,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_GOTREF(__pyx_t_5); index = 1; __pyx_t_6 = __pyx_t_8(__pyx_t_7); if (unlikely(!__pyx_t_6)) goto __pyx_L5_unpacking_failed; __Pyx_GOTREF(__pyx_t_6); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_7), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_7), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = NULL; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; goto __pyx_L6_unpacking_done; @@ -43553,7 +43514,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_8 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L6_unpacking_done:; } __Pyx_XDECREF(__pyx_v_toskip); @@ -43569,7 +43530,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -43585,15 +43546,15 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_INCREF(__pyx_t_9); __Pyx_INCREF(__pyx_t_10); #else - __pyx_t_7 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_10 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } else { Py_ssize_t index = -1; - __pyx_t_11 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_8 = Py_TYPE(__pyx_t_11)->tp_iternext; @@ -43603,7 +43564,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_GOTREF(__pyx_t_9); index = 2; __pyx_t_10 = __pyx_t_8(__pyx_t_11); if (unlikely(!__pyx_t_10)) goto __pyx_L7_unpacking_failed; __Pyx_GOTREF(__pyx_t_10); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_11), 3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_11), 3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = NULL; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; goto __pyx_L8_unpacking_done; @@ -43611,7 +43572,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __pyx_t_8 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L8_unpacking_done:; } __Pyx_XDECREF(__pyx_v_i); @@ -43624,45 +43585,45 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __pyx_v_pathlen = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":849 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":843 * nf = [] * for (toskip, (i, alt, pathlen)) in frontier: * spanlen = fwords[i][alt][2] # <<<<<<<<<<<<<< * if (toskip == 0): * res.append((i, alt, pathlen)) */ - __pyx_t_4 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_i); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_i); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = PyObject_GetItem(__pyx_t_4, __pyx_v_alt); if (!__pyx_t_6) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_GetItem(__pyx_t_4, __pyx_v_alt); if (!__pyx_t_6) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_6, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_6, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 843; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_XDECREF(__pyx_v_spanlen); __pyx_v_spanlen = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":850 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":844 * for (toskip, (i, alt, pathlen)) in frontier: * spanlen = fwords[i][alt][2] * if (toskip == 0): # <<<<<<<<<<<<<< * res.append((i, alt, pathlen)) * ni = i + spanlen */ - __pyx_t_4 = PyObject_RichCompare(__pyx_v_toskip, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 850; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_12 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_12 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 850; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_toskip, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_12 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_12) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":851 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":845 * spanlen = fwords[i][alt][2] * if (toskip == 0): * res.append((i, alt, pathlen)) # <<<<<<<<<<<<<< * ni = i + spanlen * if (ni < len(fwords) and (pathlen + 1) < self.max_initial_size): */ - __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_v_i); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_i); @@ -43673,7 +43634,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_INCREF(__pyx_v_pathlen); PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_v_pathlen); __Pyx_GIVEREF(__pyx_v_pathlen); - __pyx_t_6 = __Pyx_PyObject_Append(__pyx_v_res, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Append(__pyx_v_res, ((PyObject *)__pyx_t_4)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 845; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; @@ -43681,42 +43642,42 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":852 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":846 * if (toskip == 0): * res.append((i, alt, pathlen)) * ni = i + spanlen # <<<<<<<<<<<<<< * if (ni < len(fwords) and (pathlen + 1) < self.max_initial_size): * for na in range(len(fwords[ni])): */ - __pyx_t_6 = PyNumber_Add(__pyx_v_i, __pyx_v_spanlen); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 852; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_v_i, __pyx_v_spanlen); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 846; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF(__pyx_v_ni); __pyx_v_ni = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":853 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":847 * res.append((i, alt, pathlen)) * ni = i + spanlen * if (ni < len(fwords) and (pathlen + 1) < self.max_initial_size): # <<<<<<<<<<<<<< * for na in range(len(fwords[ni])): * nf.append((toskip - 1, (ni, na, pathlen + 1))) */ - __pyx_t_13 = PyObject_Length(__pyx_v_fwords); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_6 = PyInt_FromSsize_t(__pyx_t_13); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_v_fwords); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromSsize_t(__pyx_t_13); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = PyObject_RichCompare(__pyx_v_ni, __pyx_t_6, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_v_ni, __pyx_t_6, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_12 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_12 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_12 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_12) { - __pyx_t_4 = PyNumber_Add(__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = PyInt_FromLong(__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromLong(__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = PyObject_RichCompare(__pyx_t_4, __pyx_t_6, Py_LT); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_RichCompare(__pyx_t_4, __pyx_t_6, Py_LT); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_14 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_14 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 853; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_14 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 847; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_15 = __pyx_t_14; } else { @@ -43724,34 +43685,34 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ } if (__pyx_t_15) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":854 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":848 * ni = i + spanlen * if (ni < len(fwords) and (pathlen + 1) < self.max_initial_size): * for na in range(len(fwords[ni])): # <<<<<<<<<<<<<< * nf.append((toskip - 1, (ni, na, pathlen + 1))) * if (len(nf) > 0): */ - __pyx_t_5 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ni); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 854; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ni); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_13 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 854; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_t_5); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 848; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_13; __pyx_t_16+=1) { __pyx_v_na = __pyx_t_16; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":855 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":849 * if (ni < len(fwords) and (pathlen + 1) < self.max_initial_size): * for na in range(len(fwords[ni])): * nf.append((toskip - 1, (ni, na, pathlen + 1))) # <<<<<<<<<<<<<< * if (len(nf) > 0): * return self.advance(nf, res, fwords) */ - __pyx_t_5 = PyNumber_Subtract(__pyx_v_toskip, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Subtract(__pyx_v_toskip, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PyLong_FromUnsignedLong(__pyx_v_na); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyLong_FromUnsignedLong(__pyx_v_na); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_4 = PyNumber_Add(__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = PyTuple_New(3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_New(3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_INCREF(__pyx_v_ni); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_v_ni); @@ -43762,7 +43723,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_GIVEREF(__pyx_t_4); __pyx_t_6 = 0; __pyx_t_4 = 0; - __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); @@ -43770,7 +43731,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_GIVEREF(((PyObject *)__pyx_t_10)); __pyx_t_5 = 0; __pyx_t_10 = 0; - __pyx_t_17 = PyList_Append(__pyx_v_nf, ((PyObject *)__pyx_t_4)); if (unlikely(__pyx_t_17 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = PyList_Append(__pyx_v_nf, ((PyObject *)__pyx_t_4)); if (unlikely(__pyx_t_17 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 849; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; } goto __pyx_L10; @@ -43779,18 +43740,18 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":856 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":850 * for na in range(len(fwords[ni])): * nf.append((toskip - 1, (ni, na, pathlen + 1))) * if (len(nf) > 0): # <<<<<<<<<<<<<< * return self.advance(nf, res, fwords) * else: */ - __pyx_t_2 = PyList_GET_SIZE(((PyObject *)__pyx_v_nf)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 856; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyList_GET_SIZE(((PyObject *)__pyx_v_nf)); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 850; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_15 = (__pyx_t_2 > 0); if (__pyx_t_15) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":857 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":851 * nf.append((toskip - 1, (ni, na, pathlen + 1))) * if (len(nf) > 0): * return self.advance(nf, res, fwords) # <<<<<<<<<<<<<< @@ -43798,9 +43759,9 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ * return res */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__advance); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 857; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__advance); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 857; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(((PyObject *)__pyx_v_nf)); PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_v_nf)); @@ -43811,7 +43772,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ __Pyx_INCREF(__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_v_fwords); __Pyx_GIVEREF(__pyx_v_fwords); - __pyx_t_10 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 857; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 851; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; @@ -43822,7 +43783,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_12advance(struct __pyx_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":859 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":853 * return self.advance(nf, res, fwords) * else: * return res # <<<<<<<<<<<<<< @@ -43900,36 +43861,36 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_ case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__i)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__spanlen)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__pathlen)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 3); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 3); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__fwords)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 4); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 4); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__next_states)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 5); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 5); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__reachable_buffer)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 6); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, 6); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "get_all_nodes_isteps_away") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "get_all_nodes_isteps_away") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 7) { goto __pyx_L5_argtuple_error; @@ -43952,7 +43913,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_all_nodes_isteps_away", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 855; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.get_all_nodes_isteps_away", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -43963,7 +43924,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_15get_all_nodes_isteps_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":861 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":855 * return res * * def get_all_nodes_isteps_away(self, skip, i, spanlen, pathlen, fwords, next_states, reachable_buffer): # <<<<<<<<<<<<<< @@ -44001,41 +43962,41 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_all_nodes_isteps_away", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":863 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":857 * def get_all_nodes_isteps_away(self, skip, i, spanlen, pathlen, fwords, next_states, reachable_buffer): * cdef unsigned alt_it * frontier = [] # <<<<<<<<<<<<<< * if (i+spanlen+skip >= len(next_states)): * return frontier */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 863; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 857; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_frontier = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":864 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":858 * cdef unsigned alt_it * frontier = [] * if (i+spanlen+skip >= len(next_states)): # <<<<<<<<<<<<<< * return frontier * key = tuple([i,spanlen]) */ - __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_v_spanlen); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Add(__pyx_v_i, __pyx_v_spanlen); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 858; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_v_skip); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_v_skip); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 858; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_3 = PyObject_Length(__pyx_v_next_states); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Length(__pyx_v_next_states); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 858; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 858; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_2, __pyx_t_1, Py_GE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_t_2, __pyx_t_1, Py_GE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 858; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 864; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 858; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":865 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":859 * frontier = [] * if (i+spanlen+skip >= len(next_states)): * return frontier # <<<<<<<<<<<<<< @@ -44050,14 +44011,14 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":866 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":860 * if (i+spanlen+skip >= len(next_states)): * return frontier * key = tuple([i,spanlen]) # <<<<<<<<<<<<<< * reachable = [] * if (key in reachable_buffer): */ - __pyx_t_4 = PyList_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 866; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 860; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_v_i); PyList_SET_ITEM(__pyx_t_4, 0, __pyx_v_i); @@ -44065,42 +44026,42 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __Pyx_INCREF(__pyx_v_spanlen); PyList_SET_ITEM(__pyx_t_4, 1, __pyx_v_spanlen); __Pyx_GIVEREF(__pyx_v_spanlen); - __pyx_t_1 = ((PyObject *)PyList_AsTuple(__pyx_t_4)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 866; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = ((PyObject *)PyList_AsTuple(__pyx_t_4)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 860; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; __pyx_v_key = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":867 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":861 * return frontier * key = tuple([i,spanlen]) * reachable = [] # <<<<<<<<<<<<<< * if (key in reachable_buffer): * reachable = reachable_buffer[key] */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 861; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_reachable = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":868 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":862 * key = tuple([i,spanlen]) * reachable = [] * if (key in reachable_buffer): # <<<<<<<<<<<<<< * reachable = reachable_buffer[key] * else: */ - __pyx_t_5 = (__Pyx_PySequence_Contains(((PyObject *)__pyx_v_key), __pyx_v_reachable_buffer, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = (__Pyx_PySequence_Contains(((PyObject *)__pyx_v_key), __pyx_v_reachable_buffer, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 862; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":869 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":863 * reachable = [] * if (key in reachable_buffer): * reachable = reachable_buffer[key] # <<<<<<<<<<<<<< * else: * reachable = self.reachable(fwords, i, spanlen) */ - __pyx_t_1 = PyObject_GetItem(__pyx_v_reachable_buffer, ((PyObject *)__pyx_v_key)); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetItem(__pyx_v_reachable_buffer, ((PyObject *)__pyx_v_key)); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 863; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_v_reachable); __pyx_v_reachable = __pyx_t_1; @@ -44109,16 +44070,16 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":871 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":865 * reachable = reachable_buffer[key] * else: * reachable = self.reachable(fwords, i, spanlen) # <<<<<<<<<<<<<< * reachable_buffer[key] = reachable * for nextreachable in reachable: */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__reachable); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__reachable); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 865; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 865; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_INCREF(__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_fwords); @@ -44129,7 +44090,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __Pyx_INCREF(__pyx_v_spanlen); PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_v_spanlen); __Pyx_GIVEREF(__pyx_v_spanlen); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 865; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0; @@ -44137,18 +44098,18 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_v_reachable = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":872 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":866 * else: * reachable = self.reachable(fwords, i, spanlen) * reachable_buffer[key] = reachable # <<<<<<<<<<<<<< * for nextreachable in reachable: * for next_id in next_states[nextreachable]: */ - if (PyObject_SetItem(__pyx_v_reachable_buffer, ((PyObject *)__pyx_v_key), __pyx_v_reachable) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_v_reachable_buffer, ((PyObject *)__pyx_v_key), __pyx_v_reachable) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 866; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":873 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":867 * reachable = self.reachable(fwords, i, spanlen) * reachable_buffer[key] = reachable * for nextreachable in reachable: # <<<<<<<<<<<<<< @@ -44159,7 +44120,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_t_2 = __pyx_v_reachable; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; __pyx_t_6 = NULL; } else { - __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_reachable); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_reachable); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_6 = Py_TYPE(__pyx_t_2)->tp_iternext; } @@ -44167,23 +44128,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ if (!__pyx_t_6 && PyList_CheckExact(__pyx_t_2)) { if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_4); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_4); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_6 && PyTuple_CheckExact(__pyx_t_2)) { if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_4); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_4); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_4 = __pyx_t_6(__pyx_t_2); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 867; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -44193,20 +44154,20 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_v_nextreachable = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":874 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":868 * reachable_buffer[key] = reachable * for nextreachable in reachable: * for next_id in next_states[nextreachable]: # <<<<<<<<<<<<<< * jump = self.shortest(fwords,i,next_id) * if jump < skip: */ - __pyx_t_4 = PyObject_GetItem(__pyx_v_next_states, __pyx_v_nextreachable); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetItem(__pyx_v_next_states, __pyx_v_nextreachable); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); if (PyList_CheckExact(__pyx_t_4) || PyTuple_CheckExact(__pyx_t_4)) { __pyx_t_1 = __pyx_t_4; __Pyx_INCREF(__pyx_t_1); __pyx_t_7 = 0; __pyx_t_8 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_8 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -44215,23 +44176,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ if (!__pyx_t_8 && PyList_CheckExact(__pyx_t_1)) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_4); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_4); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_8 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_4); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_4); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_4 = __pyx_t_8(__pyx_t_1); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -44241,16 +44202,16 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_v_next_id = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":875 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":869 * for nextreachable in reachable: * for next_id in next_states[nextreachable]: * jump = self.shortest(fwords,i,next_id) # <<<<<<<<<<<<<< * if jump < skip: * continue */ - __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__shortest); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__shortest); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 = PyTuple_New(3); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(3); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_INCREF(__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_v_fwords); @@ -44261,7 +44222,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __Pyx_INCREF(__pyx_v_next_id); PyTuple_SET_ITEM(__pyx_t_9, 2, __pyx_v_next_id); __Pyx_GIVEREF(__pyx_v_next_id); - __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 869; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; @@ -44269,19 +44230,19 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_v_jump = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":876 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":870 * for next_id in next_states[nextreachable]: * jump = self.shortest(fwords,i,next_id) * if jump < skip: # <<<<<<<<<<<<<< * continue * if pathlen+jump <= self.max_initial_size: */ - __pyx_t_10 = PyObject_RichCompare(__pyx_v_jump, __pyx_v_skip, Py_LT); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 876; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 876; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_RichCompare(__pyx_v_jump, __pyx_v_skip, Py_LT); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 870; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 870; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":877 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":871 * jump = self.shortest(fwords,i,next_id) * if jump < skip: * continue # <<<<<<<<<<<<<< @@ -44293,50 +44254,50 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":878 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":872 * if jump < skip: * continue * if pathlen+jump <= self.max_initial_size: # <<<<<<<<<<<<<< * for alt_id in range(len(fwords[next_id])): * if (fwords[next_id][alt_id][0] != EPSILON): */ - __pyx_t_10 = PyNumber_Add(__pyx_v_pathlen, __pyx_v_jump); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 878; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyNumber_Add(__pyx_v_pathlen, __pyx_v_jump); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_9 = PyInt_FromLong(__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 878; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyInt_FromLong(__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_4 = PyObject_RichCompare(__pyx_t_10, __pyx_t_9, Py_LE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 878; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_RichCompare(__pyx_t_10, __pyx_t_9, Py_LE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 878; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 872; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":879 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":873 * continue * if pathlen+jump <= self.max_initial_size: * for alt_id in range(len(fwords[next_id])): # <<<<<<<<<<<<<< * if (fwords[next_id][alt_id][0] != EPSILON): * newel = (next_id,alt_id,pathlen+jump) */ - __pyx_t_4 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_next_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_next_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_11 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_11 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyInt_FromSsize_t(__pyx_t_11); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyInt_FromSsize_t(__pyx_t_11); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_Call(__pyx_builtin_range, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_Call(__pyx_builtin_range, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; if (PyList_CheckExact(__pyx_t_4) || PyTuple_CheckExact(__pyx_t_4)) { __pyx_t_9 = __pyx_t_4; __Pyx_INCREF(__pyx_t_9); __pyx_t_11 = 0; __pyx_t_12 = NULL; } else { - __pyx_t_11 = -1; __pyx_t_9 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = -1; __pyx_t_9 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __pyx_t_12 = Py_TYPE(__pyx_t_9)->tp_iternext; } @@ -44345,23 +44306,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ if (!__pyx_t_12 && PyList_CheckExact(__pyx_t_9)) { if (__pyx_t_11 >= PyList_GET_SIZE(__pyx_t_9)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_9, __pyx_t_11); __Pyx_INCREF(__pyx_t_4); __pyx_t_11++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_9, __pyx_t_11); __Pyx_INCREF(__pyx_t_4); __pyx_t_11++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_9, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_9, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_12 && PyTuple_CheckExact(__pyx_t_9)) { if (__pyx_t_11 >= PyTuple_GET_SIZE(__pyx_t_9)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_9, __pyx_t_11); __Pyx_INCREF(__pyx_t_4); __pyx_t_11++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_9, __pyx_t_11); __Pyx_INCREF(__pyx_t_4); __pyx_t_11++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_9, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PySequence_ITEM(__pyx_t_9, __pyx_t_11); __pyx_t_11++; if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_4 = __pyx_t_12(__pyx_t_9); if (unlikely(!__pyx_t_4)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 879; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 873; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -44371,40 +44332,40 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_v_alt_id = __pyx_t_4; __pyx_t_4 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":880 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":874 * if pathlen+jump <= self.max_initial_size: * for alt_id in range(len(fwords[next_id])): * if (fwords[next_id][alt_id][0] != EPSILON): # <<<<<<<<<<<<<< * newel = (next_id,alt_id,pathlen+jump) * if newel not in frontier: */ - __pyx_t_4 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_next_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_next_id); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = PyObject_GetItem(__pyx_t_4, __pyx_v_alt_id); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetItem(__pyx_t_4, __pyx_v_alt_id); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_10, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetItemInt(__pyx_t_10, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_4) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_13 = PyObject_RichCompare(__pyx_t_4, __pyx_t_10, Py_NE); __Pyx_XGOTREF(__pyx_t_13); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_RichCompare(__pyx_t_4, __pyx_t_10, Py_NE); __Pyx_XGOTREF(__pyx_t_13); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 874; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":881 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":875 * for alt_id in range(len(fwords[next_id])): * if (fwords[next_id][alt_id][0] != EPSILON): * newel = (next_id,alt_id,pathlen+jump) # <<<<<<<<<<<<<< * if newel not in frontier: * frontier.append((next_id,alt_id,pathlen+jump)) */ - __pyx_t_13 = PyNumber_Add(__pyx_v_pathlen, __pyx_v_jump); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 881; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyNumber_Add(__pyx_v_pathlen, __pyx_v_jump); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_10 = PyTuple_New(3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 881; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_New(3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 875; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_INCREF(__pyx_v_next_id); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_v_next_id); @@ -44419,26 +44380,26 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ __pyx_v_newel = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":882 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":876 * if (fwords[next_id][alt_id][0] != EPSILON): * newel = (next_id,alt_id,pathlen+jump) * if newel not in frontier: # <<<<<<<<<<<<<< * frontier.append((next_id,alt_id,pathlen+jump)) * return frontier */ - __pyx_t_5 = (__Pyx_PySequence_Contains(((PyObject *)__pyx_v_newel), ((PyObject *)__pyx_v_frontier), Py_NE)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 882; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = (__Pyx_PySequence_Contains(((PyObject *)__pyx_v_newel), ((PyObject *)__pyx_v_frontier), Py_NE)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 876; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":883 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":877 * newel = (next_id,alt_id,pathlen+jump) * if newel not in frontier: * frontier.append((next_id,alt_id,pathlen+jump)) # <<<<<<<<<<<<<< * return frontier * */ - __pyx_t_10 = PyNumber_Add(__pyx_v_pathlen, __pyx_v_jump); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 883; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyNumber_Add(__pyx_v_pathlen, __pyx_v_jump); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 877; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_13 = PyTuple_New(3); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 883; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyTuple_New(3); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 877; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_INCREF(__pyx_v_next_id); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_v_next_id); @@ -44449,7 +44410,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ PyTuple_SET_ITEM(__pyx_t_13, 2, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_14 = PyList_Append(__pyx_v_frontier, ((PyObject *)__pyx_t_13)); if (unlikely(__pyx_t_14 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 883; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyList_Append(__pyx_v_frontier, ((PyObject *)__pyx_t_13)); if (unlikely(__pyx_t_14 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 877; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0; goto __pyx_L14; } @@ -44468,7 +44429,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_14get_all_nodes_isteps_ } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":884 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":878 * if newel not in frontier: * frontier.append((next_id,alt_id,pathlen+jump)) * return frontier # <<<<<<<<<<<<<< @@ -44535,16 +44496,16 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable(PyObject *_ case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__ifrom)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("reachable", 1, 3, 3, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("reachable", 1, 3, 3, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__dist)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("reachable", 1, 3, 3, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("reachable", 1, 3, 3, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "reachable") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "reachable") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; @@ -44559,7 +44520,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable(PyObject *_ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("reachable", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("reachable", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 880; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.reachable", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -44570,7 +44531,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_17reachable(PyObject *_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":886 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":880 * return frontier * * def reachable(self, fwords, ifrom, dist): # <<<<<<<<<<<<<< @@ -44600,35 +44561,35 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("reachable", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":887 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":881 * * def reachable(self, fwords, ifrom, dist): * ret = [] # <<<<<<<<<<<<<< * if (ifrom >= len(fwords)): * return ret */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 887; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 881; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_ret = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":888 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":882 * def reachable(self, fwords, ifrom, dist): * ret = [] * if (ifrom >= len(fwords)): # <<<<<<<<<<<<<< * return ret * for alt_id in range(len(fwords[ifrom])): */ - __pyx_t_2 = PyObject_Length(__pyx_v_fwords); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_v_fwords); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 882; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 882; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_RichCompare(__pyx_v_ifrom, __pyx_t_1, Py_GE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_v_ifrom, __pyx_t_1, Py_GE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 882; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 882; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":889 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":883 * ret = [] * if (ifrom >= len(fwords)): * return ret # <<<<<<<<<<<<<< @@ -44643,32 +44604,32 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":890 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":884 * if (ifrom >= len(fwords)): * return ret * for alt_id in range(len(fwords[ifrom])): # <<<<<<<<<<<<<< * if (fwords[ifrom][alt_id][0] == EPSILON): * ret.extend(self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist)) */ - __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_builtin_range, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_builtin_range, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; if (PyList_CheckExact(__pyx_t_3) || PyTuple_CheckExact(__pyx_t_3)) { __pyx_t_1 = __pyx_t_3; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_5 = NULL; } else { - __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_5 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -44677,23 +44638,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py if (!__pyx_t_5 && PyList_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_5 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_3 = __pyx_t_5(__pyx_t_1); if (unlikely(!__pyx_t_3)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 884; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -44703,53 +44664,53 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py __pyx_v_alt_id = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":891 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":885 * return ret * for alt_id in range(len(fwords[ifrom])): * if (fwords[ifrom][alt_id][0] == EPSILON): # <<<<<<<<<<<<<< * ret.extend(self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist)) * else: */ - __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyObject_GetItem(__pyx_t_3, __pyx_v_alt_id); if (!__pyx_t_6) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_GetItem(__pyx_t_3, __pyx_v_alt_id); if (!__pyx_t_6) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_6, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_6, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_t_6, Py_EQ); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_t_6, Py_EQ); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 891; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 885; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":892 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":886 * for alt_id in range(len(fwords[ifrom])): * if (fwords[ifrom][alt_id][0] == EPSILON): * ret.extend(self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist)) # <<<<<<<<<<<<<< * else: * if (dist==0): */ - __pyx_t_7 = PyObject_GetAttr(((PyObject *)__pyx_v_ret), __pyx_n_s__extend); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_GetAttr(((PyObject *)__pyx_v_ret), __pyx_n_s__extend); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__reachable); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__reachable); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = PyObject_GetItem(__pyx_t_3, __pyx_v_alt_id); if (!__pyx_t_8) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_GetItem(__pyx_t_3, __pyx_v_alt_id); if (!__pyx_t_8) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_8, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_8, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_v_ifrom, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyNumber_Add(__pyx_v_ifrom, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_fwords); @@ -44760,16 +44721,16 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_v_dist); __Pyx_GIVEREF(__pyx_v_dist); __pyx_t_8 = 0; - __pyx_t_8 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 886; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; @@ -44778,36 +44739,36 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":894 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":888 * ret.extend(self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist)) * else: * if (dist==0): # <<<<<<<<<<<<<< * if (ifrom not in ret): * ret.append(ifrom) */ - __pyx_t_8 = PyObject_RichCompare(__pyx_v_dist, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 894; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 894; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_RichCompare(__pyx_v_dist, __pyx_int_0, Py_EQ); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 888; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":895 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":889 * else: * if (dist==0): * if (ifrom not in ret): # <<<<<<<<<<<<<< * ret.append(ifrom) * else: */ - __pyx_t_4 = (__Pyx_PySequence_Contains(__pyx_v_ifrom, ((PyObject *)__pyx_v_ret), Py_NE)); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 895; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = (__Pyx_PySequence_Contains(__pyx_v_ifrom, ((PyObject *)__pyx_v_ret), Py_NE)); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 889; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":896 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":890 * if (dist==0): * if (ifrom not in ret): * ret.append(ifrom) # <<<<<<<<<<<<<< * else: * for ifromchild in self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist-1): */ - __pyx_t_9 = PyList_Append(__pyx_v_ret, __pyx_v_ifrom); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 896; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyList_Append(__pyx_v_ret, __pyx_v_ifrom); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 890; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L8; } __pyx_L8:; @@ -44815,29 +44776,29 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":898 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":892 * ret.append(ifrom) * else: * for ifromchild in self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist-1): # <<<<<<<<<<<<<< * if (ifromchild not in ret): * ret.append(ifromchild) */ - __pyx_t_8 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__reachable); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__reachable); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_7 = PyObject_GetItem(__pyx_t_3, __pyx_v_alt_id); if (!__pyx_t_7) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_GetItem(__pyx_t_3, __pyx_v_alt_id); if (!__pyx_t_7) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_7, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_7, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_v_ifrom, __pyx_t_3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_v_ifrom, __pyx_t_3); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyNumber_Subtract(__pyx_v_dist, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Subtract(__pyx_v_dist, __pyx_int_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_6 = PyTuple_New(3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_New(3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_INCREF(__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_v_fwords); @@ -44848,7 +44809,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py __Pyx_GIVEREF(__pyx_t_3); __pyx_t_7 = 0; __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(__pyx_t_8, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_8, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0; @@ -44856,7 +44817,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py __pyx_t_6 = __pyx_t_3; __Pyx_INCREF(__pyx_t_6); __pyx_t_10 = 0; __pyx_t_11 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_t_11 = Py_TYPE(__pyx_t_6)->tp_iternext; } @@ -44865,23 +44826,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py if (!__pyx_t_11 && PyList_CheckExact(__pyx_t_6)) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_6)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_3); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_3); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_11 && PyTuple_CheckExact(__pyx_t_6)) { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_6)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_3); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_3); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_3 = __pyx_t_11(__pyx_t_6); if (unlikely(!__pyx_t_3)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 892; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -44891,24 +44852,24 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py __pyx_v_ifromchild = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":899 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":893 * else: * for ifromchild in self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist-1): * if (ifromchild not in ret): # <<<<<<<<<<<<<< * ret.append(ifromchild) * */ - __pyx_t_4 = (__Pyx_PySequence_Contains(__pyx_v_ifromchild, ((PyObject *)__pyx_v_ret), Py_NE)); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 899; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = (__Pyx_PySequence_Contains(__pyx_v_ifromchild, ((PyObject *)__pyx_v_ret), Py_NE)); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 893; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":900 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":894 * for ifromchild in self.reachable(fwords,ifrom+fwords[ifrom][alt_id][2],dist-1): * if (ifromchild not in ret): * ret.append(ifromchild) # <<<<<<<<<<<<<< * * return ret */ - __pyx_t_9 = PyList_Append(__pyx_v_ret, __pyx_v_ifromchild); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 900; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyList_Append(__pyx_v_ret, __pyx_v_ifromchild); if (unlikely(__pyx_t_9 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 894; __pyx_clineno = __LINE__; goto __pyx_L1_error;} goto __pyx_L11; } __pyx_L11:; @@ -44921,7 +44882,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_16reachable(struct __py } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":902 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":896 * ret.append(ifromchild) * * return ret # <<<<<<<<<<<<<< @@ -44982,16 +44943,16 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest(PyObject *__ case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__ifrom)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("shortest", 1, 3, 3, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("shortest", 1, 3, 3, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__ito)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("shortest", 1, 3, 3, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("shortest", 1, 3, 3, 2); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "shortest") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "shortest") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; @@ -45006,7 +44967,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest(PyObject *__ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("shortest", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 904; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("shortest", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 898; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.shortest", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -45017,7 +44978,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_19shortest(PyObject *__ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":904 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":898 * return ret * * def shortest(self, fwords, ifrom, ito): # <<<<<<<<<<<<<< @@ -45042,7 +45003,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_18shortest(struct __pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("shortest", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":906 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":900 * def shortest(self, fwords, ifrom, ito): * cdef unsigned alt_id * min = 1000 # <<<<<<<<<<<<<< @@ -45052,19 +45013,19 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_18shortest(struct __pyx __Pyx_INCREF(__pyx_int_1000); __pyx_v_min = __pyx_int_1000; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":907 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":901 * cdef unsigned alt_id * min = 1000 * if (ifrom > ito): # <<<<<<<<<<<<<< * return min * if (ifrom == ito): */ - __pyx_t_1 = PyObject_RichCompare(__pyx_v_ifrom, __pyx_v_ito, Py_GT); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 907; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 907; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_v_ifrom, __pyx_v_ito, Py_GT); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 901; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 901; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":908 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":902 * min = 1000 * if (ifrom > ito): * return min # <<<<<<<<<<<<<< @@ -45079,19 +45040,19 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_18shortest(struct __pyx } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":909 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":903 * if (ifrom > ito): * return min * if (ifrom == ito): # <<<<<<<<<<<<<< * return 0 * for alt_id in range(len(fwords[ifrom])): */ - __pyx_t_1 = PyObject_RichCompare(__pyx_v_ifrom, __pyx_v_ito, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 909; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 909; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_v_ifrom, __pyx_v_ito, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 903; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 903; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":910 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":904 * return min * if (ifrom == ito): * return 0 # <<<<<<<<<<<<<< @@ -45106,41 +45067,41 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_18shortest(struct __pyx } __pyx_L4:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":911 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":905 * if (ifrom == ito): * return 0 * for alt_id in range(len(fwords[ifrom])): # <<<<<<<<<<<<<< * currmin = self.shortest(fwords,ifrom+fwords[ifrom][alt_id][2],ito) * if (fwords[ifrom][alt_id][0] != EPSILON): */ - __pyx_t_1 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 911; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 905; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 911; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 905; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_alt_id = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":912 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":906 * return 0 * for alt_id in range(len(fwords[ifrom])): * currmin = self.shortest(fwords,ifrom+fwords[ifrom][alt_id][2],ito) # <<<<<<<<<<<<<< * if (fwords[ifrom][alt_id][0] != EPSILON): * currmin += 1 */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__shortest); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self), __pyx_n_s__shortest); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetItem(__pyx_v_fwords, __pyx_v_ifrom); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_GetItemInt(__pyx_t_5, __pyx_v_alt_id, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_6) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetItemInt(__pyx_t_5, __pyx_v_alt_id, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_6) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __Pyx_GetItemInt(__pyx_t_6, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetItemInt(__pyx_t_6, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_v_ifrom, __pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_v_ifrom, __pyx_t_5); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_v_fwords); @@ -45151,7 +45112,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_18shortest(struct __pyx PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_v_ito); __Pyx_GIVEREF(__pyx_v_ito); __pyx_t_6 = 0; - __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 912; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 906; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; @@ -45159,38 +45120,38 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_18shortest(struct __pyx __pyx_v_currmin = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":913 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":907 * for alt_id in range(len(fwords[ifrom])): * currmin = self.shortest(fwords,ifrom+fwords[ifrom][alt_id][2],ito) * if (fwords[ifrom][alt_id][0] != EPSILON): # <<<<<<<<<<<<<< * currmin += 1 * if (currmin 0) { @@ -45294,7 +45255,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states(PyObj } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "get_next_states") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "get_next_states") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 913; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -45311,7 +45272,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states(PyObj } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("get_next_states", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("get_next_states", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 913; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.get_next_states", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -45322,7 +45283,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_21get_next_states(PyObj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":919 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":913 * return min * * def get_next_states(self, _columns, curr_idx, min_dist=2): # <<<<<<<<<<<<<< @@ -45355,26 +45316,26 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_next_states", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":920 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":914 * * def get_next_states(self, _columns, curr_idx, min_dist=2): * result = [] # <<<<<<<<<<<<<< * candidate = [[curr_idx,0]] * */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 920; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 914; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_result = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":921 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":915 * def get_next_states(self, _columns, curr_idx, min_dist=2): * result = [] * candidate = [[curr_idx,0]] # <<<<<<<<<<<<<< * * while len(candidate) > 0: */ - __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 915; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_curr_idx); PyList_SET_ITEM(__pyx_t_1, 0, __pyx_v_curr_idx); @@ -45382,7 +45343,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __Pyx_INCREF(__pyx_int_0); PyList_SET_ITEM(__pyx_t_1, 1, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 915; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyList_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_t_1)); __Pyx_GIVEREF(((PyObject *)__pyx_t_1)); @@ -45390,7 +45351,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __pyx_v_candidate = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":923 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":917 * candidate = [[curr_idx,0]] * * while len(candidate) > 0: # <<<<<<<<<<<<<< @@ -45398,43 +45359,43 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc * if curr[0] >= len(_columns): */ while (1) { - __pyx_t_3 = PyList_GET_SIZE(((PyObject *)__pyx_v_candidate)); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 923; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_GET_SIZE(((PyObject *)__pyx_v_candidate)); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 917; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_4 = (__pyx_t_3 > 0); if (!__pyx_t_4) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":924 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":918 * * while len(candidate) > 0: * curr = candidate.pop() # <<<<<<<<<<<<<< * if curr[0] >= len(_columns): * continue */ - __pyx_t_2 = __Pyx_PyObject_Pop(((PyObject *)__pyx_v_candidate)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Pop(((PyObject *)__pyx_v_candidate)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 918; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_XDECREF(__pyx_v_curr); __pyx_v_curr = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":925 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":919 * while len(candidate) > 0: * curr = candidate.pop() * if curr[0] >= len(_columns): # <<<<<<<<<<<<<< * continue * if curr[0] not in result and min_dist <= curr[1] <= self.max_initial_size: */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_Length(__pyx_v__columns); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Length(__pyx_v__columns); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_RichCompare(__pyx_t_2, __pyx_t_1, Py_GE); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_RichCompare(__pyx_t_2, __pyx_t_1, Py_GE); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":926 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":920 * curr = candidate.pop() * if curr[0] >= len(_columns): * continue # <<<<<<<<<<<<<< @@ -45446,30 +45407,30 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":927 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":921 * if curr[0] >= len(_columns): * continue * if curr[0] not in result and min_dist <= curr[1] <= self.max_initial_size: # <<<<<<<<<<<<<< * result.append(curr[0]); * curr_col = _columns[curr[0]] */ - __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = (__Pyx_PySequence_Contains(__pyx_t_5, ((PyObject *)__pyx_v_result), Py_NE)); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = (__Pyx_PySequence_Contains(__pyx_t_5, ((PyObject *)__pyx_v_result), Py_NE)); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_4) { - __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_curr, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_GetItemInt(__pyx_v_curr, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 = PyObject_RichCompare(__pyx_v_min_dist, __pyx_t_5, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_v_min_dist, __pyx_t_5, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_PyObject_IsTrue(__pyx_t_1)) { __Pyx_DECREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromLong(__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_RichCompare(__pyx_t_5, __pyx_t_2, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_t_5, __pyx_t_2, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 921; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_7 = __pyx_t_6; } else { @@ -45477,38 +45438,38 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc } if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":928 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":922 * continue * if curr[0] not in result and min_dist <= curr[1] <= self.max_initial_size: * result.append(curr[0]); # <<<<<<<<<<<<<< * curr_col = _columns[curr[0]] * for alt in curr_col: */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 928; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 922; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = PyList_Append(__pyx_v_result, __pyx_t_1); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 928; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyList_Append(__pyx_v_result, __pyx_t_1); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 922; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L6; } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":929 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":923 * if curr[0] not in result and min_dist <= curr[1] <= self.max_initial_size: * result.append(curr[0]); * curr_col = _columns[curr[0]] # <<<<<<<<<<<<<< * for alt in curr_col: * next_id = curr[0]+alt[2] */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 923; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_GetItem(__pyx_v__columns, __pyx_t_1); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_GetItem(__pyx_v__columns, __pyx_t_1); if (!__pyx_t_5) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 923; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_v_curr_col); __pyx_v_curr_col = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":930 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":924 * result.append(curr[0]); * curr_col = _columns[curr[0]] * for alt in curr_col: # <<<<<<<<<<<<<< @@ -45519,7 +45480,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __pyx_t_5 = __pyx_v_curr_col; __Pyx_INCREF(__pyx_t_5); __pyx_t_3 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_3 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_v_curr_col); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_v_curr_col); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; } @@ -45527,23 +45488,23 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc if (!__pyx_t_9 && PyList_CheckExact(__pyx_t_5)) { if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_3); __Pyx_INCREF(__pyx_t_1); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_3); __Pyx_INCREF(__pyx_t_1); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_9 && PyTuple_CheckExact(__pyx_t_5)) { if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_3); __Pyx_INCREF(__pyx_t_1); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_3); __Pyx_INCREF(__pyx_t_1); __pyx_t_3++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_1 = __pyx_t_9(__pyx_t_5); if (unlikely(!__pyx_t_1)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 924; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -45553,18 +45514,18 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __pyx_v_alt = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":931 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":925 * curr_col = _columns[curr[0]] * for alt in curr_col: * next_id = curr[0]+alt[2] # <<<<<<<<<<<<<< * jump = 1 * if (alt[0] == EPSILON): */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_alt, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_alt, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_10 = PyNumber_Add(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyNumber_Add(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 925; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -45572,7 +45533,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __pyx_v_next_id = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":932 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":926 * for alt in curr_col: * next_id = curr[0]+alt[2] * jump = 1 # <<<<<<<<<<<<<< @@ -45583,25 +45544,25 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __Pyx_XDECREF(__pyx_v_jump); __pyx_v_jump = __pyx_int_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":933 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":927 * next_id = curr[0]+alt[2] * jump = 1 * if (alt[0] == EPSILON): # <<<<<<<<<<<<<< * jump = 0 * if next_id not in result and min_dist <= curr[1]+jump <= self.max_initial_size+1: */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_alt, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_alt, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyObject_RichCompare(__pyx_t_10, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_t_10, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 927; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":934 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":928 * jump = 1 * if (alt[0] == EPSILON): * jump = 0 # <<<<<<<<<<<<<< @@ -45615,30 +45576,30 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":935 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":929 * if (alt[0] == EPSILON): * jump = 0 * if next_id not in result and min_dist <= curr[1]+jump <= self.max_initial_size+1: # <<<<<<<<<<<<<< * candidate.append([next_id,curr[1]+jump]) * return sorted(result); */ - __pyx_t_7 = (__Pyx_PySequence_Contains(__pyx_v_next_id, ((PyObject *)__pyx_v_result), Py_NE)); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = (__Pyx_PySequence_Contains(__pyx_v_next_id, ((PyObject *)__pyx_v_result), Py_NE)); if (unlikely(__pyx_t_7 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_7) { - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_v_jump); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_v_jump); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyObject_RichCompare(__pyx_v_min_dist, __pyx_t_2, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_v_min_dist, __pyx_t_2, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_PyObject_IsTrue(__pyx_t_1)) { __Pyx_DECREF(__pyx_t_1); - __pyx_t_10 = PyInt_FromLong((__pyx_v_self->max_initial_size + 1)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong((__pyx_v_self->max_initial_size + 1)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = PyObject_RichCompare(__pyx_t_2, __pyx_t_10, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_t_2, __pyx_t_10, Py_LE); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 935; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 929; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_6 = __pyx_t_4; } else { @@ -45646,19 +45607,19 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc } if (__pyx_t_6) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":936 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":930 * jump = 0 * if next_id not in result and min_dist <= curr[1]+jump <= self.max_initial_size+1: * candidate.append([next_id,curr[1]+jump]) # <<<<<<<<<<<<<< * return sorted(result); * */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 936; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_curr, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_v_jump); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 936; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_1, __pyx_v_jump); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 936; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_v_next_id); PyList_SET_ITEM(__pyx_t_1, 0, __pyx_v_next_id); @@ -45666,7 +45627,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc PyList_SET_ITEM(__pyx_t_1, 1, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_8 = PyList_Append(__pyx_v_candidate, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 936; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyList_Append(__pyx_v_candidate, ((PyObject *)__pyx_t_1)); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 930; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; goto __pyx_L10; } @@ -45676,7 +45637,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc __pyx_L3_continue:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":937 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":931 * if next_id not in result and min_dist <= curr[1]+jump <= self.max_initial_size+1: * candidate.append([next_id,curr[1]+jump]) * return sorted(result); # <<<<<<<<<<<<<< @@ -45684,12 +45645,12 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_20get_next_states(struc * def input(self, fwords, meta): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(((PyObject *)__pyx_v_result)); PyTuple_SET_ITEM(__pyx_t_5, 0, ((PyObject *)__pyx_v_result)); __Pyx_GIVEREF(((PyObject *)__pyx_v_result)); - __pyx_t_1 = PyObject_Call(__pyx_builtin_sorted, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_builtin_sorted, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0; __pyx_r = __pyx_t_1; @@ -45748,11 +45709,11 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_23input(PyObject *__pyx case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__meta)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("input", 1, 2, 2, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 939; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("input", 1, 2, 2, 1); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "input") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 939; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "input") < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -45765,7 +45726,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_23input(PyObject *__pyx } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("input", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 939; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("input", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("_sa.HieroCachingRuleFactory.input", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -45800,7 +45761,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_5input_7lambda1_lambda2 return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1099 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1093 * if len(extracts) > 0: * fcount = Counter() * fphrases = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) # <<<<<<<<<<<<<< @@ -45819,14 +45780,14 @@ static PyObject *__pyx_lambda_funcdef_lambda2(CYTHON_UNUSED PyObject *__pyx_self int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lambda2", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__defaultdict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__defaultdict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)((PyObject*)(&PyList_Type)))); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)((PyObject*)(&PyList_Type)))); __Pyx_GIVEREF(((PyObject *)((PyObject*)(&PyList_Type)))); - __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; @@ -45859,16 +45820,16 @@ static PyObject *__pyx_lambda_funcdef_lambda1(CYTHON_UNUSED PyObject *__pyx_self int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lambda1", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__defaultdict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__defaultdict); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_CyFunction_NewEx(&__pyx_mdef_3_sa_23HieroCachingRuleFactory_5input_7lambda1_lambda2, 0, NULL, __pyx_n_s___sa, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_CyFunction_NewEx(&__pyx_mdef_3_sa_23HieroCachingRuleFactory_5input_7lambda1_lambda2, 0, NULL, __pyx_n_s___sa, NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; @@ -45902,7 +45863,7 @@ static PyObject *__pyx_pw_3_sa_23HieroCachingRuleFactory_5input_1lambda3(PyObjec return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1105 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1099 * for f, elist in fphrases.iteritems(): * for e, alslist in elist.iteritems(): * alignment, max_locs = max(alslist.iteritems(), key=lambda x: len(x[1])) # <<<<<<<<<<<<<< @@ -45920,11 +45881,11 @@ static PyObject *__pyx_lambda_funcdef_lambda3(CYTHON_UNUSED PyObject *__pyx_self int __pyx_clineno = 0; __Pyx_RefNannySetupContext("lambda3", 0); __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_x, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_x, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromSsize_t(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; @@ -45942,7 +45903,7 @@ static PyObject *__pyx_lambda_funcdef_lambda3(CYTHON_UNUSED PyObject *__pyx_self return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":939 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":933 * return sorted(result); * * def input(self, fwords, meta): # <<<<<<<<<<<<<< @@ -45974,7 +45935,7 @@ static PyObject *__pyx_pf_3_sa_23HieroCachingRuleFactory_22input(struct __pyx_ob __Pyx_INCREF(__pyx_cur_scope->__pyx_v_meta); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_meta); { - __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 939; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_GeneratorObject *gen = __Pyx_Generator_New((__pyx_generator_body_t) __pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4, (PyObject *) __pyx_cur_scope); if (unlikely(!gen)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_cur_scope); __Pyx_RefNannyFinishContext(); return (PyObject *) gen; @@ -46034,19 +45995,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene return NULL; } __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 939; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":950 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":944 * cdef Phrase hiero_phrase * * flen = len(fwords) # <<<<<<<<<<<<<< * start_time = monitor_cpu() * self.extract_time = 0.0 */ - __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 950; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 944; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_flen = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":951 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":945 * * flen = len(fwords) * start_time = monitor_cpu() # <<<<<<<<<<<<<< @@ -46055,7 +46016,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_start_time = __pyx_f_3_sa_monitor_cpu(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":952 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":946 * flen = len(fwords) * start_time = monitor_cpu() * self.extract_time = 0.0 # <<<<<<<<<<<<<< @@ -46064,20 +46025,20 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_self->extract_time = 0.0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":953 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":947 * start_time = monitor_cpu() * self.extract_time = 0.0 * nodes_isteps_away_buffer = {} # <<<<<<<<<<<<<< * hit = 0 * reachable_buffer = {} */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 953; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 947; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":954 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":948 * self.extract_time = 0.0 * nodes_isteps_away_buffer = {} * hit = 0 # <<<<<<<<<<<<<< @@ -46086,33 +46047,33 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_hit = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":955 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":949 * nodes_isteps_away_buffer = {} * hit = 0 * reachable_buffer = {} # <<<<<<<<<<<<<< * * # Do not cache between sentences */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 955; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 949; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); __pyx_cur_scope->__pyx_v_reachable_buffer = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":958 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":952 * * # Do not cache between sentences * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) # <<<<<<<<<<<<<< * * frontier = [] */ - __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_2, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 952; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_GIVEREF(__pyx_t_3); @@ -46121,91 +46082,91 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_self->rules->root = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":960 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":954 * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) * * frontier = [] # <<<<<<<<<<<<<< * for i in range(len(fwords)): * for alt in range(0, len(fwords[i])): */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 960; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 954; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(((PyObject *)__pyx_t_3)); __pyx_cur_scope->__pyx_v_frontier = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":961 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":955 * * frontier = [] * for i in range(len(fwords)): # <<<<<<<<<<<<<< * for alt in range(0, len(fwords[i])): * if fwords[i][alt][0] != EPSILON: */ - __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 961; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 955; __pyx_clineno = __LINE__; goto __pyx_L1_error;} for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_1; __pyx_t_4+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":962 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":956 * frontier = [] * for i in range(len(fwords)): * for alt in range(0, len(fwords[i])): # <<<<<<<<<<<<<< * if fwords[i][alt][0] != EPSILON: * frontier.append((i, i, (i,), alt, 0, self.rules.root, (), False)) */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 962; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 956; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 962; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 956; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_cur_scope->__pyx_v_alt = __pyx_t_6; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":963 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":957 * for i in range(len(fwords)): * for alt in range(0, len(fwords[i])): * if fwords[i][alt][0] != EPSILON: # <<<<<<<<<<<<<< * frontier.append((i, i, (i,), alt, 0, self.rules.root, (), False)) * */ - __pyx_t_3 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 957; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_3, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_3, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 957; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_2, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_GetItemInt(__pyx_t_2, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 957; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 957; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_t_2, Py_NE); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_t_2, Py_NE); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 957; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 957; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":964 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":958 * for alt in range(0, len(fwords[i])): * if fwords[i][alt][0] != EPSILON: * frontier.append((i, i, (i,), alt, 0, self.rules.root, (), False)) # <<<<<<<<<<<<<< * * xroot = None */ - __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyBool_FromLong(0); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_11 = PyTuple_New(8); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyTuple_New(8); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); @@ -46231,7 +46192,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_9 = 0; __pyx_t_3 = 0; __pyx_t_10 = 0; - __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_frontier, ((PyObject *)__pyx_t_11)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 964; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_frontier, ((PyObject *)__pyx_t_11)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 958; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; goto __pyx_L8; } @@ -46239,7 +46200,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":966 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":960 * frontier.append((i, i, (i,), alt, 0, self.rules.root, (), False)) * * xroot = None # <<<<<<<<<<<<<< @@ -46250,7 +46211,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(Py_None); __pyx_cur_scope->__pyx_v_xroot = Py_None; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":967 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":961 * * xroot = None * x1 = sym_setindex(self.category, 1) # <<<<<<<<<<<<<< @@ -46259,32 +46220,32 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_x1 = __pyx_f_3_sa_sym_setindex(__pyx_cur_scope->__pyx_v_self->category, 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":968 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":962 * xroot = None * x1 = sym_setindex(self.category, 1) * if x1 in self.rules.root.children: # <<<<<<<<<<<<<< * xroot = self.rules.root.children[x1] * else: */ - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_x1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_x1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 962; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_10 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_self->rules->root, __pyx_n_s__children); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_self->rules->root, __pyx_n_s__children); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 962; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_t_11, __pyx_t_10, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_t_11, __pyx_t_10, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 962; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":969 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":963 * x1 = sym_setindex(self.category, 1) * if x1 in self.rules.root.children: * xroot = self.rules.root.children[x1] # <<<<<<<<<<<<<< * else: * xroot = ExtendedTrieNode(suffix_link=self.rules.root, phrase_location=PhraseLocation()) */ - __pyx_t_10 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_self->rules->root, __pyx_n_s__children); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 969; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_self->rules->root, __pyx_n_s__children); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_11 = __Pyx_GetItemInt(__pyx_t_10, __pyx_cur_scope->__pyx_v_x1, sizeof(int), PyInt_FromLong); if (!__pyx_t_11) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 969; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_GetItemInt(__pyx_t_10, __pyx_cur_scope->__pyx_v_x1, sizeof(int), PyInt_FromLong); if (!__pyx_t_11) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 963; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_xroot); @@ -46296,21 +46257,21 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":971 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":965 * xroot = self.rules.root.children[x1] * else: * xroot = ExtendedTrieNode(suffix_link=self.rules.root, phrase_location=PhraseLocation()) # <<<<<<<<<<<<<< * self.rules.root.children[x1] = xroot * */ - __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 965; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_11)); - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__suffix_link), __pyx_cur_scope->__pyx_v_self->rules->root) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_10 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__suffix_link), __pyx_cur_scope->__pyx_v_self->rules->root) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 965; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 965; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_10) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_10) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 965; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 965; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_xroot); @@ -46319,101 +46280,101 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_xroot = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":972 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":966 * else: * xroot = ExtendedTrieNode(suffix_link=self.rules.root, phrase_location=PhraseLocation()) * self.rules.root.children[x1] = xroot # <<<<<<<<<<<<<< * * for i in range(self.min_gap_size, len(fwords)): */ - __pyx_t_10 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_self->rules->root, __pyx_n_s__children); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 972; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_self->rules->root, __pyx_n_s__children); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 966; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (__Pyx_SetItemInt(__pyx_t_10, __pyx_cur_scope->__pyx_v_x1, __pyx_cur_scope->__pyx_v_xroot, sizeof(int), PyInt_FromLong) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 972; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetItemInt(__pyx_t_10, __pyx_cur_scope->__pyx_v_x1, __pyx_cur_scope->__pyx_v_xroot, sizeof(int), PyInt_FromLong) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 966; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; } __pyx_L9:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":974 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":968 * self.rules.root.children[x1] = xroot * * for i in range(self.min_gap_size, len(fwords)): # <<<<<<<<<<<<<< * for alt in range(0, len(fwords[i])): * if fwords[i][alt][0] != EPSILON: */ - __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 974; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 968; __pyx_clineno = __LINE__; goto __pyx_L1_error;} for (__pyx_t_4 = __pyx_cur_scope->__pyx_v_self->min_gap_size; __pyx_t_4 < __pyx_t_1; __pyx_t_4+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":975 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":969 * * for i in range(self.min_gap_size, len(fwords)): * for alt in range(0, len(fwords[i])): # <<<<<<<<<<<<<< * if fwords[i][alt][0] != EPSILON: * frontier.append((i-self.min_gap_size, i, (i,), alt, self.min_gap_size, xroot, (x1,), True)) */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 969; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_5 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 969; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_cur_scope->__pyx_v_alt = __pyx_t_6; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":976 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":970 * for i in range(self.min_gap_size, len(fwords)): * for alt in range(0, len(fwords[i])): * if fwords[i][alt][0] != EPSILON: # <<<<<<<<<<<<<< * frontier.append((i-self.min_gap_size, i, (i,), alt, self.min_gap_size, xroot, (x1,), True)) * */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 970; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_11 = __Pyx_GetItemInt(__pyx_t_10, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_11) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_GetItemInt(__pyx_t_10, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_11) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 970; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_GetItemInt(__pyx_t_11, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetItemInt(__pyx_t_11, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_10) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 970; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 970; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_10, __pyx_t_11, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_t_10, __pyx_t_11, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 970; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 976; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 970; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":977 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":971 * for alt in range(0, len(fwords[i])): * if fwords[i][alt][0] != EPSILON: * frontier.append((i-self.min_gap_size, i, (i,), alt, self.min_gap_size, xroot, (x1,), True)) # <<<<<<<<<<<<<< * * next_states = [] */ - __pyx_t_3 = PyInt_FromLong((__pyx_cur_scope->__pyx_v_i - __pyx_cur_scope->__pyx_v_self->min_gap_size)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong((__pyx_cur_scope->__pyx_v_i - __pyx_cur_scope->__pyx_v_self->min_gap_size)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_10 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_x1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_x1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyTuple_New(1); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyBool_FromLong(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_14 = PyTuple_New(8); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyTuple_New(8); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); @@ -46439,7 +46400,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_2 = 0; __pyx_t_13 = 0; __pyx_t_7 = 0; - __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_frontier, ((PyObject *)__pyx_t_14)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_frontier, ((PyObject *)__pyx_t_14)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 971; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_14)); __pyx_t_14 = 0; goto __pyx_L14; } @@ -46447,44 +46408,44 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":979 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":973 * frontier.append((i-self.min_gap_size, i, (i,), alt, self.min_gap_size, xroot, (x1,), True)) * * next_states = [] # <<<<<<<<<<<<<< * for i in range(len(fwords)): * next_states.append(self.get_next_states(fwords,i,self.min_gap_size)) */ - __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyList_New(0); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 973; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_GIVEREF(((PyObject *)__pyx_t_14)); __pyx_cur_scope->__pyx_v_next_states = __pyx_t_14; __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":980 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":974 * * next_states = [] * for i in range(len(fwords)): # <<<<<<<<<<<<<< * next_states.append(self.get_next_states(fwords,i,self.min_gap_size)) * */ - __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 980; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 974; __pyx_clineno = __LINE__; goto __pyx_L1_error;} for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_1; __pyx_t_4+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":981 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":975 * next_states = [] * for i in range(len(fwords)): * next_states.append(self.get_next_states(fwords,i,self.min_gap_size)) # <<<<<<<<<<<<<< * * while len(frontier) > 0: */ - __pyx_t_14 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s__get_next_states); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s__get_next_states); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_2 = PyTuple_New(3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_fwords); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_cur_scope->__pyx_v_fwords); @@ -46495,15 +46456,15 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(__pyx_t_13); __pyx_t_7 = 0; __pyx_t_13 = 0; - __pyx_t_13 = PyObject_Call(__pyx_t_14, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Call(__pyx_t_14, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; - __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_next_states, __pyx_t_13); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_next_states, __pyx_t_13); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 975; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":983 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":977 * next_states.append(self.get_next_states(fwords,i,self.min_gap_size)) * * while len(frontier) > 0: # <<<<<<<<<<<<<< @@ -46511,18 +46472,18 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene * for k, i, input_match, alt, pathlen, node, prefix, is_shadow_path in frontier: */ while (1) { - __pyx_t_1 = PyList_GET_SIZE(((PyObject *)__pyx_cur_scope->__pyx_v_frontier)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 983; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_GET_SIZE(((PyObject *)__pyx_cur_scope->__pyx_v_frontier)); if (unlikely(__pyx_t_1 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 977; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = (__pyx_t_1 > 0); if (!__pyx_t_8) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":984 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":978 * * while len(frontier) > 0: * new_frontier = [] # <<<<<<<<<<<<<< * for k, i, input_match, alt, pathlen, node, prefix, is_shadow_path in frontier: * word_id = fwords[i][alt][0] */ - __pyx_t_13 = PyList_New(0); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 984; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyList_New(0); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 978; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_new_frontier)); __Pyx_XDECREF(((PyObject *)__pyx_cur_scope->__pyx_v_new_frontier)); @@ -46530,7 +46491,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_new_frontier = __pyx_t_13; __pyx_t_13 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":985 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":979 * while len(frontier) > 0: * new_frontier = [] * for k, i, input_match, alt, pathlen, node, prefix, is_shadow_path in frontier: # <<<<<<<<<<<<<< @@ -46541,9 +46502,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene for (;;) { if (__pyx_t_1 >= PyList_GET_SIZE(__pyx_t_13)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_2 = PyList_GET_ITEM(__pyx_t_13, __pyx_t_1); __Pyx_INCREF(__pyx_t_2); __pyx_t_1++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyList_GET_ITEM(__pyx_t_13, __pyx_t_1); __Pyx_INCREF(__pyx_t_2); __pyx_t_1++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_2 = PySequence_ITEM(__pyx_t_13, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(__pyx_t_13, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif if ((likely(PyTuple_CheckExact(__pyx_t_2))) || (PyList_CheckExact(__pyx_t_2))) { PyObject* sequence = __pyx_t_2; @@ -46555,7 +46516,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (unlikely(size != 8)) { if (size > 8) __Pyx_RaiseTooManyValuesError(8); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -46589,7 +46550,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene Py_ssize_t i; PyObject** temps[8] = {&__pyx_t_14,&__pyx_t_7,&__pyx_t_10,&__pyx_t_9,&__pyx_t_11,&__pyx_t_3,&__pyx_t_15,&__pyx_t_16}; for (i=0; i < 8; i++) { - PyObject* item = PySequence_ITEM(sequence, i); if (unlikely(!item)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyObject* item = PySequence_ITEM(sequence, i); if (unlikely(!item)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} *(temps[i]) = item; } #endif @@ -46598,7 +46559,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene { Py_ssize_t index = -1; PyObject** temps[8] = {&__pyx_t_14,&__pyx_t_7,&__pyx_t_10,&__pyx_t_9,&__pyx_t_11,&__pyx_t_3,&__pyx_t_15,&__pyx_t_16}; - __pyx_t_17 = PyObject_GetIter(__pyx_t_2); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = PyObject_GetIter(__pyx_t_2); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_18 = Py_TYPE(__pyx_t_17)->tp_iternext; @@ -46607,7 +46568,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GOTREF(item); *(temps[index]) = item; } - if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_17), 8) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_17), 8) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_18 = NULL; __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; goto __pyx_L22_unpacking_done; @@ -46615,14 +46576,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __pyx_t_18 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L22_unpacking_done:; } - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_14); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_14); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_t_7); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_t_7); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_9); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_9); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 979; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_cur_scope->__pyx_v_k = __pyx_t_4; __pyx_cur_scope->__pyx_v_i = __pyx_t_6; @@ -46653,19 +46614,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_is_shadow_path = __pyx_t_16; __pyx_t_16 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":986 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":980 * new_frontier = [] * for k, i, input_match, alt, pathlen, node, prefix, is_shadow_path in frontier: * word_id = fwords[i][alt][0] # <<<<<<<<<<<<<< * spanlen = fwords[i][alt][2] * # TODO get rid of k -- pathlen is replacing it */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 986; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 980; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_16 = __Pyx_GetItemInt(__pyx_t_2, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 986; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = __Pyx_GetItemInt(__pyx_t_2, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 980; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_16, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 986; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_16, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 980; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_word_id); @@ -46674,19 +46635,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_word_id = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":987 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":981 * for k, i, input_match, alt, pathlen, node, prefix, is_shadow_path in frontier: * word_id = fwords[i][alt][0] * spanlen = fwords[i][alt][2] # <<<<<<<<<<<<<< * # TODO get rid of k -- pathlen is replacing it * if word_id == EPSILON: */ - __pyx_t_2 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_fwords, __pyx_cur_scope->__pyx_v_i, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_16 = __Pyx_GetItemInt(__pyx_t_2, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = __Pyx_GetItemInt(__pyx_t_2, __pyx_cur_scope->__pyx_v_alt, sizeof(int), PyInt_FromLong); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_16, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_16, 2, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 981; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_spanlen); @@ -46695,44 +46656,44 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_spanlen = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":989 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":983 * spanlen = fwords[i][alt][2] * # TODO get rid of k -- pathlen is replacing it * if word_id == EPSILON: # <<<<<<<<<<<<<< * # skipping because word_id is epsilon * if i+spanlen >= len(fwords): */ - __pyx_t_2 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 989; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_3_sa_EPSILON); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 983; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_16 = PyObject_RichCompare(__pyx_cur_scope->__pyx_v_word_id, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_16); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 989; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_RichCompare(__pyx_cur_scope->__pyx_v_word_id, __pyx_t_2, Py_EQ); __Pyx_XGOTREF(__pyx_t_16); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 983; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_16); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 989; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_16); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 983; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":991 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":985 * if word_id == EPSILON: * # skipping because word_id is epsilon * if i+spanlen >= len(fwords): # <<<<<<<<<<<<<< * continue * for nualt in range(0,len(fwords[i+spanlen])): */ - __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_2 = PyNumber_Add(__pyx_t_16, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_16, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_5 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_16 = PyInt_FromSsize_t(__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromSsize_t(__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_15 = PyObject_RichCompare(__pyx_t_2, __pyx_t_16, Py_GE); __Pyx_XGOTREF(__pyx_t_15); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_RichCompare(__pyx_t_2, __pyx_t_16, Py_GE); __Pyx_XGOTREF(__pyx_t_15); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_15); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_15); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 985; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":992 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":986 * # skipping because word_id is epsilon * if i+spanlen >= len(fwords): * continue # <<<<<<<<<<<<<< @@ -46744,43 +46705,43 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L24:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":993 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":987 * if i+spanlen >= len(fwords): * continue * for nualt in range(0,len(fwords[i+spanlen])): # <<<<<<<<<<<<<< * frontier.append((k, i+spanlen, input_match, nualt, pathlen, node, prefix, is_shadow_path)) * continue */ - __pyx_t_15 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_16 = PyNumber_Add(__pyx_t_15, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyNumber_Add(__pyx_t_15, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fwords, __pyx_t_16); if (!__pyx_t_15) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fwords, __pyx_t_16); if (!__pyx_t_15) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_5 = PyObject_Length(__pyx_t_15); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_15); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 987; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_5; __pyx_t_19+=1) { __pyx_cur_scope->__pyx_v_nualt = __pyx_t_19; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":994 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":988 * continue * for nualt in range(0,len(fwords[i+spanlen])): * frontier.append((k, i+spanlen, input_match, nualt, pathlen, node, prefix, is_shadow_path)) # <<<<<<<<<<<<<< * continue * */ - __pyx_t_15 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 988; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 988; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_2 = PyNumber_Add(__pyx_t_16, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_t_16, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 988; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_nualt); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_nualt); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 988; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_3 = PyTuple_New(8); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(8); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 988; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); @@ -46806,11 +46767,11 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_15 = 0; __pyx_t_2 = 0; __pyx_t_16 = 0; - __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_frontier, ((PyObject *)__pyx_t_3)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 994; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_frontier, ((PyObject *)__pyx_t_3)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 988; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":995 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":989 * for nualt in range(0,len(fwords[i+spanlen])): * frontier.append((k, i+spanlen, input_match, nualt, pathlen, node, prefix, is_shadow_path)) * continue # <<<<<<<<<<<<<< @@ -46822,19 +46783,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L23:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":997 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":991 * continue * * phrase = prefix + (word_id,) # <<<<<<<<<<<<<< * hiero_phrase = Phrase(phrase) * arity = hiero_phrase.arity() */ - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 997; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_word_id); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_cur_scope->__pyx_v_word_id); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_word_id); - __pyx_t_16 = PyNumber_Add(__pyx_cur_scope->__pyx_v_prefix, ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 997; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyNumber_Add(__pyx_cur_scope->__pyx_v_prefix, ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 991; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_phrase); @@ -46843,19 +46804,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_phrase = __pyx_t_16; __pyx_t_16 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":998 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":992 * * phrase = prefix + (word_id,) * hiero_phrase = Phrase(phrase) # <<<<<<<<<<<<<< * arity = hiero_phrase.arity() * */ - __pyx_t_16 = PyTuple_New(1); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 998; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(1); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_phrase); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_cur_scope->__pyx_v_phrase); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_phrase); - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_16), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 998; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_16), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 992; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_16)); __pyx_t_16 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase)); @@ -46864,23 +46825,23 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_hiero_phrase = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":999 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":993 * phrase = prefix + (word_id,) * hiero_phrase = Phrase(phrase) * arity = hiero_phrase.arity() # <<<<<<<<<<<<<< * * lookup_required = False */ - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase), __pyx_n_s__arity); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase), __pyx_n_s__arity); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_16 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_16); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 999; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_16); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 993; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_cur_scope->__pyx_v_arity = __pyx_t_19; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1001 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":995 * arity = hiero_phrase.arity() * * lookup_required = False # <<<<<<<<<<<<<< @@ -46889,36 +46850,36 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_lookup_required = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1002 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":996 * * lookup_required = False * if word_id in node.children: # <<<<<<<<<<<<<< * if node.children[word_id] is None: * # Path dead-ends at this node */ - __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1002; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 996; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_cur_scope->__pyx_v_word_id, __pyx_t_16, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1002; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_cur_scope->__pyx_v_word_id, __pyx_t_16, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 996; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1003 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":997 * lookup_required = False * if word_id in node.children: * if node.children[word_id] is None: # <<<<<<<<<<<<<< * # Path dead-ends at this node * continue */ - __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1003; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 997; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_3 = PyObject_GetItem(__pyx_t_16, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1003; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_t_16, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 997; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_8 = (__pyx_t_3 == Py_None); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1005 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":999 * if node.children[word_id] is None: * # Path dead-ends at this node * continue # <<<<<<<<<<<<<< @@ -46930,16 +46891,16 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1008 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1002 * else: * # Path continues at this node * node = node.children[word_id] # <<<<<<<<<<<<<< * else: * if node.suffix_link is None: */ - __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1002; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_16 = PyObject_GetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1002; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_node); @@ -46953,20 +46914,20 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1010 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1004 * node = node.children[word_id] * else: * if node.suffix_link is None: # <<<<<<<<<<<<<< * # Current node is root; lookup required * lookup_required = True */ - __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1010; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1004; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __pyx_t_8 = (__pyx_t_16 == Py_None); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1012 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1006 * if node.suffix_link is None: * # Current node is root; lookup required * lookup_required = True # <<<<<<<<<<<<<< @@ -46978,54 +46939,54 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1014 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1008 * lookup_required = True * else: * if word_id in node.suffix_link.children: # <<<<<<<<<<<<<< * if node.suffix_link.children[word_id] is None: * # Suffix link reports path is dead end */ - __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1014; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_16, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1014; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_16, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_cur_scope->__pyx_v_word_id, __pyx_t_3, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1014; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_cur_scope->__pyx_v_word_id, __pyx_t_3, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1008; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1015 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1009 * else: * if word_id in node.suffix_link.children: * if node.suffix_link.children[word_id] is None: # <<<<<<<<<<<<<< * # Suffix link reports path is dead end * node.children[word_id] = None */ - __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1015; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1009; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_16 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1015; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1009; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetItem(__pyx_t_16, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1015; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_t_16, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1009; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_8 = (__pyx_t_3 == Py_None); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1017 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1011 * if node.suffix_link.children[word_id] is None: * # Suffix link reports path is dead end * node.children[word_id] = None # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1011; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyObject_SetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_word_id, Py_None) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1017; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_word_id, Py_None) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1011; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1018 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1012 * # Suffix link reports path is dead end * node.children[word_id] = None * continue # <<<<<<<<<<<<<< @@ -47037,7 +46998,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1021 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1015 * else: * # Suffix link indicates lookup is reqired * lookup_required = True # <<<<<<<<<<<<<< @@ -47051,18 +47012,18 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1024 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1018 * else: * #ERROR: We never get here * raise Exception("Keyword trie error") # <<<<<<<<<<<<<< * # checking whether lookup_required * if lookup_required: */ - __pyx_t_3 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_122), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(__pyx_builtin_Exception, ((PyObject *)__pyx_k_tuple_122), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1018; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1018; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_L30:; } @@ -47070,7 +47031,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L27:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1026 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1020 * raise Exception("Keyword trie error") * # checking whether lookup_required * if lookup_required: # <<<<<<<<<<<<<< @@ -47079,7 +47040,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ if (__pyx_cur_scope->__pyx_v_lookup_required) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1027 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1021 * # checking whether lookup_required * if lookup_required: * new_node = None # <<<<<<<<<<<<<< @@ -47092,66 +47053,66 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(Py_None); __pyx_cur_scope->__pyx_v_new_node = Py_None; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1028 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1022 * if lookup_required: * new_node = None * if is_shadow_path: # <<<<<<<<<<<<<< * # Extending shadow path * # on the shadow path we don't do any search, we just use info from suffix link */ - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1028; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1022; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1031 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1025 * # Extending shadow path * # on the shadow path we don't do any search, we just use info from suffix link * new_node = ExtendedTrieNode(phrase_location=node.suffix_link.children[word_id].phrase_location, # <<<<<<<<<<<<<< * suffix_link=node.suffix_link.children[word_id], * phrase=hiero_phrase) */ - __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_3)); - __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_16, __pyx_n_s__children); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_16, __pyx_n_s__children); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __pyx_t_16 = PyObject_GetItem(__pyx_t_2, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetItem(__pyx_t_2, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_16) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetAttr(__pyx_t_16, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_16, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1032 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1026 * # on the shadow path we don't do any search, we just use info from suffix link * new_node = ExtendedTrieNode(phrase_location=node.suffix_link.children[word_id].phrase_location, * suffix_link=node.suffix_link.children[word_id], # <<<<<<<<<<<<<< * phrase=hiero_phrase) * else: */ - __pyx_t_2 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1032; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_16 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1032; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__children); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetItem(__pyx_t_16, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1032; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetItem(__pyx_t_16, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1026; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__suffix_link), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__suffix_link), __pyx_t_2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1033 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1027 * new_node = ExtendedTrieNode(phrase_location=node.suffix_link.children[word_id].phrase_location, * suffix_link=node.suffix_link.children[word_id], * phrase=hiero_phrase) # <<<<<<<<<<<<<< * else: * if arity > 0: */ - if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__phrase), ((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, ((PyObject *)__pyx_n_s__phrase), ((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1025; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_new_node); @@ -47163,7 +47124,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1035 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1029 * phrase=hiero_phrase) * else: * if arity > 0: # <<<<<<<<<<<<<< @@ -47173,22 +47134,22 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_8 = (__pyx_cur_scope->__pyx_v_arity > 0); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1037 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1031 * if arity > 0: * # Intersecting because of arity > 0 * phrase_location = self.intersect(node, node.suffix_link.children[word_id], hiero_phrase) # <<<<<<<<<<<<<< * else: * # Suffix array search */ - __pyx_t_2 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyObject_GetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->intersect(__pyx_cur_scope->__pyx_v_self, __pyx_cur_scope->__pyx_v_node, __pyx_t_2, __pyx_cur_scope->__pyx_v_hiero_phrase)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->intersect(__pyx_cur_scope->__pyx_v_self, __pyx_cur_scope->__pyx_v_node, __pyx_t_2, __pyx_cur_scope->__pyx_v_hiero_phrase)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1031; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location)); @@ -47200,45 +47161,45 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1040 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1034 * else: * # Suffix array search * phrase_location = node.phrase_location # <<<<<<<<<<<<<< * sa_range = self.fsa.lookup(sym_tostring(phrase[-1]), len(phrase)-1, phrase_location.sa_low, phrase_location.sa_high) * if sa_range is not None: */ - __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1040; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1034; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1040; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_3_sa_PhraseLocation))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1034; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location)); __Pyx_XDECREF(((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location)); __Pyx_GIVEREF(__pyx_t_3); __pyx_cur_scope->__pyx_v_phrase_location = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1041 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1035 * # Suffix array search * phrase_location = node.phrase_location * sa_range = self.fsa.lookup(sym_tostring(phrase[-1]), len(phrase)-1, phrase_location.sa_low, phrase_location.sa_high) # <<<<<<<<<<<<<< * if sa_range is not None: * phrase_location = PhraseLocation(sa_low=sa_range[0], sa_high=sa_range[1]) */ - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self->fsa), __pyx_n_s__lookup); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self->fsa), __pyx_n_s__lookup); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_phrase, -1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_phrase, -1, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_2); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_2); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyBytes_FromString(__pyx_f_3_sa_sym_tostring(__pyx_t_19)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyBytes_FromString(__pyx_f_3_sa_sym_tostring(__pyx_t_19)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_2)); - __pyx_t_5 = PyObject_Length(__pyx_cur_scope->__pyx_v_phrase); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_16 = PyInt_FromSsize_t((__pyx_t_5 - 1)); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_cur_scope->__pyx_v_phrase); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromSsize_t((__pyx_t_5 - 1)); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_15 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_phrase_location->sa_low); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_phrase_location->sa_low); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_phrase_location->sa_high); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_phrase_location->sa_high); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_9 = PyTuple_New(4); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(4); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)__pyx_t_2)); __Pyx_GIVEREF(((PyObject *)__pyx_t_2)); @@ -47252,7 +47213,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_16 = 0; __pyx_t_15 = 0; __pyx_t_11 = 0; - __pyx_t_11 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1041; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1035; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; @@ -47262,7 +47223,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_sa_range = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1042 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1036 * phrase_location = node.phrase_location * sa_range = self.fsa.lookup(sym_tostring(phrase[-1]), len(phrase)-1, phrase_location.sa_low, phrase_location.sa_high) * if sa_range is not None: # <<<<<<<<<<<<<< @@ -47272,24 +47233,24 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_8 = (__pyx_cur_scope->__pyx_v_sa_range != Py_None); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1043 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1037 * sa_range = self.fsa.lookup(sym_tostring(phrase[-1]), len(phrase)-1, phrase_location.sa_low, phrase_location.sa_high) * if sa_range is not None: * phrase_location = PhraseLocation(sa_low=sa_range[0], sa_high=sa_range[1]) # <<<<<<<<<<<<<< * else: * phrase_location = None */ - __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1043; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_11)); - __pyx_t_9 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_sa_range, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1043; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_sa_range, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__sa_low), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1043; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__sa_low), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_sa_range, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1043; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_GetItemInt(__pyx_cur_scope->__pyx_v_sa_range, 1, sizeof(long), PyInt_FromLong); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__sa_high), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1043; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__sa_high), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1043; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_PhraseLocation)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1037; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __Pyx_GOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location)); @@ -47301,7 +47262,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1045 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1039 * phrase_location = PhraseLocation(sa_low=sa_range[0], sa_high=sa_range[1]) * else: * phrase_location = None # <<<<<<<<<<<<<< @@ -47318,7 +47279,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L34:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1047 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1041 * phrase_location = None * * if phrase_location is None: # <<<<<<<<<<<<<< @@ -47328,19 +47289,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_8 = (((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location) == Py_None); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1048 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1042 * * if phrase_location is None: * node.children[word_id] = None # <<<<<<<<<<<<<< * # Search failed * continue */ - __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1042; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - if (PyObject_SetItem(__pyx_t_9, __pyx_cur_scope->__pyx_v_word_id, Py_None) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_t_9, __pyx_cur_scope->__pyx_v_word_id, Py_None) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1042; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1050 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1044 * node.children[word_id] = None * # Search failed * continue # <<<<<<<<<<<<<< @@ -47352,7 +47313,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L36:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1052 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1046 * continue * # Search succeeded * suffix_link = self.rules.root # <<<<<<<<<<<<<< @@ -47365,32 +47326,32 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_self->rules->root); __pyx_cur_scope->__pyx_v_suffix_link = __pyx_cur_scope->__pyx_v_self->rules->root; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1053 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1047 * # Search succeeded * suffix_link = self.rules.root * if node.suffix_link is not None: # <<<<<<<<<<<<<< * suffix_link = node.suffix_link.children[word_id] * new_node = ExtendedTrieNode(phrase_location=phrase_location, */ - __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1053; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1047; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __pyx_t_8 = (__pyx_t_9 != Py_None); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1054 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1048 * suffix_link = self.rules.root * if node.suffix_link is not None: * suffix_link = node.suffix_link.children[word_id] # <<<<<<<<<<<<<< * new_node = ExtendedTrieNode(phrase_location=phrase_location, * suffix_link=suffix_link, */ - __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1054; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_11 = PyObject_GetAttr(__pyx_t_9, __pyx_n_s__children); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1054; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_t_9, __pyx_n_s__children); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyObject_GetItem(__pyx_t_11, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1054; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetItem(__pyx_t_11, __pyx_cur_scope->__pyx_v_word_id); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1048; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_suffix_link); @@ -47402,35 +47363,35 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L37:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1055 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1049 * if node.suffix_link is not None: * suffix_link = node.suffix_link.children[word_id] * new_node = ExtendedTrieNode(phrase_location=phrase_location, # <<<<<<<<<<<<<< * suffix_link=suffix_link, * phrase=hiero_phrase) */ - __pyx_t_9 = PyDict_New(); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1055; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyDict_New(); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1049; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_9)); - if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__phrase_location), ((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1055; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__phrase_location), ((PyObject *)__pyx_cur_scope->__pyx_v_phrase_location)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1049; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1056 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1050 * suffix_link = node.suffix_link.children[word_id] * new_node = ExtendedTrieNode(phrase_location=phrase_location, * suffix_link=suffix_link, # <<<<<<<<<<<<<< * phrase=hiero_phrase) * node.children[word_id] = new_node */ - if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__suffix_link), __pyx_cur_scope->__pyx_v_suffix_link) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1055; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__suffix_link), __pyx_cur_scope->__pyx_v_suffix_link) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1049; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1057 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1051 * new_node = ExtendedTrieNode(phrase_location=phrase_location, * suffix_link=suffix_link, * phrase=hiero_phrase) # <<<<<<<<<<<<<< * node.children[word_id] = new_node * node = new_node */ - if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__phrase), ((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1055; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_9)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1055; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__phrase), ((PyObject *)__pyx_cur_scope->__pyx_v_hiero_phrase)) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1049; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_9)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1049; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_new_node); @@ -47441,19 +47402,19 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L33:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1058 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1052 * suffix_link=suffix_link, * phrase=hiero_phrase) * node.children[word_id] = new_node # <<<<<<<<<<<<<< * node = new_node * */ - __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1058; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1052; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - if (PyObject_SetItem(__pyx_t_11, __pyx_cur_scope->__pyx_v_word_id, __pyx_cur_scope->__pyx_v_new_node) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1058; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_t_11, __pyx_cur_scope->__pyx_v_word_id, __pyx_cur_scope->__pyx_v_new_node) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1052; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1059 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1053 * phrase=hiero_phrase) * node.children[word_id] = new_node * node = new_node # <<<<<<<<<<<<<< @@ -47466,7 +47427,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_new_node); __pyx_cur_scope->__pyx_v_node = __pyx_cur_scope->__pyx_v_new_node; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1064 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1058 * This should happen before we get to extraction (so that * the node will exist if needed)''' * if arity < self.max_nonterminals: # <<<<<<<<<<<<<< @@ -47476,14 +47437,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_8 = (__pyx_cur_scope->__pyx_v_arity < __pyx_cur_scope->__pyx_v_self->max_nonterminals); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1065 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1059 * the node will exist if needed)''' * if arity < self.max_nonterminals: * xcat_index = arity+1 # <<<<<<<<<<<<<< * xcat = sym_setindex(self.category, xcat_index) * suffix_link_xcat_index = xcat_index */ - __pyx_t_11 = PyInt_FromLong((__pyx_cur_scope->__pyx_v_arity + 1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong((__pyx_cur_scope->__pyx_v_arity + 1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1059; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_xcat_index); __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_xcat_index); @@ -47491,17 +47452,17 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_xcat_index = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1066 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1060 * if arity < self.max_nonterminals: * xcat_index = arity+1 * xcat = sym_setindex(self.category, xcat_index) # <<<<<<<<<<<<<< * suffix_link_xcat_index = xcat_index * if is_shadow_path: */ - __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_cur_scope->__pyx_v_xcat_index); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_cur_scope->__pyx_v_xcat_index); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1060; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_xcat = __pyx_f_3_sa_sym_setindex(__pyx_cur_scope->__pyx_v_self->category, __pyx_t_19); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1067 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1061 * xcat_index = arity+1 * xcat = sym_setindex(self.category, xcat_index) * suffix_link_xcat_index = xcat_index # <<<<<<<<<<<<<< @@ -47514,24 +47475,24 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_xcat_index); __pyx_cur_scope->__pyx_v_suffix_link_xcat_index = __pyx_cur_scope->__pyx_v_xcat_index; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1068 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1062 * xcat = sym_setindex(self.category, xcat_index) * suffix_link_xcat_index = xcat_index * if is_shadow_path: # <<<<<<<<<<<<<< * suffix_link_xcat_index = xcat_index-1 * suffix_link_xcat = sym_setindex(self.category, suffix_link_xcat_index) */ - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1068; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1062; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1069 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1063 * suffix_link_xcat_index = xcat_index * if is_shadow_path: * suffix_link_xcat_index = xcat_index-1 # <<<<<<<<<<<<<< * suffix_link_xcat = sym_setindex(self.category, suffix_link_xcat_index) * node.children[xcat] = ExtendedTrieNode(phrase_location=node.phrase_location, */ - __pyx_t_11 = PyNumber_Subtract(__pyx_cur_scope->__pyx_v_xcat_index, __pyx_int_1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1069; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyNumber_Subtract(__pyx_cur_scope->__pyx_v_xcat_index, __pyx_int_1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1063; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_suffix_link_xcat_index); __Pyx_DECREF(__pyx_cur_scope->__pyx_v_suffix_link_xcat_index); @@ -47542,159 +47503,159 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L39:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1070 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1064 * if is_shadow_path: * suffix_link_xcat_index = xcat_index-1 * suffix_link_xcat = sym_setindex(self.category, suffix_link_xcat_index) # <<<<<<<<<<<<<< * node.children[xcat] = ExtendedTrieNode(phrase_location=node.phrase_location, * suffix_link=node.suffix_link.children[suffix_link_xcat], */ - __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_cur_scope->__pyx_v_suffix_link_xcat_index); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1070; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_cur_scope->__pyx_v_suffix_link_xcat_index); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1064; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_cur_scope->__pyx_v_suffix_link_xcat = __pyx_f_3_sa_sym_setindex(__pyx_cur_scope->__pyx_v_self->category, __pyx_t_19); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1071 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1065 * suffix_link_xcat_index = xcat_index-1 * suffix_link_xcat = sym_setindex(self.category, suffix_link_xcat_index) * node.children[xcat] = ExtendedTrieNode(phrase_location=node.phrase_location, # <<<<<<<<<<<<<< * suffix_link=node.suffix_link.children[suffix_link_xcat], * phrase= Phrase(phrase + (xcat,))) */ - __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_11)); - __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__phrase_location), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1072 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1066 * suffix_link_xcat = sym_setindex(self.category, suffix_link_xcat_index) * node.children[xcat] = ExtendedTrieNode(phrase_location=node.phrase_location, * suffix_link=node.suffix_link.children[suffix_link_xcat], # <<<<<<<<<<<<<< * phrase= Phrase(phrase + (xcat,))) * */ - __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1072; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__suffix_link); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_3 = PyObject_GetAttr(__pyx_t_9, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1072; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(__pyx_t_9, __pyx_n_s__children); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_GetItemInt(__pyx_t_3, __pyx_cur_scope->__pyx_v_suffix_link_xcat, sizeof(int), PyInt_FromLong); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1072; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_GetItemInt(__pyx_t_3, __pyx_cur_scope->__pyx_v_suffix_link_xcat, sizeof(int), PyInt_FromLong); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1066; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__suffix_link), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__suffix_link), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1073 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1067 * node.children[xcat] = ExtendedTrieNode(phrase_location=node.phrase_location, * suffix_link=node.suffix_link.children[suffix_link_xcat], * phrase= Phrase(phrase + (xcat,))) # <<<<<<<<<<<<<< * * # sample from range */ - __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_xcat); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_xcat); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1067; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1067; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_9); __Pyx_GIVEREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyNumber_Add(__pyx_cur_scope->__pyx_v_phrase, ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyNumber_Add(__pyx_cur_scope->__pyx_v_phrase, ((PyObject *)__pyx_t_3)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1067; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1067; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_9); __Pyx_GIVEREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1067; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__phrase), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__phrase), __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_ExtendedTrieNode)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1071 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1065 * suffix_link_xcat_index = xcat_index-1 * suffix_link_xcat = sym_setindex(self.category, suffix_link_xcat_index) * node.children[xcat] = ExtendedTrieNode(phrase_location=node.phrase_location, # <<<<<<<<<<<<<< * suffix_link=node.suffix_link.children[suffix_link_xcat], * phrase= Phrase(phrase + (xcat,))) */ - __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - if (__Pyx_SetItemInt(__pyx_t_11, __pyx_cur_scope->__pyx_v_xcat, __pyx_t_9, sizeof(int), PyInt_FromLong) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_SetItemInt(__pyx_t_11, __pyx_cur_scope->__pyx_v_xcat, __pyx_t_9, sizeof(int), PyInt_FromLong) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1065; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; goto __pyx_L38; } __pyx_L38:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1076 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1070 * * # sample from range * if not is_shadow_path: # <<<<<<<<<<<<<< * sample = self.sampler.sample(node.phrase_location) * num_subpatterns = ( node.phrase_location).num_subpatterns */ - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1076; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1070; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_20 = (!__pyx_t_8); if (__pyx_t_20) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1077 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1071 * # sample from range * if not is_shadow_path: * sample = self.sampler.sample(node.phrase_location) # <<<<<<<<<<<<<< * num_subpatterns = ( node.phrase_location).num_subpatterns * chunklen = IntList(initial_len=num_subpatterns) */ - __pyx_t_9 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self->sampler), __pyx_n_s__sample); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1077; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self->sampler), __pyx_n_s__sample); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1077; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1077; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_11); __Pyx_GIVEREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1077; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; - if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1077; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_3_sa_IntList))))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1071; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_sample)); __Pyx_XDECREF(((PyObject *)__pyx_cur_scope->__pyx_v_sample)); __Pyx_GIVEREF(__pyx_t_11); __pyx_cur_scope->__pyx_v_sample = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_11); __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1078 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1072 * if not is_shadow_path: * sample = self.sampler.sample(node.phrase_location) * num_subpatterns = ( node.phrase_location).num_subpatterns # <<<<<<<<<<<<<< * chunklen = IntList(initial_len=num_subpatterns) * for j from 0 <= j < num_subpatterns: */ - __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1078; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__phrase_location); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1072; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __pyx_cur_scope->__pyx_v_num_subpatterns = ((struct __pyx_obj_3_sa_PhraseLocation *)__pyx_t_11)->num_subpatterns; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1079 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1073 * sample = self.sampler.sample(node.phrase_location) * num_subpatterns = ( node.phrase_location).num_subpatterns * chunklen = IntList(initial_len=num_subpatterns) # <<<<<<<<<<<<<< * for j from 0 <= j < num_subpatterns: * chunklen.arr[j] = hiero_phrase.chunklen(j) */ - __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1079; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyDict_New(); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_11)); - __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_num_subpatterns); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1079; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_num_subpatterns); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__initial_len), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1079; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_11, ((PyObject *)__pyx_n_s__initial_len), __pyx_t_3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1079; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_11)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1073; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_chunklen)); @@ -47703,7 +47664,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_chunklen = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1080 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1074 * num_subpatterns = ( node.phrase_location).num_subpatterns * chunklen = IntList(initial_len=num_subpatterns) * for j from 0 <= j < num_subpatterns: # <<<<<<<<<<<<<< @@ -47713,7 +47674,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_19 = __pyx_cur_scope->__pyx_v_num_subpatterns; for (__pyx_cur_scope->__pyx_v_j = 0; __pyx_cur_scope->__pyx_v_j < __pyx_t_19; __pyx_cur_scope->__pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1081 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1075 * chunklen = IntList(initial_len=num_subpatterns) * for j from 0 <= j < num_subpatterns: * chunklen.arr[j] = hiero_phrase.chunklen(j) # <<<<<<<<<<<<<< @@ -47723,14 +47684,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene (__pyx_cur_scope->__pyx_v_chunklen->arr[__pyx_cur_scope->__pyx_v_j]) = ((struct __pyx_vtabstruct_3_sa_Phrase *)__pyx_cur_scope->__pyx_v_hiero_phrase->__pyx_vtab)->chunklen(__pyx_cur_scope->__pyx_v_hiero_phrase, __pyx_cur_scope->__pyx_v_j); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1082 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1076 * for j from 0 <= j < num_subpatterns: * chunklen.arr[j] = hiero_phrase.chunklen(j) * extracts = [] # <<<<<<<<<<<<<< * j = 0 * extract_start = monitor_cpu() */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1082; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1076; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_extracts)); __Pyx_XDECREF(((PyObject *)__pyx_cur_scope->__pyx_v_extracts)); @@ -47738,7 +47699,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_extracts = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1083 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1077 * chunklen.arr[j] = hiero_phrase.chunklen(j) * extracts = [] * j = 0 # <<<<<<<<<<<<<< @@ -47747,14 +47708,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_j = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1084 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1078 * extracts = [] * j = 0 * extract_start = monitor_cpu() # <<<<<<<<<<<<<< * while j < sample.len: * extract = [] */ - __pyx_t_3 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1084; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1078; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_extract_start); __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_extract_start); @@ -47762,7 +47723,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_extract_start = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1085 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1079 * j = 0 * extract_start = monitor_cpu() * while j < sample.len: # <<<<<<<<<<<<<< @@ -47773,14 +47734,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_20 = (__pyx_cur_scope->__pyx_v_j < __pyx_cur_scope->__pyx_v_sample->len); if (!__pyx_t_20) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1086 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1080 * extract_start = monitor_cpu() * while j < sample.len: * extract = [] # <<<<<<<<<<<<<< * * assign_matching(&matching, sample.arr, j, num_subpatterns, self.fda.sent_id.arr) */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1086; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1080; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_extract); __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_extract); @@ -47788,7 +47749,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_extract = ((PyObject *)__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1088 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1082 * extract = [] * * assign_matching(&matching, sample.arr, j, num_subpatterns, self.fda.sent_id.arr) # <<<<<<<<<<<<<< @@ -47797,21 +47758,21 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_f_3_sa_assign_matching((&__pyx_cur_scope->__pyx_v_matching), __pyx_cur_scope->__pyx_v_sample->arr, __pyx_cur_scope->__pyx_v_j, __pyx_cur_scope->__pyx_v_num_subpatterns, __pyx_cur_scope->__pyx_v_self->fda->sent_id->arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1089 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1083 * * assign_matching(&matching, sample.arr, j, num_subpatterns, self.fda.sent_id.arr) * loc = tuple(sample[j:j+num_subpatterns]) # <<<<<<<<<<<<<< * extract = self.extract(hiero_phrase, &matching, chunklen.arr, num_subpatterns) * extracts.extend([(e, loc) for e in extract]) */ - __pyx_t_3 = __Pyx_PySequence_GetSlice(((PyObject *)__pyx_cur_scope->__pyx_v_sample), __pyx_cur_scope->__pyx_v_j, (__pyx_cur_scope->__pyx_v_j + __pyx_cur_scope->__pyx_v_num_subpatterns)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1089; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PySequence_GetSlice(((PyObject *)__pyx_cur_scope->__pyx_v_sample), __pyx_cur_scope->__pyx_v_j, (__pyx_cur_scope->__pyx_v_j + __pyx_cur_scope->__pyx_v_num_subpatterns)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1083; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1089; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyTuple_New(1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1083; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1089; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_11), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1083; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_loc); @@ -47820,14 +47781,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_loc = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1090 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1084 * assign_matching(&matching, sample.arr, j, num_subpatterns, self.fda.sent_id.arr) * loc = tuple(sample[j:j+num_subpatterns]) * extract = self.extract(hiero_phrase, &matching, chunklen.arr, num_subpatterns) # <<<<<<<<<<<<<< * extracts.extend([(e, loc) for e in extract]) * j = j + num_subpatterns */ - __pyx_t_3 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->extract(__pyx_cur_scope->__pyx_v_self, __pyx_cur_scope->__pyx_v_hiero_phrase, (&__pyx_cur_scope->__pyx_v_matching), __pyx_cur_scope->__pyx_v_chunklen->arr, __pyx_cur_scope->__pyx_v_num_subpatterns); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1090; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_cur_scope->__pyx_v_self->__pyx_vtab)->extract(__pyx_cur_scope->__pyx_v_self, __pyx_cur_scope->__pyx_v_hiero_phrase, (&__pyx_cur_scope->__pyx_v_matching), __pyx_cur_scope->__pyx_v_chunklen->arr, __pyx_cur_scope->__pyx_v_num_subpatterns); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1084; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_extract); __Pyx_DECREF(__pyx_cur_scope->__pyx_v_extract); @@ -47835,22 +47796,22 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_extract = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1091 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1085 * loc = tuple(sample[j:j+num_subpatterns]) * extract = self.extract(hiero_phrase, &matching, chunklen.arr, num_subpatterns) * extracts.extend([(e, loc) for e in extract]) # <<<<<<<<<<<<<< * j = j + num_subpatterns * */ - __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_extracts), __pyx_n_s__extend); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_extracts), __pyx_n_s__extend); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = PyList_New(0); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyList_New(0); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); if (PyList_CheckExact(__pyx_cur_scope->__pyx_v_extract) || PyTuple_CheckExact(__pyx_cur_scope->__pyx_v_extract)) { __pyx_t_9 = __pyx_cur_scope->__pyx_v_extract; __Pyx_INCREF(__pyx_t_9); __pyx_t_5 = 0; __pyx_t_21 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_9 = PyObject_GetIter(__pyx_cur_scope->__pyx_v_extract); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = -1; __pyx_t_9 = PyObject_GetIter(__pyx_cur_scope->__pyx_v_extract); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __pyx_t_21 = Py_TYPE(__pyx_t_9)->tp_iternext; } @@ -47858,23 +47819,23 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (!__pyx_t_21 && PyList_CheckExact(__pyx_t_9)) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_9)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_15 = PyList_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_15); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyList_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_15); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_15 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_21 && PyTuple_CheckExact(__pyx_t_9)) { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_9)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_15 = PyTuple_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_15); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_15); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_15 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_15 = __pyx_t_21(__pyx_t_9); if (unlikely(!__pyx_t_15)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -47885,7 +47846,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(__pyx_t_15); __pyx_cur_scope->__pyx_v_e = __pyx_t_15; __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(2); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(2); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_e); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_cur_scope->__pyx_v_e); @@ -47893,23 +47854,23 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_INCREF(__pyx_cur_scope->__pyx_v_loc); PyTuple_SET_ITEM(__pyx_t_15, 1, __pyx_cur_scope->__pyx_v_loc); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_loc); - if (unlikely(__Pyx_PyList_Append(__pyx_t_11, (PyObject*)__pyx_t_15))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__Pyx_PyList_Append(__pyx_t_11, (PyObject*)__pyx_t_15))) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; } __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_INCREF(((PyObject *)__pyx_t_11)); PyTuple_SET_ITEM(__pyx_t_9, 0, ((PyObject *)__pyx_t_11)); __Pyx_GIVEREF(((PyObject *)__pyx_t_11)); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; - __pyx_t_11 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1085; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1092 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1086 * extract = self.extract(hiero_phrase, &matching, chunklen.arr, num_subpatterns) * extracts.extend([(e, loc) for e in extract]) * j = j + num_subpatterns # <<<<<<<<<<<<<< @@ -47919,7 +47880,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_j = (__pyx_cur_scope->__pyx_v_j + __pyx_cur_scope->__pyx_v_num_subpatterns); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1094 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1088 * j = j + num_subpatterns * * num_samples = sample.len/num_subpatterns # <<<<<<<<<<<<<< @@ -47928,22 +47889,22 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ if (unlikely(__pyx_cur_scope->__pyx_v_num_subpatterns == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1088; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } else if (sizeof(int) == sizeof(long) && unlikely(__pyx_cur_scope->__pyx_v_num_subpatterns == -1) && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_cur_scope->__pyx_v_sample->len))) { PyErr_Format(PyExc_OverflowError, "value too large to perform division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1088; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_cur_scope->__pyx_v_num_samples = __Pyx_div_int(__pyx_cur_scope->__pyx_v_sample->len, __pyx_cur_scope->__pyx_v_num_subpatterns); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1095 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1089 * * num_samples = sample.len/num_subpatterns * extract_stop = monitor_cpu() # <<<<<<<<<<<<<< * self.extract_time = self.extract_time + extract_stop - extract_start * if len(extracts) > 0: */ - __pyx_t_11 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1095; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1089; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_extract_stop); __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_extract_stop); @@ -47951,46 +47912,46 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_extract_stop = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1096 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1090 * num_samples = sample.len/num_subpatterns * extract_stop = monitor_cpu() * self.extract_time = self.extract_time + extract_stop - extract_start # <<<<<<<<<<<<<< * if len(extracts) > 0: * fcount = Counter() */ - __pyx_t_11 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_self->extract_time); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_self->extract_time); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1090; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_9 = PyNumber_Add(__pyx_t_11, __pyx_cur_scope->__pyx_v_extract_stop); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyNumber_Add(__pyx_t_11, __pyx_cur_scope->__pyx_v_extract_stop); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1090; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyNumber_Subtract(__pyx_t_9, __pyx_cur_scope->__pyx_v_extract_start); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyNumber_Subtract(__pyx_t_9, __pyx_cur_scope->__pyx_v_extract_start); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1090; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_22 = __pyx_PyFloat_AsFloat(__pyx_t_11); if (unlikely((__pyx_t_22 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_22 = __pyx_PyFloat_AsFloat(__pyx_t_11); if (unlikely((__pyx_t_22 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1090; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __pyx_cur_scope->__pyx_v_self->extract_time = __pyx_t_22; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1097 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1091 * extract_stop = monitor_cpu() * self.extract_time = self.extract_time + extract_stop - extract_start * if len(extracts) > 0: # <<<<<<<<<<<<<< * fcount = Counter() * fphrases = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) */ - __pyx_t_5 = PyList_GET_SIZE(((PyObject *)__pyx_cur_scope->__pyx_v_extracts)); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1097; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyList_GET_SIZE(((PyObject *)__pyx_cur_scope->__pyx_v_extracts)); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1091; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_20 = (__pyx_t_5 > 0); if (__pyx_t_20) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1098 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1092 * self.extract_time = self.extract_time + extract_stop - extract_start * if len(extracts) > 0: * fcount = Counter() # <<<<<<<<<<<<<< * fphrases = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) * for (f, e, count, als), loc in extracts: */ - __pyx_t_11 = __Pyx_GetName(__pyx_m, __pyx_n_s__Counter); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1098; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_GetName(__pyx_m, __pyx_n_s__Counter); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1092; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_9 = PyObject_Call(__pyx_t_11, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1098; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(__pyx_t_11, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1092; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_fcount); @@ -47999,23 +47960,23 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_fcount = __pyx_t_9; __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1099 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1093 * if len(extracts) > 0: * fcount = Counter() * fphrases = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) # <<<<<<<<<<<<<< * for (f, e, count, als), loc in extracts: * fcount[f] += count */ - __pyx_t_9 = __Pyx_GetName(__pyx_m, __pyx_n_s__defaultdict); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_GetName(__pyx_m, __pyx_n_s__defaultdict); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_11 = __Pyx_CyFunction_NewEx(&__pyx_mdef_3_sa_23HieroCachingRuleFactory_5input_lambda1, 0, NULL, __pyx_n_s___sa, NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_CyFunction_NewEx(&__pyx_mdef_3_sa_23HieroCachingRuleFactory_5input_lambda1, 0, NULL, __pyx_n_s___sa, NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_11); __Pyx_GIVEREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1093; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0; @@ -48025,7 +47986,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_fphrases = __pyx_t_11; __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1100 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1094 * fcount = Counter() * fphrases = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) * for (f, e, count, als), loc in extracts: # <<<<<<<<<<<<<< @@ -48036,9 +47997,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene for (;;) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_11)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_3 = PyList_GET_ITEM(__pyx_t_11, __pyx_t_5); __Pyx_INCREF(__pyx_t_3); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_11, __pyx_t_5); __Pyx_INCREF(__pyx_t_3); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_11, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(__pyx_t_11, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif if ((likely(PyTuple_CheckExact(__pyx_t_3))) || (PyList_CheckExact(__pyx_t_3))) { PyObject* sequence = __pyx_t_3; @@ -48050,7 +48011,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -48063,14 +48024,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_INCREF(__pyx_t_9); __Pyx_INCREF(__pyx_t_15); #else - __pyx_t_9 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_15 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { Py_ssize_t index = -1; - __pyx_t_16 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_18 = Py_TYPE(__pyx_t_16)->tp_iternext; @@ -48078,7 +48039,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GOTREF(__pyx_t_9); index = 1; __pyx_t_15 = __pyx_t_18(__pyx_t_16); if (unlikely(!__pyx_t_15)) goto __pyx_L50_unpacking_failed; __Pyx_GOTREF(__pyx_t_15); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_16), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_16), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_18 = NULL; __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; goto __pyx_L51_unpacking_done; @@ -48086,7 +48047,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_18 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L51_unpacking_done:; } if ((likely(PyTuple_CheckExact(__pyx_t_9))) || (PyList_CheckExact(__pyx_t_9))) { @@ -48099,7 +48060,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (unlikely(size != 4)) { if (size > 4) __Pyx_RaiseTooManyValuesError(4); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -48121,7 +48082,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene Py_ssize_t i; PyObject** temps[4] = {&__pyx_t_16,&__pyx_t_2,&__pyx_t_10,&__pyx_t_7}; for (i=0; i < 4; i++) { - PyObject* item = PySequence_ITEM(sequence, i); if (unlikely(!item)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyObject* item = PySequence_ITEM(sequence, i); if (unlikely(!item)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} *(temps[i]) = item; } #endif @@ -48130,7 +48091,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene { Py_ssize_t index = -1; PyObject** temps[4] = {&__pyx_t_16,&__pyx_t_2,&__pyx_t_10,&__pyx_t_7}; - __pyx_t_14 = PyObject_GetIter(__pyx_t_9); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_GetIter(__pyx_t_9); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_18 = Py_TYPE(__pyx_t_14)->tp_iternext; @@ -48139,7 +48100,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GOTREF(item); *(temps[index]) = item; } - if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_14), 4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_14), 4) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_18 = NULL; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; goto __pyx_L53_unpacking_done; @@ -48147,7 +48108,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_18 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1094; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L53_unpacking_done:; } __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_f); @@ -48176,7 +48137,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_loc = __pyx_t_15; __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1101 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1095 * fphrases = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) * for (f, e, count, als), loc in extracts: * fcount[f] += count # <<<<<<<<<<<<<< @@ -48185,38 +48146,38 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __Pyx_INCREF(__pyx_cur_scope->__pyx_v_f); __pyx_t_3 = __pyx_cur_scope->__pyx_v_f; - __pyx_t_15 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fcount, __pyx_t_3); if (!__pyx_t_15) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fcount, __pyx_t_3); if (!__pyx_t_15) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1095; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_9 = PyNumber_InPlaceAdd(__pyx_t_15, __pyx_cur_scope->__pyx_v_count); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyNumber_InPlaceAdd(__pyx_t_15, __pyx_cur_scope->__pyx_v_count); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1095; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - if (PyObject_SetItem(__pyx_cur_scope->__pyx_v_fcount, __pyx_t_3, __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyObject_SetItem(__pyx_cur_scope->__pyx_v_fcount, __pyx_t_3, __pyx_t_9) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1095; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1102 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1096 * for (f, e, count, als), loc in extracts: * fcount[f] += count * fphrases[f][e][als].append(loc) # <<<<<<<<<<<<<< * for f, elist in fphrases.iteritems(): * for e, alslist in elist.iteritems(): */ - __pyx_t_3 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fphrases, __pyx_cur_scope->__pyx_v_f); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fphrases, __pyx_cur_scope->__pyx_v_f); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_9 = PyObject_GetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_e); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetItem(__pyx_t_3, __pyx_cur_scope->__pyx_v_e); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetItem(__pyx_t_9, __pyx_cur_scope->__pyx_v_als); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_t_9, __pyx_cur_scope->__pyx_v_als); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_PyObject_Append(__pyx_t_3, __pyx_cur_scope->__pyx_v_loc); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyObject_Append(__pyx_t_3, __pyx_cur_scope->__pyx_v_loc); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1096; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; } __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1103 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1097 * fcount[f] += count * fphrases[f][e][als].append(loc) * for f, elist in fphrases.iteritems(): # <<<<<<<<<<<<<< @@ -48226,9 +48187,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_5 = 0; if (unlikely(__pyx_cur_scope->__pyx_v_fphrases == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "iteritems"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1097; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_9 = __Pyx_dict_iterator(__pyx_cur_scope->__pyx_v_fphrases, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_23), (&__pyx_t_19)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_dict_iterator(__pyx_cur_scope->__pyx_v_fphrases, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_23), (&__pyx_t_19)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1097; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_XDECREF(__pyx_t_11); __pyx_t_11 = __pyx_t_9; @@ -48236,7 +48197,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene while (1) { __pyx_t_6 = __Pyx_dict_iter_next(__pyx_t_11, __pyx_t_23, &__pyx_t_5, &__pyx_t_9, &__pyx_t_3, NULL, __pyx_t_19); if (unlikely(__pyx_t_6 == 0)) break; - if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1097; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_GOTREF(__pyx_t_3); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_f); @@ -48250,7 +48211,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_elist = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1104 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1098 * fphrases[f][e][als].append(loc) * for f, elist in fphrases.iteritems(): * for e, alslist in elist.iteritems(): # <<<<<<<<<<<<<< @@ -48260,9 +48221,9 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_24 = 0; if (unlikely(__pyx_cur_scope->__pyx_v_elist == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%s'", "iteritems"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1098; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_t_9 = __Pyx_dict_iterator(__pyx_cur_scope->__pyx_v_elist, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_25), (&__pyx_t_6)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_dict_iterator(__pyx_cur_scope->__pyx_v_elist, 0, ((PyObject *)__pyx_n_s__iteritems), (&__pyx_t_25), (&__pyx_t_6)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1098; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = __pyx_t_9; @@ -48270,7 +48231,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene while (1) { __pyx_t_4 = __Pyx_dict_iter_next(__pyx_t_3, __pyx_t_25, &__pyx_t_24, &__pyx_t_9, &__pyx_t_15, NULL, __pyx_t_6); if (unlikely(__pyx_t_4 == 0)) break; - if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(__pyx_t_4 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1098; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_GOTREF(__pyx_t_15); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_e); @@ -48284,30 +48245,30 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_alslist = __pyx_t_15; __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1105 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1099 * for f, elist in fphrases.iteritems(): * for e, alslist in elist.iteritems(): * alignment, max_locs = max(alslist.iteritems(), key=lambda x: len(x[1])) # <<<<<<<<<<<<<< * locs = tuple(itertools.chain.from_iterable(alslist.itervalues())) * count = len(locs) */ - __pyx_t_15 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_alslist, __pyx_n_s__iteritems); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_alslist, __pyx_n_s__iteritems); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_9 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_9); __Pyx_GIVEREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyDict_New(); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyDict_New(); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_9)); - __pyx_t_7 = __Pyx_CyFunction_NewEx(&__pyx_mdef_3_sa_23HieroCachingRuleFactory_5input_1lambda3, 0, NULL, __pyx_n_s___sa, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_CyFunction_NewEx(&__pyx_mdef_3_sa_23HieroCachingRuleFactory_5input_1lambda3, 0, NULL, __pyx_n_s___sa, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__key), __pyx_t_7) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_9, ((PyObject *)__pyx_n_s__key), __pyx_t_7) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyObject_Call(__pyx_builtin_max, ((PyObject *)__pyx_t_15), ((PyObject *)__pyx_t_9)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Call(__pyx_builtin_max, ((PyObject *)__pyx_t_15), ((PyObject *)__pyx_t_9)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; @@ -48321,7 +48282,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -48334,14 +48295,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_INCREF(__pyx_t_9); __Pyx_INCREF(__pyx_t_15); #else - __pyx_t_9 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_15 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } else { Py_ssize_t index = -1; - __pyx_t_10 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_18 = Py_TYPE(__pyx_t_10)->tp_iternext; @@ -48349,7 +48310,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GOTREF(__pyx_t_9); index = 1; __pyx_t_15 = __pyx_t_18(__pyx_t_10); if (unlikely(!__pyx_t_15)) goto __pyx_L58_unpacking_failed; __Pyx_GOTREF(__pyx_t_15); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_18 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L59_unpacking_done; @@ -48357,7 +48318,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_18 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L59_unpacking_done:; } __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_alignment); @@ -48371,41 +48332,41 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_max_locs = __pyx_t_15; __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1106 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1100 * for e, alslist in elist.iteritems(): * alignment, max_locs = max(alslist.iteritems(), key=lambda x: len(x[1])) * locs = tuple(itertools.chain.from_iterable(alslist.itervalues())) # <<<<<<<<<<<<<< * count = len(locs) * scores = self.scorer.score(FeatureContext( */ - __pyx_t_7 = __Pyx_GetName(__pyx_m, __pyx_n_s__itertools); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetName(__pyx_m, __pyx_n_s__itertools); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_15 = PyObject_GetAttr(__pyx_t_7, __pyx_n_s__chain); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetAttr(__pyx_t_7, __pyx_n_s__chain); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyObject_GetAttr(__pyx_t_15, __pyx_n_s__from_iterable); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_GetAttr(__pyx_t_15, __pyx_n_s__from_iterable); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_alslist, __pyx_n_s__itervalues); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_alslist, __pyx_n_s__itervalues); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_9 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_9); __Pyx_GIVEREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_9); __Pyx_GIVEREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1106; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1100; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_locs)); @@ -48414,15 +48375,15 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_locs = ((PyObject*)__pyx_t_9); __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1101 * alignment, max_locs = max(alslist.iteritems(), key=lambda x: len(x[1])) * locs = tuple(itertools.chain.from_iterable(alslist.itervalues())) * count = len(locs) # <<<<<<<<<<<<<< * scores = self.scorer.score(FeatureContext( * f, e, count, fcount[f], num_samples, */ - __pyx_t_26 = PyTuple_GET_SIZE(((PyObject *)__pyx_cur_scope->__pyx_v_locs)); if (unlikely(__pyx_t_26 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = PyInt_FromSsize_t(__pyx_t_26); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_26 = PyTuple_GET_SIZE(((PyObject *)__pyx_cur_scope->__pyx_v_locs)); if (unlikely(__pyx_t_26 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyInt_FromSsize_t(__pyx_t_26); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_count); __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_count); @@ -48430,43 +48391,43 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_count = __pyx_t_9; __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1108 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1102 * locs = tuple(itertools.chain.from_iterable(alslist.itervalues())) * count = len(locs) * scores = self.scorer.score(FeatureContext( # <<<<<<<<<<<<<< * f, e, count, fcount[f], num_samples, * (k,i+spanlen), locs, input_match, */ - __pyx_t_9 = __Pyx_GetName(__pyx_m, __pyx_n_s__FeatureContext); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_GetName(__pyx_m, __pyx_n_s__FeatureContext); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1103 * count = len(locs) * scores = self.scorer.score(FeatureContext( * f, e, count, fcount[f], num_samples, # <<<<<<<<<<<<<< * (k,i+spanlen), locs, input_match, * fwords, self.fda, self.eda, */ - __pyx_t_15 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fcount, __pyx_cur_scope->__pyx_v_f); if (!__pyx_t_15) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fcount, __pyx_cur_scope->__pyx_v_f); if (!__pyx_t_15) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_num_samples); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_num_samples); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1110 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1104 * scores = self.scorer.score(FeatureContext( * f, e, count, fcount[f], num_samples, * (k,i+spanlen), locs, input_match, # <<<<<<<<<<<<<< * fwords, self.fda, self.eda, * meta)) */ - __pyx_t_10 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_16 = PyNumber_Add(__pyx_t_2, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyNumber_Add(__pyx_t_2, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1104; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); @@ -48475,14 +48436,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_10 = 0; __pyx_t_16 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1112 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1106 * (k,i+spanlen), locs, input_match, * fwords, self.fda, self.eda, * meta)) # <<<<<<<<<<<<<< * yield Rule(self.category, f, e, scores, alignment) * */ - __pyx_t_16 = PyTuple_New(12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_f); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_cur_scope->__pyx_v_f); @@ -48520,11 +48481,11 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_15 = 0; __pyx_t_7 = 0; __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_16), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_16), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_16)); __pyx_t_16 = 0; - __pyx_t_16 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_Scorer *)__pyx_cur_scope->__pyx_v_self->scorer->__pyx_vtab)->score(__pyx_cur_scope->__pyx_v_self->scorer, __pyx_t_2)); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_Scorer *)__pyx_cur_scope->__pyx_v_self->scorer->__pyx_vtab)->score(__pyx_cur_scope->__pyx_v_self->scorer, __pyx_t_2)); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1102; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_scores)); @@ -48533,16 +48494,16 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_scores = ((struct __pyx_obj_3_sa_FeatureVector *)__pyx_t_16); __pyx_t_16 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1113 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1107 * fwords, self.fda, self.eda, * meta)) * yield Rule(self.category, f, e, scores, alignment) # <<<<<<<<<<<<<< * * if len(phrase) < self.max_length and i+spanlen < len(fwords) and pathlen+1 <= self.max_initial_size: */ - __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->category); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1113; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->category); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_2 = PyTuple_New(5); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1113; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(5); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_16); __Pyx_GIVEREF(__pyx_t_16); @@ -48559,7 +48520,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene PyTuple_SET_ITEM(__pyx_t_2, 4, __pyx_cur_scope->__pyx_v_alignment); __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_alignment); __pyx_t_16 = 0; - __pyx_t_16 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Rule)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1113; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Rule)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_r = __pyx_t_16; @@ -48599,7 +48560,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_23 = __pyx_cur_scope->__pyx_t_7; __pyx_t_24 = __pyx_cur_scope->__pyx_t_8; __pyx_t_25 = __pyx_cur_scope->__pyx_t_9; - if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1113; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } @@ -48614,38 +48575,38 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L32:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1109 * yield Rule(self.category, f, e, scores, alignment) * * if len(phrase) < self.max_length and i+spanlen < len(fwords) and pathlen+1 <= self.max_initial_size: # <<<<<<<<<<<<<< * for alt_id in range(len(fwords[i+spanlen])): * new_frontier.append((k, i+spanlen, input_match, alt_id, pathlen + 1, node, phrase, is_shadow_path)) */ - __pyx_t_23 = PyObject_Length(__pyx_cur_scope->__pyx_v_phrase); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_23 = PyObject_Length(__pyx_cur_scope->__pyx_v_phrase); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_20 = (__pyx_t_23 < __pyx_cur_scope->__pyx_v_self->max_length); if (__pyx_t_20) { - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_3 = PyNumber_Add(__pyx_t_11, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyNumber_Add(__pyx_t_11, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_23 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = PyInt_FromSsize_t(__pyx_t_23); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_23 = PyObject_Length(__pyx_cur_scope->__pyx_v_fwords); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromSsize_t(__pyx_t_23); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_16 = PyObject_RichCompare(__pyx_t_3, __pyx_t_11, Py_LT); __Pyx_XGOTREF(__pyx_t_16); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyObject_RichCompare(__pyx_t_3, __pyx_t_11, Py_LT); __Pyx_XGOTREF(__pyx_t_16); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_16); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_16); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; if (__pyx_t_8) { - __pyx_t_16 = PyNumber_Add(__pyx_cur_scope->__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyNumber_Add(__pyx_cur_scope->__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->max_initial_size); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_3 = PyObject_RichCompare(__pyx_t_16, __pyx_t_11, Py_LE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_RichCompare(__pyx_t_16, __pyx_t_11, Py_LE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_27 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_27 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_27 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_27 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1109; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_t_28 = __pyx_t_27; } else { @@ -48657,45 +48618,45 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1116 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1110 * * if len(phrase) < self.max_length and i+spanlen < len(fwords) and pathlen+1 <= self.max_initial_size: * for alt_id in range(len(fwords[i+spanlen])): # <<<<<<<<<<<<<< * new_frontier.append((k, i+spanlen, input_match, alt_id, pathlen + 1, node, phrase, is_shadow_path)) * num_subpatterns = arity */ - __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = PyNumber_Add(__pyx_t_3, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyNumber_Add(__pyx_t_3, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fwords, __pyx_t_11); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_GetItem(__pyx_cur_scope->__pyx_v_fwords, __pyx_t_11); if (!__pyx_t_3) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_23 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1116; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_23 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1110; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_23; __pyx_t_19+=1) { __pyx_cur_scope->__pyx_v_alt_id = __pyx_t_19; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1117 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1111 * if len(phrase) < self.max_length and i+spanlen < len(fwords) and pathlen+1 <= self.max_initial_size: * for alt_id in range(len(fwords[i+spanlen])): * new_frontier.append((k, i+spanlen, input_match, alt_id, pathlen + 1, node, phrase, is_shadow_path)) # <<<<<<<<<<<<<< * num_subpatterns = arity * if not is_shadow_path: */ - __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_16 = PyNumber_Add(__pyx_t_11, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyNumber_Add(__pyx_t_11, __pyx_cur_scope->__pyx_v_spanlen); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt_id); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt_id); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_2 = PyNumber_Add(__pyx_cur_scope->__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Add(__pyx_cur_scope->__pyx_v_pathlen, __pyx_int_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_9 = PyTuple_New(8); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(8); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); @@ -48721,11 +48682,11 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_16 = 0; __pyx_t_11 = 0; __pyx_t_2 = 0; - __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_new_frontier, ((PyObject *)__pyx_t_9)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_new_frontier, ((PyObject *)__pyx_t_9)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1118 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1112 * for alt_id in range(len(fwords[i+spanlen])): * new_frontier.append((k, i+spanlen, input_match, alt_id, pathlen + 1, node, phrase, is_shadow_path)) * num_subpatterns = arity # <<<<<<<<<<<<<< @@ -48734,18 +48695,18 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_num_subpatterns = __pyx_cur_scope->__pyx_v_arity; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1119 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1113 * new_frontier.append((k, i+spanlen, input_match, alt_id, pathlen + 1, node, phrase, is_shadow_path)) * num_subpatterns = arity * if not is_shadow_path: # <<<<<<<<<<<<<< * num_subpatterns = num_subpatterns + 1 * if len(phrase)+1 < self.max_length and arity < self.max_nonterminals and num_subpatterns < self.max_chunks: */ - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_is_shadow_path); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1113; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_20 = (!__pyx_t_8); if (__pyx_t_20) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1120 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1114 * num_subpatterns = arity * if not is_shadow_path: * num_subpatterns = num_subpatterns + 1 # <<<<<<<<<<<<<< @@ -48757,14 +48718,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __pyx_L64:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1121 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1115 * if not is_shadow_path: * num_subpatterns = num_subpatterns + 1 * if len(phrase)+1 < self.max_length and arity < self.max_nonterminals and num_subpatterns < self.max_chunks: # <<<<<<<<<<<<<< * xcat = sym_setindex(self.category, arity+1) * xnode = node.children[xcat] */ - __pyx_t_23 = PyObject_Length(__pyx_cur_scope->__pyx_v_phrase); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1121; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_23 = PyObject_Length(__pyx_cur_scope->__pyx_v_phrase); if (unlikely(__pyx_t_23 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_20 = ((__pyx_t_23 + 1) < __pyx_cur_scope->__pyx_v_self->max_length); if (__pyx_t_20) { __pyx_t_8 = (__pyx_cur_scope->__pyx_v_arity < __pyx_cur_scope->__pyx_v_self->max_nonterminals); @@ -48780,7 +48741,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1122 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1116 * num_subpatterns = num_subpatterns + 1 * if len(phrase)+1 < self.max_length and arity < self.max_nonterminals and num_subpatterns < self.max_chunks: * xcat = sym_setindex(self.category, arity+1) # <<<<<<<<<<<<<< @@ -48789,16 +48750,16 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene */ __pyx_cur_scope->__pyx_v_xcat = __pyx_f_3_sa_sym_setindex(__pyx_cur_scope->__pyx_v_self->category, (__pyx_cur_scope->__pyx_v_arity + 1)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1123 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1117 * if len(phrase)+1 < self.max_length and arity < self.max_nonterminals and num_subpatterns < self.max_chunks: * xcat = sym_setindex(self.category, arity+1) * xnode = node.children[xcat] # <<<<<<<<<<<<<< * # I put spanlen=1 below * key = tuple([self.min_gap_size, i, 1, pathlen]) */ - __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1123; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(__pyx_cur_scope->__pyx_v_node, __pyx_n_s__children); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_9, __pyx_cur_scope->__pyx_v_xcat, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1123; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_9, __pyx_cur_scope->__pyx_v_xcat, sizeof(int), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1117; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_xnode); @@ -48807,18 +48768,18 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_xnode = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1125 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1119 * xnode = node.children[xcat] * # I put spanlen=1 below * key = tuple([self.min_gap_size, i, 1, pathlen]) # <<<<<<<<<<<<<< * frontier_nodes = [] * if (key in nodes_isteps_away_buffer): */ - __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_11 = PyList_New(4); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyList_New(4); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); PyList_SET_ITEM(__pyx_t_11, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); @@ -48832,7 +48793,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_pathlen); __pyx_t_2 = 0; __pyx_t_9 = 0; - __pyx_t_9 = ((PyObject *)PyList_AsTuple(__pyx_t_11)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = ((PyObject *)PyList_AsTuple(__pyx_t_11)); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1119; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_9)); __Pyx_DECREF(((PyObject *)__pyx_t_11)); __pyx_t_11 = 0; __Pyx_XGOTREF(((PyObject *)__pyx_cur_scope->__pyx_v_key)); @@ -48841,14 +48802,14 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_key = __pyx_t_9; __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1126 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1120 * # I put spanlen=1 below * key = tuple([self.min_gap_size, i, 1, pathlen]) * frontier_nodes = [] # <<<<<<<<<<<<<< * if (key in nodes_isteps_away_buffer): * frontier_nodes = nodes_isteps_away_buffer[key] */ - __pyx_t_9 = PyList_New(0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1126; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyList_New(0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1120; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_XGOTREF(__pyx_cur_scope->__pyx_v_frontier_nodes); __Pyx_XDECREF(__pyx_cur_scope->__pyx_v_frontier_nodes); @@ -48856,24 +48817,24 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_frontier_nodes = ((PyObject *)__pyx_t_9); __pyx_t_9 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1127 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1121 * key = tuple([self.min_gap_size, i, 1, pathlen]) * frontier_nodes = [] * if (key in nodes_isteps_away_buffer): # <<<<<<<<<<<<<< * frontier_nodes = nodes_isteps_away_buffer[key] * else: */ - __pyx_t_8 = (__Pyx_PyDict_Contains(((PyObject *)__pyx_cur_scope->__pyx_v_key), ((PyObject *)__pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer), Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = (__Pyx_PyDict_Contains(((PyObject *)__pyx_cur_scope->__pyx_v_key), ((PyObject *)__pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer), Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1121; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1128 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1122 * frontier_nodes = [] * if (key in nodes_isteps_away_buffer): * frontier_nodes = nodes_isteps_away_buffer[key] # <<<<<<<<<<<<<< * else: * frontier_nodes = self.get_all_nodes_isteps_away(self.min_gap_size, i, 1, pathlen, fwords, next_states, reachable_buffer) */ - __pyx_t_9 = __Pyx_PyDict_GetItem(((PyObject *)__pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer), ((PyObject *)__pyx_cur_scope->__pyx_v_key)); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyDict_GetItem(((PyObject *)__pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer), ((PyObject *)__pyx_cur_scope->__pyx_v_key)); if (!__pyx_t_9) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1122; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_GOTREF(__pyx_cur_scope->__pyx_v_frontier_nodes); __Pyx_DECREF(__pyx_cur_scope->__pyx_v_frontier_nodes); @@ -48884,20 +48845,20 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1130 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1124 * frontier_nodes = nodes_isteps_away_buffer[key] * else: * frontier_nodes = self.get_all_nodes_isteps_away(self.min_gap_size, i, 1, pathlen, fwords, next_states, reachable_buffer) # <<<<<<<<<<<<<< * nodes_isteps_away_buffer[key] = frontier_nodes * */ - __pyx_t_9 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_123); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_GetAttr(((PyObject *)__pyx_cur_scope->__pyx_v_self), __pyx_n_s_123); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_self->min_gap_size); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_16 = PyTuple_New(7); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(7); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_11); __Pyx_GIVEREF(__pyx_t_11); @@ -48920,7 +48881,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GIVEREF(((PyObject *)__pyx_cur_scope->__pyx_v_reachable_buffer)); __pyx_t_11 = 0; __pyx_t_2 = 0; - __pyx_t_2 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_16), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1130; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_9, ((PyObject *)__pyx_t_16), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1124; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_16)); __pyx_t_16 = 0; @@ -48930,18 +48891,18 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_frontier_nodes = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1131 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1125 * else: * frontier_nodes = self.get_all_nodes_isteps_away(self.min_gap_size, i, 1, pathlen, fwords, next_states, reachable_buffer) * nodes_isteps_away_buffer[key] = frontier_nodes # <<<<<<<<<<<<<< * * for (i, alt, pathlen) in frontier_nodes: */ - if (PyDict_SetItem(((PyObject *)__pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer), ((PyObject *)__pyx_cur_scope->__pyx_v_key), __pyx_cur_scope->__pyx_v_frontier_nodes) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(((PyObject *)__pyx_cur_scope->__pyx_v_nodes_isteps_away_buffer), ((PyObject *)__pyx_cur_scope->__pyx_v_key), __pyx_cur_scope->__pyx_v_frontier_nodes) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1125; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_L66:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1133 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1127 * nodes_isteps_away_buffer[key] = frontier_nodes * * for (i, alt, pathlen) in frontier_nodes: # <<<<<<<<<<<<<< @@ -48952,7 +48913,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_2 = __pyx_cur_scope->__pyx_v_frontier_nodes; __Pyx_INCREF(__pyx_t_2); __pyx_t_23 = 0; __pyx_t_21 = NULL; } else { - __pyx_t_23 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_cur_scope->__pyx_v_frontier_nodes); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_23 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_cur_scope->__pyx_v_frontier_nodes); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_21 = Py_TYPE(__pyx_t_2)->tp_iternext; } @@ -48960,23 +48921,23 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (!__pyx_t_21 && PyList_CheckExact(__pyx_t_2)) { if (__pyx_t_23 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_16 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_23); __Pyx_INCREF(__pyx_t_16); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_23); __Pyx_INCREF(__pyx_t_16); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_16 = PySequence_ITEM(__pyx_t_2, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PySequence_ITEM(__pyx_t_2, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_21 && PyTuple_CheckExact(__pyx_t_2)) { if (__pyx_t_23 >= PyTuple_GET_SIZE(__pyx_t_2)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_16 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_23); __Pyx_INCREF(__pyx_t_16); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_23); __Pyx_INCREF(__pyx_t_16); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_16 = PySequence_ITEM(__pyx_t_2, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PySequence_ITEM(__pyx_t_2, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_16 = __pyx_t_21(__pyx_t_2); if (unlikely(!__pyx_t_16)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -48992,7 +48953,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -49008,15 +48969,15 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_INCREF(__pyx_t_11); __Pyx_INCREF(__pyx_t_3); #else - __pyx_t_9 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_3 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; } else { Py_ssize_t index = -1; - __pyx_t_7 = PyObject_GetIter(__pyx_t_16); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_GetIter(__pyx_t_16); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __pyx_t_18 = Py_TYPE(__pyx_t_7)->tp_iternext; @@ -49026,7 +48987,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_GOTREF(__pyx_t_11); index = 2; __pyx_t_3 = __pyx_t_18(__pyx_t_7); if (unlikely(!__pyx_t_3)) goto __pyx_L69_unpacking_failed; __Pyx_GOTREF(__pyx_t_3); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_7), 3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_18(__pyx_t_7), 3) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_18 = NULL; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; goto __pyx_L70_unpacking_done; @@ -49034,12 +48995,12 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_18 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L70_unpacking_done:; } - __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_9); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_19 = __Pyx_PyInt_AsInt(__pyx_t_9); if (unlikely((__pyx_t_19 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_t_11); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_AsInt(__pyx_t_11); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1127; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __pyx_cur_scope->__pyx_v_i = __pyx_t_19; __pyx_cur_scope->__pyx_v_alt = __pyx_t_6; @@ -49049,40 +49010,40 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_pathlen = __pyx_t_3; __pyx_t_3 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1134 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1128 * * for (i, alt, pathlen) in frontier_nodes: * new_frontier.append((k, i, input_match + (i,), alt, pathlen, xnode, phrase +(xcat,), is_shadow_path)) # <<<<<<<<<<<<<< * frontier = new_frontier * */ - __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_k); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); - __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_i); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_11); __Pyx_GIVEREF(__pyx_t_11); __pyx_t_11 = 0; - __pyx_t_11 = PyNumber_Add(__pyx_cur_scope->__pyx_v_input_match, ((PyObject *)__pyx_t_9)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyNumber_Add(__pyx_cur_scope->__pyx_v_input_match, ((PyObject *)__pyx_t_9)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0; - __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_alt); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_xcat); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyInt_FromLong(__pyx_cur_scope->__pyx_v_xcat); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_cur_scope->__pyx_v_phrase, ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_cur_scope->__pyx_v_phrase, ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(8); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(8); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_16); __Pyx_GIVEREF(__pyx_t_16); @@ -49108,7 +49069,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_t_11 = 0; __pyx_t_9 = 0; __pyx_t_7 = 0; - __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_new_frontier, ((PyObject *)__pyx_t_15)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyList_Append(__pyx_cur_scope->__pyx_v_new_frontier, ((PyObject *)__pyx_t_15)); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1128; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -49122,7 +49083,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1135 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1129 * for (i, alt, pathlen) in frontier_nodes: * new_frontier.append((k, i, input_match + (i,), alt, pathlen, xnode, phrase +(xcat,), is_shadow_path)) * frontier = new_frontier # <<<<<<<<<<<<<< @@ -49136,37 +49097,37 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene __pyx_cur_scope->__pyx_v_frontier = __pyx_cur_scope->__pyx_v_new_frontier; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1137 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1131 * frontier = new_frontier * * stop_time = monitor_cpu() # <<<<<<<<<<<<<< * logger.info("Total time for rule lookup, extraction, and scoring = %f seconds", (stop_time - start_time)) * gc.collect() */ - __pyx_t_13 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1137; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyFloat_FromDouble(__pyx_f_3_sa_monitor_cpu()); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1131; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_GIVEREF(__pyx_t_13); __pyx_cur_scope->__pyx_v_stop_time = __pyx_t_13; __pyx_t_13 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1138 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1132 * * stop_time = monitor_cpu() * logger.info("Total time for rule lookup, extraction, and scoring = %f seconds", (stop_time - start_time)) # <<<<<<<<<<<<<< * gc.collect() * logger.info(" Extract time = %f seconds", self.extract_time) */ - __pyx_t_13 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_2 = PyObject_GetAttr(__pyx_t_13, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_GetAttr(__pyx_t_13, __pyx_n_s__info); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_start_time); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_start_time); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_15 = PyNumber_Subtract(__pyx_cur_scope->__pyx_v_stop_time, __pyx_t_13); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyNumber_Subtract(__pyx_cur_scope->__pyx_v_stop_time, __pyx_t_13); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyTuple_New(2); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_INCREF(((PyObject *)__pyx_kp_s_124)); PyTuple_SET_ITEM(__pyx_t_13, 0, ((PyObject *)__pyx_kp_s_124)); @@ -49174,44 +49135,44 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene PyTuple_SET_ITEM(__pyx_t_13, 1, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_13), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1138; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_13), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_13)); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1139 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1133 * stop_time = monitor_cpu() * logger.info("Total time for rule lookup, extraction, and scoring = %f seconds", (stop_time - start_time)) * gc.collect() # <<<<<<<<<<<<<< * logger.info(" Extract time = %f seconds", self.extract_time) * */ - __pyx_t_15 = __Pyx_GetName(__pyx_m, __pyx_n_s__gc); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = __Pyx_GetName(__pyx_m, __pyx_n_s__gc); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_13 = PyObject_GetAttr(__pyx_t_15, __pyx_n_s__collect); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_GetAttr(__pyx_t_15, __pyx_n_s__collect); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyObject_Call(__pyx_t_13, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1139; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(__pyx_t_13, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1133; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1140 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1134 * logger.info("Total time for rule lookup, extraction, and scoring = %f seconds", (stop_time - start_time)) * gc.collect() * logger.info(" Extract time = %f seconds", self.extract_time) # <<<<<<<<<<<<<< * * */ - __pyx_t_15 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = __Pyx_GetName(__pyx_m, __pyx_n_s__logger); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_13 = PyObject_GetAttr(__pyx_t_15, __pyx_n_s__info); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_GetAttr(__pyx_t_15, __pyx_n_s__info); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_self->extract_time); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyFloat_FromDouble(__pyx_cur_scope->__pyx_v_self->extract_time); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_kp_s_125)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_kp_s_125)); @@ -49219,7 +49180,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyObject_Call(__pyx_t_13, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1140; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(__pyx_t_13, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1134; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; @@ -49247,7 +49208,7 @@ static PyObject *__pyx_gb_3_sa_23HieroCachingRuleFactory_24generator4(__pyx_Gene return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1143 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1137 * * * cdef int find_fixpoint(self, # <<<<<<<<<<<<<< @@ -49277,7 +49238,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("find_fixpoint", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1158 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1152 * cdef int e_low_prev, e_high_prev, f_low_prev, f_high_prev, new_x, new_low_x, new_high_x * * e_low[0] = e_in_low # <<<<<<<<<<<<<< @@ -49286,7 +49247,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_e_low[0]) = __pyx_v_e_in_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1159 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1153 * * e_low[0] = e_in_low * e_high[0] = e_in_high # <<<<<<<<<<<<<< @@ -49295,19 +49256,19 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_e_high[0]) = __pyx_v_e_in_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1160 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1154 * e_low[0] = e_in_low * e_high[0] = e_in_high * self.find_projection(f_low, f_high, f_links_low, f_links_high, e_low, e_high) # <<<<<<<<<<<<<< * if e_low[0] == -1: * # low-priority corner case: if phrase w is unaligned, */ - __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_f_high); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1160; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, __pyx_v_f_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_low, __pyx_v_e_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1160; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_f_high); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, __pyx_v_f_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_low, __pyx_v_e_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1154; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1161 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1155 * e_high[0] = e_in_high * self.find_projection(f_low, f_high, f_links_low, f_links_high, e_low, e_high) * if e_low[0] == -1: # <<<<<<<<<<<<<< @@ -49317,7 +49278,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_3 = ((__pyx_v_e_low[0]) == -1); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1167 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1161 * # rule X -> X_1 w X_2 / X_1 X_2. This is probably * # not worth the bother, though. * return 0 # <<<<<<<<<<<<<< @@ -49329,7 +49290,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj goto __pyx_L3; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1168 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1162 * # not worth the bother, though. * return 0 * elif e_in_low != -1 and e_low[0] != e_in_low: # <<<<<<<<<<<<<< @@ -49345,7 +49306,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1169 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1163 * return 0 * elif e_in_low != -1 and e_low[0] != e_in_low: * if e_in_low - e_low[0] < min_ex_size: # <<<<<<<<<<<<<< @@ -49355,7 +49316,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_5 = ((__pyx_v_e_in_low - (__pyx_v_e_low[0])) < __pyx_v_min_ex_size); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1170 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1164 * elif e_in_low != -1 and e_low[0] != e_in_low: * if e_in_low - e_low[0] < min_ex_size: * e_low[0] = e_in_low - min_ex_size # <<<<<<<<<<<<<< @@ -49364,7 +49325,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_e_low[0]) = (__pyx_v_e_in_low - __pyx_v_min_ex_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1171 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1165 * if e_in_low - e_low[0] < min_ex_size: * e_low[0] = e_in_low - min_ex_size * if e_low[0] < 0: # <<<<<<<<<<<<<< @@ -49374,7 +49335,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_5 = ((__pyx_v_e_low[0]) < 0); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1172 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1166 * e_low[0] = e_in_low - min_ex_size * if e_low[0] < 0: * return 0 # <<<<<<<<<<<<<< @@ -49393,7 +49354,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1174 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1168 * return 0 * * if e_high[0] - e_low[0] > max_e_len: # <<<<<<<<<<<<<< @@ -49403,7 +49364,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_5 = (((__pyx_v_e_high[0]) - (__pyx_v_e_low[0])) > __pyx_v_max_e_len); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1175 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1169 * * if e_high[0] - e_low[0] > max_e_len: * return 0 # <<<<<<<<<<<<<< @@ -49415,7 +49376,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj goto __pyx_L6; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1176 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1170 * if e_high[0] - e_low[0] > max_e_len: * return 0 * elif e_in_high != -1 and e_high[0] != e_in_high: # <<<<<<<<<<<<<< @@ -49431,7 +49392,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1177 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1171 * return 0 * elif e_in_high != -1 and e_high[0] != e_in_high: * if e_high[0] - e_in_high < min_ex_size: # <<<<<<<<<<<<<< @@ -49441,7 +49402,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (((__pyx_v_e_high[0]) - __pyx_v_e_in_high) < __pyx_v_min_ex_size); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1178 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1172 * elif e_in_high != -1 and e_high[0] != e_in_high: * if e_high[0] - e_in_high < min_ex_size: * e_high[0] = e_in_high + min_ex_size # <<<<<<<<<<<<<< @@ -49450,7 +49411,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_e_high[0]) = (__pyx_v_e_in_high + __pyx_v_min_ex_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1179 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1173 * if e_high[0] - e_in_high < min_ex_size: * e_high[0] = e_in_high + min_ex_size * if e_high[0] > e_sent_len: # <<<<<<<<<<<<<< @@ -49460,7 +49421,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = ((__pyx_v_e_high[0]) > __pyx_v_e_sent_len); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1180 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1174 * e_high[0] = e_in_high + min_ex_size * if e_high[0] > e_sent_len: * return 0 # <<<<<<<<<<<<<< @@ -49479,7 +49440,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1182 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1176 * return 0 * * f_back_low[0] = -1 # <<<<<<<<<<<<<< @@ -49488,7 +49449,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_f_back_low[0]) = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1183 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1177 * * f_back_low[0] = -1 * f_back_high[0] = -1 # <<<<<<<<<<<<<< @@ -49497,7 +49458,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_f_back_high[0]) = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1184 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1178 * f_back_low[0] = -1 * f_back_high[0] = -1 * f_low_prev = f_low # <<<<<<<<<<<<<< @@ -49506,17 +49467,17 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_f_low_prev = __pyx_v_f_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1185 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1179 * f_back_high[0] = -1 * f_low_prev = f_low * f_high_prev = f_high # <<<<<<<<<<<<<< * new_x = 0 * new_low_x = 0 */ - __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_f_high); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1185; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_f_high); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1179; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_f_high_prev = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1186 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1180 * f_low_prev = f_low * f_high_prev = f_high * new_x = 0 # <<<<<<<<<<<<<< @@ -49525,7 +49486,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_new_x = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1187 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1181 * f_high_prev = f_high * new_x = 0 * new_low_x = 0 # <<<<<<<<<<<<<< @@ -49534,7 +49495,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_new_low_x = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1188 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1182 * new_x = 0 * new_low_x = 0 * new_high_x = 0 # <<<<<<<<<<<<<< @@ -49543,7 +49504,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_new_high_x = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1190 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1184 * new_high_x = 0 * * while True: # <<<<<<<<<<<<<< @@ -49553,7 +49514,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj while (1) { if (!1) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1192 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1186 * while True: * * if f_back_low[0] == -1: # <<<<<<<<<<<<<< @@ -49563,45 +49524,45 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = ((__pyx_v_f_back_low[0]) == -1); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1193 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1187 * * if f_back_low[0] == -1: * self.find_projection(e_low[0], e_high[0], e_links_low, e_links_high, f_back_low, f_back_high) # <<<<<<<<<<<<<< * else: * self.find_projection(e_low[0], e_low_prev, e_links_low, e_links_high, f_back_low, f_back_high) */ - __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, (__pyx_v_e_low[0]), (__pyx_v_e_high[0]), __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_f_back_low, __pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1193; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, (__pyx_v_e_low[0]), (__pyx_v_e_high[0]), __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_f_back_low, __pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1187; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; goto __pyx_L11; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1195 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1189 * self.find_projection(e_low[0], e_high[0], e_links_low, e_links_high, f_back_low, f_back_high) * else: * self.find_projection(e_low[0], e_low_prev, e_links_low, e_links_high, f_back_low, f_back_high) # <<<<<<<<<<<<<< * self.find_projection(e_high_prev, e_high[0], e_links_low, e_links_high, f_back_low, f_back_high) * */ - __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, (__pyx_v_e_low[0]), __pyx_v_e_low_prev, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_f_back_low, __pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, (__pyx_v_e_low[0]), __pyx_v_e_low_prev, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_f_back_low, __pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1189; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1196 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1190 * else: * self.find_projection(e_low[0], e_low_prev, e_links_low, e_links_high, f_back_low, f_back_high) * self.find_projection(e_high_prev, e_high[0], e_links_low, e_links_high, f_back_low, f_back_high) # <<<<<<<<<<<<<< * * if f_back_low[0] > f_low: */ - __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, __pyx_v_e_high_prev, (__pyx_v_e_high[0]), __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_f_back_low, __pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, __pyx_v_e_high_prev, (__pyx_v_e_high[0]), __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_f_back_low, __pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } __pyx_L11:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1198 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1192 * self.find_projection(e_high_prev, e_high[0], e_links_low, e_links_high, f_back_low, f_back_high) * * if f_back_low[0] > f_low: # <<<<<<<<<<<<<< @@ -49611,7 +49572,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = ((__pyx_v_f_back_low[0]) > __pyx_v_f_low); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1199 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1193 * * if f_back_low[0] > f_low: * f_back_low[0] = f_low # <<<<<<<<<<<<<< @@ -49623,35 +49584,35 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L12:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1201 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1195 * f_back_low[0] = f_low * * if f_back_high[0] < f_high: # <<<<<<<<<<<<<< * f_back_high[0] = f_high * */ - __pyx_t_2 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = PyObject_RichCompare(__pyx_t_2, __pyx_v_f_high, Py_LT); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_RichCompare(__pyx_t_2, __pyx_v_f_high, Py_LT); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1201; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1195; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1202 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1196 * * if f_back_high[0] < f_high: * f_back_high[0] = f_high # <<<<<<<<<<<<<< * * if f_back_low[0] == f_low_prev and f_back_high[0] == f_high_prev: */ - __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_f_high); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1202; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_v_f_high); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1196; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_f_back_high[0]) = __pyx_t_1; goto __pyx_L13; } __pyx_L13:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1204 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1198 * f_back_high[0] = f_high * * if f_back_low[0] == f_low_prev and f_back_high[0] == f_high_prev: # <<<<<<<<<<<<<< @@ -49667,7 +49628,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1205 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1199 * * if f_back_low[0] == f_low_prev and f_back_high[0] == f_high_prev: * return 1 # <<<<<<<<<<<<<< @@ -49680,7 +49641,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L14:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1207 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1201 * return 1 * * if allow_low_x == 0 and f_back_low[0] < f_low: # <<<<<<<<<<<<<< @@ -49696,7 +49657,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1209 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1203 * if allow_low_x == 0 and f_back_low[0] < f_low: * # FAIL: f phrase is not tight * return 0 # <<<<<<<<<<<<<< @@ -49709,7 +49670,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L15:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1211 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1205 * return 0 * * if f_back_high[0] - f_back_low[0] > max_f_len: # <<<<<<<<<<<<<< @@ -49719,7 +49680,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_5 = (((__pyx_v_f_back_high[0]) - (__pyx_v_f_back_low[0])) > __pyx_v_max_f_len); if (__pyx_t_5) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1213 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1207 * if f_back_high[0] - f_back_low[0] > max_f_len: * # FAIL: f back projection is too wide * return 0 # <<<<<<<<<<<<<< @@ -49732,7 +49693,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L16:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1215 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1209 * return 0 * * if allow_high_x == 0 and f_back_high[0] > f_high: # <<<<<<<<<<<<<< @@ -49741,11 +49702,11 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_t_5 = (__pyx_v_allow_high_x == 0); if (__pyx_t_5) { - __pyx_t_6 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1209; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_2 = PyObject_RichCompare(__pyx_t_6, __pyx_v_f_high, Py_GT); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_RichCompare(__pyx_t_6, __pyx_v_f_high, Py_GT); __Pyx_XGOTREF(__pyx_t_2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1209; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1215; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1209; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_4 = __pyx_t_3; } else { @@ -49753,7 +49714,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1217 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1211 * if allow_high_x == 0 and f_back_high[0] > f_high: * # FAIL: extension on high side not allowed * return 0 # <<<<<<<<<<<<<< @@ -49766,7 +49727,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L17:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1219 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1213 * return 0 * * if f_low != f_back_low[0]: # <<<<<<<<<<<<<< @@ -49776,7 +49737,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (__pyx_v_f_low != (__pyx_v_f_back_low[0])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1220 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1214 * * if f_low != f_back_low[0]: * if new_low_x == 0: # <<<<<<<<<<<<<< @@ -49786,7 +49747,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (__pyx_v_new_low_x == 0); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1221 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1215 * if f_low != f_back_low[0]: * if new_low_x == 0: * if new_x >= max_new_x: # <<<<<<<<<<<<<< @@ -49796,7 +49757,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (__pyx_v_new_x >= __pyx_v_max_new_x); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1223 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1217 * if new_x >= max_new_x: * # FAIL: extension required on low side violates max # of gaps * return 0 # <<<<<<<<<<<<<< @@ -49809,7 +49770,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1225 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1219 * return 0 * else: * new_x = new_x + 1 # <<<<<<<<<<<<<< @@ -49818,7 +49779,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_new_x = (__pyx_v_new_x + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1226 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1220 * else: * new_x = new_x + 1 * new_low_x = 1 # <<<<<<<<<<<<<< @@ -49832,7 +49793,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L19:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1227 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1221 * new_x = new_x + 1 * new_low_x = 1 * if f_low - f_back_low[0] < min_fx_size: # <<<<<<<<<<<<<< @@ -49842,7 +49803,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = ((__pyx_v_f_low - (__pyx_v_f_back_low[0])) < __pyx_v_min_fx_size); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1228 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1222 * new_low_x = 1 * if f_low - f_back_low[0] < min_fx_size: * f_back_low[0] = f_low - min_fx_size # <<<<<<<<<<<<<< @@ -49851,7 +49812,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ (__pyx_v_f_back_low[0]) = (__pyx_v_f_low - __pyx_v_min_fx_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1229 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1223 * if f_low - f_back_low[0] < min_fx_size: * f_back_low[0] = f_low - min_fx_size * if f_back_high[0] - f_back_low[0] > max_f_len: # <<<<<<<<<<<<<< @@ -49861,7 +49822,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (((__pyx_v_f_back_high[0]) - (__pyx_v_f_back_low[0])) > __pyx_v_max_f_len); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1231 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1225 * if f_back_high[0] - f_back_low[0] > max_f_len: * # FAIL: extension required on low side violates max initial length * return 0 # <<<<<<<<<<<<<< @@ -49874,7 +49835,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L22:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1232 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1226 * # FAIL: extension required on low side violates max initial length * return 0 * if f_back_low[0] < 0: # <<<<<<<<<<<<<< @@ -49884,7 +49845,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = ((__pyx_v_f_back_low[0]) < 0); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1234 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1228 * if f_back_low[0] < 0: * # FAIL: extension required on low side violates sentence boundary * return 0 # <<<<<<<<<<<<<< @@ -49903,22 +49864,22 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L18:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1236 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1230 * return 0 * * if f_high != f_back_high[0]: # <<<<<<<<<<<<<< * if new_high_x == 0: * if new_x >= max_new_x: */ - __pyx_t_2 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_6 = PyObject_RichCompare(__pyx_v_f_high, __pyx_t_2, Py_NE); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_RichCompare(__pyx_v_f_high, __pyx_t_2, Py_NE); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1237 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1231 * * if f_high != f_back_high[0]: * if new_high_x == 0: # <<<<<<<<<<<<<< @@ -49928,7 +49889,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (__pyx_v_new_high_x == 0); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1238 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1232 * if f_high != f_back_high[0]: * if new_high_x == 0: * if new_x >= max_new_x: # <<<<<<<<<<<<<< @@ -49938,7 +49899,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (__pyx_v_new_x >= __pyx_v_max_new_x); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1240 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1234 * if new_x >= max_new_x: * # FAIL: extension required on high side violates max # of gaps * return 0 # <<<<<<<<<<<<<< @@ -49951,7 +49912,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1242 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1236 * return 0 * else: * new_x = new_x + 1 # <<<<<<<<<<<<<< @@ -49960,7 +49921,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_new_x = (__pyx_v_new_x + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1243 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1237 * else: * new_x = new_x + 1 * new_high_x = 1 # <<<<<<<<<<<<<< @@ -49974,44 +49935,44 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L25:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1244 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1238 * new_x = new_x + 1 * new_high_x = 1 * if f_back_high[0] - f_high < min_fx_size: # <<<<<<<<<<<<<< * f_back_high[0] = f_high + min_fx_size * if f_back_high[0] - f_back_low[0] > max_f_len: */ - __pyx_t_6 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromLong((__pyx_v_f_back_high[0])); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_2 = PyNumber_Subtract(__pyx_t_6, __pyx_v_f_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyNumber_Subtract(__pyx_t_6, __pyx_v_f_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyInt_FromLong(__pyx_v_min_fx_size); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyInt_FromLong(__pyx_v_min_fx_size); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyObject_RichCompare(__pyx_t_2, __pyx_t_6, Py_LT); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_RichCompare(__pyx_t_2, __pyx_t_6, Py_LT); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1245 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1239 * new_high_x = 1 * if f_back_high[0] - f_high < min_fx_size: * f_back_high[0] = f_high + min_fx_size # <<<<<<<<<<<<<< * if f_back_high[0] - f_back_low[0] > max_f_len: * # FAIL: extension required on high side violates max initial length */ - __pyx_t_7 = PyInt_FromLong(__pyx_v_min_fx_size); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyInt_FromLong(__pyx_v_min_fx_size); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_6 = PyNumber_Add(__pyx_v_f_high, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_v_f_high, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_t_6); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1245; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_AsInt(__pyx_t_6); if (unlikely((__pyx_t_1 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_f_back_high[0]) = __pyx_t_1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1246 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1240 * if f_back_high[0] - f_high < min_fx_size: * f_back_high[0] = f_high + min_fx_size * if f_back_high[0] - f_back_low[0] > max_f_len: # <<<<<<<<<<<<<< @@ -50021,7 +49982,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = (((__pyx_v_f_back_high[0]) - (__pyx_v_f_back_low[0])) > __pyx_v_max_f_len); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1248 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1242 * if f_back_high[0] - f_back_low[0] > max_f_len: * # FAIL: extension required on high side violates max initial length * return 0 # <<<<<<<<<<<<<< @@ -50034,7 +49995,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L28:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1249 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1243 * # FAIL: extension required on high side violates max initial length * return 0 * if f_back_high[0] > f_sent_len: # <<<<<<<<<<<<<< @@ -50044,7 +50005,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_4 = ((__pyx_v_f_back_high[0]) > __pyx_v_f_sent_len); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1251 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1245 * if f_back_high[0] > f_sent_len: * # FAIL: extension required on high side violates sentence boundary * return 0 # <<<<<<<<<<<<<< @@ -50063,7 +50024,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L24:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1253 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1247 * return 0 * * e_low_prev = e_low[0] # <<<<<<<<<<<<<< @@ -50072,7 +50033,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_e_low_prev = (__pyx_v_e_low[0]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1254 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1248 * * e_low_prev = e_low[0] * e_high_prev = e_high[0] # <<<<<<<<<<<<<< @@ -50081,29 +50042,29 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_e_high_prev = (__pyx_v_e_high[0]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1256 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1250 * e_high_prev = e_high[0] * * self.find_projection(f_back_low[0], f_low_prev, f_links_low, f_links_high, e_low, e_high) # <<<<<<<<<<<<<< * self.find_projection(f_high_prev, f_back_high[0], f_links_low, f_links_high, e_low, e_high) * if e_low[0] == e_low_prev and e_high[0] == e_high_prev: */ - __pyx_t_6 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, (__pyx_v_f_back_low[0]), __pyx_v_f_low_prev, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_low, __pyx_v_e_high); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, (__pyx_v_f_back_low[0]), __pyx_v_f_low_prev, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_low, __pyx_v_e_high); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1257 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1251 * * self.find_projection(f_back_low[0], f_low_prev, f_links_low, f_links_high, e_low, e_high) * self.find_projection(f_high_prev, f_back_high[0], f_links_low, f_links_high, e_low, e_high) # <<<<<<<<<<<<<< * if e_low[0] == e_low_prev and e_high[0] == e_high_prev: * return 1 */ - __pyx_t_6 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, __pyx_v_f_high_prev, (__pyx_v_f_back_high[0]), __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_low, __pyx_v_e_high); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_projection(__pyx_v_self, __pyx_v_f_high_prev, (__pyx_v_f_back_high[0]), __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_low, __pyx_v_e_high); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1258 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1252 * self.find_projection(f_back_low[0], f_low_prev, f_links_low, f_links_high, e_low, e_high) * self.find_projection(f_high_prev, f_back_high[0], f_links_low, f_links_high, e_low, e_high) * if e_low[0] == e_low_prev and e_high[0] == e_high_prev: # <<<<<<<<<<<<<< @@ -50119,7 +50080,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1259 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1253 * self.find_projection(f_high_prev, f_back_high[0], f_links_low, f_links_high, e_low, e_high) * if e_low[0] == e_low_prev and e_high[0] == e_high_prev: * return 1 # <<<<<<<<<<<<<< @@ -50132,7 +50093,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L30:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1260 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1254 * if e_low[0] == e_low_prev and e_high[0] == e_high_prev: * return 1 * if allow_arbitrary_x == 0: # <<<<<<<<<<<<<< @@ -50142,7 +50103,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_3 = (__pyx_v_allow_arbitrary_x == 0); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1262 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1256 * if allow_arbitrary_x == 0: * # FAIL: arbitrary expansion not permitted * return 0 # <<<<<<<<<<<<<< @@ -50155,7 +50116,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L31:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1263 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1257 * # FAIL: arbitrary expansion not permitted * return 0 * if e_high[0] - e_low[0] > max_e_len: # <<<<<<<<<<<<<< @@ -50165,7 +50126,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj __pyx_t_3 = (((__pyx_v_e_high[0]) - (__pyx_v_e_low[0])) > __pyx_v_max_e_len); if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1265 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1259 * if e_high[0] - e_low[0] > max_e_len: * # FAIL: re-projection violates sentence max phrase length * return 0 # <<<<<<<<<<<<<< @@ -50178,7 +50139,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj } __pyx_L32:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1266 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1260 * # FAIL: re-projection violates sentence max phrase length * return 0 * f_low_prev = f_back_low[0] # <<<<<<<<<<<<<< @@ -50187,7 +50148,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj */ __pyx_v_f_low_prev = (__pyx_v_f_back_low[0]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1267 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1261 * return 0 * f_low_prev = f_back_low[0] * f_high_prev = f_back_high[0] # <<<<<<<<<<<<<< @@ -50210,7 +50171,7 @@ static int __pyx_f_3_sa_23HieroCachingRuleFactory_find_fixpoint(struct __pyx_obj return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1270 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1264 * * * cdef find_projection(self, int in_low, int in_high, int* in_links_low, int* in_links_high, # <<<<<<<<<<<<<< @@ -50228,7 +50189,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U int __pyx_t_4; __Pyx_RefNannySetupContext("find_projection", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1273 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1267 * int* out_low, int* out_high): * cdef int i * for i from in_low <= i < in_high: # <<<<<<<<<<<<<< @@ -50238,7 +50199,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U __pyx_t_1 = __pyx_v_in_high; for (__pyx_v_i = __pyx_v_in_low; __pyx_v_i < __pyx_t_1; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1274 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1268 * cdef int i * for i from in_low <= i < in_high: * if in_links_low[i] != -1: # <<<<<<<<<<<<<< @@ -50248,7 +50209,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U __pyx_t_2 = ((__pyx_v_in_links_low[__pyx_v_i]) != -1); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1275 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1269 * for i from in_low <= i < in_high: * if in_links_low[i] != -1: * if out_low[0] == -1 or in_links_low[i] < out_low[0]: # <<<<<<<<<<<<<< @@ -50264,7 +50225,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U } if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1276 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1270 * if in_links_low[i] != -1: * if out_low[0] == -1 or in_links_low[i] < out_low[0]: * out_low[0] = in_links_low[i] # <<<<<<<<<<<<<< @@ -50276,7 +50237,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U } __pyx_L6:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1277 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1271 * if out_low[0] == -1 or in_links_low[i] < out_low[0]: * out_low[0] = in_links_low[i] * if out_high[0] == -1 or in_links_high[i] > out_high[0]: # <<<<<<<<<<<<<< @@ -50292,7 +50253,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U } if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1278 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1272 * out_low[0] = in_links_low[i] * if out_high[0] == -1 or in_links_high[i] > out_high[0]: * out_high[0] = in_links_high[i] # <<<<<<<<<<<<<< @@ -50314,7 +50275,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_find_projection(CYTHON_U return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1281 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1275 * * * cdef int* int_arr_extend(self, int* arr, int* arr_len, int* data, int data_len): # <<<<<<<<<<<<<< @@ -50328,7 +50289,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_int_arr_extend(CYTHON_UNUSED __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("int_arr_extend", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1283 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1277 * cdef int* int_arr_extend(self, int* arr, int* arr_len, int* data, int data_len): * cdef int new_len * new_len = arr_len[0] + data_len # <<<<<<<<<<<<<< @@ -50337,7 +50298,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_int_arr_extend(CYTHON_UNUSED */ __pyx_v_new_len = ((__pyx_v_arr_len[0]) + __pyx_v_data_len); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1284 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1278 * cdef int new_len * new_len = arr_len[0] + data_len * arr = realloc(arr, new_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -50346,7 +50307,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_int_arr_extend(CYTHON_UNUSED */ __pyx_v_arr = ((int *)realloc(__pyx_v_arr, (__pyx_v_new_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1285 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1279 * new_len = arr_len[0] + data_len * arr = realloc(arr, new_len*sizeof(int)) * memcpy(arr+arr_len[0], data, data_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -50355,7 +50316,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_int_arr_extend(CYTHON_UNUSED */ memcpy((__pyx_v_arr + (__pyx_v_arr_len[0])), __pyx_v_data, (__pyx_v_data_len * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1286 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1280 * arr = realloc(arr, new_len*sizeof(int)) * memcpy(arr+arr_len[0], data, data_len*sizeof(int)) * arr_len[0] = new_len # <<<<<<<<<<<<<< @@ -50364,7 +50325,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_int_arr_extend(CYTHON_UNUSED */ (__pyx_v_arr_len[0]) = __pyx_v_new_len; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1287 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1281 * memcpy(arr+arr_len[0], data, data_len*sizeof(int)) * arr_len[0] = new_len * return arr # <<<<<<<<<<<<<< @@ -50380,7 +50341,7 @@ static int *__pyx_f_3_sa_23HieroCachingRuleFactory_int_arr_extend(CYTHON_UNUSED return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1290 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1284 * * * cdef extract_phrases(self, int e_low, int e_high, int* e_gap_low, int* e_gap_high, int* e_links_low, int num_gaps, # <<<<<<<<<<<<<< @@ -50426,19 +50387,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("extract_phrases", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1298 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1292 * cdef result * * result = [] # <<<<<<<<<<<<<< * len1 = 0 * e_gaps1 = malloc(0) */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_result = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1299 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1293 * * result = [] * len1 = 0 # <<<<<<<<<<<<<< @@ -50447,7 +50408,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_len1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1300 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1294 * result = [] * len1 = 0 * e_gaps1 = malloc(0) # <<<<<<<<<<<<<< @@ -50456,19 +50417,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps1 = ((int *)malloc(0)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1301 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1295 * len1 = 0 * e_gaps1 = malloc(0) * ephr_arr = IntList() # <<<<<<<<<<<<<< * * e_gap_order = malloc(num_gaps*sizeof(int)) */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_ephr_arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1303 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1297 * ephr_arr = IntList() * * e_gap_order = malloc(num_gaps*sizeof(int)) # <<<<<<<<<<<<<< @@ -50477,7 +50438,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gap_order = ((int *)malloc((__pyx_v_num_gaps * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1304 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1298 * * e_gap_order = malloc(num_gaps*sizeof(int)) * if num_gaps > 0: # <<<<<<<<<<<<<< @@ -50487,7 +50448,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_2 = (__pyx_v_num_gaps > 0); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1305 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1299 * e_gap_order = malloc(num_gaps*sizeof(int)) * if num_gaps > 0: * e_gap_order[0] = 0 # <<<<<<<<<<<<<< @@ -50496,7 +50457,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ (__pyx_v_e_gap_order[0]) = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1306 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1300 * if num_gaps > 0: * e_gap_order[0] = 0 * for i from 1 <= i < num_gaps: # <<<<<<<<<<<<<< @@ -50506,7 +50467,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_3 = __pyx_v_num_gaps; for (__pyx_v_i = 1; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1307 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1301 * e_gap_order[0] = 0 * for i from 1 <= i < num_gaps: * for j from 0 <= j < i: # <<<<<<<<<<<<<< @@ -50516,7 +50477,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_4 = __pyx_v_i; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_4; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1308 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1302 * for i from 1 <= i < num_gaps: * for j from 0 <= j < i: * if e_gap_low[i] < e_gap_low[j]: # <<<<<<<<<<<<<< @@ -50526,7 +50487,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_2 = ((__pyx_v_e_gap_low[__pyx_v_i]) < (__pyx_v_e_gap_low[__pyx_v_j])); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1309 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1303 * for j from 0 <= j < i: * if e_gap_low[i] < e_gap_low[j]: * for k from j <= k < i: # <<<<<<<<<<<<<< @@ -50536,7 +50497,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_5 = __pyx_v_i; for (__pyx_v_k = __pyx_v_j; __pyx_v_k < __pyx_t_5; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1310 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1304 * if e_gap_low[i] < e_gap_low[j]: * for k from j <= k < i: * e_gap_order[k+1] = e_gap_order[k] # <<<<<<<<<<<<<< @@ -50546,7 +50507,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ (__pyx_v_e_gap_order[(__pyx_v_k + 1)]) = (__pyx_v_e_gap_order[__pyx_v_k]); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1311 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1305 * for k from j <= k < i: * e_gap_order[k+1] = e_gap_order[k] * e_gap_order[j] = i # <<<<<<<<<<<<<< @@ -50555,7 +50516,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ (__pyx_v_e_gap_order[__pyx_v_j]) = __pyx_v_i; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1312 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1306 * e_gap_order[k+1] = e_gap_order[k] * e_gap_order[j] = i * break # <<<<<<<<<<<<<< @@ -50569,7 +50530,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1314 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1308 * break * else: * e_gap_order[i] = i # <<<<<<<<<<<<<< @@ -50584,37 +50545,37 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } __pyx_L3:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1316 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1310 * e_gap_order[i] = i * * e_x_low = e_low # <<<<<<<<<<<<<< * e_x_high = e_high - * if self.tight_phrases == 0: + * if not self.tight_phrases: */ __pyx_v_e_x_low = __pyx_v_e_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1317 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1311 * * e_x_low = e_low * e_x_high = e_high # <<<<<<<<<<<<<< - * if self.tight_phrases == 0: + * if not self.tight_phrases: * while e_x_low > 0 and e_high - e_x_low < self.train_max_initial_size and e_links_low[e_x_low-1] == -1: */ __pyx_v_e_x_high = __pyx_v_e_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1318 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1312 * e_x_low = e_low * e_x_high = e_high - * if self.tight_phrases == 0: # <<<<<<<<<<<<<< + * if not self.tight_phrases: # <<<<<<<<<<<<<< * while e_x_low > 0 and e_high - e_x_low < self.train_max_initial_size and e_links_low[e_x_low-1] == -1: * e_x_low = e_x_low - 1 */ - __pyx_t_2 = (__pyx_v_self->tight_phrases == 0); + __pyx_t_2 = (!__pyx_v_self->tight_phrases); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1319 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1313 * e_x_high = e_high - * if self.tight_phrases == 0: + * if not self.tight_phrases: * while e_x_low > 0 and e_high - e_x_low < self.train_max_initial_size and e_links_low[e_x_low-1] == -1: # <<<<<<<<<<<<<< * e_x_low = e_x_low - 1 * while e_x_high < e_sent_len and e_x_high - e_low < self.train_max_initial_size and e_links_low[e_x_high] == -1: @@ -50635,8 +50596,8 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } if (!__pyx_t_6) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1320 - * if self.tight_phrases == 0: + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1314 + * if not self.tight_phrases: * while e_x_low > 0 and e_high - e_x_low < self.train_max_initial_size and e_links_low[e_x_low-1] == -1: * e_x_low = e_x_low - 1 # <<<<<<<<<<<<<< * while e_x_high < e_sent_len and e_x_high - e_low < self.train_max_initial_size and e_links_low[e_x_high] == -1: @@ -50645,7 +50606,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_v_e_x_low = (__pyx_v_e_x_low - 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1321 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1315 * while e_x_low > 0 and e_high - e_x_low < self.train_max_initial_size and e_links_low[e_x_low-1] == -1: * e_x_low = e_x_low - 1 * while e_x_high < e_sent_len and e_x_high - e_low < self.train_max_initial_size and e_links_low[e_x_high] == -1: # <<<<<<<<<<<<<< @@ -50668,7 +50629,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } if (!__pyx_t_2) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1322 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1316 * e_x_low = e_x_low - 1 * while e_x_high < e_sent_len and e_x_high - e_low < self.train_max_initial_size and e_links_low[e_x_high] == -1: * e_x_high = e_x_high + 1 # <<<<<<<<<<<<<< @@ -50681,7 +50642,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } __pyx_L11:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1324 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1318 * e_x_high = e_x_high + 1 * * for i from e_x_low <= i <= e_low: # <<<<<<<<<<<<<< @@ -50691,7 +50652,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_3 = __pyx_v_e_low; for (__pyx_v_i = __pyx_v_e_x_low; __pyx_v_i <= __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1325 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1319 * * for i from e_x_low <= i <= e_low: * e_gaps1 = self.int_arr_extend(e_gaps1, &len1, &i, 1) # <<<<<<<<<<<<<< @@ -50701,7 +50662,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_v_e_gaps1 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->int_arr_extend(__pyx_v_self, __pyx_v_e_gaps1, (&__pyx_v_len1), (&__pyx_v_i), 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1327 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1321 * e_gaps1 = self.int_arr_extend(e_gaps1, &len1, &i, 1) * * for i from 0 <= i < num_gaps: # <<<<<<<<<<<<<< @@ -50711,7 +50672,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_3 = __pyx_v_num_gaps; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1328 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1322 * * for i from 0 <= i < num_gaps: * e_gaps2 = malloc(0) # <<<<<<<<<<<<<< @@ -50720,7 +50681,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps2 = ((int *)malloc(0)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1329 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1323 * for i from 0 <= i < num_gaps: * e_gaps2 = malloc(0) * len2 = 0 # <<<<<<<<<<<<<< @@ -50729,7 +50690,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_len2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1331 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1325 * len2 = 0 * * j = e_gap_order[i] # <<<<<<<<<<<<<< @@ -50738,37 +50699,37 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_j = (__pyx_v_e_gap_order[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1332 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1326 * * j = e_gap_order[i] * e_x_gap_low = e_gap_low[j] # <<<<<<<<<<<<<< * e_x_gap_high = e_gap_high[j] - * if self.tight_phrases == 0: + * if not self.tight_phrases: */ __pyx_v_e_x_gap_low = (__pyx_v_e_gap_low[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1333 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1327 * j = e_gap_order[i] * e_x_gap_low = e_gap_low[j] * e_x_gap_high = e_gap_high[j] # <<<<<<<<<<<<<< - * if self.tight_phrases == 0: + * if not self.tight_phrases: * while e_x_gap_low > e_x_low and e_links_low[e_x_gap_low-1] == -1: */ __pyx_v_e_x_gap_high = (__pyx_v_e_gap_high[__pyx_v_j]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1334 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1328 * e_x_gap_low = e_gap_low[j] * e_x_gap_high = e_gap_high[j] - * if self.tight_phrases == 0: # <<<<<<<<<<<<<< + * if not self.tight_phrases: # <<<<<<<<<<<<<< * while e_x_gap_low > e_x_low and e_links_low[e_x_gap_low-1] == -1: * e_x_gap_low = e_x_gap_low - 1 */ - __pyx_t_2 = (__pyx_v_self->tight_phrases == 0); + __pyx_t_2 = (!__pyx_v_self->tight_phrases); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1335 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1329 * e_x_gap_high = e_gap_high[j] - * if self.tight_phrases == 0: + * if not self.tight_phrases: * while e_x_gap_low > e_x_low and e_links_low[e_x_gap_low-1] == -1: # <<<<<<<<<<<<<< * e_x_gap_low = e_x_gap_low - 1 * while e_x_gap_high < e_x_high and e_links_low[e_x_gap_high] == -1: @@ -50783,8 +50744,8 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } if (!__pyx_t_7) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1336 - * if self.tight_phrases == 0: + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1330 + * if not self.tight_phrases: * while e_x_gap_low > e_x_low and e_links_low[e_x_gap_low-1] == -1: * e_x_gap_low = e_x_gap_low - 1 # <<<<<<<<<<<<<< * while e_x_gap_high < e_x_high and e_links_low[e_x_gap_high] == -1: @@ -50793,7 +50754,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_v_e_x_gap_low = (__pyx_v_e_x_gap_low - 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1337 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1331 * while e_x_gap_low > e_x_low and e_links_low[e_x_gap_low-1] == -1: * e_x_gap_low = e_x_gap_low - 1 * while e_x_gap_high < e_x_high and e_links_low[e_x_gap_high] == -1: # <<<<<<<<<<<<<< @@ -50810,7 +50771,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } if (!__pyx_t_6) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1338 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1332 * e_x_gap_low = e_x_gap_low - 1 * while e_x_gap_high < e_x_high and e_links_low[e_x_gap_high] == -1: * e_x_gap_high = e_x_gap_high + 1 # <<<<<<<<<<<<<< @@ -50823,7 +50784,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } __pyx_L20:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1340 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1334 * e_x_gap_high = e_x_gap_high + 1 * * k = 0 # <<<<<<<<<<<<<< @@ -50832,7 +50793,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_k = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1341 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1335 * * k = 0 * step = 1+(i*2) # <<<<<<<<<<<<<< @@ -50841,7 +50802,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_step = (1 + (__pyx_v_i * 2)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1342 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1336 * k = 0 * step = 1+(i*2) * while k < len1: # <<<<<<<<<<<<<< @@ -50852,7 +50813,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_6 = (__pyx_v_k < __pyx_v_len1); if (!__pyx_t_6) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1343 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1337 * step = 1+(i*2) * while k < len1: * for m from e_x_gap_low <= m <= e_gap_low[j]: # <<<<<<<<<<<<<< @@ -50862,7 +50823,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_4 = (__pyx_v_e_gap_low[__pyx_v_j]); for (__pyx_v_m = __pyx_v_e_x_gap_low; __pyx_v_m <= __pyx_t_4; __pyx_v_m++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1344 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1338 * while k < len1: * for m from e_x_gap_low <= m <= e_gap_low[j]: * if m >= e_gaps1[k+step-1]: # <<<<<<<<<<<<<< @@ -50872,7 +50833,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_6 = (__pyx_v_m >= (__pyx_v_e_gaps1[((__pyx_v_k + __pyx_v_step) - 1)])); if (__pyx_t_6) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1345 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1339 * for m from e_x_gap_low <= m <= e_gap_low[j]: * if m >= e_gaps1[k+step-1]: * for n from e_gap_high[j] <= n <= e_x_gap_high: # <<<<<<<<<<<<<< @@ -50882,7 +50843,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_5 = __pyx_v_e_x_gap_high; for (__pyx_v_n = (__pyx_v_e_gap_high[__pyx_v_j]); __pyx_v_n <= __pyx_t_5; __pyx_v_n++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1346 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1340 * if m >= e_gaps1[k+step-1]: * for n from e_gap_high[j] <= n <= e_x_gap_high: * if n-m >= 1: # extractor.py doesn't restrict target-side gap length # <<<<<<<<<<<<<< @@ -50892,7 +50853,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_6 = ((__pyx_v_n - __pyx_v_m) >= 1); if (__pyx_t_6) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1347 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1341 * for n from e_gap_high[j] <= n <= e_x_gap_high: * if n-m >= 1: # extractor.py doesn't restrict target-side gap length * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, e_gaps1+k, step) # <<<<<<<<<<<<<< @@ -50901,7 +50862,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->int_arr_extend(__pyx_v_self, __pyx_v_e_gaps2, (&__pyx_v_len2), (__pyx_v_e_gaps1 + __pyx_v_k), __pyx_v_step); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1348 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1342 * if n-m >= 1: # extractor.py doesn't restrict target-side gap length * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, e_gaps1+k, step) * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &m, 1) # <<<<<<<<<<<<<< @@ -50910,7 +50871,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->int_arr_extend(__pyx_v_self, __pyx_v_e_gaps2, (&__pyx_v_len2), (&__pyx_v_m), 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1349 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1343 * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, e_gaps1+k, step) * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &m, 1) * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &n, 1) # <<<<<<<<<<<<<< @@ -50927,7 +50888,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_L29:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1350 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1344 * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &m, 1) * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &n, 1) * k = k + step # <<<<<<<<<<<<<< @@ -50937,7 +50898,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_v_k = (__pyx_v_k + __pyx_v_step); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1351 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1345 * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &n, 1) * k = k + step * free(e_gaps1) # <<<<<<<<<<<<<< @@ -50946,7 +50907,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ free(__pyx_v_e_gaps1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1352 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1346 * k = k + step * free(e_gaps1) * e_gaps1 = e_gaps2 # <<<<<<<<<<<<<< @@ -50955,7 +50916,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps1 = __pyx_v_e_gaps2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1353 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1347 * free(e_gaps1) * e_gaps1 = e_gaps2 * len1 = len2 # <<<<<<<<<<<<<< @@ -50965,7 +50926,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_v_len1 = __pyx_v_len2; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1355 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1349 * len1 = len2 * * step = 1+(num_gaps*2) # <<<<<<<<<<<<<< @@ -50974,7 +50935,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_step = (1 + (__pyx_v_num_gaps * 2)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1356 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1350 * * step = 1+(num_gaps*2) * e_gaps2 = malloc(0) # <<<<<<<<<<<<<< @@ -50983,7 +50944,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps2 = ((int *)malloc(0)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1357 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1351 * step = 1+(num_gaps*2) * e_gaps2 = malloc(0) * len2 = 0 # <<<<<<<<<<<<<< @@ -50992,7 +50953,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_len2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1358 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1352 * e_gaps2 = malloc(0) * len2 = 0 * for i from e_high <= i <= e_x_high: # <<<<<<<<<<<<<< @@ -51002,7 +50963,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_3 = __pyx_v_e_x_high; for (__pyx_v_i = __pyx_v_e_high; __pyx_v_i <= __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1359 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1353 * len2 = 0 * for i from e_high <= i <= e_x_high: * j = 0 # <<<<<<<<<<<<<< @@ -51011,7 +50972,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_j = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1360 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1354 * for i from e_high <= i <= e_x_high: * j = 0 * while j < len1: # <<<<<<<<<<<<<< @@ -51022,7 +50983,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_6 = (__pyx_v_j < __pyx_v_len1); if (!__pyx_t_6) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1361 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1355 * j = 0 * while j < len1: * if i - e_gaps1[j] <= self.train_max_initial_size and i >= e_gaps1[j+step-1]: # <<<<<<<<<<<<<< @@ -51038,7 +50999,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1362 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1356 * while j < len1: * if i - e_gaps1[j] <= self.train_max_initial_size and i >= e_gaps1[j+step-1]: * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, e_gaps1+j, step) # <<<<<<<<<<<<<< @@ -51047,7 +51008,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps2 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->int_arr_extend(__pyx_v_self, __pyx_v_e_gaps2, (&__pyx_v_len2), (__pyx_v_e_gaps1 + __pyx_v_j), __pyx_v_step); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1363 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1357 * if i - e_gaps1[j] <= self.train_max_initial_size and i >= e_gaps1[j+step-1]: * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, e_gaps1+j, step) * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &i, 1) # <<<<<<<<<<<<<< @@ -51059,7 +51020,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } __pyx_L37:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1364 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1358 * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, e_gaps1+j, step) * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &i, 1) * j = j + step # <<<<<<<<<<<<<< @@ -51070,7 +51031,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1365 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1359 * e_gaps2 = self.int_arr_extend(e_gaps2, &len2, &i, 1) * j = j + step * free(e_gaps1) # <<<<<<<<<<<<<< @@ -51079,7 +51040,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ free(__pyx_v_e_gaps1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1366 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1360 * j = j + step * free(e_gaps1) * e_gaps1 = e_gaps2 # <<<<<<<<<<<<<< @@ -51088,7 +51049,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_e_gaps1 = __pyx_v_e_gaps2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1367 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1361 * free(e_gaps1) * e_gaps1 = e_gaps2 * len1 = len2 # <<<<<<<<<<<<<< @@ -51097,7 +51058,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_len1 = __pyx_v_len2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1369 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1363 * len1 = len2 * * step = (num_gaps+1)*2 # <<<<<<<<<<<<<< @@ -51106,7 +51067,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_step = ((__pyx_v_num_gaps + 1) * 2); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1370 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1364 * * step = (num_gaps+1)*2 * i = 0 # <<<<<<<<<<<<<< @@ -51115,7 +51076,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_i = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1372 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1366 * i = 0 * * while i < len1: # <<<<<<<<<<<<<< @@ -51126,7 +51087,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_2 = (__pyx_v_i < __pyx_v_len1); if (!__pyx_t_2) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1373 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1367 * * while i < len1: * ephr_arr._clear() # <<<<<<<<<<<<<< @@ -51135,7 +51096,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_ephr_arr->__pyx_vtab)->_clear(__pyx_v_ephr_arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1374 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1368 * while i < len1: * ephr_arr._clear() * num_chunks = 0 # <<<<<<<<<<<<<< @@ -51144,20 +51105,20 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_num_chunks = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1375 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1369 * ephr_arr._clear() * num_chunks = 0 * indexes = [] # <<<<<<<<<<<<<< * for j from 0 <= j < num_gaps+1: * if e_gaps1[i+2*j] < e_gaps1[i+(2*j)+1]: */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF(((PyObject *)__pyx_v_indexes)); __pyx_v_indexes = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1376 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1370 * num_chunks = 0 * indexes = [] * for j from 0 <= j < num_gaps+1: # <<<<<<<<<<<<<< @@ -51167,7 +51128,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_9 = (__pyx_v_num_gaps + 1); for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_9; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1377 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1371 * indexes = [] * for j from 0 <= j < num_gaps+1: * if e_gaps1[i+2*j] < e_gaps1[i+(2*j)+1]: # <<<<<<<<<<<<<< @@ -51177,7 +51138,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_2 = ((__pyx_v_e_gaps1[(__pyx_v_i + (2 * __pyx_v_j))]) < (__pyx_v_e_gaps1[((__pyx_v_i + (2 * __pyx_v_j)) + 1)])); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1378 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1372 * for j from 0 <= j < num_gaps+1: * if e_gaps1[i+2*j] < e_gaps1[i+(2*j)+1]: * num_chunks = num_chunks + 1 # <<<<<<<<<<<<<< @@ -51189,7 +51150,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } __pyx_L42:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1379 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1373 * if e_gaps1[i+2*j] < e_gaps1[i+(2*j)+1]: * num_chunks = num_chunks + 1 * for k from e_gaps1[i+2*j] <= k < e_gaps1[i+(2*j)+1]: # <<<<<<<<<<<<<< @@ -51199,19 +51160,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_3 = (__pyx_v_e_gaps1[((__pyx_v_i + (2 * __pyx_v_j)) + 1)]); for (__pyx_v_k = (__pyx_v_e_gaps1[(__pyx_v_i + (2 * __pyx_v_j))]); __pyx_v_k < __pyx_t_3; __pyx_v_k++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1380 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1374 * num_chunks = num_chunks + 1 * for k from e_gaps1[i+2*j] <= k < e_gaps1[i+(2*j)+1]: * indexes.append(k) # <<<<<<<<<<<<<< * ephr_arr._append(self.eid2symid[self.eda.data.arr[e_sent_start+k]]) * if j < num_gaps: */ - __pyx_t_1 = PyInt_FromLong(__pyx_v_k); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_v_k); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = PyList_Append(__pyx_v_indexes, __pyx_t_1); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyList_Append(__pyx_v_indexes, __pyx_t_1); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1381 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1375 * for k from e_gaps1[i+2*j] <= k < e_gaps1[i+(2*j)+1]: * indexes.append(k) * ephr_arr._append(self.eid2symid[self.eda.data.arr[e_sent_start+k]]) # <<<<<<<<<<<<<< @@ -51219,14 +51180,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ * indexes.append(sym_setindex(self.category, e_gap_order[j]+1)) */ __pyx_t_4 = (__pyx_v_self->eda->data->arr[(__pyx_v_e_sent_start + __pyx_v_k)]); - __pyx_t_1 = __Pyx_GetItemInt(((PyObject *)__pyx_v_self->eid2symid), __pyx_t_4, sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(((PyObject *)__pyx_v_self->eid2symid), __pyx_t_4, sizeof(int), PyInt_FromLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_AsInt(__pyx_t_1); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_ephr_arr->__pyx_vtab)->_append(__pyx_v_ephr_arr, __pyx_t_4); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1382 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1376 * indexes.append(k) * ephr_arr._append(self.eid2symid[self.eda.data.arr[e_sent_start+k]]) * if j < num_gaps: # <<<<<<<<<<<<<< @@ -51236,19 +51197,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_t_2 = (__pyx_v_j < __pyx_v_num_gaps); if (__pyx_t_2) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1383 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1377 * ephr_arr._append(self.eid2symid[self.eda.data.arr[e_sent_start+k]]) * if j < num_gaps: * indexes.append(sym_setindex(self.category, e_gap_order[j]+1)) # <<<<<<<<<<<<<< * ephr_arr._append(sym_setindex(self.category, e_gap_order[j]+1)) * i = i + step */ - __pyx_t_1 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, ((__pyx_v_e_gap_order[__pyx_v_j]) + 1))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, ((__pyx_v_e_gap_order[__pyx_v_j]) + 1))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = PyList_Append(__pyx_v_indexes, __pyx_t_1); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyList_Append(__pyx_v_indexes, __pyx_t_1); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1384 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1378 * if j < num_gaps: * indexes.append(sym_setindex(self.category, e_gap_order[j]+1)) * ephr_arr._append(sym_setindex(self.category, e_gap_order[j]+1)) # <<<<<<<<<<<<<< @@ -51261,7 +51222,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_L45:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1385 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1379 * indexes.append(sym_setindex(self.category, e_gap_order[j]+1)) * ephr_arr._append(sym_setindex(self.category, e_gap_order[j]+1)) * i = i + step # <<<<<<<<<<<<<< @@ -51270,7 +51231,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ __pyx_v_i = (__pyx_v_i + __pyx_v_step); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1386 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1380 * ephr_arr._append(sym_setindex(self.category, e_gap_order[j]+1)) * i = i + step * if ephr_arr.len <= self.max_target_length and num_chunks <= self.max_target_chunks: # <<<<<<<<<<<<<< @@ -51286,22 +51247,22 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ } if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1387 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1381 * i = i + step * if ephr_arr.len <= self.max_target_length and num_chunks <= self.max_target_chunks: * result.append((Phrase(ephr_arr),indexes)) # <<<<<<<<<<<<<< * * free(e_gaps1) */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_ephr_arr)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_ephr_arr)); __Pyx_GIVEREF(((PyObject *)__pyx_v_ephr_arr)); - __pyx_t_11 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_11); __Pyx_GIVEREF(__pyx_t_11); @@ -51309,7 +51270,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ PyTuple_SET_ITEM(__pyx_t_1, 1, ((PyObject *)__pyx_v_indexes)); __Pyx_GIVEREF(((PyObject *)__pyx_v_indexes)); __pyx_t_11 = 0; - __pyx_t_11 = __Pyx_PyObject_Append(__pyx_v_result, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_PyObject_Append(__pyx_v_result, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; @@ -51318,7 +51279,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ __pyx_L46:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1389 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1383 * result.append((Phrase(ephr_arr),indexes)) * * free(e_gaps1) # <<<<<<<<<<<<<< @@ -51327,7 +51288,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ free(__pyx_v_e_gaps1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1390 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1384 * * free(e_gaps1) * free(e_gap_order) # <<<<<<<<<<<<<< @@ -51336,7 +51297,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ */ free(__pyx_v_e_gap_order); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1391 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1385 * free(e_gaps1) * free(e_gap_order) * return result # <<<<<<<<<<<<<< @@ -51364,7 +51325,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract_phrases(struct _ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1393 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1387 * return result * * cdef IntList create_alignments(self, int* sent_links, int num_links, findexes, eindexes): # <<<<<<<<<<<<<< @@ -51392,55 +51353,55 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre int __pyx_clineno = 0; __Pyx_RefNannySetupContext("create_alignments", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1395 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1389 * cdef IntList create_alignments(self, int* sent_links, int num_links, findexes, eindexes): * cdef unsigned i * cdef IntList ret = IntList() # <<<<<<<<<<<<<< * for i in range(len(findexes)): * s = findexes[i] */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_ret = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1396 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1390 * cdef unsigned i * cdef IntList ret = IntList() * for i in range(len(findexes)): # <<<<<<<<<<<<<< * s = findexes[i] * if (s<0): */ - __pyx_t_2 = PyObject_Length(__pyx_v_findexes); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Length(__pyx_v_findexes); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1397 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1391 * cdef IntList ret = IntList() * for i in range(len(findexes)): * s = findexes[i] # <<<<<<<<<<<<<< * if (s<0): * continue */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_findexes, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_findexes, __pyx_v_i, sizeof(unsigned int)+1, PyLong_FromUnsignedLong); if (!__pyx_t_1) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF(__pyx_v_s); __pyx_v_s = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1398 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1392 * for i in range(len(findexes)): * s = findexes[i] * if (s<0): # <<<<<<<<<<<<<< * continue * idx = 0 */ - __pyx_t_1 = PyObject_RichCompare(__pyx_v_s, __pyx_int_0, Py_LT); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_v_s, __pyx_int_0, Py_LT); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1399 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1393 * s = findexes[i] * if (s<0): * continue # <<<<<<<<<<<<<< @@ -51452,7 +51413,7 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre } __pyx_L5:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1400 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1394 * if (s<0): * continue * idx = 0 # <<<<<<<<<<<<<< @@ -51463,7 +51424,7 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre __Pyx_XDECREF(__pyx_v_idx); __pyx_v_idx = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1401 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1395 * continue * idx = 0 * while (idx < num_links*2): # <<<<<<<<<<<<<< @@ -51471,51 +51432,51 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre * j = eindexes.index(sent_links[idx+1]) */ while (1) { - __pyx_t_1 = PyInt_FromLong((__pyx_v_num_links * 2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong((__pyx_v_num_links * 2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_RichCompare(__pyx_v_idx, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_RichCompare(__pyx_v_idx, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (!__pyx_t_4) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1402 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1396 * idx = 0 * while (idx < num_links*2): * if (sent_links[idx] == s): # <<<<<<<<<<<<<< * j = eindexes.index(sent_links[idx+1]) * ret.append(i*65536+j) */ - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_idx); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_5 = PyInt_FromLong((__pyx_v_sent_links[__pyx_t_6])); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_idx); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromLong((__pyx_v_sent_links[__pyx_t_6])); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 = PyObject_RichCompare(__pyx_t_5, __pyx_v_s, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_RichCompare(__pyx_t_5, __pyx_v_s, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1403 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1397 * while (idx < num_links*2): * if (sent_links[idx] == s): * j = eindexes.index(sent_links[idx+1]) # <<<<<<<<<<<<<< * ret.append(i*65536+j) * idx += 2 */ - __pyx_t_1 = PyObject_GetAttr(__pyx_v_eindexes, __pyx_n_s__index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(__pyx_v_eindexes, __pyx_n_s__index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyNumber_Add(__pyx_v_idx, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_Add(__pyx_v_idx, __pyx_int_1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_5); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_5); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyInt_FromLong((__pyx_v_sent_links[__pyx_t_6])); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromLong((__pyx_v_sent_links[__pyx_t_6])); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_5); __Pyx_GIVEREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0; @@ -51523,19 +51484,19 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre __pyx_v_j = __pyx_t_5; __pyx_t_5 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1404 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1398 * if (sent_links[idx] == s): * j = eindexes.index(sent_links[idx+1]) * ret.append(i*65536+j) # <<<<<<<<<<<<<< * idx += 2 * return ret */ - __pyx_t_5 = PyInt_FromLong((__pyx_v_i * 65536)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyInt_FromLong((__pyx_v_i * 65536)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_7 = PyNumber_Add(__pyx_t_5, __pyx_v_j); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_t_5, __pyx_v_j); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_ret), __pyx_t_7); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_ret), __pyx_t_7); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1398; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; @@ -51543,14 +51504,14 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre } __pyx_L8:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1405 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1399 * j = eindexes.index(sent_links[idx+1]) * ret.append(i*65536+j) * idx += 2 # <<<<<<<<<<<<<< * return ret * */ - __pyx_t_5 = PyNumber_InPlaceAdd(__pyx_v_idx, __pyx_int_2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyNumber_InPlaceAdd(__pyx_v_idx, __pyx_int_2); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_v_idx); __pyx_v_idx = __pyx_t_5; @@ -51559,7 +51520,7 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre __pyx_L3_continue:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1406 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1400 * ret.append(i*65536+j) * idx += 2 * return ret # <<<<<<<<<<<<<< @@ -51589,7 +51550,7 @@ static struct __pyx_obj_3_sa_IntList *__pyx_f_3_sa_23HieroCachingRuleFactory_cre return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1408 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1402 * return ret * * cdef extract(self, Phrase phrase, Matching* matching, int* chunklen, int num_chunks): # <<<<<<<<<<<<<< @@ -51683,19 +51644,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj int __pyx_clineno = 0; __Pyx_RefNannySetupContext("extract", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1421 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1415 * cdef reason_for_failure * * fphr_arr = IntList() # <<<<<<<<<<<<<< * phrase_len = phrase.n * extracts = [] */ - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_IntList)), ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_fphr_arr = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1422 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1416 * * fphr_arr = IntList() * phrase_len = phrase.n # <<<<<<<<<<<<<< @@ -51704,19 +51665,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_phrase_len = __pyx_v_phrase->n; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1423 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1417 * fphr_arr = IntList() * phrase_len = phrase.n * extracts = [] # <<<<<<<<<<<<<< * sent_links = self.alignment._get_sent_links(matching.sent_id, &num_links) * */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_v_extracts = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1424 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1418 * phrase_len = phrase.n * extracts = [] * sent_links = self.alignment._get_sent_links(matching.sent_id, &num_links) # <<<<<<<<<<<<<< @@ -51725,7 +51686,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_sent_links = ((struct __pyx_vtabstruct_3_sa_Alignment *)__pyx_v_self->alignment->__pyx_vtab)->_get_sent_links(__pyx_v_self->alignment, __pyx_v_matching->sent_id, (&__pyx_v_num_links)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1426 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1420 * sent_links = self.alignment._get_sent_links(matching.sent_id, &num_links) * * e_sent_start = self.eda.sent_index.arr[matching.sent_id] # <<<<<<<<<<<<<< @@ -51734,7 +51695,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_sent_start = (__pyx_v_self->eda->sent_index->arr[__pyx_v_matching->sent_id]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1427 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1421 * * e_sent_start = self.eda.sent_index.arr[matching.sent_id] * e_sent_end = self.eda.sent_index.arr[matching.sent_id+1] # <<<<<<<<<<<<<< @@ -51743,7 +51704,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_sent_end = (__pyx_v_self->eda->sent_index->arr[(__pyx_v_matching->sent_id + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1428 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1422 * e_sent_start = self.eda.sent_index.arr[matching.sent_id] * e_sent_end = self.eda.sent_index.arr[matching.sent_id+1] * e_sent_len = e_sent_end - e_sent_start - 1 # <<<<<<<<<<<<<< @@ -51752,7 +51713,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_sent_len = ((__pyx_v_e_sent_end - __pyx_v_e_sent_start) - 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1429 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1423 * e_sent_end = self.eda.sent_index.arr[matching.sent_id+1] * e_sent_len = e_sent_end - e_sent_start - 1 * f_sent_start = self.fda.sent_index.arr[matching.sent_id] # <<<<<<<<<<<<<< @@ -51761,7 +51722,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_sent_start = (__pyx_v_self->fda->sent_index->arr[__pyx_v_matching->sent_id]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1430 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1424 * e_sent_len = e_sent_end - e_sent_start - 1 * f_sent_start = self.fda.sent_index.arr[matching.sent_id] * f_sent_end = self.fda.sent_index.arr[matching.sent_id+1] # <<<<<<<<<<<<<< @@ -51770,7 +51731,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_sent_end = (__pyx_v_self->fda->sent_index->arr[(__pyx_v_matching->sent_id + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1431 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1425 * f_sent_start = self.fda.sent_index.arr[matching.sent_id] * f_sent_end = self.fda.sent_index.arr[matching.sent_id+1] * f_sent_len = f_sent_end - f_sent_start - 1 # <<<<<<<<<<<<<< @@ -51779,21 +51740,21 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_sent_len = ((__pyx_v_f_sent_end - __pyx_v_f_sent_start) - 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1433 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1427 * f_sent_len = f_sent_end - f_sent_start - 1 * * self.findexes1.reset() # <<<<<<<<<<<<<< * sofar = 0 * for i in range(num_chunks): */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes1), __pyx_n_s__reset); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes1), __pyx_n_s__reset); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1434 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1428 * * self.findexes1.reset() * sofar = 0 # <<<<<<<<<<<<<< @@ -51803,7 +51764,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_INCREF(__pyx_int_0); __pyx_v_sofar = __pyx_int_0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1435 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1429 * self.findexes1.reset() * sofar = 0 * for i in range(num_chunks): # <<<<<<<<<<<<<< @@ -51814,7 +51775,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_i = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1436 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1430 * sofar = 0 * for i in range(num_chunks): * for j in range(chunklen[i]): # <<<<<<<<<<<<<< @@ -51825,35 +51786,35 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_j = __pyx_t_6; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1437 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1431 * for i in range(num_chunks): * for j in range(chunklen[i]): * self.findexes1.append(matching.arr[matching.start+i]+j-f_sent_start); # <<<<<<<<<<<<<< * sofar += 1 * if (i+1arr[(__pyx_v_matching->start + __pyx_v_i)]) + __pyx_v_j) - __pyx_v_f_sent_start)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong((((__pyx_v_matching->arr[(__pyx_v_matching->start + __pyx_v_i)]) + __pyx_v_j) - __pyx_v_f_sent_start)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes1), __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes1), __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1438 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1432 * for j in range(chunklen[i]): * self.findexes1.append(matching.arr[matching.start+i]+j-f_sent_start); * sofar += 1 # <<<<<<<<<<<<<< * if (i+1findexes1), __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes1), __pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1441 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1435 * if (i+1 malloc(e_sent_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -51903,7 +51864,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_links_low = ((int *)malloc((__pyx_v_e_sent_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1445 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1439 * * e_links_low = malloc(e_sent_len*sizeof(int)) * e_links_high = malloc(e_sent_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -51912,7 +51873,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_links_high = ((int *)malloc((__pyx_v_e_sent_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1446 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1440 * e_links_low = malloc(e_sent_len*sizeof(int)) * e_links_high = malloc(e_sent_len*sizeof(int)) * f_links_low = malloc(f_sent_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -51921,7 +51882,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_links_low = ((int *)malloc((__pyx_v_f_sent_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1447 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1441 * e_links_high = malloc(e_sent_len*sizeof(int)) * f_links_low = malloc(f_sent_len*sizeof(int)) * f_links_high = malloc(f_sent_len*sizeof(int)) # <<<<<<<<<<<<<< @@ -51930,7 +51891,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_links_high = ((int *)malloc((__pyx_v_f_sent_len * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1448 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1442 * f_links_low = malloc(f_sent_len*sizeof(int)) * f_links_high = malloc(f_sent_len*sizeof(int)) * f_gap_low = malloc((num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51939,7 +51900,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_gap_low = ((int *)malloc(((__pyx_v_num_chunks + 1) * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1449 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1443 * f_links_high = malloc(f_sent_len*sizeof(int)) * f_gap_low = malloc((num_chunks+1)*sizeof(int)) * f_gap_high = malloc((num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51948,7 +51909,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_gap_high = ((int *)malloc(((__pyx_v_num_chunks + 1) * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1450 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1444 * f_gap_low = malloc((num_chunks+1)*sizeof(int)) * f_gap_high = malloc((num_chunks+1)*sizeof(int)) * e_gap_low = malloc((num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51957,7 +51918,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_gap_low = ((int *)malloc(((__pyx_v_num_chunks + 1) * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1451 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1445 * f_gap_high = malloc((num_chunks+1)*sizeof(int)) * e_gap_low = malloc((num_chunks+1)*sizeof(int)) * e_gap_high = malloc((num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51966,7 +51927,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_gap_high = ((int *)malloc(((__pyx_v_num_chunks + 1) * (sizeof(int))))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1452 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1446 * e_gap_low = malloc((num_chunks+1)*sizeof(int)) * e_gap_high = malloc((num_chunks+1)*sizeof(int)) * memset(f_gap_low, 0, (num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51975,7 +51936,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ memset(__pyx_v_f_gap_low, 0, ((__pyx_v_num_chunks + 1) * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1453 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1447 * e_gap_high = malloc((num_chunks+1)*sizeof(int)) * memset(f_gap_low, 0, (num_chunks+1)*sizeof(int)) * memset(f_gap_high, 0, (num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51984,7 +51945,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ memset(__pyx_v_f_gap_high, 0, ((__pyx_v_num_chunks + 1) * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1454 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1448 * memset(f_gap_low, 0, (num_chunks+1)*sizeof(int)) * memset(f_gap_high, 0, (num_chunks+1)*sizeof(int)) * memset(e_gap_low, 0, (num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -51993,7 +51954,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ memset(__pyx_v_e_gap_low, 0, ((__pyx_v_num_chunks + 1) * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1455 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1449 * memset(f_gap_high, 0, (num_chunks+1)*sizeof(int)) * memset(e_gap_low, 0, (num_chunks+1)*sizeof(int)) * memset(e_gap_high, 0, (num_chunks+1)*sizeof(int)) # <<<<<<<<<<<<<< @@ -52002,7 +51963,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ memset(__pyx_v_e_gap_high, 0, ((__pyx_v_num_chunks + 1) * (sizeof(int)))); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1457 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1451 * memset(e_gap_high, 0, (num_chunks+1)*sizeof(int)) * * reason_for_failure = "" # <<<<<<<<<<<<<< @@ -52012,7 +51973,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_INCREF(((PyObject *)__pyx_kp_s_45)); __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_45); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1459 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1453 * reason_for_failure = "" * * for i from 0 <= i < e_sent_len: # <<<<<<<<<<<<<< @@ -52022,7 +51983,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_e_sent_len; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1460 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1454 * * for i from 0 <= i < e_sent_len: * e_links_low[i] = -1 # <<<<<<<<<<<<<< @@ -52031,7 +51992,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_e_links_low[__pyx_v_i]) = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1461 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1455 * for i from 0 <= i < e_sent_len: * e_links_low[i] = -1 * e_links_high[i] = -1 # <<<<<<<<<<<<<< @@ -52041,7 +52002,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj (__pyx_v_e_links_high[__pyx_v_i]) = -1; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1462 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1456 * e_links_low[i] = -1 * e_links_high[i] = -1 * for i from 0 <= i < f_sent_len: # <<<<<<<<<<<<<< @@ -52051,7 +52012,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_f_sent_len; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1463 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1457 * e_links_high[i] = -1 * for i from 0 <= i < f_sent_len: * f_links_low[i] = -1 # <<<<<<<<<<<<<< @@ -52060,7 +52021,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_links_low[__pyx_v_i]) = -1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1464 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1458 * for i from 0 <= i < f_sent_len: * f_links_low[i] = -1 * f_links_high[i] = -1 # <<<<<<<<<<<<<< @@ -52070,7 +52031,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj (__pyx_v_f_links_high[__pyx_v_i]) = -1; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1470 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1464 * # links that we care about (but then how to look up * # when we want to check something on the e side?) * i = 0 # <<<<<<<<<<<<<< @@ -52079,7 +52040,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1471 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1465 * # when we want to check something on the e side?) * i = 0 * while i < num_links*2: # <<<<<<<<<<<<<< @@ -52090,7 +52051,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_7 = (__pyx_v_i < (__pyx_v_num_links * 2)); if (!__pyx_t_7) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1472 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1466 * i = 0 * while i < num_links*2: * f_i = sent_links[i] # <<<<<<<<<<<<<< @@ -52099,7 +52060,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_i = (__pyx_v_sent_links[__pyx_v_i]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1473 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1467 * while i < num_links*2: * f_i = sent_links[i] * e_i = sent_links[i+1] # <<<<<<<<<<<<<< @@ -52108,7 +52069,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_i = (__pyx_v_sent_links[(__pyx_v_i + 1)]); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1474 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1468 * f_i = sent_links[i] * e_i = sent_links[i+1] * if f_links_low[f_i] == -1 or f_links_low[f_i] > e_i: # <<<<<<<<<<<<<< @@ -52124,7 +52085,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1475 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1469 * e_i = sent_links[i+1] * if f_links_low[f_i] == -1 or f_links_low[f_i] > e_i: * f_links_low[f_i] = e_i # <<<<<<<<<<<<<< @@ -52136,7 +52097,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L14:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1476 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1470 * if f_links_low[f_i] == -1 or f_links_low[f_i] > e_i: * f_links_low[f_i] = e_i * if f_links_high[f_i] == -1 or f_links_high[f_i] < e_i + 1: # <<<<<<<<<<<<<< @@ -52152,7 +52113,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1477 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1471 * f_links_low[f_i] = e_i * if f_links_high[f_i] == -1 or f_links_high[f_i] < e_i + 1: * f_links_high[f_i] = e_i + 1 # <<<<<<<<<<<<<< @@ -52164,7 +52125,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L15:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1478 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1472 * if f_links_high[f_i] == -1 or f_links_high[f_i] < e_i + 1: * f_links_high[f_i] = e_i + 1 * if e_links_low[e_i] == -1 or e_links_low[e_i] > f_i: # <<<<<<<<<<<<<< @@ -52180,7 +52141,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1479 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1473 * f_links_high[f_i] = e_i + 1 * if e_links_low[e_i] == -1 or e_links_low[e_i] > f_i: * e_links_low[e_i] = f_i # <<<<<<<<<<<<<< @@ -52192,7 +52153,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L16:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1480 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1474 * if e_links_low[e_i] == -1 or e_links_low[e_i] > f_i: * e_links_low[e_i] = f_i * if e_links_high[e_i] == -1 or e_links_high[e_i] < f_i + 1: # <<<<<<<<<<<<<< @@ -52208,7 +52169,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1481 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1475 * e_links_low[e_i] = f_i * if e_links_high[e_i] == -1 or e_links_high[e_i] < f_i + 1: * e_links_high[e_i] = f_i + 1 # <<<<<<<<<<<<<< @@ -52220,7 +52181,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L17:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1482 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1476 * if e_links_high[e_i] == -1 or e_links_high[e_i] < f_i + 1: * e_links_high[e_i] = f_i + 1 * i = i + 2 # <<<<<<<<<<<<<< @@ -52230,19 +52191,19 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_i = (__pyx_v_i + 2); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1484 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1478 * i = i + 2 * * als = [] # <<<<<<<<<<<<<< * for x in range(matching.start,matching.end): * al = (matching.arr[x]-f_sent_start,f_links_low[matching.arr[x]-f_sent_start]) */ - __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1478; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_v_als = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1485 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1479 * * als = [] * for x in range(matching.start,matching.end): # <<<<<<<<<<<<<< @@ -52253,18 +52214,18 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj for (__pyx_t_4 = __pyx_v_matching->start; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_x = __pyx_t_4; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1486 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1480 * als = [] * for x in range(matching.start,matching.end): * al = (matching.arr[x]-f_sent_start,f_links_low[matching.arr[x]-f_sent_start]) # <<<<<<<<<<<<<< * als.append(al) * # check all source-side alignment constraints */ - __pyx_t_2 = PyInt_FromLong(((__pyx_v_matching->arr[__pyx_v_x]) - __pyx_v_f_sent_start)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(((__pyx_v_matching->arr[__pyx_v_x]) - __pyx_v_f_sent_start)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1480; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyInt_FromLong((__pyx_v_f_links_low[((__pyx_v_matching->arr[__pyx_v_x]) - __pyx_v_f_sent_start)])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong((__pyx_v_f_links_low[((__pyx_v_matching->arr[__pyx_v_x]) - __pyx_v_f_sent_start)])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1480; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1480; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); @@ -52276,17 +52237,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_al = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1487 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1481 * for x in range(matching.start,matching.end): * al = (matching.arr[x]-f_sent_start,f_links_low[matching.arr[x]-f_sent_start]) * als.append(al) # <<<<<<<<<<<<<< * # check all source-side alignment constraints * met_constraints = 1 */ - __pyx_t_11 = PyList_Append(__pyx_v_als, ((PyObject *)__pyx_v_al)); if (unlikely(__pyx_t_11 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = PyList_Append(__pyx_v_als, ((PyObject *)__pyx_v_al)); if (unlikely(__pyx_t_11 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1489 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1483 * als.append(al) * # check all source-side alignment constraints * met_constraints = 1 # <<<<<<<<<<<<<< @@ -52295,7 +52256,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_met_constraints = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1490 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1484 * # check all source-side alignment constraints * met_constraints = 1 * if self.require_aligned_terminal: # <<<<<<<<<<<<<< @@ -52304,7 +52265,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->require_aligned_terminal) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1491 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1485 * met_constraints = 1 * if self.require_aligned_terminal: * num_aligned_chunks = 0 # <<<<<<<<<<<<<< @@ -52313,7 +52274,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_num_aligned_chunks = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1492 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1486 * if self.require_aligned_terminal: * num_aligned_chunks = 0 * for i from 0 <= i < num_chunks: # <<<<<<<<<<<<<< @@ -52323,7 +52284,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_num_chunks; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1493 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1487 * num_aligned_chunks = 0 * for i from 0 <= i < num_chunks: * for j from 0 <= j < chunklen[i]: # <<<<<<<<<<<<<< @@ -52333,7 +52294,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_4 = (__pyx_v_chunklen[__pyx_v_i]); for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_4; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1494 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1488 * for i from 0 <= i < num_chunks: * for j from 0 <= j < chunklen[i]: * if f_links_low[matching.arr[matching.start+i]+j-f_sent_start] > -1: # <<<<<<<<<<<<<< @@ -52343,7 +52304,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_9 = ((__pyx_v_f_links_low[(((__pyx_v_matching->arr[(__pyx_v_matching->start + __pyx_v_i)]) + __pyx_v_j) - __pyx_v_f_sent_start)]) > -1); if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1495 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1489 * for j from 0 <= j < chunklen[i]: * if f_links_low[matching.arr[matching.start+i]+j-f_sent_start] > -1: * num_aligned_chunks = num_aligned_chunks + 1 # <<<<<<<<<<<<<< @@ -52352,7 +52313,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_num_aligned_chunks = (__pyx_v_num_aligned_chunks + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1496 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1490 * if f_links_low[matching.arr[matching.start+i]+j-f_sent_start] > -1: * num_aligned_chunks = num_aligned_chunks + 1 * break # <<<<<<<<<<<<<< @@ -52367,7 +52328,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_L24_break:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1497 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1491 * num_aligned_chunks = num_aligned_chunks + 1 * break * if num_aligned_chunks == 0: # <<<<<<<<<<<<<< @@ -52377,7 +52338,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_9 = (__pyx_v_num_aligned_chunks == 0); if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1498 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1492 * break * if num_aligned_chunks == 0: * reason_for_failure = "No aligned terminals" # <<<<<<<<<<<<<< @@ -52388,7 +52349,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_v_reason_for_failure); __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_126); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1499 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1493 * if num_aligned_chunks == 0: * reason_for_failure = "No aligned terminals" * met_constraints = 0 # <<<<<<<<<<<<<< @@ -52400,7 +52361,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L26:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1500 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1494 * reason_for_failure = "No aligned terminals" * met_constraints = 0 * if self.require_aligned_chunks and num_aligned_chunks < num_chunks: # <<<<<<<<<<<<<< @@ -52415,7 +52376,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1501 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1495 * met_constraints = 0 * if self.require_aligned_chunks and num_aligned_chunks < num_chunks: * reason_for_failure = "Unaligned chunk" # <<<<<<<<<<<<<< @@ -52426,7 +52387,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_v_reason_for_failure); __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_127); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1502 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1496 * if self.require_aligned_chunks and num_aligned_chunks < num_chunks: * reason_for_failure = "Unaligned chunk" * met_constraints = 0 # <<<<<<<<<<<<<< @@ -52441,7 +52402,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L20:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1504 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1498 * met_constraints = 0 * * if met_constraints and self.tight_phrases: # <<<<<<<<<<<<<< @@ -52450,12 +52411,13 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_met_constraints) { __pyx_t_7 = __pyx_v_self->tight_phrases; + __pyx_t_9 = __pyx_t_7; } else { - __pyx_t_7 = __pyx_v_met_constraints; + __pyx_t_9 = __pyx_v_met_constraints; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1506 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1500 * if met_constraints and self.tight_phrases: * # outside edge constraints are checked later * for i from 0 <= i < num_chunks-1: # <<<<<<<<<<<<<< @@ -52465,17 +52427,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_12 = (__pyx_v_num_chunks - 1); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_12; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1507 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1501 * # outside edge constraints are checked later * for i from 0 <= i < num_chunks-1: * if f_links_low[matching.arr[matching.start+i]+chunklen[i]-f_sent_start] == -1: # <<<<<<<<<<<<<< * reason_for_failure = "Gaps are not tight phrases" * met_constraints = 0 */ - __pyx_t_7 = ((__pyx_v_f_links_low[(((__pyx_v_matching->arr[(__pyx_v_matching->start + __pyx_v_i)]) + (__pyx_v_chunklen[__pyx_v_i])) - __pyx_v_f_sent_start)]) == -1); - if (__pyx_t_7) { + __pyx_t_9 = ((__pyx_v_f_links_low[(((__pyx_v_matching->arr[(__pyx_v_matching->start + __pyx_v_i)]) + (__pyx_v_chunklen[__pyx_v_i])) - __pyx_v_f_sent_start)]) == -1); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1508 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1502 * for i from 0 <= i < num_chunks-1: * if f_links_low[matching.arr[matching.start+i]+chunklen[i]-f_sent_start] == -1: * reason_for_failure = "Gaps are not tight phrases" # <<<<<<<<<<<<<< @@ -52486,7 +52448,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_v_reason_for_failure); __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1509 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1503 * if f_links_low[matching.arr[matching.start+i]+chunklen[i]-f_sent_start] == -1: * reason_for_failure = "Gaps are not tight phrases" * met_constraints = 0 # <<<<<<<<<<<<<< @@ -52495,7 +52457,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_met_constraints = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1510 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1504 * reason_for_failure = "Gaps are not tight phrases" * met_constraints = 0 * break # <<<<<<<<<<<<<< @@ -52507,17 +52469,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L31:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1511 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1505 * met_constraints = 0 * break * if f_links_low[matching.arr[matching.start+i+1]-1-f_sent_start] == -1: # <<<<<<<<<<<<<< * reason_for_failure = "Gaps are not tight phrases" * met_constraints = 0 */ - __pyx_t_7 = ((__pyx_v_f_links_low[(((__pyx_v_matching->arr[((__pyx_v_matching->start + __pyx_v_i) + 1)]) - 1) - __pyx_v_f_sent_start)]) == -1); - if (__pyx_t_7) { + __pyx_t_9 = ((__pyx_v_f_links_low[(((__pyx_v_matching->arr[((__pyx_v_matching->start + __pyx_v_i) + 1)]) - 1) - __pyx_v_f_sent_start)]) == -1); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1512 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1506 * break * if f_links_low[matching.arr[matching.start+i+1]-1-f_sent_start] == -1: * reason_for_failure = "Gaps are not tight phrases" # <<<<<<<<<<<<<< @@ -52528,7 +52490,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_v_reason_for_failure); __pyx_v_reason_for_failure = ((PyObject *)__pyx_kp_s_128); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1513 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1507 * if f_links_low[matching.arr[matching.start+i+1]-1-f_sent_start] == -1: * reason_for_failure = "Gaps are not tight phrases" * met_constraints = 0 # <<<<<<<<<<<<<< @@ -52537,7 +52499,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_met_constraints = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1514 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1508 * reason_for_failure = "Gaps are not tight phrases" * met_constraints = 0 * break # <<<<<<<<<<<<<< @@ -52554,7 +52516,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L28:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1516 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1510 * break * * f_low = matching.arr[matching.start] - f_sent_start # <<<<<<<<<<<<<< @@ -52563,7 +52525,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_low = ((__pyx_v_matching->arr[__pyx_v_matching->start]) - __pyx_v_f_sent_start); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1517 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1511 * * f_low = matching.arr[matching.start] - f_sent_start * f_high = matching.arr[matching.start + matching.size - 1] + chunklen[num_chunks-1] - f_sent_start # <<<<<<<<<<<<<< @@ -52572,7 +52534,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_high = (((__pyx_v_matching->arr[((__pyx_v_matching->start + __pyx_v_matching->size) - 1)]) + (__pyx_v_chunklen[(__pyx_v_num_chunks - 1)])) - __pyx_v_f_sent_start); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1518 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1512 * f_low = matching.arr[matching.start] - f_sent_start * f_high = matching.arr[matching.start + matching.size - 1] + chunklen[num_chunks-1] - f_sent_start * if met_constraints: # <<<<<<<<<<<<<< @@ -52581,17 +52543,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_met_constraints) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1520 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1514 * if met_constraints: * * if self.find_fixpoint(f_low, f_high, f_links_low, f_links_high, e_links_low, e_links_high, # <<<<<<<<<<<<<< * -1, -1, &e_low, &e_high, &f_back_low, &f_back_high, f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, */ - __pyx_t_10 = PyInt_FromLong(__pyx_v_f_high); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_v_f_high); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1514; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1524 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1518 * self.train_max_initial_size, self.train_max_initial_size, * self.train_min_gap_size, 0, * self.max_nonterminals - num_chunks + 1, 1, 1, 0, 0): # <<<<<<<<<<<<<< @@ -52602,7 +52564,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1525 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1519 * self.train_min_gap_size, 0, * self.max_nonterminals - num_chunks + 1, 1, 1, 0, 0): * gap_error = 0 # <<<<<<<<<<<<<< @@ -52611,7 +52573,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_gap_error = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1526 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1520 * self.max_nonterminals - num_chunks + 1, 1, 1, 0, 0): * gap_error = 0 * num_gaps = 0 # <<<<<<<<<<<<<< @@ -52620,17 +52582,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_num_gaps = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1528 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1522 * num_gaps = 0 * * if f_back_low < f_low: # <<<<<<<<<<<<<< * f_gap_low[0] = f_back_low * f_gap_high[0] = f_low */ - __pyx_t_7 = (__pyx_v_f_back_low < __pyx_v_f_low); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_f_back_low < __pyx_v_f_low); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1529 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1523 * * if f_back_low < f_low: * f_gap_low[0] = f_back_low # <<<<<<<<<<<<<< @@ -52639,7 +52601,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_gap_low[0]) = __pyx_v_f_back_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1530 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1524 * if f_back_low < f_low: * f_gap_low[0] = f_back_low * f_gap_high[0] = f_low # <<<<<<<<<<<<<< @@ -52648,7 +52610,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_gap_high[0]) = __pyx_v_f_low; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1531 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1525 * f_gap_low[0] = f_back_low * f_gap_high[0] = f_low * num_gaps = 1 # <<<<<<<<<<<<<< @@ -52657,7 +52619,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_num_gaps = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1532 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1526 * f_gap_high[0] = f_low * num_gaps = 1 * gap_start = 0 # <<<<<<<<<<<<<< @@ -52666,7 +52628,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_gap_start = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1533 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1527 * num_gaps = 1 * gap_start = 0 * phrase_len = phrase_len+1 # <<<<<<<<<<<<<< @@ -52675,17 +52637,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_phrase_len = (__pyx_v_phrase_len + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1534 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1528 * gap_start = 0 * phrase_len = phrase_len+1 * if phrase_len > self.max_length: # <<<<<<<<<<<<<< * gap_error = 1 * if self.tight_phrases: */ - __pyx_t_7 = (__pyx_v_phrase_len > __pyx_v_self->max_length); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_phrase_len > __pyx_v_self->max_length); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1535 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1529 * phrase_len = phrase_len+1 * if phrase_len > self.max_length: * gap_error = 1 # <<<<<<<<<<<<<< @@ -52697,7 +52659,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L36:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1536 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1530 * if phrase_len > self.max_length: * gap_error = 1 * if self.tight_phrases: # <<<<<<<<<<<<<< @@ -52706,23 +52668,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1537 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1531 * gap_error = 1 * if self.tight_phrases: * if f_links_low[f_back_low] == -1 or f_links_low[f_low-1] == -1: # <<<<<<<<<<<<<< * gap_error = 1 * reason_for_failure = "Inside edges of preceding subphrase are not tight" */ - __pyx_t_7 = ((__pyx_v_f_links_low[__pyx_v_f_back_low]) == -1); - if (!__pyx_t_7) { - __pyx_t_9 = ((__pyx_v_f_links_low[(__pyx_v_f_low - 1)]) == -1); - __pyx_t_8 = __pyx_t_9; - } else { + __pyx_t_9 = ((__pyx_v_f_links_low[__pyx_v_f_back_low]) == -1); + if (!__pyx_t_9) { + __pyx_t_7 = ((__pyx_v_f_links_low[(__pyx_v_f_low - 1)]) == -1); __pyx_t_8 = __pyx_t_7; + } else { + __pyx_t_8 = __pyx_t_9; } if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1538 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1532 * if self.tight_phrases: * if f_links_low[f_back_low] == -1 or f_links_low[f_low-1] == -1: * gap_error = 1 # <<<<<<<<<<<<<< @@ -52731,7 +52693,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_gap_error = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1539 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1533 * if f_links_low[f_back_low] == -1 or f_links_low[f_low-1] == -1: * gap_error = 1 * reason_for_failure = "Inside edges of preceding subphrase are not tight" # <<<<<<<<<<<<<< @@ -52751,7 +52713,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1541 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1535 * reason_for_failure = "Inside edges of preceding subphrase are not tight" * else: * gap_start = 1 # <<<<<<<<<<<<<< @@ -52760,7 +52722,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_gap_start = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1542 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1536 * else: * gap_start = 1 * if self.tight_phrases and f_links_low[f_low] == -1: # <<<<<<<<<<<<<< @@ -52769,13 +52731,13 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { __pyx_t_8 = ((__pyx_v_f_links_low[__pyx_v_f_low]) == -1); - __pyx_t_7 = __pyx_t_8; + __pyx_t_9 = __pyx_t_8; } else { - __pyx_t_7 = __pyx_v_self->tight_phrases; + __pyx_t_9 = __pyx_v_self->tight_phrases; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1545 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1539 * # this is not a hard error. we can't extract this phrase * # but we still might be able to extract a superphrase * met_constraints = 0 # <<<<<<<<<<<<<< @@ -52789,7 +52751,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L35:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1547 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1541 * met_constraints = 0 * * for i from 0 <= i < matching.size - 1: # <<<<<<<<<<<<<< @@ -52799,7 +52761,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_12 = (__pyx_v_matching->size - 1); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_12; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1548 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1542 * * for i from 0 <= i < matching.size - 1: * f_gap_low[1+i] = matching.arr[matching.start+i] + chunklen[i] - f_sent_start # <<<<<<<<<<<<<< @@ -52808,7 +52770,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_gap_low[(1 + __pyx_v_i)]) = (((__pyx_v_matching->arr[(__pyx_v_matching->start + __pyx_v_i)]) + (__pyx_v_chunklen[__pyx_v_i])) - __pyx_v_f_sent_start); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1549 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1543 * for i from 0 <= i < matching.size - 1: * f_gap_low[1+i] = matching.arr[matching.start+i] + chunklen[i] - f_sent_start * f_gap_high[1+i] = matching.arr[matching.start+i+1] - f_sent_start # <<<<<<<<<<<<<< @@ -52817,7 +52779,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_gap_high[(1 + __pyx_v_i)]) = ((__pyx_v_matching->arr[((__pyx_v_matching->start + __pyx_v_i) + 1)]) - __pyx_v_f_sent_start); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1550 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1544 * f_gap_low[1+i] = matching.arr[matching.start+i] + chunklen[i] - f_sent_start * f_gap_high[1+i] = matching.arr[matching.start+i+1] - f_sent_start * num_gaps = num_gaps + 1 # <<<<<<<<<<<<<< @@ -52827,17 +52789,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_num_gaps = (__pyx_v_num_gaps + 1); } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1552 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1546 * num_gaps = num_gaps + 1 * * if f_high < f_back_high: # <<<<<<<<<<<<<< * f_gap_low[gap_start+num_gaps] = f_high * f_gap_high[gap_start+num_gaps] = f_back_high */ - __pyx_t_7 = (__pyx_v_f_high < __pyx_v_f_back_high); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_f_high < __pyx_v_f_back_high); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1553 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1547 * * if f_high < f_back_high: * f_gap_low[gap_start+num_gaps] = f_high # <<<<<<<<<<<<<< @@ -52846,7 +52808,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_gap_low[(__pyx_v_gap_start + __pyx_v_num_gaps)]) = __pyx_v_f_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1554 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1548 * if f_high < f_back_high: * f_gap_low[gap_start+num_gaps] = f_high * f_gap_high[gap_start+num_gaps] = f_back_high # <<<<<<<<<<<<<< @@ -52855,7 +52817,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ (__pyx_v_f_gap_high[(__pyx_v_gap_start + __pyx_v_num_gaps)]) = __pyx_v_f_back_high; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1555 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1549 * f_gap_low[gap_start+num_gaps] = f_high * f_gap_high[gap_start+num_gaps] = f_back_high * num_gaps = num_gaps + 1 # <<<<<<<<<<<<<< @@ -52864,7 +52826,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_num_gaps = (__pyx_v_num_gaps + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1556 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1550 * f_gap_high[gap_start+num_gaps] = f_back_high * num_gaps = num_gaps + 1 * phrase_len = phrase_len+1 # <<<<<<<<<<<<<< @@ -52873,17 +52835,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_phrase_len = (__pyx_v_phrase_len + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1557 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1551 * num_gaps = num_gaps + 1 * phrase_len = phrase_len+1 * if phrase_len > self.max_length: # <<<<<<<<<<<<<< * gap_error = 1 * if self.tight_phrases: */ - __pyx_t_7 = (__pyx_v_phrase_len > __pyx_v_self->max_length); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_phrase_len > __pyx_v_self->max_length); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1558 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1552 * phrase_len = phrase_len+1 * if phrase_len > self.max_length: * gap_error = 1 # <<<<<<<<<<<<<< @@ -52895,7 +52857,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L43:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1559 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1553 * if phrase_len > self.max_length: * gap_error = 1 * if self.tight_phrases: # <<<<<<<<<<<<<< @@ -52904,23 +52866,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1560 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1554 * gap_error = 1 * if self.tight_phrases: * if f_links_low[f_back_high-1] == -1 or f_links_low[f_high] == -1: # <<<<<<<<<<<<<< * gap_error = 1 * reason_for_failure = "Inside edges of following subphrase are not tight" */ - __pyx_t_7 = ((__pyx_v_f_links_low[(__pyx_v_f_back_high - 1)]) == -1); - if (!__pyx_t_7) { + __pyx_t_9 = ((__pyx_v_f_links_low[(__pyx_v_f_back_high - 1)]) == -1); + if (!__pyx_t_9) { __pyx_t_8 = ((__pyx_v_f_links_low[__pyx_v_f_high]) == -1); - __pyx_t_9 = __pyx_t_8; + __pyx_t_7 = __pyx_t_8; } else { - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1561 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1555 * if self.tight_phrases: * if f_links_low[f_back_high-1] == -1 or f_links_low[f_high] == -1: * gap_error = 1 # <<<<<<<<<<<<<< @@ -52929,7 +52891,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_gap_error = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1562 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1556 * if f_links_low[f_back_high-1] == -1 or f_links_low[f_high] == -1: * gap_error = 1 * reason_for_failure = "Inside edges of following subphrase are not tight" # <<<<<<<<<<<<<< @@ -52949,7 +52911,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1564 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1558 * reason_for_failure = "Inside edges of following subphrase are not tight" * else: * if self.tight_phrases and f_links_low[f_high-1] == -1: # <<<<<<<<<<<<<< @@ -52957,14 +52919,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * */ if (__pyx_v_self->tight_phrases) { - __pyx_t_9 = ((__pyx_v_f_links_low[(__pyx_v_f_high - 1)]) == -1); - __pyx_t_7 = __pyx_t_9; + __pyx_t_7 = ((__pyx_v_f_links_low[(__pyx_v_f_high - 1)]) == -1); + __pyx_t_9 = __pyx_t_7; } else { - __pyx_t_7 = __pyx_v_self->tight_phrases; + __pyx_t_9 = __pyx_v_self->tight_phrases; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1565 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1559 * else: * if self.tight_phrases and f_links_low[f_high-1] == -1: * met_constraints = 0 # <<<<<<<<<<<<<< @@ -52978,17 +52940,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L42:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1567 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1561 * met_constraints = 0 * * if gap_error == 0: # <<<<<<<<<<<<<< * e_word_count = e_high - e_low * for i from 0 <= i < num_gaps: # check integrity of subphrases */ - __pyx_t_7 = (__pyx_v_gap_error == 0); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_gap_error == 0); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1568 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1562 * * if gap_error == 0: * e_word_count = e_high - e_low # <<<<<<<<<<<<<< @@ -52997,7 +52959,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_e_word_count = (__pyx_v_e_high - __pyx_v_e_low); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1569 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1563 * if gap_error == 0: * e_word_count = e_high - e_low * for i from 0 <= i < num_gaps: # check integrity of subphrases # <<<<<<<<<<<<<< @@ -53007,28 +52969,28 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_num_gaps; for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_3; __pyx_v_i++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1570 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1564 * e_word_count = e_high - e_low * for i from 0 <= i < num_gaps: # check integrity of subphrases * if self.find_fixpoint(f_gap_low[gap_start+i], f_gap_high[gap_start+i], # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * -1, -1, e_gap_low+gap_start+i, e_gap_high+gap_start+i, */ - __pyx_t_10 = PyInt_FromLong((__pyx_v_f_gap_high[(__pyx_v_gap_start + __pyx_v_i)])); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong((__pyx_v_f_gap_high[(__pyx_v_gap_start + __pyx_v_i)])); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1564; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1575 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1569 * f_gap_low+gap_start+i, f_gap_high+gap_start+i, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< * 0, 0, 0, 0, 0, 0, 0) == 0: * gap_error = 1 */ - __pyx_t_7 = (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, (__pyx_v_f_gap_low[(__pyx_v_gap_start + __pyx_v_i)]), __pyx_t_10, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, ((__pyx_v_e_gap_low + __pyx_v_gap_start) + __pyx_v_i), ((__pyx_v_e_gap_high + __pyx_v_gap_start) + __pyx_v_i), ((__pyx_v_f_gap_low + __pyx_v_gap_start) + __pyx_v_i), ((__pyx_v_f_gap_high + __pyx_v_gap_start) + __pyx_v_i), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0) == 0); + __pyx_t_9 = (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, (__pyx_v_f_gap_low[(__pyx_v_gap_start + __pyx_v_i)]), __pyx_t_10, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, ((__pyx_v_e_gap_low + __pyx_v_gap_start) + __pyx_v_i), ((__pyx_v_e_gap_high + __pyx_v_gap_start) + __pyx_v_i), ((__pyx_v_f_gap_low + __pyx_v_gap_start) + __pyx_v_i), ((__pyx_v_f_gap_high + __pyx_v_gap_start) + __pyx_v_i), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0) == 0); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1577 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1571 * self.train_max_initial_size, self.train_max_initial_size, * 0, 0, 0, 0, 0, 0, 0) == 0: * gap_error = 1 # <<<<<<<<<<<<<< @@ -53037,18 +52999,18 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_gap_error = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1578 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1572 * 0, 0, 0, 0, 0, 0, 0) == 0: * gap_error = 1 * reason_for_failure = "Subphrase [%d, %d] failed integrity check" % (f_gap_low[gap_start+i], f_gap_high[gap_start+i]) # <<<<<<<<<<<<<< * break * */ - __pyx_t_10 = PyInt_FromLong((__pyx_v_f_gap_low[(__pyx_v_gap_start + __pyx_v_i)])); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong((__pyx_v_f_gap_low[(__pyx_v_gap_start + __pyx_v_i)])); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = PyInt_FromLong((__pyx_v_f_gap_high[(__pyx_v_gap_start + __pyx_v_i)])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong((__pyx_v_f_gap_high[(__pyx_v_gap_start + __pyx_v_i)])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); @@ -53056,14 +53018,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GIVEREF(__pyx_t_1); __pyx_t_10 = 0; __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_131), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_131), ((PyObject *)__pyx_t_2)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_1)); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_v_reason_for_failure); __pyx_v_reason_for_failure = ((PyObject *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1579 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1573 * gap_error = 1 * reason_for_failure = "Subphrase [%d, %d] failed integrity check" % (f_gap_low[gap_start+i], f_gap_high[gap_start+i]) * break # <<<<<<<<<<<<<< @@ -53080,17 +53042,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L47:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1581 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1575 * break * * if gap_error == 0: # <<<<<<<<<<<<<< * i = 1 * self.findexes.reset() */ - __pyx_t_7 = (__pyx_v_gap_error == 0); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_gap_error == 0); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1582 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1576 * * if gap_error == 0: * i = 1 # <<<<<<<<<<<<<< @@ -53099,31 +53061,31 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1583 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1577 * if gap_error == 0: * i = 1 * self.findexes.reset() # <<<<<<<<<<<<<< * if f_back_low < f_low: * fphr_arr._append(sym_setindex(self.category, i)) */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1584 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1578 * i = 1 * self.findexes.reset() * if f_back_low < f_low: # <<<<<<<<<<<<<< * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 */ - __pyx_t_7 = (__pyx_v_f_back_low < __pyx_v_f_low); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_f_back_low < __pyx_v_f_low); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1585 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1579 * self.findexes.reset() * if f_back_low < f_low: * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -53132,7 +53094,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1586 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1580 * if f_back_low < f_low: * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 # <<<<<<<<<<<<<< @@ -53141,16 +53103,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = (__pyx_v_i + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1587 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1581 * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: */ - __pyx_t_2 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -53158,27 +53120,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L52:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1588 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1582 * i = i+1 * self.findexes.append(sym_setindex(self.category, i)) * self.findexes.extend(self.findexes1) # <<<<<<<<<<<<<< * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1588; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1588; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_v_self->findexes1)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_self->findexes1)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self->findexes1)); - __pyx_t_10 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1588; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1589 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1583 * self.findexes.append(sym_setindex(self.category, i)) * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: # <<<<<<<<<<<<<< @@ -53188,7 +53150,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_phrase->n; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_3; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1590 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1584 * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): # <<<<<<<<<<<<<< @@ -53198,7 +53160,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_4 = __pyx_f_3_sa_sym_isvar((__pyx_v_phrase->syms[__pyx_v_j])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1591 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1585 * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -53207,7 +53169,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1592 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1586 * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) * i = i + 1 # <<<<<<<<<<<<<< @@ -53219,7 +53181,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1594 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1588 * i = i + 1 * else: * fphr_arr._append(phrase.syms[j]) # <<<<<<<<<<<<<< @@ -53231,17 +53193,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_L55:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1595 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1589 * else: * fphr_arr._append(phrase.syms[j]) * if f_back_high > f_high: # <<<<<<<<<<<<<< * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) */ - __pyx_t_7 = (__pyx_v_f_back_high > __pyx_v_f_high); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_f_back_high > __pyx_v_f_high); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1596 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1590 * fphr_arr._append(phrase.syms[j]) * if f_back_high > f_high: * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -53250,16 +53212,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1597 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1591 * if f_back_high > f_high: * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * * fphr = Phrase(fphr_arr) */ - __pyx_t_10 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1591; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_10); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_10); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1591; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -53267,25 +53229,25 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L56:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1599 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1593 * self.findexes.append(sym_setindex(self.category, i)) * * fphr = Phrase(fphr_arr) # <<<<<<<<<<<<<< * if met_constraints: * phrase_list = self.extract_phrases(e_low, e_high, e_gap_low + gap_start, e_gap_high + gap_start, e_links_low, num_gaps, */ - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(((PyObject *)__pyx_v_fphr_arr)); PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_fphr_arr)); __Pyx_GIVEREF(((PyObject *)__pyx_v_fphr_arr)); - __pyx_t_10 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0; __pyx_v_fphr = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_10); __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1600 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1594 * * fphr = Phrase(fphr_arr) * if met_constraints: # <<<<<<<<<<<<<< @@ -53294,47 +53256,47 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_met_constraints) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1603 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1597 * phrase_list = self.extract_phrases(e_low, e_high, e_gap_low + gap_start, e_gap_high + gap_start, e_links_low, num_gaps, * f_back_low, f_back_high, f_gap_low + gap_start, f_gap_high + gap_start, f_links_low, * matching.sent_id, e_sent_len, e_sent_start) # <<<<<<<<<<<<<< * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) */ - __pyx_t_10 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_low, __pyx_v_e_high, (__pyx_v_e_gap_low + __pyx_v_gap_start), (__pyx_v_e_gap_high + __pyx_v_gap_start), __pyx_v_e_links_low, __pyx_v_num_gaps, __pyx_v_f_back_low, __pyx_v_f_back_high, (__pyx_v_f_gap_low + __pyx_v_gap_start), (__pyx_v_f_gap_high + __pyx_v_gap_start), __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1601; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_low, __pyx_v_e_high, (__pyx_v_e_gap_low + __pyx_v_gap_start), (__pyx_v_e_gap_high + __pyx_v_gap_start), __pyx_v_e_links_low, __pyx_v_num_gaps, __pyx_v_f_back_low, __pyx_v_f_back_high, (__pyx_v_f_gap_low + __pyx_v_gap_start), (__pyx_v_f_gap_high + __pyx_v_gap_start), __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_v_phrase_list = __pyx_t_10; __pyx_t_10 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1604 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1598 * f_back_low, f_back_high, f_gap_low + gap_start, f_gap_high + gap_start, f_links_low, * matching.sent_id, e_sent_len, e_sent_start) * if len(phrase_list) > 0: # <<<<<<<<<<<<<< * pair_count = 1.0 / len(phrase_list) * else: */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1604; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_7 = (__pyx_t_13 > 0); - if (__pyx_t_7) { + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = (__pyx_t_13 > 0); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1605 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1599 * matching.sent_id, e_sent_len, e_sent_start) * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) # <<<<<<<<<<<<<< * else: * pair_count = 0 */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (unlikely(__pyx_t_13 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "float division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_v_pair_count = (1.0 / __pyx_t_13); goto __pyx_L58; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1607 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1601 * pair_count = 1.0 / len(phrase_list) * else: * pair_count = 0 # <<<<<<<<<<<<<< @@ -53343,22 +53305,22 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_pair_count = 0.0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1608 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1602 * else: * pair_count = 0 * reason_for_failure = "Didn't extract anything from [%d, %d] -> [%d, %d]" % (f_back_low, f_back_high, e_low, e_high) # <<<<<<<<<<<<<< * for (phrase2,eindexes) in phrase_list: * als1 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) */ - __pyx_t_10 = PyInt_FromLong(__pyx_v_f_back_low); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyInt_FromLong(__pyx_v_f_back_low); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_2 = PyInt_FromLong(__pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyInt_FromLong(__pyx_v_f_back_high); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyInt_FromLong(__pyx_v_e_low); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_v_e_low); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = PyInt_FromLong(__pyx_v_e_high); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyInt_FromLong(__pyx_v_e_high); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = PyTuple_New(4); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(4); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); @@ -53372,7 +53334,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_2 = 0; __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_14 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_132), ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_132), ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(((PyObject *)__pyx_t_14)); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_v_reason_for_failure); @@ -53381,7 +53343,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L58:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1609 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1603 * pair_count = 0 * reason_for_failure = "Didn't extract anything from [%d, %d] -> [%d, %d]" % (f_back_low, f_back_high, e_low, e_high) * for (phrase2,eindexes) in phrase_list: # <<<<<<<<<<<<<< @@ -53392,7 +53354,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_14 = __pyx_v_phrase_list; __Pyx_INCREF(__pyx_t_14); __pyx_t_13 = 0; __pyx_t_16 = NULL; } else { - __pyx_t_13 = -1; __pyx_t_14 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = -1; __pyx_t_14 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_16 = Py_TYPE(__pyx_t_14)->tp_iternext; } @@ -53400,23 +53362,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (!__pyx_t_16 && PyList_CheckExact(__pyx_t_14)) { if (__pyx_t_13 >= PyList_GET_SIZE(__pyx_t_14)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_15 = PyList_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyList_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_16 && PyTuple_CheckExact(__pyx_t_14)) { if (__pyx_t_13 >= PyTuple_GET_SIZE(__pyx_t_14)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_15 = PyTuple_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_15 = __pyx_t_16(__pyx_t_14); if (unlikely(!__pyx_t_15)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -53432,7 +53394,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -53445,14 +53407,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_INCREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_2); #else - __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; } else { Py_ssize_t index = -1; - __pyx_t_10 = PyObject_GetIter(__pyx_t_15); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetIter(__pyx_t_15); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __pyx_t_17 = Py_TYPE(__pyx_t_10)->tp_iternext; @@ -53460,7 +53422,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GOTREF(__pyx_t_1); index = 1; __pyx_t_2 = __pyx_t_17(__pyx_t_10); if (unlikely(!__pyx_t_2)) goto __pyx_L61_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_17 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L62_unpacking_done; @@ -53468,7 +53430,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_17 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L62_unpacking_done:; } __Pyx_XDECREF(__pyx_v_phrase2); @@ -53478,7 +53440,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_eindexes = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1610 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1604 * reason_for_failure = "Didn't extract anything from [%d, %d] -> [%d, %d]" % (f_back_low, f_back_high, e_low, e_high) * for (phrase2,eindexes) in phrase_list: * als1 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) # <<<<<<<<<<<<<< @@ -53487,31 +53449,31 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_15 = ((PyObject *)__pyx_v_self->findexes); __Pyx_INCREF(__pyx_t_15); - __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_15, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_15, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1604; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_XDECREF(((PyObject *)__pyx_v_als1)); __pyx_v_als1 = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1611 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1605 * for (phrase2,eindexes) in phrase_list: * als1 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) * extracts.append((fphr, phrase2, pair_count, tuple(als1))) # <<<<<<<<<<<<<< * * if (num_gaps < self.max_nonterminals and */ - __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(((PyObject *)__pyx_v_als1)); PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_v_als1)); __Pyx_GIVEREF(((PyObject *)__pyx_v_als1)); - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(4); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(4); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(((PyObject *)__pyx_v_fphr)); PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_v_fphr)); @@ -53525,7 +53487,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GIVEREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1611; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -53535,27 +53497,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L57:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1613 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1607 * extracts.append((fphr, phrase2, pair_count, tuple(als1))) * * if (num_gaps < self.max_nonterminals and # <<<<<<<<<<<<<< * phrase_len < self.max_length and * f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size): */ - __pyx_t_7 = (__pyx_v_num_gaps < __pyx_v_self->max_nonterminals); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_num_gaps < __pyx_v_self->max_nonterminals); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1614 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1608 * * if (num_gaps < self.max_nonterminals and * phrase_len < self.max_length and # <<<<<<<<<<<<<< * f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size): * if (f_back_low == f_low and */ - __pyx_t_9 = (__pyx_v_phrase_len < __pyx_v_self->max_length); - if (__pyx_t_9) { + __pyx_t_7 = (__pyx_v_phrase_len < __pyx_v_self->max_length); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1615 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1609 * if (num_gaps < self.max_nonterminals and * phrase_len < self.max_length and * f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size): # <<<<<<<<<<<<<< @@ -53565,35 +53527,35 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_8 = (((__pyx_v_f_back_high - __pyx_v_f_back_low) + __pyx_v_self->train_min_gap_size) <= __pyx_v_self->train_max_initial_size); __pyx_t_18 = __pyx_t_8; } else { - __pyx_t_18 = __pyx_t_9; + __pyx_t_18 = __pyx_t_7; } - __pyx_t_9 = __pyx_t_18; + __pyx_t_7 = __pyx_t_18; } else { - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1616 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1610 * phrase_len < self.max_length and * f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size): * if (f_back_low == f_low and # <<<<<<<<<<<<<< * f_low >= self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_back_high-1] != -1))): */ - __pyx_t_9 = (__pyx_v_f_back_low == __pyx_v_f_low); - if (__pyx_t_9) { + __pyx_t_7 = (__pyx_v_f_back_low == __pyx_v_f_low); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1617 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1611 * f_back_high - f_back_low + self.train_min_gap_size <= self.train_max_initial_size): * if (f_back_low == f_low and * f_low >= self.train_min_gap_size and # <<<<<<<<<<<<<< * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_back_high-1] != -1))): * f_x_low = f_low-self.train_min_gap_size */ - __pyx_t_7 = (__pyx_v_f_low >= __pyx_v_self->train_min_gap_size); - if (__pyx_t_7) { + __pyx_t_9 = (__pyx_v_f_low >= __pyx_v_self->train_min_gap_size); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1618 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1612 * if (f_back_low == f_low and * f_low >= self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_back_high-1] != -1))): # <<<<<<<<<<<<<< @@ -53615,15 +53577,15 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_t_18 = __pyx_t_8; } else { - __pyx_t_18 = __pyx_t_7; + __pyx_t_18 = __pyx_t_9; } - __pyx_t_7 = __pyx_t_18; + __pyx_t_9 = __pyx_t_18; } else { - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1619 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1613 * f_low >= self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_back_high-1] != -1))): * f_x_low = f_low-self.train_min_gap_size # <<<<<<<<<<<<<< @@ -53632,7 +53594,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_x_low = (__pyx_v_f_low - __pyx_v_self->train_min_gap_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1620 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1614 * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_back_high-1] != -1))): * f_x_low = f_low-self.train_min_gap_size * met_constraints = 1 # <<<<<<<<<<<<<< @@ -53641,7 +53603,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_met_constraints = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1621 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1615 * f_x_low = f_low-self.train_min_gap_size * met_constraints = 1 * if self.tight_phrases: # <<<<<<<<<<<<<< @@ -53650,7 +53612,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1622 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1616 * met_constraints = 1 * if self.tight_phrases: * while f_x_low >= 0 and f_links_low[f_x_low] == -1: # <<<<<<<<<<<<<< @@ -53658,16 +53620,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * if f_x_low < 0 or f_back_high - f_x_low > self.train_max_initial_size: */ while (1) { - __pyx_t_7 = (__pyx_v_f_x_low >= 0); - if (__pyx_t_7) { - __pyx_t_9 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) == -1); - __pyx_t_18 = __pyx_t_9; - } else { + __pyx_t_9 = (__pyx_v_f_x_low >= 0); + if (__pyx_t_9) { + __pyx_t_7 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) == -1); __pyx_t_18 = __pyx_t_7; + } else { + __pyx_t_18 = __pyx_t_9; } if (!__pyx_t_18) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1623 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1617 * if self.tight_phrases: * while f_x_low >= 0 and f_links_low[f_x_low] == -1: * f_x_low = f_x_low - 1 # <<<<<<<<<<<<<< @@ -53680,7 +53642,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L65:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1624 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1618 * while f_x_low >= 0 and f_links_low[f_x_low] == -1: * f_x_low = f_x_low - 1 * if f_x_low < 0 or f_back_high - f_x_low > self.train_max_initial_size: # <<<<<<<<<<<<<< @@ -53689,14 +53651,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_18 = (__pyx_v_f_x_low < 0); if (!__pyx_t_18) { - __pyx_t_7 = ((__pyx_v_f_back_high - __pyx_v_f_x_low) > __pyx_v_self->train_max_initial_size); - __pyx_t_9 = __pyx_t_7; + __pyx_t_9 = ((__pyx_v_f_back_high - __pyx_v_f_x_low) > __pyx_v_self->train_max_initial_size); + __pyx_t_7 = __pyx_t_9; } else { - __pyx_t_9 = __pyx_t_18; + __pyx_t_7 = __pyx_t_18; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1625 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1619 * f_x_low = f_x_low - 1 * if f_x_low < 0 or f_back_high - f_x_low > self.train_max_initial_size: * met_constraints = 0 # <<<<<<<<<<<<<< @@ -53708,7 +53670,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L68:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1627 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1621 * met_constraints = 0 * * if (met_constraints and # <<<<<<<<<<<<<< @@ -53717,17 +53679,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_met_constraints) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1628 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1622 * * if (met_constraints and * self.find_fixpoint(f_x_low, f_back_high, # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, */ - __pyx_t_14 = PyInt_FromLong(__pyx_v_f_back_high); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyInt_FromLong(__pyx_v_f_back_high); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1632 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1626 * e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< @@ -53737,56 +53699,56 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_14, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 1, 1, 0, 1, 0)) { __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1634 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1628 * self.train_max_initial_size, self.train_max_initial_size, * 1, 1, 1, 1, 0, 1, 0) and * ((not self.tight_phrases) or f_links_low[f_x_low] != -1) and # <<<<<<<<<<<<<< * self.find_fixpoint(f_x_low, f_low, # check integrity of new subphrase * f_links_low, f_links_high, e_links_low, e_links_high, */ - __pyx_t_9 = (!__pyx_v_self->tight_phrases); - if (!__pyx_t_9) { + __pyx_t_7 = (!__pyx_v_self->tight_phrases); + if (!__pyx_t_7) { __pyx_t_18 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) != -1); - __pyx_t_7 = __pyx_t_18; + __pyx_t_9 = __pyx_t_18; } else { - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1635 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1629 * 1, 1, 1, 1, 0, 1, 0) and * ((not self.tight_phrases) or f_links_low[f_x_low] != -1) and * self.find_fixpoint(f_x_low, f_low, # check integrity of new subphrase # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * -1, -1, e_gap_low, e_gap_high, f_gap_low, f_gap_high, */ - __pyx_t_1 = PyInt_FromLong(__pyx_v_f_low); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1635; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_v_f_low); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1639 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1633 * -1, -1, e_gap_low, e_gap_high, f_gap_low, f_gap_high, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() */ - __pyx_t_9 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, __pyx_v_e_gap_low, __pyx_v_e_gap_high, __pyx_v_f_gap_low, __pyx_v_f_gap_high, __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0); + __pyx_t_7 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, __pyx_v_e_gap_low, __pyx_v_e_gap_high, __pyx_v_f_gap_low, __pyx_v_f_gap_high, __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } else { - __pyx_t_7 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_14, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 1, 1, 0, 1, 0); + __pyx_t_9 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_14, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 1, 1, 0, 1, 0); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; } - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } else { - __pyx_t_9 = __pyx_v_met_constraints; + __pyx_t_7 = __pyx_v_met_constraints; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1641 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1635 * self.train_max_initial_size, self.train_max_initial_size, * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() # <<<<<<<<<<<<<< @@ -53795,7 +53757,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_clear(__pyx_v_fphr_arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1642 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1636 * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() * i = 1 # <<<<<<<<<<<<<< @@ -53804,35 +53766,35 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1643 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1637 * fphr_arr._clear() * i = 1 * self.findexes.reset() # <<<<<<<<<<<<<< * self.findexes.append(sym_setindex(self.category, i)) * fphr_arr._append(sym_setindex(self.category, i)) */ - __pyx_t_14 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1643; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = PyObject_Call(__pyx_t_14, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1643; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_14, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1644 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1638 * i = 1 * self.findexes.reset() * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 */ - __pyx_t_1 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1644; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1638; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1644; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1638; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1645 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1639 * self.findexes.reset() * self.findexes.append(sym_setindex(self.category, i)) * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -53841,7 +53803,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1646 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1640 * self.findexes.append(sym_setindex(self.category, i)) * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 # <<<<<<<<<<<<<< @@ -53850,27 +53812,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = (__pyx_v_i + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1647 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1641 * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 * self.findexes.extend(self.findexes1) # <<<<<<<<<<<<<< * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): */ - __pyx_t_14 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_self->findexes1)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_self->findexes1)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self->findexes1)); - __pyx_t_15 = PyObject_Call(__pyx_t_14, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(__pyx_t_14, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1648 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1642 * i = i+1 * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: # <<<<<<<<<<<<<< @@ -53880,7 +53842,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_phrase->n; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_3; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1649 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1643 * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): # <<<<<<<<<<<<<< @@ -53890,7 +53852,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_4 = __pyx_f_3_sa_sym_isvar((__pyx_v_phrase->syms[__pyx_v_j])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1650 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1644 * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -53899,7 +53861,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1651 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1645 * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) * i = i + 1 # <<<<<<<<<<<<<< @@ -53911,7 +53873,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1653 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1647 * i = i + 1 * else: * fphr_arr._append(phrase.syms[j]) # <<<<<<<<<<<<<< @@ -53923,17 +53885,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_L72:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1654 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1648 * else: * fphr_arr._append(phrase.syms[j]) * if f_back_high > f_high: # <<<<<<<<<<<<<< * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) */ - __pyx_t_9 = (__pyx_v_f_back_high > __pyx_v_f_high); - if (__pyx_t_9) { + __pyx_t_7 = (__pyx_v_f_back_high > __pyx_v_f_high); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1655 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1649 * fphr_arr._append(phrase.syms[j]) * if f_back_high > f_high: * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -53942,16 +53904,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1656 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1650 * if f_back_high > f_high: * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * fphr = Phrase(fphr_arr) * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low, e_gap_high, e_links_low, num_gaps+1, */ - __pyx_t_15 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1656; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1650; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_15); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1656; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_15); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1650; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -53959,67 +53921,67 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L73:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1657 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1651 * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) * fphr = Phrase(fphr_arr) # <<<<<<<<<<<<<< * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low, e_gap_high, e_links_low, num_gaps+1, * f_x_low, f_x_high, f_gap_low, f_gap_high, f_links_low, matching.sent_id, */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1657; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1651; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_fphr_arr)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_fphr_arr)); __Pyx_GIVEREF(((PyObject *)__pyx_v_fphr_arr)); - __pyx_t_15 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1657; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1651; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_fphr)); __pyx_v_fphr = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1660 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1654 * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low, e_gap_high, e_links_low, num_gaps+1, * f_x_low, f_x_high, f_gap_low, f_gap_high, f_links_low, matching.sent_id, * e_sent_len, e_sent_start) # <<<<<<<<<<<<<< * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) */ - __pyx_t_15 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_x_low, __pyx_v_e_x_high, __pyx_v_e_gap_low, __pyx_v_e_gap_high, __pyx_v_e_links_low, (__pyx_v_num_gaps + 1), __pyx_v_f_x_low, __pyx_v_f_x_high, __pyx_v_f_gap_low, __pyx_v_f_gap_high, __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1658; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_x_low, __pyx_v_e_x_high, __pyx_v_e_gap_low, __pyx_v_e_gap_high, __pyx_v_e_links_low, (__pyx_v_num_gaps + 1), __pyx_v_f_x_low, __pyx_v_f_x_high, __pyx_v_f_gap_low, __pyx_v_f_gap_high, __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1652; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_XDECREF(__pyx_v_phrase_list); __pyx_v_phrase_list = __pyx_t_15; __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1661 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1655 * f_x_low, f_x_high, f_gap_low, f_gap_high, f_links_low, matching.sent_id, * e_sent_len, e_sent_start) * if len(phrase_list) > 0: # <<<<<<<<<<<<<< * pair_count = 1.0 / len(phrase_list) * else: */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = (__pyx_t_13 > 0); - if (__pyx_t_9) { + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1655; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = (__pyx_t_13 > 0); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1662 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1656 * e_sent_len, e_sent_start) * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) # <<<<<<<<<<<<<< * else: * pair_count = 0 */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1662; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1656; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (unlikely(__pyx_t_13 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "float division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1662; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1656; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_v_pair_count = (1.0 / __pyx_t_13); goto __pyx_L74; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1664 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1658 * pair_count = 1.0 / len(phrase_list) * else: * pair_count = 0 # <<<<<<<<<<<<<< @@ -54030,7 +53992,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L74:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1665 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1659 * else: * pair_count = 0 * for phrase2,eindexes in phrase_list: # <<<<<<<<<<<<<< @@ -54041,7 +54003,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_15 = __pyx_v_phrase_list; __Pyx_INCREF(__pyx_t_15); __pyx_t_13 = 0; __pyx_t_16 = NULL; } else { - __pyx_t_13 = -1; __pyx_t_15 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = -1; __pyx_t_15 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __pyx_t_16 = Py_TYPE(__pyx_t_15)->tp_iternext; } @@ -54049,23 +54011,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (!__pyx_t_16 && PyList_CheckExact(__pyx_t_15)) { if (__pyx_t_13 >= PyList_GET_SIZE(__pyx_t_15)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_15, __pyx_t_13); __Pyx_INCREF(__pyx_t_1); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_15, __pyx_t_13); __Pyx_INCREF(__pyx_t_1); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_15, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_15, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_16 && PyTuple_CheckExact(__pyx_t_15)) { if (__pyx_t_13 >= PyTuple_GET_SIZE(__pyx_t_15)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_15, __pyx_t_13); __Pyx_INCREF(__pyx_t_1); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_15, __pyx_t_13); __Pyx_INCREF(__pyx_t_1); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_15, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_15, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_1 = __pyx_t_16(__pyx_t_15); if (unlikely(!__pyx_t_1)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -54081,7 +54043,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -54094,14 +54056,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_INCREF(__pyx_t_14); __Pyx_INCREF(__pyx_t_2); #else - __pyx_t_14 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else { Py_ssize_t index = -1; - __pyx_t_10 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_17 = Py_TYPE(__pyx_t_10)->tp_iternext; @@ -54109,7 +54071,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GOTREF(__pyx_t_14); index = 1; __pyx_t_2 = __pyx_t_17(__pyx_t_10); if (unlikely(!__pyx_t_2)) goto __pyx_L77_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_17 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L78_unpacking_done; @@ -54117,7 +54079,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_17 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1665; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1659; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L78_unpacking_done:; } __Pyx_XDECREF(__pyx_v_phrase2); @@ -54127,7 +54089,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_eindexes = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1666 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1660 * pair_count = 0 * for phrase2,eindexes in phrase_list: * als2 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) # <<<<<<<<<<<<<< @@ -54136,31 +54098,31 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_1 = ((PyObject *)__pyx_v_self->findexes); __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_1, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1666; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_1, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1660; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(((PyObject *)__pyx_v_als2)); __pyx_v_als2 = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1667 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1661 * for phrase2,eindexes in phrase_list: * als2 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) * extracts.append((fphr, phrase2, pair_count, tuple(als2))) # <<<<<<<<<<<<<< * * if (f_back_high == f_high and */ - __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1667; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1667; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_als2)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_als2)); __Pyx_GIVEREF(((PyObject *)__pyx_v_als2)); - __pyx_t_14 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1667; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1667; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)__pyx_v_fphr)); PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_fphr)); @@ -54174,7 +54136,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GIVEREF(__pyx_t_14); __pyx_t_2 = 0; __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1667; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; @@ -54187,27 +54149,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L64:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1669 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1663 * extracts.append((fphr, phrase2, pair_count, tuple(als2))) * * if (f_back_high == f_high and # <<<<<<<<<<<<<< * f_sent_len - f_high >= self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_high] != -1 and f_links_low[f_back_low] != -1))): */ - __pyx_t_9 = (__pyx_v_f_back_high == __pyx_v_f_high); - if (__pyx_t_9) { + __pyx_t_7 = (__pyx_v_f_back_high == __pyx_v_f_high); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1670 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1664 * * if (f_back_high == f_high and * f_sent_len - f_high >= self.train_min_gap_size and # <<<<<<<<<<<<<< * ((not self.tight_phrases) or (f_links_low[f_high] != -1 and f_links_low[f_back_low] != -1))): * f_x_high = f_high+self.train_min_gap_size */ - __pyx_t_7 = ((__pyx_v_f_sent_len - __pyx_v_f_high) >= __pyx_v_self->train_min_gap_size); - if (__pyx_t_7) { + __pyx_t_9 = ((__pyx_v_f_sent_len - __pyx_v_f_high) >= __pyx_v_self->train_min_gap_size); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1671 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1665 * if (f_back_high == f_high and * f_sent_len - f_high >= self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_high] != -1 and f_links_low[f_back_low] != -1))): # <<<<<<<<<<<<<< @@ -54229,15 +54191,15 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_t_18 = __pyx_t_8; } else { - __pyx_t_18 = __pyx_t_7; + __pyx_t_18 = __pyx_t_9; } - __pyx_t_7 = __pyx_t_18; + __pyx_t_9 = __pyx_t_18; } else { - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1672 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1666 * f_sent_len - f_high >= self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_high] != -1 and f_links_low[f_back_low] != -1))): * f_x_high = f_high+self.train_min_gap_size # <<<<<<<<<<<<<< @@ -54246,7 +54208,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_x_high = (__pyx_v_f_high + __pyx_v_self->train_min_gap_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1673 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1667 * ((not self.tight_phrases) or (f_links_low[f_high] != -1 and f_links_low[f_back_low] != -1))): * f_x_high = f_high+self.train_min_gap_size * met_constraints = 1 # <<<<<<<<<<<<<< @@ -54255,7 +54217,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_met_constraints = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1674 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1668 * f_x_high = f_high+self.train_min_gap_size * met_constraints = 1 * if self.tight_phrases: # <<<<<<<<<<<<<< @@ -54264,7 +54226,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1675 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1669 * met_constraints = 1 * if self.tight_phrases: * while f_x_high <= f_sent_len and f_links_low[f_x_high-1] == -1: # <<<<<<<<<<<<<< @@ -54272,16 +54234,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * if f_x_high > f_sent_len or f_x_high - f_back_low > self.train_max_initial_size: */ while (1) { - __pyx_t_7 = (__pyx_v_f_x_high <= __pyx_v_f_sent_len); - if (__pyx_t_7) { - __pyx_t_9 = ((__pyx_v_f_links_low[(__pyx_v_f_x_high - 1)]) == -1); - __pyx_t_18 = __pyx_t_9; - } else { + __pyx_t_9 = (__pyx_v_f_x_high <= __pyx_v_f_sent_len); + if (__pyx_t_9) { + __pyx_t_7 = ((__pyx_v_f_links_low[(__pyx_v_f_x_high - 1)]) == -1); __pyx_t_18 = __pyx_t_7; + } else { + __pyx_t_18 = __pyx_t_9; } if (!__pyx_t_18) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1676 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1670 * if self.tight_phrases: * while f_x_high <= f_sent_len and f_links_low[f_x_high-1] == -1: * f_x_high = f_x_high + 1 # <<<<<<<<<<<<<< @@ -54294,7 +54256,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L80:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1677 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1671 * while f_x_high <= f_sent_len and f_links_low[f_x_high-1] == -1: * f_x_high = f_x_high + 1 * if f_x_high > f_sent_len or f_x_high - f_back_low > self.train_max_initial_size: # <<<<<<<<<<<<<< @@ -54303,14 +54265,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_18 = (__pyx_v_f_x_high > __pyx_v_f_sent_len); if (!__pyx_t_18) { - __pyx_t_7 = ((__pyx_v_f_x_high - __pyx_v_f_back_low) > __pyx_v_self->train_max_initial_size); - __pyx_t_9 = __pyx_t_7; + __pyx_t_9 = ((__pyx_v_f_x_high - __pyx_v_f_back_low) > __pyx_v_self->train_max_initial_size); + __pyx_t_7 = __pyx_t_9; } else { - __pyx_t_9 = __pyx_t_18; + __pyx_t_7 = __pyx_t_18; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1678 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1672 * f_x_high = f_x_high + 1 * if f_x_high > f_sent_len or f_x_high - f_back_low > self.train_max_initial_size: * met_constraints = 0 # <<<<<<<<<<<<<< @@ -54322,7 +54284,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L83:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1680 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1674 * met_constraints = 0 * * if (met_constraints and # <<<<<<<<<<<<<< @@ -54331,17 +54293,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_met_constraints) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1681 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1675 * * if (met_constraints and * self.find_fixpoint(f_back_low, f_x_high, # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, */ - __pyx_t_15 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1681; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1675; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1685 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1679 * e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< @@ -54351,56 +54313,56 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_back_low, __pyx_t_15, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 1, 0, 1, 1, 0)) { __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1687 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1681 * self.train_max_initial_size, self.train_max_initial_size, * 1, 1, 1, 0, 1, 1, 0) and * ((not self.tight_phrases) or f_links_low[f_x_high-1] != -1) and # <<<<<<<<<<<<<< * self.find_fixpoint(f_high, f_x_high, * f_links_low, f_links_high, e_links_low, e_links_high, */ - __pyx_t_9 = (!__pyx_v_self->tight_phrases); - if (!__pyx_t_9) { + __pyx_t_7 = (!__pyx_v_self->tight_phrases); + if (!__pyx_t_7) { __pyx_t_18 = ((__pyx_v_f_links_low[(__pyx_v_f_x_high - 1)]) != -1); - __pyx_t_7 = __pyx_t_18; + __pyx_t_9 = __pyx_t_18; } else { - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1688 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1682 * 1, 1, 1, 0, 1, 1, 0) and * ((not self.tight_phrases) or f_links_low[f_x_high-1] != -1) and * self.find_fixpoint(f_high, f_x_high, # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * -1, -1, e_gap_low+gap_start+num_gaps, e_gap_high+gap_start+num_gaps, */ - __pyx_t_14 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1688; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1682; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1693 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1687 * f_gap_low+gap_start+num_gaps, f_gap_high+gap_start+num_gaps, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() */ - __pyx_t_9 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_high, __pyx_t_14, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, ((__pyx_v_e_gap_low + __pyx_v_gap_start) + __pyx_v_num_gaps), ((__pyx_v_e_gap_high + __pyx_v_gap_start) + __pyx_v_num_gaps), ((__pyx_v_f_gap_low + __pyx_v_gap_start) + __pyx_v_num_gaps), ((__pyx_v_f_gap_high + __pyx_v_gap_start) + __pyx_v_num_gaps), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0); + __pyx_t_7 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_high, __pyx_t_14, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, ((__pyx_v_e_gap_low + __pyx_v_gap_start) + __pyx_v_num_gaps), ((__pyx_v_e_gap_high + __pyx_v_gap_start) + __pyx_v_num_gaps), ((__pyx_v_f_gap_low + __pyx_v_gap_start) + __pyx_v_num_gaps), ((__pyx_v_f_gap_high + __pyx_v_gap_start) + __pyx_v_num_gaps), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; } else { - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } else { - __pyx_t_7 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_back_low, __pyx_t_15, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 1, 0, 1, 1, 0); + __pyx_t_9 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_back_low, __pyx_t_15, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 1, 0, 1, 1, 0); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; } - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } else { - __pyx_t_9 = __pyx_v_met_constraints; + __pyx_t_7 = __pyx_v_met_constraints; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1695 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1689 * self.train_max_initial_size, self.train_max_initial_size, * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() # <<<<<<<<<<<<<< @@ -54409,7 +54371,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_clear(__pyx_v_fphr_arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1696 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1690 * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() * i = 1 # <<<<<<<<<<<<<< @@ -54418,31 +54380,31 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1697 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1691 * fphr_arr._clear() * i = 1 * self.findexes.reset() # <<<<<<<<<<<<<< * if f_back_low < f_low: * fphr_arr._append(sym_setindex(self.category, i)) */ - __pyx_t_15 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1697; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1691; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_14 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1697; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1691; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1698 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1692 * i = 1 * self.findexes.reset() * if f_back_low < f_low: # <<<<<<<<<<<<<< * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 */ - __pyx_t_9 = (__pyx_v_f_back_low < __pyx_v_f_low); - if (__pyx_t_9) { + __pyx_t_7 = (__pyx_v_f_back_low < __pyx_v_f_low); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1699 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1693 * self.findexes.reset() * if f_back_low < f_low: * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -54451,7 +54413,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1700 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1694 * if f_back_low < f_low: * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 # <<<<<<<<<<<<<< @@ -54460,16 +54422,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = (__pyx_v_i + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1701 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1695 * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: */ - __pyx_t_14 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1701; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1695; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_14); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1701; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_14); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1695; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; @@ -54477,27 +54439,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L85:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1702 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1696 * i = i+1 * self.findexes.append(sym_setindex(self.category, i)) * self.findexes.extend(self.findexes1) # <<<<<<<<<<<<<< * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): */ - __pyx_t_15 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1702; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1696; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1702; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1696; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(((PyObject *)__pyx_v_self->findexes1)); PyTuple_SET_ITEM(__pyx_t_14, 0, ((PyObject *)__pyx_v_self->findexes1)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self->findexes1)); - __pyx_t_1 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_t_14), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1702; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(__pyx_t_15, ((PyObject *)__pyx_t_14), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1696; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_14)); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1703 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1697 * self.findexes.append(sym_setindex(self.category, i)) * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: # <<<<<<<<<<<<<< @@ -54507,7 +54469,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_phrase->n; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_3; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1704 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1698 * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): # <<<<<<<<<<<<<< @@ -54517,7 +54479,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_4 = __pyx_f_3_sa_sym_isvar((__pyx_v_phrase->syms[__pyx_v_j])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1705 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1699 * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -54526,7 +54488,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1706 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1700 * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) * i = i + 1 # <<<<<<<<<<<<<< @@ -54538,7 +54500,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1708 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1702 * i = i + 1 * else: * fphr_arr._append(phrase.syms[j]) # <<<<<<<<<<<<<< @@ -54550,7 +54512,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_L88:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1709 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1703 * else: * fphr_arr._append(phrase.syms[j]) * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -54559,81 +54521,81 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1710 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1704 * fphr_arr._append(phrase.syms[j]) * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * fphr = Phrase(fphr_arr) * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low+gap_start, e_gap_high+gap_start, e_links_low, num_gaps+1, */ - __pyx_t_1 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1704; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1704; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1711 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1705 * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) * fphr = Phrase(fphr_arr) # <<<<<<<<<<<<<< * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low+gap_start, e_gap_high+gap_start, e_links_low, num_gaps+1, * f_x_low, f_x_high, f_gap_low+gap_start, f_gap_high+gap_start, f_links_low, */ - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1711; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1705; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(((PyObject *)__pyx_v_fphr_arr)); PyTuple_SET_ITEM(__pyx_t_14, 0, ((PyObject *)__pyx_v_fphr_arr)); __Pyx_GIVEREF(((PyObject *)__pyx_v_fphr_arr)); - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_14), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1711; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_14), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1705; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_14)); __pyx_t_14 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_fphr)); __pyx_v_fphr = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1714 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1708 * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low+gap_start, e_gap_high+gap_start, e_links_low, num_gaps+1, * f_x_low, f_x_high, f_gap_low+gap_start, f_gap_high+gap_start, f_links_low, * matching.sent_id, e_sent_len, e_sent_start) # <<<<<<<<<<<<<< * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) */ - __pyx_t_1 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_x_low, __pyx_v_e_x_high, (__pyx_v_e_gap_low + __pyx_v_gap_start), (__pyx_v_e_gap_high + __pyx_v_gap_start), __pyx_v_e_links_low, (__pyx_v_num_gaps + 1), __pyx_v_f_x_low, __pyx_v_f_x_high, (__pyx_v_f_gap_low + __pyx_v_gap_start), (__pyx_v_f_gap_high + __pyx_v_gap_start), __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1712; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_x_low, __pyx_v_e_x_high, (__pyx_v_e_gap_low + __pyx_v_gap_start), (__pyx_v_e_gap_high + __pyx_v_gap_start), __pyx_v_e_links_low, (__pyx_v_num_gaps + 1), __pyx_v_f_x_low, __pyx_v_f_x_high, (__pyx_v_f_gap_low + __pyx_v_gap_start), (__pyx_v_f_gap_high + __pyx_v_gap_start), __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1706; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF(__pyx_v_phrase_list); __pyx_v_phrase_list = __pyx_t_1; __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1715 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1709 * f_x_low, f_x_high, f_gap_low+gap_start, f_gap_high+gap_start, f_links_low, * matching.sent_id, e_sent_len, e_sent_start) * if len(phrase_list) > 0: # <<<<<<<<<<<<<< * pair_count = 1.0 / len(phrase_list) * else: */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = (__pyx_t_13 > 0); - if (__pyx_t_9) { + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1709; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = (__pyx_t_13 > 0); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1716 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1710 * matching.sent_id, e_sent_len, e_sent_start) * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) # <<<<<<<<<<<<<< * else: * pair_count = 0 */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (unlikely(__pyx_t_13 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "float division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_v_pair_count = (1.0 / __pyx_t_13); goto __pyx_L89; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1718 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1712 * pair_count = 1.0 / len(phrase_list) * else: * pair_count = 0 # <<<<<<<<<<<<<< @@ -54644,7 +54606,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L89:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1719 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1713 * else: * pair_count = 0 * for phrase2, eindexes in phrase_list: # <<<<<<<<<<<<<< @@ -54655,7 +54617,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_1 = __pyx_v_phrase_list; __Pyx_INCREF(__pyx_t_1); __pyx_t_13 = 0; __pyx_t_16 = NULL; } else { - __pyx_t_13 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_16 = Py_TYPE(__pyx_t_1)->tp_iternext; } @@ -54663,23 +54625,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (!__pyx_t_16 && PyList_CheckExact(__pyx_t_1)) { if (__pyx_t_13 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_14 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_13); __Pyx_INCREF(__pyx_t_14); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_13); __Pyx_INCREF(__pyx_t_14); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_14 = PySequence_ITEM(__pyx_t_1, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PySequence_ITEM(__pyx_t_1, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_16 && PyTuple_CheckExact(__pyx_t_1)) { if (__pyx_t_13 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_14 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_13); __Pyx_INCREF(__pyx_t_14); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_13); __Pyx_INCREF(__pyx_t_14); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_14 = PySequence_ITEM(__pyx_t_1, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PySequence_ITEM(__pyx_t_1, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_14 = __pyx_t_16(__pyx_t_1); if (unlikely(!__pyx_t_14)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -54695,7 +54657,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -54708,14 +54670,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_INCREF(__pyx_t_15); __Pyx_INCREF(__pyx_t_2); #else - __pyx_t_15 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; } else { Py_ssize_t index = -1; - __pyx_t_10 = PyObject_GetIter(__pyx_t_14); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetIter(__pyx_t_14); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_17 = Py_TYPE(__pyx_t_10)->tp_iternext; @@ -54723,7 +54685,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GOTREF(__pyx_t_15); index = 1; __pyx_t_2 = __pyx_t_17(__pyx_t_10); if (unlikely(!__pyx_t_2)) goto __pyx_L92_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_17 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L93_unpacking_done; @@ -54731,7 +54693,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_17 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L93_unpacking_done:; } __Pyx_XDECREF(__pyx_v_phrase2); @@ -54741,7 +54703,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_eindexes = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1720 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1714 * pair_count = 0 * for phrase2, eindexes in phrase_list: * als3 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) # <<<<<<<<<<<<<< @@ -54750,31 +54712,31 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_14 = ((PyObject *)__pyx_v_self->findexes); __Pyx_INCREF(__pyx_t_14); - __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_14, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_14, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1714; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_XDECREF(((PyObject *)__pyx_v_als3)); __pyx_v_als3 = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1721 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1715 * for phrase2, eindexes in phrase_list: * als3 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) * extracts.append((fphr, phrase2, pair_count, tuple(als3))) # <<<<<<<<<<<<<< * if (num_gaps < self.max_nonterminals - 1 and * phrase_len+1 < self.max_length and */ - __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyTuple_New(1); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(((PyObject *)__pyx_v_als3)); PyTuple_SET_ITEM(__pyx_t_14, 0, ((PyObject *)__pyx_v_als3)); __Pyx_GIVEREF(((PyObject *)__pyx_v_als3)); - __pyx_t_15 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_14), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_14), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(((PyObject *)__pyx_t_14)); __pyx_t_14 = 0; - __pyx_t_14 = PyTuple_New(4); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyTuple_New(4); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_INCREF(((PyObject *)__pyx_v_fphr)); PyTuple_SET_ITEM(__pyx_t_14, 0, ((PyObject *)__pyx_v_fphr)); @@ -54788,7 +54750,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GIVEREF(__pyx_t_15); __pyx_t_2 = 0; __pyx_t_15 = 0; - __pyx_t_15 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_14)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_14)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(((PyObject *)__pyx_t_14)); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; @@ -54801,27 +54763,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L79:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1722 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1716 * als3 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) * extracts.append((fphr, phrase2, pair_count, tuple(als3))) * if (num_gaps < self.max_nonterminals - 1 and # <<<<<<<<<<<<<< * phrase_len+1 < self.max_length and * f_back_high == f_high and */ - __pyx_t_9 = (__pyx_v_num_gaps < (__pyx_v_self->max_nonterminals - 1)); - if (__pyx_t_9) { + __pyx_t_7 = (__pyx_v_num_gaps < (__pyx_v_self->max_nonterminals - 1)); + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1723 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1717 * extracts.append((fphr, phrase2, pair_count, tuple(als3))) * if (num_gaps < self.max_nonterminals - 1 and * phrase_len+1 < self.max_length and # <<<<<<<<<<<<<< * f_back_high == f_high and * f_back_low == f_low and */ - __pyx_t_7 = ((__pyx_v_phrase_len + 1) < __pyx_v_self->max_length); - if (__pyx_t_7) { + __pyx_t_9 = ((__pyx_v_phrase_len + 1) < __pyx_v_self->max_length); + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1724 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1718 * if (num_gaps < self.max_nonterminals - 1 and * phrase_len+1 < self.max_length and * f_back_high == f_high and # <<<<<<<<<<<<<< @@ -54831,7 +54793,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_18 = (__pyx_v_f_back_high == __pyx_v_f_high); if (__pyx_t_18) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1725 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1719 * phrase_len+1 < self.max_length and * f_back_high == f_high and * f_back_low == f_low and # <<<<<<<<<<<<<< @@ -54841,7 +54803,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_8 = (__pyx_v_f_back_low == __pyx_v_f_low); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1726 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1720 * f_back_high == f_high and * f_back_low == f_low and * f_back_high - f_back_low + (2*self.train_min_gap_size) <= self.train_max_initial_size and # <<<<<<<<<<<<<< @@ -54851,7 +54813,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_19 = (((__pyx_v_f_back_high - __pyx_v_f_back_low) + (2 * __pyx_v_self->train_min_gap_size)) <= __pyx_v_self->train_max_initial_size); if (__pyx_t_19) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1727 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1721 * f_back_low == f_low and * f_back_high - f_back_low + (2*self.train_min_gap_size) <= self.train_max_initial_size and * f_low >= self.train_min_gap_size and # <<<<<<<<<<<<<< @@ -54861,7 +54823,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_20 = (__pyx_v_f_low >= __pyx_v_self->train_min_gap_size); if (__pyx_t_20) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1728 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1722 * f_back_high - f_back_low + (2*self.train_min_gap_size) <= self.train_max_initial_size and * f_low >= self.train_min_gap_size and * f_high <= f_sent_len - self.train_min_gap_size and # <<<<<<<<<<<<<< @@ -54871,7 +54833,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_21 = (__pyx_v_f_high <= (__pyx_v_f_sent_len - __pyx_v_self->train_min_gap_size)); if (__pyx_t_21) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1729 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1723 * f_low >= self.train_min_gap_size and * f_high <= f_sent_len - self.train_min_gap_size and * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_high] != -1))): # <<<<<<<<<<<<<< @@ -54913,15 +54875,15 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_t_18 = __pyx_t_8; } else { - __pyx_t_18 = __pyx_t_7; + __pyx_t_18 = __pyx_t_9; } - __pyx_t_7 = __pyx_t_18; + __pyx_t_9 = __pyx_t_18; } else { - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1731 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1725 * ((not self.tight_phrases) or (f_links_low[f_low-1] != -1 and f_links_low[f_high] != -1))): * * met_constraints = 1 # <<<<<<<<<<<<<< @@ -54930,7 +54892,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_met_constraints = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1732 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1726 * * met_constraints = 1 * f_x_low = f_low-self.train_min_gap_size # <<<<<<<<<<<<<< @@ -54939,7 +54901,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_x_low = (__pyx_v_f_low - __pyx_v_self->train_min_gap_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1733 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1727 * met_constraints = 1 * f_x_low = f_low-self.train_min_gap_size * if self.tight_phrases: # <<<<<<<<<<<<<< @@ -54948,7 +54910,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1734 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1728 * f_x_low = f_low-self.train_min_gap_size * if self.tight_phrases: * while f_x_low >= 0 and f_links_low[f_x_low] == -1: # <<<<<<<<<<<<<< @@ -54956,16 +54918,16 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj * if f_x_low < 0: */ while (1) { - __pyx_t_7 = (__pyx_v_f_x_low >= 0); - if (__pyx_t_7) { - __pyx_t_9 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) == -1); - __pyx_t_18 = __pyx_t_9; - } else { + __pyx_t_9 = (__pyx_v_f_x_low >= 0); + if (__pyx_t_9) { + __pyx_t_7 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) == -1); __pyx_t_18 = __pyx_t_7; + } else { + __pyx_t_18 = __pyx_t_9; } if (!__pyx_t_18) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1735 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1729 * if self.tight_phrases: * while f_x_low >= 0 and f_links_low[f_x_low] == -1: * f_x_low = f_x_low - 1 # <<<<<<<<<<<<<< @@ -54978,7 +54940,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L95:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1736 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1730 * while f_x_low >= 0 and f_links_low[f_x_low] == -1: * f_x_low = f_x_low - 1 * if f_x_low < 0: # <<<<<<<<<<<<<< @@ -54988,7 +54950,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_18 = (__pyx_v_f_x_low < 0); if (__pyx_t_18) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1737 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1731 * f_x_low = f_x_low - 1 * if f_x_low < 0: * met_constraints = 0 # <<<<<<<<<<<<<< @@ -55000,7 +54962,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L98:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1739 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1733 * met_constraints = 0 * * f_x_high = f_high+self.train_min_gap_size # <<<<<<<<<<<<<< @@ -55009,7 +54971,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_f_x_high = (__pyx_v_f_high + __pyx_v_self->train_min_gap_size); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1740 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1734 * * f_x_high = f_high+self.train_min_gap_size * if self.tight_phrases: # <<<<<<<<<<<<<< @@ -55018,7 +54980,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_self->tight_phrases) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1741 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1735 * f_x_high = f_high+self.train_min_gap_size * if self.tight_phrases: * while f_x_high <= f_sent_len and f_links_low[f_x_high-1] == -1: # <<<<<<<<<<<<<< @@ -55028,14 +54990,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj while (1) { __pyx_t_18 = (__pyx_v_f_x_high <= __pyx_v_f_sent_len); if (__pyx_t_18) { - __pyx_t_7 = ((__pyx_v_f_links_low[(__pyx_v_f_x_high - 1)]) == -1); - __pyx_t_9 = __pyx_t_7; + __pyx_t_9 = ((__pyx_v_f_links_low[(__pyx_v_f_x_high - 1)]) == -1); + __pyx_t_7 = __pyx_t_9; } else { - __pyx_t_9 = __pyx_t_18; + __pyx_t_7 = __pyx_t_18; } - if (!__pyx_t_9) break; + if (!__pyx_t_7) break; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1742 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1736 * if self.tight_phrases: * while f_x_high <= f_sent_len and f_links_low[f_x_high-1] == -1: * f_x_high = f_x_high + 1 # <<<<<<<<<<<<<< @@ -55048,23 +55010,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L99:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1743 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1737 * while f_x_high <= f_sent_len and f_links_low[f_x_high-1] == -1: * f_x_high = f_x_high + 1 * if f_x_high > f_sent_len or f_x_high - f_x_low > self.train_max_initial_size: # <<<<<<<<<<<<<< * met_constraints = 0 * */ - __pyx_t_9 = (__pyx_v_f_x_high > __pyx_v_f_sent_len); - if (!__pyx_t_9) { + __pyx_t_7 = (__pyx_v_f_x_high > __pyx_v_f_sent_len); + if (!__pyx_t_7) { __pyx_t_18 = ((__pyx_v_f_x_high - __pyx_v_f_x_low) > __pyx_v_self->train_max_initial_size); - __pyx_t_7 = __pyx_t_18; + __pyx_t_9 = __pyx_t_18; } else { - __pyx_t_7 = __pyx_t_9; + __pyx_t_9 = __pyx_t_7; } - if (__pyx_t_7) { + if (__pyx_t_9) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1744 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1738 * f_x_high = f_x_high + 1 * if f_x_high > f_sent_len or f_x_high - f_x_low > self.train_max_initial_size: * met_constraints = 0 # <<<<<<<<<<<<<< @@ -55076,7 +55038,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L102:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1746 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1740 * met_constraints = 0 * * if (met_constraints and # <<<<<<<<<<<<<< @@ -55085,17 +55047,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ if (__pyx_v_met_constraints) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1747 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1741 * * if (met_constraints and * self.find_fixpoint(f_x_low, f_x_high, # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, */ - __pyx_t_1 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1747; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1741; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1751 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1745 * e_low, e_high, &e_x_low, &e_x_high, &f_x_low, &f_x_high, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< @@ -55105,39 +55067,39 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 2, 1, 1, 1, 1)) { __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1753 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1747 * self.train_max_initial_size, self.train_max_initial_size, * 1, 1, 2, 1, 1, 1, 1) and * ((not self.tight_phrases) or (f_links_low[f_x_low] != -1 and f_links_low[f_x_high-1] != -1)) and # <<<<<<<<<<<<<< * self.find_fixpoint(f_x_low, f_low, * f_links_low, f_links_high, e_links_low, e_links_high, */ - __pyx_t_7 = (!__pyx_v_self->tight_phrases); - if (!__pyx_t_7) { - __pyx_t_9 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) != -1); - if (__pyx_t_9) { + __pyx_t_9 = (!__pyx_v_self->tight_phrases); + if (!__pyx_t_9) { + __pyx_t_7 = ((__pyx_v_f_links_low[__pyx_v_f_x_low]) != -1); + if (__pyx_t_7) { __pyx_t_18 = ((__pyx_v_f_links_low[(__pyx_v_f_x_high - 1)]) != -1); __pyx_t_8 = __pyx_t_18; } else { - __pyx_t_8 = __pyx_t_9; + __pyx_t_8 = __pyx_t_7; } - __pyx_t_9 = __pyx_t_8; + __pyx_t_7 = __pyx_t_8; } else { - __pyx_t_9 = __pyx_t_7; + __pyx_t_7 = __pyx_t_9; } - if (__pyx_t_9) { + if (__pyx_t_7) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1754 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1748 * 1, 1, 2, 1, 1, 1, 1) and * ((not self.tight_phrases) or (f_links_low[f_x_low] != -1 and f_links_low[f_x_high-1] != -1)) and * self.find_fixpoint(f_x_low, f_low, # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * -1, -1, e_gap_low, e_gap_high, f_gap_low, f_gap_high, */ - __pyx_t_15 = PyInt_FromLong(__pyx_v_f_low); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1754; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_v_f_low); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1748; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1758 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1752 * -1, -1, e_gap_low, e_gap_high, f_gap_low, f_gap_high, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< @@ -55148,17 +55110,17 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; if (__pyx_t_3) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1760 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1754 * self.train_max_initial_size, self.train_max_initial_size, * 0, 0, 0, 0, 0, 0, 0) and * self.find_fixpoint(f_high, f_x_high, # <<<<<<<<<<<<<< * f_links_low, f_links_high, e_links_low, e_links_high, * -1, -1, e_gap_low+1+num_gaps, e_gap_high+1+num_gaps, */ - __pyx_t_15 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1760; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_v_f_x_high); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1754; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1765 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1759 * f_gap_low+1+num_gaps, f_gap_high+1+num_gaps, * f_sent_len, e_sent_len, * self.train_max_initial_size, self.train_max_initial_size, # <<<<<<<<<<<<<< @@ -55167,26 +55129,26 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_4 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_high, __pyx_t_15, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, -1, -1, ((__pyx_v_e_gap_low + 1) + __pyx_v_num_gaps), ((__pyx_v_e_gap_high + 1) + __pyx_v_num_gaps), ((__pyx_v_f_gap_low + 1) + __pyx_v_num_gaps), ((__pyx_v_f_gap_high + 1) + __pyx_v_num_gaps), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 0, 0, 0, 0, 0, 0, 0); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_7 = __pyx_t_4; + __pyx_t_9 = __pyx_t_4; } else { - __pyx_t_7 = __pyx_t_3; + __pyx_t_9 = __pyx_t_3; } - __pyx_t_8 = __pyx_t_7; - } else { __pyx_t_8 = __pyx_t_9; + } else { + __pyx_t_8 = __pyx_t_7; } - __pyx_t_9 = __pyx_t_8; + __pyx_t_7 = __pyx_t_8; } else { - __pyx_t_9 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 2, 1, 1, 1, 1); + __pyx_t_7 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->find_fixpoint(__pyx_v_self, __pyx_v_f_x_low, __pyx_t_1, __pyx_v_f_links_low, __pyx_v_f_links_high, __pyx_v_e_links_low, __pyx_v_e_links_high, __pyx_v_e_low, __pyx_v_e_high, (&__pyx_v_e_x_low), (&__pyx_v_e_x_high), (&__pyx_v_f_x_low), (&__pyx_v_f_x_high), __pyx_v_f_sent_len, __pyx_v_e_sent_len, __pyx_v_self->train_max_initial_size, __pyx_v_self->train_max_initial_size, 1, 1, 2, 1, 1, 1, 1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } - __pyx_t_8 = __pyx_t_9; + __pyx_t_8 = __pyx_t_7; } else { __pyx_t_8 = __pyx_v_met_constraints; } if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1767 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1761 * self.train_max_initial_size, self.train_max_initial_size, * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() # <<<<<<<<<<<<<< @@ -55195,7 +55157,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_clear(__pyx_v_fphr_arr); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1768 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1762 * 0, 0, 0, 0, 0, 0, 0)): * fphr_arr._clear() * i = 1 # <<<<<<<<<<<<<< @@ -55204,35 +55166,35 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = 1; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1769 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1763 * fphr_arr._clear() * i = 1 * self.findexes.reset() # <<<<<<<<<<<<<< * self.findexes.append(sym_setindex(self.category, i)) * fphr_arr._append(sym_setindex(self.category, i)) */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__reset); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_15 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), NULL); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1763; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1770 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1764 * i = 1 * self.findexes.reset() * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 */ - __pyx_t_15 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1770; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_15); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1770; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_15); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1771 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1765 * self.findexes.reset() * self.findexes.append(sym_setindex(self.category, i)) * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -55241,7 +55203,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1772 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1766 * self.findexes.append(sym_setindex(self.category, i)) * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 # <<<<<<<<<<<<<< @@ -55250,27 +55212,27 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_v_i = (__pyx_v_i + 1); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1773 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1767 * fphr_arr._append(sym_setindex(self.category, i)) * i = i+1 * self.findexes.extend(self.findexes1) # <<<<<<<<<<<<<< * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): */ - __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1773; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_self->findexes), __pyx_n_s__extend); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1773; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(((PyObject *)__pyx_v_self->findexes1)); PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_v_self->findexes1)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self->findexes1)); - __pyx_t_14 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1773; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1774 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1768 * i = i+1 * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: # <<<<<<<<<<<<<< @@ -55280,7 +55242,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_3 = __pyx_v_phrase->n; for (__pyx_v_j = 0; __pyx_v_j < __pyx_t_3; __pyx_v_j++) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1775 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1769 * self.findexes.extend(self.findexes1) * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): # <<<<<<<<<<<<<< @@ -55290,7 +55252,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_4 = __pyx_f_3_sa_sym_isvar((__pyx_v_phrase->syms[__pyx_v_j])); if (__pyx_t_4) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1776 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1770 * for j from 0 <= j < phrase.n: * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -55299,7 +55261,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1777 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1771 * if sym_isvar(phrase.syms[j]): * fphr_arr._append(sym_setindex(self.category, i)) * i = i + 1 # <<<<<<<<<<<<<< @@ -55311,7 +55273,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1779 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1773 * i = i + 1 * else: * fphr_arr._append(phrase.syms[j]) # <<<<<<<<<<<<<< @@ -55323,7 +55285,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_L106:; } - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1780 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1774 * else: * fphr_arr._append(phrase.syms[j]) * fphr_arr._append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< @@ -55332,81 +55294,81 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ ((struct __pyx_vtabstruct_3_sa_IntList *)__pyx_v_fphr_arr->__pyx_vtab)->_append(__pyx_v_fphr_arr, __pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1781 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1775 * fphr_arr._append(phrase.syms[j]) * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) # <<<<<<<<<<<<<< * fphr = Phrase(fphr_arr) * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low, e_gap_high, e_links_low, num_gaps+2, */ - __pyx_t_14 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1781; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyInt_FromLong(__pyx_f_3_sa_sym_setindex(__pyx_v_self->category, __pyx_v_i)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1775; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_14); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1781; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = __Pyx_PyObject_Append(((PyObject *)__pyx_v_self->findexes), __pyx_t_14); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1775; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1782 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1776 * fphr_arr._append(sym_setindex(self.category, i)) * self.findexes.append(sym_setindex(self.category, i)) * fphr = Phrase(fphr_arr) # <<<<<<<<<<<<<< * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low, e_gap_high, e_links_low, num_gaps+2, * f_x_low, f_x_high, f_gap_low, f_gap_high, f_links_low, */ - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1776; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(((PyObject *)__pyx_v_fphr_arr)); PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_v_fphr_arr)); __Pyx_GIVEREF(((PyObject *)__pyx_v_fphr_arr)); - __pyx_t_14 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1782; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_3_sa_Phrase)), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1776; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(((PyObject *)__pyx_v_fphr)); __pyx_v_fphr = ((struct __pyx_obj_3_sa_Phrase *)__pyx_t_14); __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1785 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1779 * phrase_list = self.extract_phrases(e_x_low, e_x_high, e_gap_low, e_gap_high, e_links_low, num_gaps+2, * f_x_low, f_x_high, f_gap_low, f_gap_high, f_links_low, * matching.sent_id, e_sent_len, e_sent_start) # <<<<<<<<<<<<<< * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) */ - __pyx_t_14 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_x_low, __pyx_v_e_x_high, __pyx_v_e_gap_low, __pyx_v_e_gap_high, __pyx_v_e_links_low, (__pyx_v_num_gaps + 2), __pyx_v_f_x_low, __pyx_v_f_x_high, __pyx_v_f_gap_low, __pyx_v_f_gap_high, __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1783; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = ((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->extract_phrases(__pyx_v_self, __pyx_v_e_x_low, __pyx_v_e_x_high, __pyx_v_e_gap_low, __pyx_v_e_gap_high, __pyx_v_e_links_low, (__pyx_v_num_gaps + 2), __pyx_v_f_x_low, __pyx_v_f_x_high, __pyx_v_f_gap_low, __pyx_v_f_gap_high, __pyx_v_f_links_low, __pyx_v_matching->sent_id, __pyx_v_e_sent_len, __pyx_v_e_sent_start); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_XDECREF(__pyx_v_phrase_list); __pyx_v_phrase_list = __pyx_t_14; __pyx_t_14 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1786 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1780 * f_x_low, f_x_high, f_gap_low, f_gap_high, f_links_low, * matching.sent_id, e_sent_len, e_sent_start) * if len(phrase_list) > 0: # <<<<<<<<<<<<<< * pair_count = 1.0 / len(phrase_list) * else: */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1780; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_8 = (__pyx_t_13 > 0); if (__pyx_t_8) { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1787 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1781 * matching.sent_id, e_sent_len, e_sent_start) * if len(phrase_list) > 0: * pair_count = 1.0 / len(phrase_list) # <<<<<<<<<<<<<< * else: * pair_count = 0 */ - __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = PyObject_Length(__pyx_v_phrase_list); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1781; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (unlikely(__pyx_t_13 == 0)) { PyErr_Format(PyExc_ZeroDivisionError, "float division"); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1781; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __pyx_v_pair_count = (1.0 / __pyx_t_13); goto __pyx_L107; } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1789 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1783 * pair_count = 1.0 / len(phrase_list) * else: * pair_count = 0 # <<<<<<<<<<<<<< @@ -55417,7 +55379,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L107:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1790 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1784 * else: * pair_count = 0 * for phrase2, eindexes in phrase_list: # <<<<<<<<<<<<<< @@ -55428,7 +55390,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_t_14 = __pyx_v_phrase_list; __Pyx_INCREF(__pyx_t_14); __pyx_t_13 = 0; __pyx_t_16 = NULL; } else { - __pyx_t_13 = -1; __pyx_t_14 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = -1; __pyx_t_14 = PyObject_GetIter(__pyx_v_phrase_list); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_16 = Py_TYPE(__pyx_t_14)->tp_iternext; } @@ -55436,23 +55398,23 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (!__pyx_t_16 && PyList_CheckExact(__pyx_t_14)) { if (__pyx_t_13 >= PyList_GET_SIZE(__pyx_t_14)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_15 = PyList_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyList_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else if (!__pyx_t_16 && PyTuple_CheckExact(__pyx_t_14)) { if (__pyx_t_13 >= PyTuple_GET_SIZE(__pyx_t_14)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_15 = PyTuple_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_GET_ITEM(__pyx_t_14, __pyx_t_13); __Pyx_INCREF(__pyx_t_15); __pyx_t_13++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PySequence_ITEM(__pyx_t_14, __pyx_t_13); __pyx_t_13++; if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { __pyx_t_15 = __pyx_t_16(__pyx_t_14); if (unlikely(!__pyx_t_15)) { if (PyErr_Occurred()) { if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -55468,7 +55430,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { @@ -55481,14 +55443,14 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_INCREF(__pyx_t_1); __Pyx_INCREF(__pyx_t_2); #else - __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; } else { Py_ssize_t index = -1; - __pyx_t_10 = PyObject_GetIter(__pyx_t_15); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_GetIter(__pyx_t_15); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __pyx_t_17 = Py_TYPE(__pyx_t_10)->tp_iternext; @@ -55496,7 +55458,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GOTREF(__pyx_t_1); index = 1; __pyx_t_2 = __pyx_t_17(__pyx_t_10); if (unlikely(!__pyx_t_2)) goto __pyx_L110_unpacking_failed; __Pyx_GOTREF(__pyx_t_2); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_IternextUnpackEndCheck(__pyx_t_17(__pyx_t_10), 2) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_17 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L111_unpacking_done; @@ -55504,7 +55466,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_17 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1784; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_L111_unpacking_done:; } __Pyx_XDECREF(__pyx_v_phrase2); @@ -55514,7 +55476,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __pyx_v_eindexes = __pyx_t_2; __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1791 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1785 * pair_count = 0 * for phrase2, eindexes in phrase_list: * als4 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) # <<<<<<<<<<<<<< @@ -55523,31 +55485,31 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ __pyx_t_15 = ((PyObject *)__pyx_v_self->findexes); __Pyx_INCREF(__pyx_t_15); - __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_15, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1791; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = ((PyObject *)((struct __pyx_vtabstruct_3_sa_HieroCachingRuleFactory *)__pyx_v_self->__pyx_vtab)->create_alignments(__pyx_v_self, __pyx_v_sent_links, __pyx_v_num_links, __pyx_t_15, __pyx_v_eindexes)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1785; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_XDECREF(((PyObject *)__pyx_v_als4)); __pyx_v_als4 = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1792 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1786 * for phrase2, eindexes in phrase_list: * als4 = self.create_alignments(sent_links,num_links,self.findexes,eindexes) * extracts.append((fphr, phrase2, pair_count, tuple(als4))) # <<<<<<<<<<<<<< * else: * reason_for_failure = "Unable to extract basic phrase" */ - __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyFloat_FromDouble(__pyx_v_pair_count); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(((PyObject *)__pyx_v_als4)); PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_v_als4)); __Pyx_GIVEREF(((PyObject *)__pyx_v_als4)); - __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyObject_Call(((PyObject *)((PyObject*)(&PyTuple_Type))), ((PyObject *)__pyx_t_15), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; - __pyx_t_15 = PyTuple_New(4); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_15 = PyTuple_New(4); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_INCREF(((PyObject *)__pyx_v_fphr)); PyTuple_SET_ITEM(__pyx_t_15, 0, ((PyObject *)__pyx_v_fphr)); @@ -55561,7 +55523,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj __Pyx_GIVEREF(__pyx_t_1); __pyx_t_2 = 0; __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1792; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Append(__pyx_v_extracts, ((PyObject *)__pyx_t_15)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1786; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(((PyObject *)__pyx_t_15)); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -55583,7 +55545,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } /*else*/ { - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1794 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1788 * extracts.append((fphr, phrase2, pair_count, tuple(als4))) * else: * reason_for_failure = "Unable to extract basic phrase" # <<<<<<<<<<<<<< @@ -55599,7 +55561,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj } __pyx_L33:; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1796 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1790 * reason_for_failure = "Unable to extract basic phrase" * * free(sent_links) # <<<<<<<<<<<<<< @@ -55608,7 +55570,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_sent_links); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1797 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1791 * * free(sent_links) * free(f_links_low) # <<<<<<<<<<<<<< @@ -55617,7 +55579,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_f_links_low); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1798 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1792 * free(sent_links) * free(f_links_low) * free(f_links_high) # <<<<<<<<<<<<<< @@ -55626,7 +55588,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_f_links_high); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1799 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1793 * free(f_links_low) * free(f_links_high) * free(e_links_low) # <<<<<<<<<<<<<< @@ -55635,7 +55597,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_e_links_low); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1800 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1794 * free(f_links_high) * free(e_links_low) * free(e_links_high) # <<<<<<<<<<<<<< @@ -55644,7 +55606,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_e_links_high); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1801 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1795 * free(e_links_low) * free(e_links_high) * free(f_gap_low) # <<<<<<<<<<<<<< @@ -55653,7 +55615,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_f_gap_low); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1802 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1796 * free(e_links_high) * free(f_gap_low) * free(f_gap_high) # <<<<<<<<<<<<<< @@ -55662,7 +55624,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_f_gap_high); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1803 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1797 * free(f_gap_low) * free(f_gap_high) * free(e_gap_low) # <<<<<<<<<<<<<< @@ -55671,7 +55633,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_e_gap_low); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1804 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1798 * free(f_gap_high) * free(e_gap_low) * free(e_gap_high) # <<<<<<<<<<<<<< @@ -55680,7 +55642,7 @@ static PyObject *__pyx_f_3_sa_23HieroCachingRuleFactory_extract(struct __pyx_obj */ free(__pyx_v_e_gap_high); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1806 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1800 * free(e_gap_high) * * return extracts # <<<<<<<<<<<<<< @@ -55734,7 +55696,7 @@ static int __pyx_pw_3_sa_13FeatureVector_1__cinit__(PyObject *__pyx_v_self, PyOb return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":7 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":7 * * cdef class FeatureVector: * def __cinit__(self): # <<<<<<<<<<<<<< @@ -55753,7 +55715,7 @@ static int __pyx_pf_3_sa_13FeatureVector___cinit__(struct __pyx_obj_3_sa_Feature int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":8 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":8 * cdef class FeatureVector: * def __cinit__(self): * self.names = IntList(INITIAL_CAPACITY, INCREMENT) # <<<<<<<<<<<<<< @@ -55781,7 +55743,7 @@ static int __pyx_pf_3_sa_13FeatureVector___cinit__(struct __pyx_obj_3_sa_Feature __pyx_v_self->names = ((struct __pyx_obj_3_sa_IntList *)__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":9 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":9 * def __cinit__(self): * self.names = IntList(INITIAL_CAPACITY, INCREMENT) * self.values = FloatList(INITIAL_CAPACITY, INCREMENT) # <<<<<<<<<<<<<< @@ -55878,7 +55840,7 @@ static PyObject *__pyx_pw_3_sa_13FeatureVector_3set(PyObject *__pyx_v_self, PyOb return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":11 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":11 * self.values = FloatList(INITIAL_CAPACITY, INCREMENT) * * def set(self, unsigned name, float value): # <<<<<<<<<<<<<< @@ -55896,7 +55858,7 @@ static PyObject *__pyx_pf_3_sa_13FeatureVector_2set(struct __pyx_obj_3_sa_Featur int __pyx_clineno = 0; __Pyx_RefNannySetupContext("set", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":12 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":12 * * def set(self, unsigned name, float value): * self.names.append(name) # <<<<<<<<<<<<<< @@ -55910,7 +55872,7 @@ static PyObject *__pyx_pf_3_sa_13FeatureVector_2set(struct __pyx_obj_3_sa_Featur __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":13 * def set(self, unsigned name, float value): * self.names.append(name) * self.values.append(value) # <<<<<<<<<<<<<< @@ -55949,7 +55911,7 @@ static PyObject *__pyx_pw_3_sa_13FeatureVector_5__iter__(PyObject *__pyx_v_self) return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":15 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":15 * self.values.append(value) * * def __iter__(self): # <<<<<<<<<<<<<< @@ -56015,7 +55977,7 @@ static PyObject *__pyx_gb_3_sa_13FeatureVector_6generator5(__pyx_GeneratorObject __pyx_L3_first_run:; if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":17 * def __iter__(self): * cdef unsigned i * for i in range(self.names.len): # <<<<<<<<<<<<<< @@ -56026,7 +55988,7 @@ static PyObject *__pyx_gb_3_sa_13FeatureVector_6generator5(__pyx_GeneratorObject for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_cur_scope->__pyx_v_i = __pyx_t_2; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":18 * cdef unsigned i * for i in range(self.names.len): * yield (FD.word(self.names[i]), self.values[i]) # <<<<<<<<<<<<<< @@ -56090,7 +56052,7 @@ static PyObject *__pyx_pw_3_sa_13FeatureVector_8__str__(PyObject *__pyx_v_self) } static PyObject *__pyx_gb_3_sa_13FeatureVector_7__str___2generator12(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":21 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":21 * * def __str__(self): * return ' '.join('%s=%s' % feat for feat in self) # <<<<<<<<<<<<<< @@ -56229,7 +56191,7 @@ static PyObject *__pyx_gb_3_sa_13FeatureVector_7__str___2generator12(__pyx_Gener return NULL; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":20 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":20 * yield (FD.word(self.names[i]), self.values[i]) * * def __str__(self): # <<<<<<<<<<<<<< @@ -56258,7 +56220,7 @@ static PyObject *__pyx_pf_3_sa_13FeatureVector_7__str__(struct __pyx_obj_3_sa_Fe __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":21 * * def __str__(self): * return ' '.join('%s=%s' % feat for feat in self) # <<<<<<<<<<<<<< @@ -56314,7 +56276,7 @@ static int __pyx_pw_3_sa_6Scorer_1__init__(PyObject *__pyx_v_self, PyObject *__p return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":25 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":25 * cdef class Scorer: * cdef models * def __init__(self, *models): # <<<<<<<<<<<<<< @@ -56337,7 +56299,7 @@ static int __pyx_pf_3_sa_6Scorer___init__(struct __pyx_obj_3_sa_Scorer *__pyx_v_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":26 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":26 * cdef models * def __init__(self, *models): * names = [FD.index(model.__name__) for model in models] # <<<<<<<<<<<<<< @@ -56371,7 +56333,7 @@ static int __pyx_pf_3_sa_6Scorer___init__(struct __pyx_obj_3_sa_Scorer *__pyx_v_ __pyx_v_names = __pyx_t_1; __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":27 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":27 * def __init__(self, *models): * names = [FD.index(model.__name__) for model in models] * self.models = zip(names, models) # <<<<<<<<<<<<<< @@ -56410,7 +56372,7 @@ static int __pyx_pf_3_sa_6Scorer___init__(struct __pyx_obj_3_sa_Scorer *__pyx_v_ return __pyx_r; } -/* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":29 +/* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":29 * self.models = zip(names, models) * * cdef FeatureVector score(self, ctx): # <<<<<<<<<<<<<< @@ -56437,7 +56399,7 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("score", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":30 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":30 * * cdef FeatureVector score(self, ctx): * cdef FeatureVector scores = FeatureVector() # <<<<<<<<<<<<<< @@ -56449,7 +56411,7 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __ __pyx_v_scores = ((struct __pyx_obj_3_sa_FeatureVector *)__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":31 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":31 * cdef FeatureVector score(self, ctx): * cdef FeatureVector scores = FeatureVector() * for name, model in self.models: # <<<<<<<<<<<<<< @@ -56546,7 +56508,7 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __ __pyx_v_model = __pyx_t_6; __pyx_t_6 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":32 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":32 * cdef FeatureVector scores = FeatureVector() * for name, model in self.models: * scores.set(name, model(ctx)) # <<<<<<<<<<<<<< @@ -56578,7 +56540,7 @@ static struct __pyx_obj_3_sa_FeatureVector *__pyx_f_3_sa_6Scorer_score(struct __ } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/features.pxi":33 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/features.pxi":33 * for name, model in self.models: * scores.set(name, model(ctx)) * return scores # <<<<<<<<<<<<<< @@ -66601,8 +66563,8 @@ static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_zip = __Pyx_GetName(__pyx_b, __pyx_n_s__zip); if (!__pyx_builtin_zip) {__pyx_filename = __pyx_f[5]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_StopIteration = __Pyx_GetName(__pyx_b, __pyx_n_s__StopIteration); if (!__pyx_builtin_StopIteration) {__pyx_filename = __pyx_f[6]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_cmp = __Pyx_GetName(__pyx_b, __pyx_n_s__cmp); if (!__pyx_builtin_cmp) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 173; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_sorted = __Pyx_GetName(__pyx_b, __pyx_n_s__sorted); if (!__pyx_builtin_sorted) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 937; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_max = __Pyx_GetName(__pyx_b, __pyx_n_s__max); if (!__pyx_builtin_max) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1105; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_sorted = __Pyx_GetName(__pyx_b, __pyx_n_s__sorted); if (!__pyx_builtin_sorted) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 931; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_max = __Pyx_GetName(__pyx_b, __pyx_n_s__max); if (!__pyx_builtin_max) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1099; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; __pyx_L1_error:; return -1; @@ -66612,7 +66574,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":20 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":20 * self.word2id = {"END_OF_FILE":0, "END_OF_LINE":1} * self.id2word = ["END_OF_FILE", "END_OF_LINE"] * self.data = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -66629,7 +66591,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_1000); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_10)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":21 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":21 * self.id2word = ["END_OF_FILE", "END_OF_LINE"] * self.data = IntList(1000,1000) * self.sent_id = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -66646,7 +66608,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_1000); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_11)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":22 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":22 * self.data = IntList(1000,1000) * self.sent_id = IntList(1000,1000) * self.sent_index = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -66663,7 +66625,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_1000); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_12)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":66 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":66 * f.write("%s " % self.get_word(w_id)) * if w_id == 1: * f.write("\n") # <<<<<<<<<<<<<< @@ -66677,7 +66639,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_15)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":61 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":61 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -66697,7 +66659,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_16)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":69 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":69 * * def read_text(self, char* filename): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< @@ -66717,7 +66679,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_17)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":74 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":74 * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: * data = (line.split(' ||| ')[side] for line in fp) # <<<<<<<<<<<<<< @@ -66731,7 +66693,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_18)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":73 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":73 * * def read_bitext(self, char* filename, int side): * with gzip_or_text(filename) as fp: # <<<<<<<<<<<<<< @@ -66751,7 +66713,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_20)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":144 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":144 * for i in self.data: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< @@ -66765,7 +66727,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_22)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":147 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":147 * for i in self.sent_index: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< @@ -66779,7 +66741,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_23)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":150 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":150 * for i in self.sent_id: * f.write("%d " %i) * f.write("\n") # <<<<<<<<<<<<<< @@ -66793,7 +66755,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_24)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":153 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":153 * for word in self.id2word: * f.write("%s %d " % (word, self.word2id[word])) * f.write("\n") # <<<<<<<<<<<<<< @@ -66807,7 +66769,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_26)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/data_array.pxi":156 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/data_array.pxi":156 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -66826,7 +66788,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_28)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":46 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":46 * * def __cinit__(self, from_binary=None, from_text=None): * self.links = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -66843,7 +66805,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_1000); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_29)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":47 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":47 * def __cinit__(self, from_binary=None, from_text=None): * self.links = IntList(1000,1000) * self.sent_index = IntList(1000,1000) # <<<<<<<<<<<<<< @@ -66860,7 +66822,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_1000); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_30)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":59 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":59 * pairs = line.split() * for pair in pairs: * (i, j) = map(int, pair.split('-')) # <<<<<<<<<<<<<< @@ -66874,7 +66836,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_31)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_32)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":54 * * def read_text(self, char* filename): * with gzip_or_text(filename) as f: # <<<<<<<<<<<<<< @@ -66894,7 +66856,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_33)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":75 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":75 * for i, link in enumerate(self.links): * while i >= self.sent_index[sent_num]: * f.write("\n") # <<<<<<<<<<<<<< @@ -66908,7 +66870,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_34)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":78 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":78 * sent_num = sent_num + 1 * f.write("%d-%d " % self.unlink(link)) * f.write("\n") # <<<<<<<<<<<<<< @@ -66922,7 +66884,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_36)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":71 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":71 * * def write_text(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -66942,7 +66904,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_37)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":92 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":92 * for link in self.links: * f.write("%d " % link) * f.write("\n") # <<<<<<<<<<<<<< @@ -66956,7 +66918,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_38)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":95 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":95 * for i in self.sent_index: * f.write("%d " % i) * f.write("\n") # <<<<<<<<<<<<<< @@ -66970,7 +66932,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_39)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/alignment.pxi":88 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/alignment.pxi":88 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -66990,7 +66952,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_40)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":297 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":297 * * # Re-read file, placing words into buckets * f.seek(0) # <<<<<<<<<<<<<< @@ -67004,7 +66966,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_0); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_43)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":273 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":273 * * fcount = IntList() * with gzip_or_text(filename) as f: # <<<<<<<<<<<<<< @@ -67024,7 +66986,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_44)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":339 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":339 * * if i > j: * raise Exception("Sort error in CLex") # <<<<<<<<<<<<<< @@ -67038,7 +67000,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_46)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_47)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":362 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":362 * for i in self.f_index: * f.write("%d " % i) * f.write("\n") # <<<<<<<<<<<<<< @@ -67052,7 +67014,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_49)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":365 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":365 * for i, s1, s2 in zip(self.e_index, self.col1, self.col2): * f.write("%d %f %f " % (i, s1, s2)) * f.write("\n") # <<<<<<<<<<<<<< @@ -67066,7 +67028,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_51)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":368 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":368 * for i, w in enumerate(self.id2fword): * f.write("%d %s " % (i, w)) * f.write("\n") # <<<<<<<<<<<<<< @@ -67080,7 +67042,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_53)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":371 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":371 * for i, w in enumerate(self.id2eword): * f.write("%d %s " % (i, w)) * f.write("\n") # <<<<<<<<<<<<<< @@ -67094,7 +67056,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_54)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":359 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":359 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -67114,7 +67076,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_55)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":404 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":404 * cdef i, N, e_id, f_id * * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -67134,7 +67096,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_57)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":13 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":13 * cdef IntList rank * * logger.info("Constructing LCP array") # <<<<<<<<<<<<<< @@ -67148,7 +67110,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_60)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_61)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/lcp.pxi":34 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/lcp.pxi":34 * if h > 0: * h = h-1 * logger.info("LCP array completed") # <<<<<<<<<<<<<< @@ -67162,7 +67124,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_62)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_63)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":297 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":297 * pattern_rank = {} * * logger.info("Precomputing frequent intersections") # <<<<<<<<<<<<<< @@ -67176,7 +67138,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_72)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_73)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":314 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":314 * queue = IntList(increment=1000) * * logger.info(" Computing inverted indexes...") # <<<<<<<<<<<<<< @@ -67190,7 +67152,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_74)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_75)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":329 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":329 * trie_node_data_append(node, i) * * logger.info(" Computing collocations...") # <<<<<<<<<<<<<< @@ -67204,7 +67166,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_76)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_77)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":393 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":393 * for pattern2 in J_set: * if len(pattern1) + len(pattern2) + 1 < self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 # <<<<<<<<<<<<<< @@ -67218,7 +67180,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_neg_1); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_79)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":400 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":400 * x = x+1 * if len(pattern1) + len(pattern2) + 1 <= self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 # <<<<<<<<<<<<<< @@ -67232,7 +67194,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_neg_1); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_80)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":407 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":407 * x = x+2 * if len(pattern1) + len(pattern2) + 1<= self.max_length: * combined_pattern = pattern1 + (-1,) + pattern2 # <<<<<<<<<<<<<< @@ -67246,7 +67208,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_neg_1); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_81)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/precomputation.pxi":409 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/precomputation.pxi":409 * combined_pattern = pattern1 + (-1,) + pattern2 * IJ_set.add(combined_pattern) * combined_pattern = pattern2 + (-1,) + pattern1 # <<<<<<<<<<<<<< @@ -67260,7 +67222,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_int_neg_1); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_82)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":94 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":94 * * '''Step 3: read off suffix array from inverse suffix array''' * logger.info(" Finalizing sort...") # <<<<<<<<<<<<<< @@ -67274,7 +67236,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_92)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_93)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":193 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":193 * for a_i in self.sa: * f.write("%d " % a_i) * f.write("\n") # <<<<<<<<<<<<<< @@ -67288,7 +67250,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_96)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":196 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":196 * for w_i in self.ha: * f.write("%d " % w_i) * f.write("\n") # <<<<<<<<<<<<<< @@ -67302,7 +67264,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_14)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_97)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/suffix_array.pxi":189 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/suffix_array.pxi":189 * * def write_enhanced(self, char* filename): * with open(filename, "w") as f: # <<<<<<<<<<<<<< @@ -67322,7 +67284,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(Py_None); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_98)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":109 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":109 * logger.info("Sampling strategy: uniform, max sample size = %d", sample_size) * else: * logger.info("Sampling strategy: no sampling") # <<<<<<<<<<<<<< @@ -67336,7 +67298,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_101)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_102)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":318 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":318 * self.rules.root = ExtendedTrieNode(phrase_location=PhraseLocation()) * if alignment is None: * raise Exception("Must specify an alignment object") # <<<<<<<<<<<<<< @@ -67350,14 +67312,14 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_106)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_107)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/rulefactory.pxi":1024 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/rulefactory.pxi":1018 * else: * #ERROR: We never get here * raise Exception("Keyword trie error") # <<<<<<<<<<<<<< * # checking whether lookup_required * if lookup_required: */ - __pyx_k_tuple_122 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_122)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1024; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_k_tuple_122 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_122)) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 1018; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_k_tuple_122); __Pyx_INCREF(((PyObject *)__pyx_kp_s_121)); PyTuple_SET_ITEM(__pyx_k_tuple_122, 0, ((PyObject *)__pyx_kp_s_121)); @@ -67396,7 +67358,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_138)); __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_139)); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":107 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":107 * return ALPHABET.fromstring(string, terminal) * * def make_lattice(words): # <<<<<<<<<<<<<< @@ -67423,7 +67385,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_140)); __pyx_k_codeobj_141 = (PyObject*)__Pyx_PyCode_New(1, 0, 5, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_140, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_142, __pyx_n_s__make_lattice, 107, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_141)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 107; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":111 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":111 * return tuple(((word, None, 1), ) for word in word_ids) * * def decode_lattice(lattice): # <<<<<<<<<<<<<< @@ -67444,7 +67406,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_143)); __pyx_k_codeobj_144 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_143, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_142, __pyx_n_s__decode_lattice, 111, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_144)) {__pyx_filename = __pyx_f[10]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":115 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":115 * for arc in node for node in lattice) * * def decode_sentence(lattice): # <<<<<<<<<<<<<< @@ -67758,7 +67720,7 @@ PyMODINIT_FUNC PyInit__sa(void) __pyx_ptype_3_sa___pyx_scope_struct_13_genexpr = &__pyx_type_3_sa___pyx_scope_struct_13_genexpr; if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_14_alignments) < 0) {__pyx_filename = __pyx_f[7]; __pyx_lineno = 190; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_3_sa___pyx_scope_struct_14_alignments = &__pyx_type_3_sa___pyx_scope_struct_14_alignments; - if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_15_input) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 939; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_15_input) < 0) {__pyx_filename = __pyx_f[8]; __pyx_lineno = 933; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_3_sa___pyx_scope_struct_15_input = &__pyx_type_3_sa___pyx_scope_struct_15_input; if (PyType_Ready(&__pyx_type_3_sa___pyx_scope_struct_16___iter__) < 0) {__pyx_filename = __pyx_f[13]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_ptype_3_sa___pyx_scope_struct_16___iter__ = &__pyx_type_3_sa___pyx_scope_struct_16___iter__; @@ -67834,7 +67796,7 @@ PyMODINIT_FUNC PyInit__sa(void) if (PyObject_SetAttr(__pyx_m, __pyx_n_s__logger, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/bilex.pxi":54 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/bilex.pxi":54 * cdef id2eword, id2fword, eword2id, fword2id * * def __cinit__(self, from_text=None, from_data=False, from_binary=None, # <<<<<<<<<<<<<< @@ -67847,7 +67809,7 @@ PyMODINIT_FUNC PyInit__sa(void) __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":17 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":17 * from libc.string cimport memset * * cdef int MIN_BOTTOM_SIZE = 32 # <<<<<<<<<<<<<< @@ -67856,7 +67818,7 @@ PyMODINIT_FUNC PyInit__sa(void) */ __pyx_v_3_sa_MIN_BOTTOM_SIZE = 32; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":18 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":18 * * cdef int MIN_BOTTOM_SIZE = 32 * cdef int MIN_BOTTOM_BITS = 5 # <<<<<<<<<<<<<< @@ -67865,7 +67827,7 @@ PyMODINIT_FUNC PyInit__sa(void) */ __pyx_v_3_sa_MIN_BOTTOM_BITS = 5; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/veb.pxi":28 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/veb.pxi":28 * LOWER_MASK[i] = mask * * _init_lower_mask() # <<<<<<<<<<<<<< @@ -67874,7 +67836,7 @@ PyMODINIT_FUNC PyInit__sa(void) */ __pyx_f_3_sa__init_lower_mask(); - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":4 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":4 * from libc.stdlib cimport malloc, realloc, strtol * * cdef int INDEX_SHIFT = 3 # <<<<<<<<<<<<<< @@ -67883,7 +67845,7 @@ PyMODINIT_FUNC PyInit__sa(void) */ __pyx_v_3_sa_INDEX_SHIFT = 3; - /* "/home/hltcoe/alopez/dev/cdec/python/src/sa/sym.pxi":5 + /* "/Users/vchahun/Sandbox/cdec/python/src/sa/sym.pxi":5 * * cdef int INDEX_SHIFT = 3 * cdef int INDEX_MASK = (1< 0 and e_high - e_x_low < self.train_max_initial_size and e_links_low[e_x_low-1] == -1: e_x_low = e_x_low - 1 while e_x_high < e_sent_len and e_x_high - e_low < self.train_max_initial_size and e_links_low[e_x_high] == -1: @@ -1331,7 +1325,7 @@ cdef class HieroCachingRuleFactory: j = e_gap_order[i] e_x_gap_low = e_gap_low[j] e_x_gap_high = e_gap_high[j] - if self.tight_phrases == 0: + if not self.tight_phrases: while e_x_gap_low > e_x_low and e_links_low[e_x_gap_low-1] == -1: e_x_gap_low = e_x_gap_low - 1 while e_x_gap_high < e_x_high and e_links_low[e_x_gap_high] == -1: -- cgit v1.2.3 From 7b61618f1c9d7704bb6791b9098871ec1fbdce89 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 14 Dec 2012 13:35:11 -0500 Subject: add compression libraries --- configure.ac | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/configure.ac b/configure.ac index 366112a3..f1b9d132 100644 --- a/configure.ac +++ b/configure.ac @@ -78,6 +78,13 @@ LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_ AC_CHECK_HEADER(google/dense_hash_map, [AC_DEFINE([HAVE_SPARSEHASH], [1], [flag for google::dense_hash_map])]) +AC_CHECK_HEADER(zlib.h, + [AC_DEFINE([HAVE_ZLIB], [1], [zlib])]) +AC_CHECK_HEADER(bzlib.h, + [AC_DEFINE([HAVE_BZLIB], [1], [bzlib])]) +AC_CHECK_HEADER(lzma.h, + [AC_DEFINE([HAVE_XZLIB], [1], [xzlib])]) + AC_PROG_INSTALL CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H" -- cgit v1.2.3 From de53e2e98acd0e2d07efb39bef430bd598908aa8 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 14 Dec 2012 12:39:04 -0800 Subject: Updated incremental, updated kenlm. Incremental assumes --- decoder/incremental.cc | 44 +++++++------- klm/lm/left.hh | 66 +++++++++++---------- klm/lm/max_order.hh | 7 +-- klm/lm/model.cc | 3 +- klm/lm/search_hashed.cc | 8 +-- klm/lm/search_hashed.hh | 2 +- klm/lm/vocab.cc | 7 ++- klm/lm/vocab.hh | 5 +- klm/search/Makefile.am | 4 +- klm/search/applied.hh | 86 +++++++++++++++++++++++++++ klm/search/config.hh | 25 ++++++-- klm/search/context.hh | 28 ++------- klm/search/dedupe.hh | 131 +++++++++++++++++++++++++++++++++++++++++ klm/search/edge_generator.cc | 3 +- klm/search/edge_generator.hh | 1 - klm/search/final.hh | 36 ----------- klm/search/header.hh | 9 ++- klm/search/nbest.cc | 106 +++++++++++++++++++++++++++++++++ klm/search/nbest.hh | 81 +++++++++++++++++++++++++ klm/search/note.hh | 12 ---- klm/search/rule.cc | 52 ++++++++-------- klm/search/rule.hh | 11 +++- klm/search/types.hh | 17 ++++++ klm/search/vertex.cc | 27 ++++++--- klm/search/vertex.hh | 37 +++++++----- klm/search/vertex_generator.cc | 44 +++----------- klm/search/vertex_generator.hh | 72 ++++++++++++++++++---- klm/search/weights.cc | 71 ---------------------- klm/search/weights.hh | 52 ---------------- klm/search/weights_test.cc | 38 ------------ 30 files changed, 680 insertions(+), 405 deletions(-) create mode 100644 klm/search/applied.hh create mode 100644 klm/search/dedupe.hh delete mode 100644 klm/search/final.hh create mode 100644 klm/search/nbest.cc create mode 100644 klm/search/nbest.hh delete mode 100644 klm/search/note.hh delete mode 100644 klm/search/weights.cc delete mode 100644 klm/search/weights.hh delete mode 100644 klm/search/weights_test.cc diff --git a/decoder/incremental.cc b/decoder/incremental.cc index 46615b0b..85647a44 100644 --- a/decoder/incremental.cc +++ b/decoder/incremental.cc @@ -6,6 +6,7 @@ #include "lm/enumerate_vocab.hh" #include "lm/model.hh" +#include "search/applied.hh" #include "search/config.hh" #include "search/context.hh" #include "search/edge.hh" @@ -48,16 +49,16 @@ template class Incremental : public IncrementalBase { Incremental(const char *model_file, const std::vector &weights) : IncrementalBase(weights), m_(model_file, GetConfig()), - weights_( - weights[FD::Convert("KLanguageModel")], - weights[FD::Convert("KLanguageModel_OOV")], - weights[FD::Convert("WordPenalty")]) { - std::cerr << "Weights KLanguageModel " << weights_.LM() << " KLanguageModel_OOV " << weights_.OOV() << " WordPenalty " << weights_.WordPenalty() << std::endl; + lm_(weights[FD::Convert("KLanguageModel")]), + oov_(weights[FD::Convert("KLanguageModel_OOV")]), + word_penalty_(weights[FD::Convert("WordPenalty")]) { + std::cerr << "Weights KLanguageModel " << lm_ << " KLanguageModel_OOV " << oov_ << " WordPenalty " << word_penalty_ << std::endl; } + void Search(unsigned int pop_limit, const Hypergraph &hg) const; private: - void ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const; + void ConvertEdge(const search::Context &context, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const; lm::ngram::Config GetConfig() { lm::ngram::Config ret; @@ -69,46 +70,47 @@ template class Incremental : public IncrementalBase { const Model m_; - const search::Weights weights_; + const float lm_, oov_, word_penalty_; }; -void PrintFinal(const Hypergraph &hg, const search::Final final) { +void PrintApplied(const Hypergraph &hg, const search::Applied final) { const std::vector &words = static_cast(final.GetNote().vp)->rule_->e(); - const search::Final *child(final.Children()); + const search::Applied *child(final.Children()); for (std::vector::const_iterator i = words.begin(); i != words.end(); ++i) { if (*i > 0) { std::cout << TD::Convert(*i) << ' '; } else { - PrintFinal(hg, *child++); + PrintApplied(hg, *child++); } } } template void Incremental::Search(unsigned int pop_limit, const Hypergraph &hg) const { boost::scoped_array out_vertices(new search::Vertex[hg.nodes_.size()]); - search::Config config(weights_, pop_limit); + search::Config config(lm_, pop_limit, search::NBestConfig(1)); search::Context context(config, m_); + search::SingleBest best; for (unsigned int i = 0; i < hg.nodes_.size() - 1; ++i) { search::EdgeGenerator gen; const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; for (unsigned int j = 0; j < down_edges.size(); ++j) { unsigned int edge_index = down_edges[j]; - ConvertEdge(context, i == hg.nodes_.size() - 2, out_vertices.get(), hg.edges_[edge_index], gen); + ConvertEdge(context, out_vertices.get(), hg.edges_[edge_index], gen); } - search::VertexGenerator vertex_gen(context, out_vertices[i]); + search::VertexGenerator vertex_gen(context, out_vertices[i], best); gen.Search(context, vertex_gen); } - const search::Final top = out_vertices[hg.nodes_.size() - 2].BestChild(); + const search::Applied top = out_vertices[hg.nodes_.size() - 2].BestChild(); if (!top.Valid()) { std::cout << "NO PATH FOUND" << std::endl; } else { - PrintFinal(hg, top); + PrintApplied(hg, top); std::cout << "||| " << top.GetScore() << std::endl; } } -template void Incremental::ConvertEdge(const search::Context &context, bool final, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const { +template void Incremental::ConvertEdge(const search::Context &context, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const { const std::vector &e = in.rule_->e(); std::vector words; words.reserve(e.size()); @@ -127,10 +129,6 @@ template void Incremental::ConvertEdge(const search::Contex } } - if (final) { - words.push_back(m_.GetVocabulary().EndSentence()); - } - search::PartialEdge out(gen.AllocateEdge(nts.size())); memcpy(out.NT(), &nts[0], sizeof(search::PartialVertex) * nts.size()); @@ -140,8 +138,10 @@ template void Incremental::ConvertEdge(const search::Contex out.SetNote(note); score += in.rule_->GetFeatureValues().dot(cdec_weights_); - score -= static_cast(terminals) * context.GetWeights().WordPenalty() / M_LN10; - score += search::ScoreRule(context, words, final, out.Between()); + score -= static_cast(terminals) * word_penalty_ / M_LN10; + search::ScoreRuleRet res(search::ScoreRule(context.LanguageModel(), words, out.Between())); + score += res.prob * lm_ + static_cast(res.oov) * oov_; + out.SetScore(score); gen.AddEdge(out); diff --git a/klm/lm/left.hh b/klm/lm/left.hh index 8c27232e..85c1ea37 100644 --- a/klm/lm/left.hh +++ b/klm/lm/left.hh @@ -51,36 +51,36 @@ namespace ngram { template class RuleScore { public: - explicit RuleScore(const M &model, ChartState &out) : model_(model), out_(out), left_done_(false), prob_(0.0) { + explicit RuleScore(const M &model, ChartState &out) : model_(model), out_(&out), left_done_(false), prob_(0.0) { out.left.length = 0; out.right.length = 0; } void BeginSentence() { - out_.right = model_.BeginSentenceState(); - // out_.left is empty. + out_->right = model_.BeginSentenceState(); + // out_->left is empty. left_done_ = true; } void Terminal(WordIndex word) { - State copy(out_.right); - FullScoreReturn ret(model_.FullScore(copy, word, out_.right)); + State copy(out_->right); + FullScoreReturn ret(model_.FullScore(copy, word, out_->right)); if (left_done_) { prob_ += ret.prob; return; } if (ret.independent_left) { prob_ += ret.prob; left_done_ = true; return; } - out_.left.pointers[out_.left.length++] = ret.extend_left; + out_->left.pointers[out_->left.length++] = ret.extend_left; prob_ += ret.rest; - if (out_.right.length != copy.length + 1) + if (out_->right.length != copy.length + 1) left_done_ = true; } // Faster version of NonTerminal for the case where the rule begins with a non-terminal. void BeginNonTerminal(const ChartState &in, float prob = 0.0) { prob_ = prob; - out_ = in; + *out_ = in; left_done_ = in.left.full; } @@ -89,23 +89,23 @@ template class RuleScore { if (!in.left.length) { if (in.left.full) { - for (const float *i = out_.right.backoff; i < out_.right.backoff + out_.right.length; ++i) prob_ += *i; + for (const float *i = out_->right.backoff; i < out_->right.backoff + out_->right.length; ++i) prob_ += *i; left_done_ = true; - out_.right = in.right; + out_->right = in.right; } return; } - if (!out_.right.length) { - out_.right = in.right; + if (!out_->right.length) { + out_->right = in.right; if (left_done_) { prob_ += model_.UnRest(in.left.pointers, in.left.pointers + in.left.length, 1); return; } - if (out_.left.length) { + if (out_->left.length) { left_done_ = true; } else { - out_.left = in.left; + out_->left = in.left; left_done_ = in.left.full; } return; @@ -113,10 +113,10 @@ template class RuleScore { float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1]; float *back = backoffs, *back2 = backoffs2; - unsigned char next_use = out_.right.length; + unsigned char next_use = out_->right.length; // First word - if (ExtendLeft(in, next_use, 1, out_.right.backoff, back)) return; + if (ExtendLeft(in, next_use, 1, out_->right.backoff, back)) return; // Words after the first, so extending a bigram to begin with for (unsigned char extend_length = 2; extend_length <= in.left.length; ++extend_length) { @@ -127,54 +127,58 @@ template class RuleScore { if (in.left.full) { for (const float *i = back; i != back + next_use; ++i) prob_ += *i; left_done_ = true; - out_.right = in.right; + out_->right = in.right; return; } // Right state was minimized, so it's already independent of the new words to the left. if (in.right.length < in.left.length) { - out_.right = in.right; + out_->right = in.right; return; } // Shift exisiting words down. - for (WordIndex *i = out_.right.words + next_use - 1; i >= out_.right.words; --i) { + for (WordIndex *i = out_->right.words + next_use - 1; i >= out_->right.words; --i) { *(i + in.right.length) = *i; } // Add words from in.right. - std::copy(in.right.words, in.right.words + in.right.length, out_.right.words); + std::copy(in.right.words, in.right.words + in.right.length, out_->right.words); // Assemble backoff composed on the existing state's backoff followed by the new state's backoff. - std::copy(in.right.backoff, in.right.backoff + in.right.length, out_.right.backoff); - std::copy(back, back + next_use, out_.right.backoff + in.right.length); - out_.right.length = in.right.length + next_use; + std::copy(in.right.backoff, in.right.backoff + in.right.length, out_->right.backoff); + std::copy(back, back + next_use, out_->right.backoff + in.right.length); + out_->right.length = in.right.length + next_use; } float Finish() { // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram. - out_.left.full = left_done_ || (out_.left.length == model_.Order() - 1); + out_->left.full = left_done_ || (out_->left.length == model_.Order() - 1); return prob_; } void Reset() { prob_ = 0.0; left_done_ = false; - out_.left.length = 0; - out_.right.length = 0; + out_->left.length = 0; + out_->right.length = 0; + } + void Reset(ChartState &replacement) { + out_ = &replacement; + Reset(); } private: bool ExtendLeft(const ChartState &in, unsigned char &next_use, unsigned char extend_length, const float *back_in, float *back_out) { ProcessRet(model_.ExtendLeft( - out_.right.words, out_.right.words + next_use, // Words to extend into + out_->right.words, out_->right.words + next_use, // Words to extend into back_in, // Backoffs to use in.left.pointers[extend_length - 1], extend_length, // Words to be extended back_out, // Backoffs for the next score next_use)); // Length of n-gram to use in next scoring. - if (next_use != out_.right.length) { + if (next_use != out_->right.length) { left_done_ = true; if (!next_use) { // Early exit. - out_.right = in.right; + out_->right = in.right; prob_ += model_.UnRest(in.left.pointers + extend_length, in.left.pointers + in.left.length, extend_length + 1); return true; } @@ -193,13 +197,13 @@ template class RuleScore { left_done_ = true; return; } - out_.left.pointers[out_.left.length++] = ret.extend_left; + out_->left.pointers[out_->left.length++] = ret.extend_left; prob_ += ret.rest; } const M &model_; - ChartState &out_; + ChartState *out_; bool left_done_; diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh index 989f8324..ea0dea46 100644 --- a/klm/lm/max_order.hh +++ b/klm/lm/max_order.hh @@ -4,9 +4,8 @@ * (kMaxOrder - 1) * sizeof(float) bytes instead of * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead */ -#ifndef KENLM_MAX_ORDER -#define KENLM_MAX_ORDER 6 -#endif #ifndef KENLM_ORDER_MESSAGE -#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --kenlm-max-order=6 -a'. Otherwise, edit lm/max_order.hh." +#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh." #endif + +#define KENLM_MAX_ORDER 5 diff --git a/klm/lm/model.cc b/klm/lm/model.cc index 2fd20481..fc61efee 100644 --- a/klm/lm/model.cc +++ b/klm/lm/model.cc @@ -87,7 +87,7 @@ template void GenericModel FullScoreReturn GenericModel void HashedSearch::InitializeFromARPA(const char * template <> void HashedSearch::DispatchBuild(util::FilePiece &f, const std::vector &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn) { NoRestBuild build; - ApplyBuild(f, counts, config, vocab, warn, build); + ApplyBuild(f, counts, vocab, warn, build); } template <> void HashedSearch::DispatchBuild(util::FilePiece &f, const std::vector &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn) { @@ -239,19 +239,19 @@ template <> void HashedSearch::DispatchBuild(util::FilePiece &f, cons case Config::REST_MAX: { MaxRestBuild build; - ApplyBuild(f, counts, config, vocab, warn, build); + ApplyBuild(f, counts, vocab, warn, build); } break; case Config::REST_LOWER: { LowerRestBuild build(config, counts.size(), vocab); - ApplyBuild(f, counts, config, vocab, warn, build); + ApplyBuild(f, counts, vocab, warn, build); } break; } } -template template void HashedSearch::ApplyBuild(util::FilePiece &f, const std::vector &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build) { +template template void HashedSearch::ApplyBuild(util::FilePiece &f, const std::vector &counts, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build) { for (WordIndex i = 0; i < counts[0]; ++i) { build.SetRest(&i, (unsigned int)1, unigram_.Raw()[i]); } diff --git a/klm/lm/search_hashed.hh b/klm/lm/search_hashed.hh index a52f107b..00595796 100644 --- a/klm/lm/search_hashed.hh +++ b/klm/lm/search_hashed.hh @@ -147,7 +147,7 @@ template class HashedSearch { // Interpret config's rest cost build policy and pass the right template argument to ApplyBuild. void DispatchBuild(util::FilePiece &f, const std::vector &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn); - template void ApplyBuild(util::FilePiece &f, const std::vector &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build); + template void ApplyBuild(util::FilePiece &f, const std::vector &counts, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build); class Unigram { public: diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc index 11c27518..fd7f96dc 100644 --- a/klm/lm/vocab.cc +++ b/klm/lm/vocab.cc @@ -116,7 +116,9 @@ WordIndex SortedVocabulary::Insert(const StringPiece &str) { } *end_ = hashed; if (enumerate_) { - strings_to_enumerate_[end_ - begin_].assign(str.data(), str.size()); + void *copied = string_backing_.Allocate(str.size()); + memcpy(copied, str.data(), str.size()); + strings_to_enumerate_[end_ - begin_] = StringPiece(static_cast(copied), str.size()); } ++end_; // This is 1 + the offset where it was inserted to make room for unk. @@ -126,7 +128,7 @@ WordIndex SortedVocabulary::Insert(const StringPiece &str) { void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) { if (enumerate_) { if (!strings_to_enumerate_.empty()) { - util::PairedIterator values(reorder_vocab + 1, &*strings_to_enumerate_.begin()); + util::PairedIterator values(reorder_vocab + 1, &*strings_to_enumerate_.begin()); util::JointSort(begin_, end_, values); } for (WordIndex i = 0; i < static_cast(end_ - begin_); ++i) { @@ -134,6 +136,7 @@ void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) { enumerate_->Add(i + 1, strings_to_enumerate_[i]); } strings_to_enumerate_.clear(); + string_backing_.FreeAll(); } else { util::JointSort(begin_, end_, reorder_vocab + 1); } diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh index de54eb06..3902f117 100644 --- a/klm/lm/vocab.hh +++ b/klm/lm/vocab.hh @@ -4,6 +4,7 @@ #include "lm/enumerate_vocab.hh" #include "lm/lm_exception.hh" #include "lm/virtual_interface.hh" +#include "util/pool.hh" #include "util/probing_hash_table.hh" #include "util/sorted_uniform.hh" #include "util/string_piece.hh" @@ -96,7 +97,9 @@ class SortedVocabulary : public base::Vocabulary { EnumerateVocab *enumerate_; // Actual strings. Used only when loading from ARPA and enumerate_ != NULL - std::vector strings_to_enumerate_; + util::Pool string_backing_; + + std::vector strings_to_enumerate_; }; #pragma pack(push) diff --git a/klm/search/Makefile.am b/klm/search/Makefile.am index ccc5b7f6..5aea33c2 100644 --- a/klm/search/Makefile.am +++ b/klm/search/Makefile.am @@ -2,10 +2,10 @@ noinst_LIBRARIES = libksearch.a libksearch_a_SOURCES = \ edge_generator.cc \ + nbest.cc \ rule.cc \ vertex.cc \ - vertex_generator.cc \ - weights.cc + vertex_generator.cc AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. diff --git a/klm/search/applied.hh b/klm/search/applied.hh new file mode 100644 index 00000000..bd659e5c --- /dev/null +++ b/klm/search/applied.hh @@ -0,0 +1,86 @@ +#ifndef SEARCH_APPLIED__ +#define SEARCH_APPLIED__ + +#include "search/edge.hh" +#include "search/header.hh" +#include "util/pool.hh" + +#include + +namespace search { + +// A full hypothesis: a score, arity of the rule, a pointer to the decoder's rule (Note), and pointers to non-terminals that were substituted. +template class GenericApplied : public Header { + public: + GenericApplied() {} + + GenericApplied(void *location, PartialEdge partial) + : Header(location) { + memcpy(Base(), partial.Base(), kHeaderSize); + Below *child_out = Children(); + const PartialVertex *part = partial.NT(); + const PartialVertex *const part_end_loop = part + partial.GetArity(); + for (; part != part_end_loop; ++part, ++child_out) + *child_out = Below(part->End()); + } + + GenericApplied(void *location, Score score, Arity arity, Note note) : Header(location, arity) { + SetScore(score); + SetNote(note); + } + + explicit GenericApplied(History from) : Header(from) {} + + + // These are arrays of length GetArity(). + Below *Children() { + return reinterpret_cast(After()); + } + const Below *Children() const { + return reinterpret_cast(After()); + } + + static std::size_t Size(Arity arity) { + return kHeaderSize + arity * sizeof(const Below); + } +}; + +// Applied rule that references itself. +class Applied : public GenericApplied { + private: + typedef GenericApplied P; + + public: + Applied() {} + Applied(void *location, PartialEdge partial) : P(location, partial) {} + Applied(History from) : P(from) {} +}; + +// How to build single-best hypotheses. +class SingleBest { + public: + typedef PartialEdge Combine; + + void Add(PartialEdge &existing, PartialEdge add) const { + if (!existing.Valid() || existing.GetScore() < add.GetScore()) + existing = add; + } + + NBestComplete Complete(PartialEdge partial) { + if (!partial.Valid()) + return NBestComplete(NULL, lm::ngram::ChartState(), -INFINITY); + void *place_final = pool_.Allocate(Applied::Size(partial.GetArity())); + Applied(place_final, partial); + return NBestComplete( + place_final, + partial.CompletedState(), + partial.GetScore()); + } + + private: + util::Pool pool_; +}; + +} // namespace search + +#endif // SEARCH_APPLIED__ diff --git a/klm/search/config.hh b/klm/search/config.hh index ef8e2354..ba18c09e 100644 --- a/klm/search/config.hh +++ b/klm/search/config.hh @@ -1,23 +1,36 @@ #ifndef SEARCH_CONFIG__ #define SEARCH_CONFIG__ -#include "search/weights.hh" -#include "util/string_piece.hh" +#include "search/types.hh" namespace search { +struct NBestConfig { + explicit NBestConfig(unsigned int in_size) { + keep = in_size; + size = in_size; + } + + unsigned int keep, size; +}; + class Config { public: - Config(const Weights &weights, unsigned int pop_limit) : - weights_(weights), pop_limit_(pop_limit) {} + Config(Score lm_weight, unsigned int pop_limit, const NBestConfig &nbest) : + lm_weight_(lm_weight), pop_limit_(pop_limit), nbest_(nbest) {} - const Weights &GetWeights() const { return weights_; } + Score LMWeight() const { return lm_weight_; } unsigned int PopLimit() const { return pop_limit_; } + const NBestConfig &GetNBest() const { return nbest_; } + private: - Weights weights_; + Score lm_weight_; + unsigned int pop_limit_; + + NBestConfig nbest_; }; } // namespace search diff --git a/klm/search/context.hh b/klm/search/context.hh index 62163144..08f21bbf 100644 --- a/klm/search/context.hh +++ b/klm/search/context.hh @@ -1,30 +1,16 @@ #ifndef SEARCH_CONTEXT__ #define SEARCH_CONTEXT__ -#include "lm/model.hh" #include "search/config.hh" -#include "search/final.hh" -#include "search/types.hh" #include "search/vertex.hh" -#include "util/exception.hh" -#include "util/pool.hh" #include -#include - -#include namespace search { -class Weights; - class ContextBase { public: - explicit ContextBase(const Config &config) : pop_limit_(config.PopLimit()), weights_(config.GetWeights()) {} - - util::Pool &FinalPool() { - return final_pool_; - } + explicit ContextBase(const Config &config) : config_(config) {} VertexNode *NewVertexNode() { VertexNode *ret = vertex_node_pool_.construct(); @@ -36,18 +22,16 @@ class ContextBase { vertex_node_pool_.destroy(node); } - unsigned int PopLimit() const { return pop_limit_; } + unsigned int PopLimit() const { return config_.PopLimit(); } - const Weights &GetWeights() const { return weights_; } + Score LMWeight() const { return config_.LMWeight(); } - private: - util::Pool final_pool_; + const Config &GetConfig() const { return config_; } + private: boost::object_pool vertex_node_pool_; - unsigned int pop_limit_; - - const Weights &weights_; + Config config_; }; template class Context : public ContextBase { diff --git a/klm/search/dedupe.hh b/klm/search/dedupe.hh new file mode 100644 index 00000000..7eaa3b95 --- /dev/null +++ b/klm/search/dedupe.hh @@ -0,0 +1,131 @@ +#ifndef SEARCH_DEDUPE__ +#define SEARCH_DEDUPE__ + +#include "lm/state.hh" +#include "search/edge_generator.hh" + +#include +#include + +namespace search { + +class Dedupe { + public: + Dedupe() {} + + PartialEdge AllocateEdge(Arity arity) { + return behind_.AllocateEdge(arity); + } + + void AddEdge(PartialEdge edge) { + edge.MutableFlags() = 0; + + uint64_t hash = 0; + const PartialVertex *v = edge.NT(); + const PartialVertex *v_end = v + edge.GetArity(); + for (; v != v_end; ++v) { + const void *ptr = v->Identify(); + hash = util::MurmurHashNative(&ptr, sizeof(const void*), hash); + } + + const lm::ngram::ChartState *c = edge.Between(); + const lm::ngram::ChartState *const c_end = c + edge.GetArity() + 1; + for (; c != c_end; ++c) hash = hash_value(*c, hash); + + std::pair ret(table_.insert(std::make_pair(hash, edge))); + if (!ret.second) FoundDupe(ret.first->second, edge); + } + + bool Empty() const { return behind_.Empty(); } + + template void Search(Context &context, Output &output) { + for (Table::const_iterator i(table_.begin()); i != table_.end(); ++i) { + behind_.AddEdge(i->second); + } + Unpack unpack(output, *this); + behind_.Search(context, unpack); + } + + private: + void FoundDupe(PartialEdge &table, PartialEdge adding) { + if (table.GetFlags() & kPackedFlag) { + Packed &packed = *static_cast(table.GetNote().mut); + if (table.GetScore() >= adding.GetScore()) { + packed.others.push_back(adding); + return; + } + Note original(packed.original); + packed.original = adding.GetNote(); + adding.SetNote(table.GetNote()); + table.SetNote(original); + packed.others.push_back(table); + packed.starting = adding.GetScore(); + table = adding; + table.MutableFlags() |= kPackedFlag; + return; + } + PartialEdge loser; + if (adding.GetScore() > table.GetScore()) { + loser = table; + table = adding; + } else { + loser = adding; + } + // table is winner, loser is loser... + packed_.construct(table, loser); + } + + struct Packed { + Packed(PartialEdge winner, PartialEdge loser) + : original(winner.GetNote()), starting(winner.GetScore()), others(1, loser) { + winner.MutableNote().vp = this; + winner.MutableFlags() |= kPackedFlag; + loser.MutableFlags() &= ~kPackedFlag; + } + Note original; + Score starting; + std::vector others; + }; + + template class Unpack { + public: + explicit Unpack(Output &output, Dedupe &owner) : output_(output), owner_(owner) {} + + void NewHypothesis(PartialEdge edge) { + if (edge.GetFlags() & kPackedFlag) { + Packed &packed = *reinterpret_cast(edge.GetNote().mut); + edge.SetNote(packed.original); + edge.MutableFlags() = 0; + std::size_t copy_size = sizeof(PartialVertex) * edge.GetArity() + sizeof(lm::ngram::ChartState); + for (std::vector::iterator i = packed.others.begin(); i != packed.others.end(); ++i) { + PartialEdge copy(owner_.AllocateEdge(edge.GetArity())); + copy.SetScore(edge.GetScore() - packed.starting + i->GetScore()); + copy.MutableFlags() = 0; + copy.SetNote(i->GetNote()); + memcpy(copy.NT(), edge.NT(), copy_size); + output_.NewHypothesis(copy); + } + } + output_.NewHypothesis(edge); + } + + void FinishedSearch() { + output_.FinishedSearch(); + } + + private: + Output &output_; + Dedupe &owner_; + }; + + EdgeGenerator behind_; + + typedef boost::unordered_map Table; + Table table_; + + boost::object_pool packed_; + + static const uint16_t kPackedFlag = 1; +}; +} // namespace search +#endif // SEARCH_DEDUPE__ diff --git a/klm/search/edge_generator.cc b/klm/search/edge_generator.cc index 260159b1..eacf5de5 100644 --- a/klm/search/edge_generator.cc +++ b/klm/search/edge_generator.cc @@ -1,6 +1,7 @@ #include "search/edge_generator.hh" #include "lm/left.hh" +#include "lm/model.hh" #include "lm/partial.hh" #include "search/context.hh" #include "search/vertex.hh" @@ -38,7 +39,7 @@ template void FastScore(const Context &context, Arity victi *cover = *(cover + 1); } } - update.SetScore(update.GetScore() + adjustment * context.GetWeights().LM()); + update.SetScore(update.GetScore() + adjustment * context.LMWeight()); } } // namespace diff --git a/klm/search/edge_generator.hh b/klm/search/edge_generator.hh index 582c78b7..203942c6 100644 --- a/klm/search/edge_generator.hh +++ b/klm/search/edge_generator.hh @@ -2,7 +2,6 @@ #define SEARCH_EDGE_GENERATOR__ #include "search/edge.hh" -#include "search/note.hh" #include "search/types.hh" #include diff --git a/klm/search/final.hh b/klm/search/final.hh deleted file mode 100644 index 50e62cf2..00000000 --- a/klm/search/final.hh +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef SEARCH_FINAL__ -#define SEARCH_FINAL__ - -#include "search/header.hh" -#include "util/pool.hh" - -namespace search { - -// A full hypothesis with pointers to children. -class Final : public Header { - public: - Final() {} - - Final(util::Pool &pool, Score score, Arity arity, Note note) - : Header(pool.Allocate(Size(arity)), arity) { - SetScore(score); - SetNote(note); - } - - // These are arrays of length GetArity(). - Final *Children() { - return reinterpret_cast(After()); - } - const Final *Children() const { - return reinterpret_cast(After()); - } - - private: - static std::size_t Size(Arity arity) { - return kHeaderSize + arity * sizeof(const Final); - } -}; - -} // namespace search - -#endif // SEARCH_FINAL__ diff --git a/klm/search/header.hh b/klm/search/header.hh index 25550dbe..69f0eed0 100644 --- a/klm/search/header.hh +++ b/klm/search/header.hh @@ -3,7 +3,6 @@ // Header consisting of Score, Arity, and Note -#include "search/note.hh" #include "search/types.hh" #include @@ -24,6 +23,9 @@ class Header { bool operator<(const Header &other) const { return GetScore() < other.GetScore(); } + bool operator>(const Header &other) const { + return GetScore() > other.GetScore(); + } Arity GetArity() const { return *reinterpret_cast(base_ + sizeof(Score)); @@ -36,9 +38,14 @@ class Header { *reinterpret_cast(base_ + sizeof(Score) + sizeof(Arity)) = to; } + uint8_t *Base() { return base_; } + const uint8_t *Base() const { return base_; } + protected: Header() : base_(NULL) {} + explicit Header(void *base) : base_(static_cast(base)) {} + Header(void *base, Arity arity) : base_(static_cast(base)) { *reinterpret_cast(base_ + sizeof(Score)) = arity; } diff --git a/klm/search/nbest.cc b/klm/search/nbest.cc new file mode 100644 index 00000000..ec3322c9 --- /dev/null +++ b/klm/search/nbest.cc @@ -0,0 +1,106 @@ +#include "search/nbest.hh" + +#include "util/pool.hh" + +#include +#include +#include + +#include +#include + +namespace search { + +NBestList::NBestList(std::vector &partials, util::Pool &entry_pool, std::size_t keep) { + assert(!partials.empty()); + std::vector::iterator end; + if (partials.size() > keep) { + end = partials.begin() + keep; + std::nth_element(partials.begin(), end, partials.end(), std::greater()); + } else { + end = partials.end(); + } + for (std::vector::const_iterator i(partials.begin()); i != end; ++i) { + queue_.push(QueueEntry(entry_pool.Allocate(QueueEntry::Size(i->GetArity())), *i)); + } +} + +Score NBestList::TopAfterConstructor() const { + assert(revealed_.empty()); + return queue_.top().GetScore(); +} + +const std::vector &NBestList::Extract(util::Pool &pool, std::size_t n) { + while (revealed_.size() < n && !queue_.empty()) { + MoveTop(pool); + } + return revealed_; +} + +Score NBestList::Visit(util::Pool &pool, std::size_t index) { + if (index + 1 < revealed_.size()) + return revealed_[index + 1].GetScore() - revealed_[index].GetScore(); + if (queue_.empty()) + return -INFINITY; + if (index + 1 == revealed_.size()) + return queue_.top().GetScore() - revealed_[index].GetScore(); + assert(index == revealed_.size()); + + MoveTop(pool); + + if (queue_.empty()) return -INFINITY; + return queue_.top().GetScore() - revealed_[index].GetScore(); +} + +Applied NBestList::Get(util::Pool &pool, std::size_t index) { + assert(index <= revealed_.size()); + if (index == revealed_.size()) MoveTop(pool); + return revealed_[index]; +} + +void NBestList::MoveTop(util::Pool &pool) { + assert(!queue_.empty()); + QueueEntry entry(queue_.top()); + queue_.pop(); + RevealedRef *const children_begin = entry.Children(); + RevealedRef *const children_end = children_begin + entry.GetArity(); + Score basis = entry.GetScore(); + for (RevealedRef *child = children_begin; child != children_end; ++child) { + Score change = child->in_->Visit(pool, child->index_); + if (change != -INFINITY) { + assert(change < 0.001); + QueueEntry new_entry(pool.Allocate(QueueEntry::Size(entry.GetArity())), basis + change, entry.GetArity(), entry.GetNote()); + std::copy(children_begin, child, new_entry.Children()); + RevealedRef *update = new_entry.Children() + (child - children_begin); + update->in_ = child->in_; + update->index_ = child->index_ + 1; + std::copy(child + 1, children_end, update + 1); + queue_.push(new_entry); + } + // Gesmundo, A. and Henderson, J. Faster Cube Pruning, IWSLT 2010. + if (child->index_) break; + } + + // Convert QueueEntry to Applied. This leaves some unused memory. + void *overwrite = entry.Children(); + for (unsigned int i = 0; i < entry.GetArity(); ++i) { + RevealedRef from(*(static_cast(overwrite) + i)); + *(static_cast(overwrite) + i) = from.in_->Get(pool, from.index_); + } + revealed_.push_back(Applied(entry.Base())); +} + +NBestComplete NBest::Complete(std::vector &partials) { + assert(!partials.empty()); + NBestList *list = list_pool_.construct(partials, entry_pool_, config_.keep); + return NBestComplete( + list, + partials.front().CompletedState(), // All partials have the same state + list->TopAfterConstructor()); +} + +const std::vector &NBest::Extract(History history) { + return static_cast(history)->Extract(entry_pool_, config_.size); +} + +} // namespace search diff --git a/klm/search/nbest.hh b/klm/search/nbest.hh new file mode 100644 index 00000000..cb7651bc --- /dev/null +++ b/klm/search/nbest.hh @@ -0,0 +1,81 @@ +#ifndef SEARCH_NBEST__ +#define SEARCH_NBEST__ + +#include "search/applied.hh" +#include "search/config.hh" +#include "search/edge.hh" + +#include + +#include +#include +#include + +#include + +namespace search { + +class NBestList; + +class NBestList { + private: + class RevealedRef { + public: + explicit RevealedRef(History history) + : in_(static_cast(history)), index_(0) {} + + private: + friend class NBestList; + + NBestList *in_; + std::size_t index_; + }; + + typedef GenericApplied QueueEntry; + + public: + NBestList(std::vector &existing, util::Pool &entry_pool, std::size_t keep); + + Score TopAfterConstructor() const; + + const std::vector &Extract(util::Pool &pool, std::size_t n); + + private: + Score Visit(util::Pool &pool, std::size_t index); + + Applied Get(util::Pool &pool, std::size_t index); + + void MoveTop(util::Pool &pool); + + typedef std::vector Revealed; + Revealed revealed_; + + typedef std::priority_queue Queue; + Queue queue_; +}; + +class NBest { + public: + typedef std::vector Combine; + + explicit NBest(const NBestConfig &config) : config_(config) {} + + void Add(std::vector &existing, PartialEdge addition) const { + existing.push_back(addition); + } + + NBestComplete Complete(std::vector &partials); + + const std::vector &Extract(History root); + + private: + const NBestConfig config_; + + boost::object_pool list_pool_; + + util::Pool entry_pool_; +}; + +} // namespace search + +#endif // SEARCH_NBEST__ diff --git a/klm/search/note.hh b/klm/search/note.hh deleted file mode 100644 index 50bed06e..00000000 --- a/klm/search/note.hh +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef SEARCH_NOTE__ -#define SEARCH_NOTE__ - -namespace search { - -union Note { - const void *vp; -}; - -} // namespace search - -#endif // SEARCH_NOTE__ diff --git a/klm/search/rule.cc b/klm/search/rule.cc index 5b00207e..0244a09f 100644 --- a/klm/search/rule.cc +++ b/klm/search/rule.cc @@ -1,7 +1,7 @@ #include "search/rule.hh" +#include "lm/model.hh" #include "search/context.hh" -#include "search/final.hh" #include @@ -9,35 +9,35 @@ namespace search { -template float ScoreRule(const Context &context, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing) { - unsigned int oov_count = 0; - float prob = 0.0; - const Model &model = context.LanguageModel(); - const lm::WordIndex oov = model.GetVocabulary().NotFound(); - for (std::vector::const_iterator word = words.begin(); ; ++word) { - lm::ngram::RuleScore scorer(model, *(writing++)); - // TODO: optimize - if (prepend_bos && (word == words.begin())) { - scorer.BeginSentence(); - } - for (; ; ++word) { - if (word == words.end()) { - prob += scorer.Finish(); - return static_cast(oov_count) * context.GetWeights().OOV() + prob * context.GetWeights().LM(); - } - if (*word == kNonTerminal) break; - if (*word == oov) ++oov_count; +template ScoreRuleRet ScoreRule(const Model &model, const std::vector &words, lm::ngram::ChartState *writing) { + ScoreRuleRet ret; + ret.prob = 0.0; + ret.oov = 0; + const lm::WordIndex oov = model.GetVocabulary().NotFound(), bos = model.GetVocabulary().BeginSentence(); + lm::ngram::RuleScore scorer(model, *(writing++)); + std::vector::const_iterator word = words.begin(); + if (word != words.end() && *word == bos) { + scorer.BeginSentence(); + ++word; + } + for (; word != words.end(); ++word) { + if (*word == kNonTerminal) { + ret.prob += scorer.Finish(); + scorer.Reset(*(writing++)); + } else { + if (*word == oov) ++ret.oov; scorer.Terminal(*word); } - prob += scorer.Finish(); } + ret.prob += scorer.Finish(); + return ret; } -template float ScoreRule(const Context &model, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing); -template float ScoreRule(const Context &model, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing); -template float ScoreRule(const Context &model, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing); -template float ScoreRule(const Context &model, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing); -template float ScoreRule(const Context &model, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing); -template float ScoreRule(const Context &model, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *writing); +template ScoreRuleRet ScoreRule(const lm::ngram::RestProbingModel &model, const std::vector &words, lm::ngram::ChartState *writing); +template ScoreRuleRet ScoreRule(const lm::ngram::ProbingModel &model, const std::vector &words, lm::ngram::ChartState *writing); +template ScoreRuleRet ScoreRule(const lm::ngram::TrieModel &model, const std::vector &words, lm::ngram::ChartState *writing); +template ScoreRuleRet ScoreRule(const lm::ngram::QuantTrieModel &model, const std::vector &words, lm::ngram::ChartState *writing); +template ScoreRuleRet ScoreRule(const lm::ngram::ArrayTrieModel &model, const std::vector &words, lm::ngram::ChartState *writing); +template ScoreRuleRet ScoreRule(const lm::ngram::QuantArrayTrieModel &model, const std::vector &words, lm::ngram::ChartState *writing); } // namespace search diff --git a/klm/search/rule.hh b/klm/search/rule.hh index 0ce2794d..43ca6162 100644 --- a/klm/search/rule.hh +++ b/klm/search/rule.hh @@ -9,11 +9,16 @@ namespace search { -template class Context; - const lm::WordIndex kNonTerminal = lm::kMaxWordIndex; -template float ScoreRule(const Context &context, const std::vector &words, bool prepend_bos, lm::ngram::ChartState *state_out); +struct ScoreRuleRet { + Score prob; + unsigned int oov; +}; + +// Pass and normally. +// Indicate non-terminals with kNonTerminal. +template ScoreRuleRet ScoreRule(const Model &model, const std::vector &words, lm::ngram::ChartState *state_out); } // namespace search diff --git a/klm/search/types.hh b/klm/search/types.hh index 06eb5bfa..f9c849b3 100644 --- a/klm/search/types.hh +++ b/klm/search/types.hh @@ -3,12 +3,29 @@ #include +namespace lm { namespace ngram { class ChartState; } } + namespace search { typedef float Score; typedef uint32_t Arity; +union Note { + const void *vp; +}; + +typedef void *History; + +struct NBestComplete { + NBestComplete(History in_history, const lm::ngram::ChartState &in_state, Score in_score) + : history(in_history), state(&in_state), score(in_score) {} + + History history; + const lm::ngram::ChartState *state; + Score score; +}; + } // namespace search #endif // SEARCH_TYPES__ diff --git a/klm/search/vertex.cc b/klm/search/vertex.cc index 11f4631f..45842982 100644 --- a/klm/search/vertex.cc +++ b/klm/search/vertex.cc @@ -19,21 +19,34 @@ struct GreaterByBound : public std::binary_functionSortAndSet(context, parent_ptr); + if (extend_.size() == 1) { + parent_ptr = extend_[0]; + extend_[0]->RecursiveSortAndSet(context, parent_ptr); context.DeleteVertexNode(this); return; } for (std::vector::iterator i = extend_.begin(); i != extend_.end(); ++i) { - (*i)->SortAndSet(context, &*i); + (*i)->RecursiveSortAndSet(context, *i); + } + std::sort(extend_.begin(), extend_.end(), GreaterByBound()); + bound_ = extend_.front()->Bound(); +} + +void VertexNode::SortAndSet(ContextBase &context) { + // This is the root. The root might be empty. + if (extend_.empty()) { + bound_ = -INFINITY; + return; + } + // The root cannot be replaced. There's always one transition. + for (std::vector::iterator i = extend_.begin(); i != extend_.end(); ++i) { + (*i)->RecursiveSortAndSet(context, *i); } std::sort(extend_.begin(), extend_.end(), GreaterByBound()); bound_ = extend_.front()->Bound(); diff --git a/klm/search/vertex.hh b/klm/search/vertex.hh index 52bc1dfe..10b3339b 100644 --- a/klm/search/vertex.hh +++ b/klm/search/vertex.hh @@ -2,7 +2,6 @@ #define SEARCH_VERTEX__ #include "lm/left.hh" -#include "search/final.hh" #include "search/types.hh" #include @@ -10,6 +9,7 @@ #include #include +#include #include namespace search { @@ -18,7 +18,7 @@ class ContextBase; class VertexNode { public: - VertexNode() {} + VertexNode() : end_() {} void InitRoot() { extend_.clear(); @@ -26,7 +26,7 @@ class VertexNode { state_.left.length = 0; state_.right.length = 0; right_full_ = false; - end_ = Final(); + end_ = History(); } lm::ngram::ChartState &MutableState() { return state_; } @@ -36,20 +36,21 @@ class VertexNode { extend_.push_back(next); } - void SetEnd(Final end) { - assert(!end_.Valid()); + void SetEnd(History end, Score score) { + assert(!end_); end_ = end; + bound_ = score; } - void SortAndSet(ContextBase &context, VertexNode **parent_pointer); + void SortAndSet(ContextBase &context); // Should only happen to a root node when the entire vertex is empty. bool Empty() const { - return !end_.Valid() && extend_.empty(); + return !end_ && extend_.empty(); } bool Complete() const { - return end_.Valid(); + return end_; } const lm::ngram::ChartState &State() const { return state_; } @@ -64,7 +65,7 @@ class VertexNode { } // Will be invalid unless this is a leaf. - const Final End() const { return end_; } + const History End() const { return end_; } const VertexNode &operator[](size_t index) const { return *extend_[index]; @@ -75,13 +76,15 @@ class VertexNode { } private: + void RecursiveSortAndSet(ContextBase &context, VertexNode *&parent); + std::vector extend_; lm::ngram::ChartState state_; bool right_full_; Score bound_; - Final end_; + History end_; }; class PartialVertex { @@ -97,7 +100,7 @@ class PartialVertex { const lm::ngram::ChartState &State() const { return back_->State(); } bool RightFull() const { return back_->RightFull(); } - Score Bound() const { return Complete() ? back_->End().GetScore() : (*back_)[index_].Bound(); } + Score Bound() const { return Complete() ? back_->Bound() : (*back_)[index_].Bound(); } unsigned char Length() const { return back_->Length(); } @@ -121,7 +124,7 @@ class PartialVertex { return ret; } - const Final End() const { + const History End() const { return back_->End(); } @@ -130,16 +133,18 @@ class PartialVertex { unsigned int index_; }; +template class VertexGenerator; + class Vertex { public: Vertex() {} PartialVertex RootPartial() const { return PartialVertex(root_); } - const Final BestChild() const { + const History BestChild() const { PartialVertex top(RootPartial()); if (top.Empty()) { - return Final(); + return History(); } else { PartialVertex continuation; while (!top.Complete()) { @@ -150,8 +155,8 @@ class Vertex { } private: - friend class VertexGenerator; - + template friend class VertexGenerator; + template friend class RootVertexGenerator; VertexNode root_; }; diff --git a/klm/search/vertex_generator.cc b/klm/search/vertex_generator.cc index 0945fe55..73139ffc 100644 --- a/klm/search/vertex_generator.cc +++ b/klm/search/vertex_generator.cc @@ -4,26 +4,18 @@ #include "search/context.hh" #include "search/edge.hh" +#include +#include + #include namespace search { -VertexGenerator::VertexGenerator(ContextBase &context, Vertex &gen) : context_(context), gen_(gen) { - gen.root_.InitRoot(); -} - +#if BOOST_VERSION > 104200 namespace { const uint64_t kCompleteAdd = static_cast(-1); -// Parallel structure to VertexNode. -struct Trie { - Trie() : under(NULL) {} - - VertexNode *under; - boost::unordered_map extend; -}; - Trie &FindOrInsert(ContextBase &context, Trie &node, uint64_t added, const lm::ngram::ChartState &state, unsigned char left, bool left_full, unsigned char right, bool right_full) { Trie &next = node.extend[added]; if (!next.under) { @@ -39,19 +31,10 @@ Trie &FindOrInsert(ContextBase &context, Trie &node, uint64_t added, const lm::n return next; } -void CompleteTransition(ContextBase &context, Trie &starter, PartialEdge partial) { - Final final(context.FinalPool(), partial.GetScore(), partial.GetArity(), partial.GetNote()); - Final *child_out = final.Children(); - const PartialVertex *part = partial.NT(); - const PartialVertex *const part_end_loop = part + partial.GetArity(); - for (; part != part_end_loop; ++part, ++child_out) - *child_out = part->End(); - - starter.under->SetEnd(final); -} +} // namespace -void AddHypothesis(ContextBase &context, Trie &root, PartialEdge partial) { - const lm::ngram::ChartState &state = partial.CompletedState(); +void AddHypothesis(ContextBase &context, Trie &root, const NBestComplete &end) { + const lm::ngram::ChartState &state = *end.state; unsigned char left = 0, right = 0; Trie *node = &root; @@ -77,18 +60,9 @@ void AddHypothesis(ContextBase &context, Trie &root, PartialEdge partial) { } node = &FindOrInsert(context, *node, kCompleteAdd - state.left.full, state, state.left.length, true, state.right.length, true); - CompleteTransition(context, *node, partial); + node->under->SetEnd(end.history, end.score); } -} // namespace - -void VertexGenerator::FinishedSearch() { - Trie root; - root.under = &gen_.root_; - for (Existing::const_iterator i(existing_.begin()); i != existing_.end(); ++i) { - AddHypothesis(context_, root, i->second); - } - root.under->SortAndSet(context_, NULL); -} +#endif // BOOST_VERSION } // namespace search diff --git a/klm/search/vertex_generator.hh b/klm/search/vertex_generator.hh index 60e86112..da563c2d 100644 --- a/klm/search/vertex_generator.hh +++ b/klm/search/vertex_generator.hh @@ -2,9 +2,11 @@ #define SEARCH_VERTEX_GENERATOR__ #include "search/edge.hh" +#include "search/types.hh" #include "search/vertex.hh" #include +#include namespace lm { namespace ngram { @@ -15,21 +17,44 @@ class ChartState; namespace search { class ContextBase; -class Final; -class VertexGenerator { +#if BOOST_VERSION > 104200 +// Parallel structure to VertexNode. +struct Trie { + Trie() : under(NULL) {} + + VertexNode *under; + boost::unordered_map extend; +}; + +void AddHypothesis(ContextBase &context, Trie &root, const NBestComplete &end); + +#endif // BOOST_VERSION + +// Output makes the single-best or n-best list. +template class VertexGenerator { public: - VertexGenerator(ContextBase &context, Vertex &gen); + VertexGenerator(ContextBase &context, Vertex &gen, Output &nbest) : context_(context), gen_(gen), nbest_(nbest) { + gen.root_.InitRoot(); + } void NewHypothesis(PartialEdge partial) { - const lm::ngram::ChartState &state = partial.CompletedState(); - std::pair ret(existing_.insert(std::make_pair(hash_value(state), partial))); - if (!ret.second && ret.first->second < partial) { - ret.first->second = partial; - } + nbest_.Add(existing_[hash_value(partial.CompletedState())], partial); } - void FinishedSearch(); + void FinishedSearch() { +#if BOOST_VERSION > 104200 + Trie root; + root.under = &gen_.root_; + for (typename Existing::iterator i(existing_.begin()); i != existing_.end(); ++i) { + AddHypothesis(context_, root, nbest_.Complete(i->second)); + } + existing_.clear(); + root.under->SortAndSet(context_); +#else + UTIL_THROW(util::Exception, "Upgrade Boost to >= 1.42.0 to use incremental search."); +#endif + } const Vertex &Generating() const { return gen_; } @@ -38,8 +63,35 @@ class VertexGenerator { Vertex &gen_; - typedef boost::unordered_map Existing; + typedef boost::unordered_map Existing; Existing existing_; + + Output &nbest_; +}; + +// Special case for root vertex: everything should come together into the root +// node. In theory, this should happen naturally due to state collapsing with +// and . If that's the case, VertexGenerator is fine, though it will +// make one connection. +template class RootVertexGenerator { + public: + RootVertexGenerator(Vertex &gen, Output &out) : gen_(gen), out_(out) {} + + void NewHypothesis(PartialEdge partial) { + out_.Add(combine_, partial); + } + + void FinishedSearch() { + gen_.root_.InitRoot(); + NBestComplete completed(out_.Complete(combine_)); + gen_.root_.SetEnd(completed.history, completed.score); + } + + private: + Vertex &gen_; + + typename Output::Combine combine_; + Output &out_; }; } // namespace search diff --git a/klm/search/weights.cc b/klm/search/weights.cc deleted file mode 100644 index d65471ad..00000000 --- a/klm/search/weights.cc +++ /dev/null @@ -1,71 +0,0 @@ -#include "search/weights.hh" -#include "util/tokenize_piece.hh" - -#include - -namespace search { - -namespace { -struct Insert { - void operator()(boost::unordered_map &map, StringPiece name, search::Score score) const { - std::string copy(name.data(), name.size()); - map[copy] = score; - } -}; - -struct DotProduct { - search::Score total; - DotProduct() : total(0.0) {} - - void operator()(const boost::unordered_map &map, StringPiece name, search::Score score) { - boost::unordered_map::const_iterator i(FindStringPiece(map, name)); - if (i != map.end()) - total += score * i->second; - } -}; - -template void Parse(StringPiece text, Map &map, Op &op) { - for (util::TokenIter spaces(text, ' '); spaces; ++spaces) { - util::TokenIter equals(*spaces, '='); - UTIL_THROW_IF(!equals, WeightParseException, "Bad weight token " << *spaces); - StringPiece name(*equals); - UTIL_THROW_IF(!++equals, WeightParseException, "Bad weight token " << *spaces); - char *end; - // Assumes proper termination. - double value = std::strtod(equals->data(), &end); - UTIL_THROW_IF(end != equals->data() + equals->size(), WeightParseException, "Failed to parse weight" << *equals); - UTIL_THROW_IF(++equals, WeightParseException, "Too many equals in " << *spaces); - op(map, name, value); - } -} - -} // namespace - -Weights::Weights(StringPiece text) { - Insert op; - Parse(text, map_, op); - lm_ = Steal("LanguageModel"); - oov_ = Steal("OOV"); - word_penalty_ = Steal("WordPenalty"); -} - -Weights::Weights(Score lm, Score oov, Score word_penalty) : lm_(lm), oov_(oov), word_penalty_(word_penalty) {} - -search::Score Weights::DotNoLM(StringPiece text) const { - DotProduct dot; - Parse(text, map_, dot); - return dot.total; -} - -float Weights::Steal(const std::string &str) { - Map::iterator i(map_.find(str)); - if (i == map_.end()) { - return 0.0; - } else { - float ret = i->second; - map_.erase(i); - return ret; - } -} - -} // namespace search diff --git a/klm/search/weights.hh b/klm/search/weights.hh deleted file mode 100644 index df1c419f..00000000 --- a/klm/search/weights.hh +++ /dev/null @@ -1,52 +0,0 @@ -// For now, the individual features are not kept. -#ifndef SEARCH_WEIGHTS__ -#define SEARCH_WEIGHTS__ - -#include "search/types.hh" -#include "util/exception.hh" -#include "util/string_piece.hh" - -#include - -#include - -namespace search { - -class WeightParseException : public util::Exception { - public: - WeightParseException() {} - ~WeightParseException() throw() {} -}; - -class Weights { - public: - // Parses weights, sets lm_weight_, removes it from map_. - explicit Weights(StringPiece text); - - // Just the three scores we care about adding. - Weights(Score lm, Score oov, Score word_penalty); - - Score DotNoLM(StringPiece text) const; - - Score LM() const { return lm_; } - - Score OOV() const { return oov_; } - - Score WordPenalty() const { return word_penalty_; } - - // Mostly for testing. - const boost::unordered_map &GetMap() const { return map_; } - - private: - float Steal(const std::string &str); - - typedef boost::unordered_map Map; - - Map map_; - - Score lm_, oov_, word_penalty_; -}; - -} // namespace search - -#endif // SEARCH_WEIGHTS__ diff --git a/klm/search/weights_test.cc b/klm/search/weights_test.cc deleted file mode 100644 index 4811ff06..00000000 --- a/klm/search/weights_test.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "search/weights.hh" - -#define BOOST_TEST_MODULE WeightTest -#include -#include - -namespace search { -namespace { - -#define CHECK_WEIGHT(value, string) \ - i = parsed.find(string); \ - BOOST_REQUIRE(i != parsed.end()); \ - BOOST_CHECK_CLOSE((value), i->second, 0.001); - -BOOST_AUTO_TEST_CASE(parse) { - // These are not real feature weights. - Weights w("rarity=0 phrase-SGT=0 phrase-TGS=9.45117 lhsGrhs=0 lexical-SGT=2.33833 lexical-TGS=-28.3317 abstract?=0 LanguageModel=3 lexical?=1 glue?=5"); - const boost::unordered_map &parsed = w.GetMap(); - boost::unordered_map::const_iterator i; - CHECK_WEIGHT(0.0, "rarity"); - CHECK_WEIGHT(0.0, "phrase-SGT"); - CHECK_WEIGHT(9.45117, "phrase-TGS"); - CHECK_WEIGHT(2.33833, "lexical-SGT"); - BOOST_CHECK(parsed.end() == parsed.find("lm")); - BOOST_CHECK_CLOSE(3.0, w.LM(), 0.001); - CHECK_WEIGHT(-28.3317, "lexical-TGS"); - CHECK_WEIGHT(5.0, "glue?"); -} - -BOOST_AUTO_TEST_CASE(dot) { - Weights w("rarity=0 phrase-SGT=0 phrase-TGS=9.45117 lhsGrhs=0 lexical-SGT=2.33833 lexical-TGS=-28.3317 abstract?=0 LanguageModel=3 lexical?=1 glue?=5"); - BOOST_CHECK_CLOSE(9.45117 * 3.0, w.DotNoLM("phrase-TGS=3.0"), 0.001); - BOOST_CHECK_CLOSE(9.45117 * 3.0, w.DotNoLM("phrase-TGS=3.0 LanguageModel=10"), 0.001); - BOOST_CHECK_CLOSE(9.45117 * 3.0 + 28.3317 * 17.4, w.DotNoLM("rarity=5 phrase-TGS=3.0 LanguageModel=10 lexical-TGS=-17.4"), 0.001); -} - -} // namespace -} // namespace search -- cgit v1.2.3 From 59737f22fccb9c2ab8744a719f4dbb95eedf7943 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 14 Dec 2012 12:48:26 -0800 Subject: Updated kenlm --- configure.ac | 2 +- klm/lm/binary_format.cc | 21 +- klm/lm/config.cc | 1 + klm/lm/config.hh | 59 +++--- klm/lm/max_order.hh | 2 - klm/lm/model.cc | 30 +-- klm/lm/search_trie.cc | 47 ++--- klm/util/Makefile.am | 1 + klm/util/exception.hh | 8 +- klm/util/file.cc | 38 ++-- klm/util/file.hh | 8 +- klm/util/file_piece.cc | 66 ++----- klm/util/file_piece.hh | 41 ++-- klm/util/file_piece_test.cc | 4 +- klm/util/have.hh | 12 +- klm/util/joint_sort.hh | 4 +- klm/util/read_compressed.cc | 403 +++++++++++++++++++++++++++++++++++++++ klm/util/read_compressed.hh | 74 +++++++ klm/util/read_compressed_test.cc | 94 +++++++++ klm/util/scoped.hh | 65 ++++--- klm/util/string_piece.hh | 19 +- klm/util/tokenize_piece.hh | 14 +- 22 files changed, 781 insertions(+), 232 deletions(-) create mode 100644 klm/util/read_compressed.cc create mode 100644 klm/util/read_compressed.hh create mode 100644 klm/util/read_compressed_test.cc diff --git a/configure.ac b/configure.ac index f1b9d132..f4650ca4 100644 --- a/configure.ac +++ b/configure.ac @@ -87,7 +87,7 @@ AC_CHECK_HEADER(lzma.h, AC_PROG_INSTALL -CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H" +CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H -DKENLM_MAX_ORDER=6" # core cdec stuff AC_CONFIG_FILES([Makefile]) diff --git a/klm/lm/binary_format.cc b/klm/lm/binary_format.cc index efa67056..39c4a9b6 100644 --- a/klm/lm/binary_format.cc +++ b/klm/lm/binary_format.cc @@ -16,11 +16,11 @@ namespace ngram { namespace { const char kMagicBeforeVersion[] = "mmap lm http://kheafield.com/code format version"; const char kMagicBytes[] = "mmap lm http://kheafield.com/code format version 5\n\0"; -// This must be shorter than kMagicBytes and indicates an incomplete binary file (i.e. build failed). +// This must be shorter than kMagicBytes and indicates an incomplete binary file (i.e. build failed). const char kMagicIncomplete[] = "mmap lm http://kheafield.com/code incomplete\n"; const long int kMagicVersion = 5; -// Old binary files built on 32-bit machines have this header. +// Old binary files built on 32-bit machines have this header. // TODO: eliminate with next binary release. struct OldSanity { char magic[sizeof(kMagicBytes)]; @@ -39,7 +39,7 @@ struct OldSanity { }; -// Test values aligned to 8 bytes. +// Test values aligned to 8 bytes. struct Sanity { char magic[ALIGN8(sizeof(kMagicBytes))]; float zero_f, one_f, minus_half_f; @@ -101,7 +101,7 @@ uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_ uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t memory_size, Backing &backing) { std::size_t adjusted_vocab = backing.vocab.size() + vocab_pad; if (config.write_mmap) { - // Grow the file to accomodate the search, using zeros. + // Grow the file to accomodate the search, using zeros. try { util::ResizeOrThrow(backing.file.get(), adjusted_vocab + memory_size); } catch (util::ErrnoException &e) { @@ -114,7 +114,7 @@ uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t return reinterpret_cast(backing.search.get()); } // mmap it now. - // We're skipping over the header and vocab for the search space mmap. mmap likes page aligned offsets, so some arithmetic to round the offset down. + // We're skipping over the header and vocab for the search space mmap. mmap likes page aligned offsets, so some arithmetic to round the offset down. std::size_t page_size = util::SizePage(); std::size_t alignment_cruft = adjusted_vocab % page_size; backing.search.reset(util::MapOrThrow(alignment_cruft + memory_size, true, util::kFileFlags, false, backing.file.get(), adjusted_vocab - alignment_cruft), alignment_cruft + memory_size, util::scoped_memory::MMAP_ALLOCATED); @@ -122,7 +122,7 @@ uint8_t *GrowForSearch(const Config &config, std::size_t vocab_pad, std::size_t } else { util::MapAnonymous(memory_size, backing.search); return reinterpret_cast(backing.search.get()); - } + } } void FinishFile(const Config &config, ModelType model_type, unsigned int search_version, const std::vector &counts, std::size_t vocab_pad, Backing &backing) { @@ -140,7 +140,7 @@ void FinishFile(const Config &config, ModelType model_type, unsigned int search_ util::FSyncOrThrow(backing.file.get()); break; } - // header and vocab share the same mmap. The header is written here because we know the counts. + // header and vocab share the same mmap. The header is written here because we know the counts. Parameters params = Parameters(); params.counts = counts; params.fixed.order = counts.size(); @@ -160,7 +160,7 @@ namespace detail { bool IsBinaryFormat(int fd) { const uint64_t size = util::SizeFile(fd); if (size == util::kBadSize || (size <= static_cast(sizeof(Sanity)))) return false; - // Try reading the header. + // Try reading the header. util::scoped_memory memory; try { util::MapRead(util::LAZY, fd, 0, sizeof(Sanity), memory); @@ -214,7 +214,7 @@ void SeekPastHeader(int fd, const Parameters ¶ms) { uint8_t *SetupBinary(const Config &config, const Parameters ¶ms, uint64_t memory_size, Backing &backing) { const uint64_t file_size = util::SizeFile(backing.file.get()); - // The header is smaller than a page, so we have to map the whole header as well. + // The header is smaller than a page, so we have to map the whole header as well. std::size_t total_map = util::CheckOverflow(TotalHeaderSize(params.counts.size()) + memory_size); if (file_size != util::kBadSize && static_cast(file_size) < total_map) UTIL_THROW(FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map); @@ -233,7 +233,8 @@ void ComplainAboutARPA(const Config &config, ModelType model_type) { if (config.write_mmap || !config.messages) return; if (config.arpa_complain == Config::ALL) { *config.messages << "Loading the LM will be faster if you build a binary file." << std::endl; - } else if (config.arpa_complain == Config::EXPENSIVE && model_type == TRIE_SORTED) { + } else if (config.arpa_complain == Config::EXPENSIVE && + (model_type == TRIE || model_type == QUANT_TRIE || model_type == ARRAY_TRIE || model_type == QUANT_ARRAY_TRIE)) { *config.messages << "Building " << kModelNames[model_type] << " from ARPA is expensive. Save time by building a binary format." << std::endl; } } diff --git a/klm/lm/config.cc b/klm/lm/config.cc index f9d988ca..9520c41c 100644 --- a/klm/lm/config.cc +++ b/klm/lm/config.cc @@ -6,6 +6,7 @@ namespace lm { namespace ngram { Config::Config() : + show_progress(true), messages(&std::cerr), enumerate_vocab(NULL), unknown_missing(COMPLAIN), diff --git a/klm/lm/config.hh b/klm/lm/config.hh index 739cee9c..0de7b7c6 100644 --- a/klm/lm/config.hh +++ b/klm/lm/config.hh @@ -11,46 +11,52 @@ /* Configuration for ngram model. Separate header to reduce pollution. */ namespace lm { - + class EnumerateVocab; namespace ngram { struct Config { - // EFFECTIVE FOR BOTH ARPA AND BINARY READS + // EFFECTIVE FOR BOTH ARPA AND BINARY READS + + // (default true) print progress bar to messages + bool show_progress; // Where to log messages including the progress bar. Set to NULL for // silence. std::ostream *messages; + std::ostream *ProgressMessages() const { + return show_progress ? messages : 0; + } + // This will be called with every string in the vocabulary. See // enumerate_vocab.hh for more detail. Config does not take ownership; you - // are still responsible for deleting it (or stack allocating). + // are still responsible for deleting it (or stack allocating). EnumerateVocab *enumerate_vocab; - // ONLY EFFECTIVE WHEN READING ARPA - // What to do when isn't in the provided model. + // What to do when isn't in the provided model. WarningAction unknown_missing; - // What to do when or is missing from the model. - // If THROW_UP, the exception will be of type util::SpecialWordMissingException. + // What to do when or is missing from the model. + // If THROW_UP, the exception will be of type util::SpecialWordMissingException. WarningAction sentence_marker_missing; // What to do with a positive log probability. For COMPLAIN and SILENT, map - // to 0. + // to 0. WarningAction positive_log_probability; - // The probability to substitute for if it's missing from the model. + // The probability to substitute for if it's missing from the model. // No effect if the model has or unknown_missing == THROW_UP. float unknown_missing_logprob; // Size multiplier for probing hash table. Must be > 1. Space is linear in // this. Time is probing_multiplier / (probing_multiplier - 1). No effect - // for sorted variant. + // for sorted variant. // If you find yourself setting this to a low number, consider using the - // TrieModel which has lower memory consumption. + // TrieModel which has lower memory consumption. float probing_multiplier; // Amount of memory to use for building. The actual memory usage will be @@ -58,10 +64,10 @@ struct Config { // models. std::size_t building_memory; - // Template for temporary directory appropriate for passing to mkdtemp. + // Template for temporary directory appropriate for passing to mkdtemp. // The characters XXXXXX are appended before passing to mkdtemp. Only // applies to trie. If NULL, defaults to write_mmap. If that's NULL, - // defaults to input file name. + // defaults to input file name. const char *temporary_directory_prefix; // Level of complaining to do when loading from ARPA instead of binary format. @@ -69,49 +75,46 @@ struct Config { ARPALoadComplain arpa_complain; // While loading an ARPA file, also write out this binary format file. Set - // to NULL to disable. + // to NULL to disable. const char *write_mmap; enum WriteMethod { - WRITE_MMAP, // Map the file directly. - WRITE_AFTER // Write after we're done. + WRITE_MMAP, // Map the file directly. + WRITE_AFTER // Write after we're done. }; WriteMethod write_method; - // Include the vocab in the binary file? Only effective if write_mmap != NULL. + // Include the vocab in the binary file? Only effective if write_mmap != NULL. bool include_vocab; - // Left rest options. Only used when the model includes rest costs. + // Left rest options. Only used when the model includes rest costs. enum RestFunction { REST_MAX, // Maximum of any score to the left - REST_LOWER, // Use lower-order files given below. + REST_LOWER, // Use lower-order files given below. }; RestFunction rest_function; - // Only used for REST_LOWER. + // Only used for REST_LOWER. std::vector rest_lower_files; - // Quantization options. Only effective for QuantTrieModel. One value is // reserved for each of prob and backoff, so 2^bits - 1 buckets will be used - // to quantize (and one of the remaining backoffs will be 0). + // to quantize (and one of the remaining backoffs will be 0). uint8_t prob_bits, backoff_bits; // Bhiksha compression (simple form). Only works with trie. uint8_t pointer_bhiksha_bits; - - + // ONLY EFFECTIVE WHEN READING BINARY - + // How to get the giant array into memory: lazy mmap, populate, read etc. - // See util/mmap.hh for details of MapMethod. + // See util/mmap.hh for details of MapMethod. util::LoadMethod load_method; - - // Set defaults. + // Set defaults. Config(); }; diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh index ea0dea46..3eb97ccd 100644 --- a/klm/lm/max_order.hh +++ b/klm/lm/max_order.hh @@ -7,5 +7,3 @@ #ifndef KENLM_ORDER_MESSAGE #define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh." #endif - -#define KENLM_MAX_ORDER 5 diff --git a/klm/lm/model.cc b/klm/lm/model.cc index fc61efee..a40fd2fb 100644 --- a/klm/lm/model.cc +++ b/klm/lm/model.cc @@ -37,7 +37,7 @@ template void GenericModel GenericModel::GenericModel(const char *file, const Config &config) { LoadLM(file, config, *this); - // g++ prints warnings unless these are fully initialized. + // g++ prints warnings unless these are fully initialized. State begin_sentence = State(); begin_sentence.length = 1; begin_sentence.words[0] = vocab_.BeginSentence(); @@ -69,8 +69,8 @@ template void GenericModel void GenericModel::InitializeFromARPA(const char *file, const Config &config) { - // Backing file is the ARPA. Steal it so we can make the backing file the mmap output if any. - util::FilePiece f(backing_.file.release(), file, config.messages); + // Backing file is the ARPA. Steal it so we can make the backing file the mmap output if any. + util::FilePiece f(backing_.file.release(), file, config.ProgressMessages()); try { std::vector counts; // File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_. @@ -80,7 +80,7 @@ template void GenericModel 1.0"); std::size_t vocab_size = util::CheckOverflow(VocabularyT::Size(counts[0], config)); - // Setup the binary file for writing the vocab lookup table. The search_ is responsible for growing the binary file to its needs. + // Setup the binary file for writing the vocab lookup table. The search_ is responsible for growing the binary file to its needs. vocab_.SetupMemory(SetupJustVocab(config, counts.size(), vocab_size, backing_), vocab_size, counts[0], config); if (config.write_mmap) { @@ -95,7 +95,7 @@ template void GenericModel FullScoreReturn GenericModel void GenericModel::GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const { - // Generate a state from context. + // Generate a state from context. context_rend = std::min(context_rend, context_rbegin + P::Order() - 1); if (context_rend == context_rbegin) { out_state.length = 0; @@ -191,7 +191,7 @@ template FullScoreReturn GenericModel FullScoreReturn GenericModel FullScoreReturn GenericModel(out_state.length) - 1; @@ -217,10 +217,10 @@ void CopyRemainingHistory(const WordIndex *from, State &out_state) { } } // namespace -/* Ugly optimized function. Produce a score excluding backoff. - * The search goes in increasing order of ngram length. +/* Ugly optimized function. Produce a score excluding backoff. + * The search goes in increasing order of ngram length. * Context goes backward, so context_begin is the word immediately preceeding - * new_word. + * new_word. */ template FullScoreReturn GenericModel::ScoreExceptBackoff( const WordIndex *const context_rbegin, @@ -229,7 +229,7 @@ template FullScoreReturn GenericModel FullScoreReturn GenericModel(current_); const unsigned char order = (entry_size_ - sizeof(ProbPointer)) / sizeof(WordIndex); for (reader.Rewind(); reader && (current_ != allocated_); ) { @@ -109,7 +109,7 @@ class BackoffMessages { ++reader; break; case 1: - // Message but nobody to receive it. Write it down at the beginning of the buffer so we can inform this blank that it extends. + // Message but nobody to receive it. Write it down at the beginning of the buffer so we can inform this blank that it extends. for (const WordIndex *w = reinterpret_cast(current_); w != reinterpret_cast(current_) + order; ++w, ++extend_out) *extend_out = *w; current_ += entry_size_; break; @@ -126,7 +126,7 @@ class BackoffMessages { break; } } - // Now this is a list of blanks that extend right. + // Now this is a list of blanks that extend right. entry_size_ = sizeof(WordIndex) * order; Resize(sizeof(WordIndex) * (extend_out - (const WordIndex*)backing_.get())); current_ = (uint8_t*)backing_.get(); @@ -153,7 +153,7 @@ class BackoffMessages { private: void FinishedAdding() { Resize(current_ - (uint8_t*)backing_.get()); - // Sort requests in same order as files. + // Sort requests in same order as files. std::sort( util::SizedIterator(util::SizedProxy(backing_.get(), entry_size_)), util::SizedIterator(util::SizedProxy(current_, entry_size_)), @@ -220,7 +220,7 @@ class SRISucks { } private: - // This used to be one array. Then I needed to separate it by order for quantization to work. + // This used to be one array. Then I needed to separate it by order for quantization to work. std::vector values_[KENLM_MAX_ORDER - 1]; BackoffMessages messages_[KENLM_MAX_ORDER - 1]; @@ -253,7 +253,7 @@ class FindBlanks { ++counts_.back(); } - // Unigrams wrote one past. + // Unigrams wrote one past. void Cleanup() { --counts_[0]; } @@ -270,15 +270,15 @@ class FindBlanks { SRISucks &sri_; }; -// Phase to actually write n-grams to the trie. +// Phase to actually write n-grams to the trie. template class WriteEntries { public: - WriteEntries(RecordReader *contexts, const Quant &quant, UnigramValue *unigrams, BitPackedMiddle *middle, BitPackedLongest &longest, unsigned char order, SRISucks &sri) : + WriteEntries(RecordReader *contexts, const Quant &quant, UnigramValue *unigrams, BitPackedMiddle *middle, BitPackedLongest &longest, unsigned char order, SRISucks &sri) : contexts_(contexts), quant_(quant), unigrams_(unigrams), middle_(middle), - longest_(longest), + longest_(longest), bigram_pack_((order == 2) ? static_cast(longest_) : static_cast(*middle_)), order_(order), sri_(sri) {} @@ -328,7 +328,7 @@ struct Gram { const WordIndex *begin, *end; - // For queue, this is the direction we want. + // For queue, this is the direction we want. bool operator<(const Gram &other) const { return std::lexicographical_compare(other.begin, other.end, begin, end); } @@ -353,7 +353,7 @@ template class BlankManager { been_length_ = length; return; } - // There are blanks to insert starting with order blank. + // There are blanks to insert starting with order blank. unsigned char blank = cur - to + 1; UTIL_THROW_IF(blank == 1, FormatLoadException, "Missing a unigram that appears as context."); const float *lower_basis; @@ -363,7 +363,7 @@ template class BlankManager { assert(*lower_basis != kBadProb); doing_.MiddleBlank(blank, to, based_on, *lower_basis); *pre = *cur; - // Mark that the probability is a blank so it shouldn't be used as the basis for a later n-gram. + // Mark that the probability is a blank so it shouldn't be used as the basis for a later n-gram. basis_[blank - 1] = kBadProb; } *pre = *cur; @@ -377,7 +377,7 @@ template class BlankManager { unsigned char been_length_; float basis_[KENLM_MAX_ORDER]; - + Doing &doing_; }; @@ -451,7 +451,7 @@ template void TrainProbQuantizer(uint8_t order, uint64_t count, Re } void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, RecordReader &contexts, UnigramValue *unigrams) { - // Fill unigram probabilities. + // Fill unigram probabilities. try { rewind(file); for (WordIndex i = 0; i < unigram_count; ++i) { @@ -486,7 +486,7 @@ template void BuildTrie(SortedFiles &files, std::ve util::scoped_memory unigrams; MapRead(util::POPULATE_OR_READ, unigram_fd.get(), 0, counts[0] * sizeof(ProbBackoff), unigrams); FindBlanks finder(counts.size(), reinterpret_cast(unigrams.get()), sri); - RecursiveInsert(counts.size(), counts[0], inputs, config.messages, "Identifying n-grams omitted by SRI", finder); + RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Identifying n-grams omitted by SRI", finder); fixed_counts = finder.Counts(); } unigram_file.reset(util::FDOpenOrThrow(unigram_fd)); @@ -504,7 +504,8 @@ template void BuildTrie(SortedFiles &files, std::ve inputs[i-2].Rewind(); } if (Quant::kTrain) { - util::ErsatzProgress progress(std::accumulate(counts.begin() + 1, counts.end(), 0), config.messages, "Quantizing"); + util::ErsatzProgress progress(std::accumulate(counts.begin() + 1, counts.end(), 0), + config.ProgressMessages(), "Quantizing"); for (unsigned char i = 2; i < counts.size(); ++i) { TrainQuantizer(i, counts[i-1], sri.Values(i), inputs[i-2], progress, quant); } @@ -519,13 +520,13 @@ template void BuildTrie(SortedFiles &files, std::ve for (unsigned char i = 2; i <= counts.size(); ++i) { inputs[i-2].Rewind(); } - // Fill entries except unigram probabilities. + // Fill entries except unigram probabilities. { WriteEntries writer(contexts, quant, unigrams, out.middle_begin_, out.longest_, counts.size(), sri); - RecursiveInsert(counts.size(), counts[0], inputs, config.messages, "Writing trie", writer); + RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Writing trie", writer); } - // Do not disable this error message or else too little state will be returned. Both WriteEntries::Middle and returning state based on found n-grams will need to be fixed to handle this situation. + // Do not disable this error message or else too little state will be returned. Both WriteEntries::Middle and returning state based on found n-grams will need to be fixed to handle this situation. for (unsigned char order = 2; order <= counts.size(); ++order) { const RecordReader &context = contexts[order - 2]; if (context) { @@ -541,13 +542,13 @@ template void BuildTrie(SortedFiles &files, std::ve } /* Set ending offsets so the last entry will be sized properly */ - // Last entry for unigrams was already set. + // Last entry for unigrams was already set. if (out.middle_begin_ != out.middle_end_) { for (typename TrieSearch::Middle *i = out.middle_begin_; i != out.middle_end_ - 1; ++i) { i->FinishedLoading((i+1)->InsertIndex(), config); } (out.middle_end_ - 1)->FinishedLoading(out.longest_.InsertIndex(), config); - } + } } template uint8_t *TrieSearch::SetupMemory(uint8_t *start, const std::vector &counts, const Config &config) { @@ -595,7 +596,7 @@ template void TrieSearch::Initializ } else { temporary_prefix = file; } - // At least 1MB sorting memory. + // At least 1MB sorting memory. SortedFiles sorted(config, f, counts, std::max(config.building_memory, 1048576), temporary_prefix, vocab); BuildTrie(sorted, counts, config, *this, quant_, vocab, backing); diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index 5306850f..a676bdb3 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -27,6 +27,7 @@ libklm_util_a_SOURCES = \ mmap.cc \ murmur_hash.cc \ pool.cc \ + read_compressed.cc \ string_piece.cc \ usage.cc diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 053a850b..0165a7a3 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -87,8 +87,14 @@ template typename Except::template ExceptionTag= 3 +#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0) +#else +#define UTIL_UNLIKELY(x) (x) +#endif + #define UTIL_THROW_IF(Condition, Exception, Modify) do { \ - if (Condition) { \ + if (UTIL_UNLIKELY(Condition)) { \ Exception UTIL_e; \ UTIL_SET_LOCATION(UTIL_e, #Exception, #Condition); \ UTIL_e << Modify; \ diff --git a/klm/util/file.cc b/klm/util/file.cc index 6bf879ac..b9a77cf9 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -15,6 +15,8 @@ #if defined(_WIN32) || defined(_WIN64) #include #include +#include +#include #else #include #endif @@ -48,7 +50,7 @@ int OpenReadOrThrow(const char *name) { int CreateOrThrow(const char *name) { int ret; #if defined(_WIN32) || defined(_WIN64) - UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); + UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); #else UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); #endif @@ -74,16 +76,22 @@ void ResizeOrThrow(int fd, uint64_t to) { #endif } -#ifdef WIN32 -typedef int ssize_t; +std::size_t PartialRead(int fd, void *to, std::size_t amount) { +#if defined(_WIN32) || defined(_WIN64) + amount = min(static_cast(INT_MAX), amount); + int ret = _read(fd, to, amount); +#else + ssize_t ret = read(fd, to, amount); #endif + UTIL_THROW_IF(ret < 0, ErrnoException, "Reading " << amount << " from fd " << fd << " failed."); + return static_cast(ret); +} void ReadOrThrow(int fd, void *to_void, std::size_t amount) { uint8_t *to = static_cast(to_void); while (amount) { - ssize_t ret = read(fd, to, amount); - UTIL_THROW_IF(ret == -1, ErrnoException, "Reading " << amount << " from fd " << fd << " failed."); - UTIL_THROW_IF(ret == 0, EndOfFileException, "Hit EOF in fd " << fd << " but there should be " << amount << " more bytes to read."); + std::size_t ret = PartialRead(fd, to, amount); + UTIL_THROW_IF(ret == 0, EndOfFileException, " in fd " << fd << " but there should be " << amount << " more bytes to read."); amount -= ret; to += ret; } @@ -93,8 +101,7 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) { uint8_t *to = static_cast(to_void); std::size_t remaining = amount; while (remaining) { - ssize_t ret = read(fd, to, remaining); - UTIL_THROW_IF(ret == -1, ErrnoException, "Reading " << remaining << " from fd " << fd << " failed."); + std::size_t ret = PartialRead(fd, to, remaining); if (!ret) return amount - remaining; remaining -= ret; to += ret; @@ -105,7 +112,11 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) { void WriteOrThrow(int fd, const void *data_void, std::size_t size) { const uint8_t *data = static_cast(data_void); while (size) { +#if defined(_WIN32) || defined(_WIN64) + int ret = write(fd, data, min(static_cast(INT_MAX), size)); +#else ssize_t ret = write(fd, data, size); +#endif if (ret < 1) UTIL_THROW(util::ErrnoException, "Write failed"); data += ret; size -= ret; @@ -114,7 +125,7 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) { void WriteOrThrow(FILE *to, const void *data, std::size_t size) { assert(size); - if (1 != std::fwrite(data, size, 1, to)) UTIL_THROW(util::ErrnoException, "Short write; requested size " << size); + UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), util::ErrnoException, "Short write; requested size " << size); } void FSyncOrThrow(int fd) { @@ -149,14 +160,15 @@ void SeekEnd(int fd) { std::FILE *FDOpenOrThrow(scoped_fd &file) { std::FILE *ret = fdopen(file.get(), "r+b"); - if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen"); + if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen descriptor " << file.get()); file.release(); return ret; } -std::FILE *FOpenOrThrow(const char *path, const char *mode) { - std::FILE *ret; - UTIL_THROW_IF(!(ret = fopen(path, mode)), util::ErrnoException, "Could not fopen " << path << " for " << mode); +std::FILE *FDOpenReadOrThrow(scoped_fd &file) { + std::FILE *ret = fdopen(file.get(), "rb"); + if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen descriptor " << file.get()); + file.release(); return ret; } diff --git a/klm/util/file.hh b/klm/util/file.hh index 185cb1f3..c24580d6 100644 --- a/klm/util/file.hh +++ b/klm/util/file.hh @@ -32,8 +32,6 @@ class scoped_fd { return ret; } - operator bool() { return fd_ != -1; } - private: int fd_; @@ -76,8 +74,9 @@ uint64_t SizeFile(int fd); void ResizeOrThrow(int fd, uint64_t to); +std::size_t PartialRead(int fd, void *to, std::size_t size); void ReadOrThrow(int fd, void *to, std::size_t size); -std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount); +std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size); void WriteOrThrow(int fd, const void *data_void, std::size_t size); void WriteOrThrow(FILE *to, const void *data, std::size_t size); @@ -90,8 +89,7 @@ void AdvanceOrThrow(int fd, int64_t off); void SeekEnd(int fd); std::FILE *FDOpenOrThrow(scoped_fd &file); - -std::FILE *FOpenOrThrow(const char *path, const char *mode); +std::FILE *FDOpenReadOrThrow(scoped_fd &file); class TempMaker { public: diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 280f438c..5a208eff 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -26,13 +25,6 @@ ParseNumberException::ParseNumberException(StringPiece value) throw() { *this << "Could not parse \"" << value << "\" into a number"; } -#ifdef HAVE_ZLIB -GZException::GZException(gzFile file) { - int num; - *this << gzerror(file, &num) << " from zlib"; -} -#endif // HAVE_ZLIB - // Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale). const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; @@ -48,19 +40,7 @@ FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std: Initialize(name, show_progress, min_buffer); } -FilePiece::~FilePiece() { -#ifdef HAVE_ZLIB - if (gz_file_) { - // zlib took ownership - file_.release(); - int ret; - if (Z_OK != (ret = gzclose(gz_file_))) { - std::cerr << "could not close file " << file_name_ << " using zlib" << std::endl; - abort(); - } - } -#endif -} +FilePiece::~FilePiece() {} StringPiece FilePiece::ReadLine(char delim) { std::size_t skip = 0; @@ -95,9 +75,6 @@ unsigned long int FilePiece::ReadULong() { } void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { -#ifdef HAVE_ZLIB - gz_file_ = NULL; -#endif file_name_ = name; default_map_size_ = page_ * std::max((min_buffer / page_ + 1), 2); @@ -117,10 +94,7 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s } Shift(); // gzip detect. - if ((position_end_ - position_) > 2 && *position_ == 0x1f && static_cast(*(position_ + 1)) == 0x8b) { -#ifndef HAVE_ZLIB - UTIL_THROW(GZException, "Looks like a gzip file but support was not compiled in."); -#endif + if ((position_end_ - position_) >= ReadCompressed::kMagicSize && ReadCompressed::DetectCompressedMagic(position_)) { if (!fallback_to_read_) { at_end_ = false; TransitionToRead(); @@ -197,7 +171,7 @@ void FilePiece::Shift() { if (fallback_to_read_) ReadShift(); for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) { - if (isspace(*last_space_)) break; + if (kSpaces[static_cast(*last_space_)]) break; } } @@ -248,17 +222,14 @@ void FilePiece::TransitionToRead() { position_ = data_.begin(); position_end_ = position_; -#ifdef HAVE_ZLIB - assert(!gz_file_); - gz_file_ = gzdopen(file_.get(), "r"); - UTIL_THROW_IF(!gz_file_, GZException, "zlib failed to open " << file_name_); -#endif + try { + fell_back_.Reset(file_.release()); + } catch (util::Exception &e) { + e << " in file " << file_name_; + throw; + } } -#ifdef WIN32 -typedef int ssize_t; -#endif - void FilePiece::ReadShift() { assert(fallback_to_read_); // Bytes [data_.begin(), position_) have been consumed. @@ -283,7 +254,7 @@ void FilePiece::ReadShift() { position_ = data_.begin(); position_end_ = position_ + valid_length; } else { - size_t moving = position_end_ - position_; + std::size_t moving = position_end_ - position_; memmove(data_.get(), position_, moving); position_ = data_.begin(); position_end_ = position_ + moving; @@ -291,20 +262,9 @@ void FilePiece::ReadShift() { } } - ssize_t read_return; -#ifdef HAVE_ZLIB - read_return = gzread(gz_file_, static_cast(data_.get()) + already_read, default_map_size_ - already_read); - if (read_return == -1) throw GZException(gz_file_); - if (total_size_ != kBadSize) { - // Just get the position, don't actually seek. Apparently this is how you do it. . . - off_t ret = lseek(file_.get(), 0, SEEK_CUR); - if (ret != -1) progress_.Set(ret); - } -#else - read_return = read(file_.get(), static_cast(data_.get()) + already_read, default_map_size_ - already_read); - UTIL_THROW_IF(read_return == -1, ErrnoException, "read failed"); - progress_.Set(mapped_offset_); -#endif + std::size_t read_return = fell_back_.Read(static_cast(data_.get()) + already_read, default_map_size_ - already_read); + progress_.Set(fell_back_.RawAmount()); + if (read_return == 0) { at_end_ = true; } diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index af93d8aa..39bd1581 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -4,8 +4,8 @@ #include "util/ersatz_progress.hh" #include "util/exception.hh" #include "util/file.hh" -#include "util/have.hh" #include "util/mmap.hh" +#include "util/read_compressed.hh" #include "util/string_piece.hh" #include @@ -13,10 +13,6 @@ #include -#ifdef HAVE_ZLIB -#include -#endif - namespace util { class ParseNumberException : public Exception { @@ -25,28 +21,19 @@ class ParseNumberException : public Exception { ~ParseNumberException() throw() {} }; -class GZException : public Exception { - public: -#ifdef HAVE_ZLIB - explicit GZException(gzFile file); -#endif - GZException() throw() {} - ~GZException() throw() {} -}; - extern const bool kSpaces[256]; -// Memory backing the returned StringPiece may vanish on the next call. +// Memory backing the returned StringPiece may vanish on the next call. class FilePiece { public: - // 32 MB default. - explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432); - // Takes ownership of fd. name is used for messages. - explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432); + // 1 MB default. + explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); + // Takes ownership of fd. name is used for messages. + explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); ~FilePiece(); - - char get() { + + char get() { if (position_ == position_end_) { Shift(); if (at_end_) throw EndOfFileException(); @@ -54,14 +41,14 @@ class FilePiece { return *(position_++); } - // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace(). + // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace(). StringPiece ReadDelimited(const bool *delim = kSpaces) { SkipSpaces(delim); return Consume(FindDelimiterOrEOF(delim)); } // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter. - // It is similar to getline in that way. + // It is similar to getline in that way. StringPiece ReadLine(char delim = '\n'); float ReadFloat(); @@ -69,7 +56,7 @@ class FilePiece { long int ReadLong(); unsigned long int ReadULong(); - // Skip spaces defined by isspace. + // Skip spaces defined by isspace. void SkipSpaces(const bool *delim = kSpaces) { for (; ; ++position_) { if (position_ == position_end_) Shift(); @@ -82,7 +69,7 @@ class FilePiece { } const std::string &FileName() const { return file_name_; } - + private: void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer); @@ -122,9 +109,7 @@ class FilePiece { std::string file_name_; -#ifdef HAVE_ZLIB - gzFile gz_file_; -#endif // HAVE_ZLIB + ReadCompressed fell_back_; }; } // namespace util diff --git a/klm/util/file_piece_test.cc b/klm/util/file_piece_test.cc index f912e18a..e79ece7a 100644 --- a/klm/util/file_piece_test.cc +++ b/klm/util/file_piece_test.cc @@ -38,7 +38,7 @@ BOOST_AUTO_TEST_CASE(MMapReadLine) { BOOST_CHECK_THROW(test.get(), EndOfFileException); } -#ifndef __APPLE__ +#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__) /* Apple isn't happy with the popen, fileno, dup. And I don't want to * reimplement popen. This is an issue with the test. */ @@ -65,7 +65,7 @@ BOOST_AUTO_TEST_CASE(StreamReadLine) { BOOST_CHECK_THROW(test.get(), EndOfFileException); BOOST_REQUIRE(!pclose(catter)); } -#endif // __APPLE__ +#endif #ifdef HAVE_ZLIB diff --git a/klm/util/have.hh b/klm/util/have.hh index b8181e99..1523c0c5 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -2,22 +2,12 @@ #ifndef UTIL_HAVE__ #define UTIL_HAVE__ -#ifndef HAVE_ZLIB -#if !defined(_WIN32) && !defined(_WIN64) -#define HAVE_ZLIB -#endif -#endif - #ifndef HAVE_ICU //#define HAVE_ICU #endif #ifndef HAVE_BOOST -#define HAVE_BOOST -#endif - -#ifndef HAVE_THREADS -//#define HAVE_THREADS +//#define HAVE_BOOST #endif #endif // UTIL_HAVE__ diff --git a/klm/util/joint_sort.hh b/klm/util/joint_sort.hh index cf3d8432..1b43ddcf 100644 --- a/klm/util/joint_sort.hh +++ b/klm/util/joint_sort.hh @@ -60,7 +60,7 @@ template class JointProxy { JointProxy(const KeyIter &key_iter, const ValueIter &value_iter) : inner_(key_iter, value_iter) {} JointProxy(const JointProxy &other) : inner_(other.inner_) {} - operator const value_type() const { + operator value_type() const { value_type ret; ret.key = *inner_.key_; ret.value = *inner_.value_; @@ -121,7 +121,7 @@ template class LessWrapper : public std::binary_functi template class PairedIterator : public ProxyIterator > { public: - PairedIterator(const KeyIter &key, const ValueIter &value) : + PairedIterator(const KeyIter &key, const ValueIter &value) : ProxyIterator >(detail::JointProxy(key, value)) {} }; diff --git a/klm/util/read_compressed.cc b/klm/util/read_compressed.cc new file mode 100644 index 00000000..4ec94c4e --- /dev/null +++ b/klm/util/read_compressed.cc @@ -0,0 +1,403 @@ +#include "util/read_compressed.hh" + +#include "util/file.hh" +#include "util/have.hh" +#include "util/scoped.hh" + +#include +#include + +#include +#include +#include +#include + +#ifdef HAVE_ZLIB +#include +#endif + +#ifdef HAVE_BZLIB +#include +#endif + +#ifdef HAVE_XZLIB +#include +#endif + +namespace util { + +CompressedException::CompressedException() throw() {} +CompressedException::~CompressedException() throw() {} + +GZException::GZException() throw() {} +GZException::~GZException() throw() {} + +BZException::BZException() throw() {} +BZException::~BZException() throw() {} + +XZException::XZException() throw() {} +XZException::~XZException() throw() {} + +class ReadBase { + public: + virtual ~ReadBase() {} + + virtual std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) = 0; + + protected: + static void ReplaceThis(ReadBase *with, ReadCompressed &thunk) { + thunk.internal_.reset(with); + } + + static uint64_t &ReadCount(ReadCompressed &thunk) { + return thunk.raw_amount_; + } +}; + +namespace { + +// Completed file that other classes can thunk to. +class Complete : public ReadBase { + public: + std::size_t Read(void *, std::size_t, ReadCompressed &) { + return 0; + } +}; + +class Uncompressed : public ReadBase { + public: + explicit Uncompressed(int fd) : fd_(fd) {} + + std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) { + std::size_t got = PartialRead(fd_.get(), to, amount); + ReadCount(thunk) += got; + return got; + } + + private: + scoped_fd fd_; +}; + +class UncompressedWithHeader : public ReadBase { + public: + UncompressedWithHeader(int fd, void *already_data, std::size_t already_size) : fd_(fd) { + assert(already_size); + buf_.reset(malloc(already_size)); + if (!buf_.get()) throw std::bad_alloc(); + memcpy(buf_.get(), already_data, already_size); + remain_ = static_cast(buf_.get()); + end_ = remain_ + already_size; + } + + std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) { + assert(buf_.get()); + std::size_t sending = std::min(amount, end_ - remain_); + memcpy(to, remain_, sending); + remain_ += sending; + if (remain_ == end_) { + ReplaceThis(new Uncompressed(fd_.release()), thunk); + } + return sending; + } + + private: + scoped_malloc buf_; + uint8_t *remain_; + uint8_t *end_; + + scoped_fd fd_; +}; + +#ifdef HAVE_ZLIB +class GZip : public ReadBase { + private: + static const std::size_t kInputBuffer = 16384; + public: + GZip(int fd, void *already_data, std::size_t already_size) + : file_(fd), in_buffer_(malloc(kInputBuffer)) { + if (!in_buffer_.get()) throw std::bad_alloc(); + assert(already_size < kInputBuffer); + if (already_size) { + memcpy(in_buffer_.get(), already_data, already_size); + stream_.next_in = static_cast(in_buffer_.get()); + stream_.avail_in = already_size; + stream_.avail_in += ReadOrEOF(file_.get(), static_cast(in_buffer_.get()) + already_size, kInputBuffer - already_size); + } else { + stream_.avail_in = 0; + } + stream_.zalloc = Z_NULL; + stream_.zfree = Z_NULL; + stream_.opaque = Z_NULL; + stream_.msg = NULL; + // 32 for zlib and gzip decoding with automatic header detection. + // 15 for maximum window size. + UTIL_THROW_IF(Z_OK != inflateInit2(&stream_, 32 + 15), GZException, "Failed to initialize zlib."); + } + + ~GZip() { + if (Z_OK != inflateEnd(&stream_)) { + std::cerr << "zlib could not close properly." << std::endl; + abort(); + } + } + + std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) { + if (amount == 0) return 0; + stream_.next_out = static_cast(to); + stream_.avail_out = std::min(std::numeric_limits::max(), amount); + do { + if (!stream_.avail_in) ReadInput(thunk); + int result = inflate(&stream_, 0); + switch (result) { + case Z_OK: + break; + case Z_STREAM_END: + { + std::size_t ret = static_cast(stream_.next_out) - static_cast(to); + ReplaceThis(new Complete(), thunk); + return ret; + } + case Z_ERRNO: + UTIL_THROW(ErrnoException, "zlib error"); + default: + UTIL_THROW(GZException, "zlib encountered " << (stream_.msg ? stream_.msg : "an error ") << " code " << result); + } + } while (stream_.next_out == to); + return static_cast(stream_.next_out) - static_cast(to); + } + + private: + void ReadInput(ReadCompressed &thunk) { + assert(!stream_.avail_in); + stream_.next_in = static_cast(in_buffer_.get()); + stream_.avail_in = ReadOrEOF(file_.get(), in_buffer_.get(), kInputBuffer); + ReadCount(thunk) += stream_.avail_in; + } + + scoped_fd file_; + scoped_malloc in_buffer_; + z_stream stream_; +}; +#endif // HAVE_ZLIB + +#ifdef HAVE_BZLIB +class BZip : public ReadBase { + public: + explicit BZip(int fd, void *already_data, std::size_t already_size) { + scoped_fd hold(fd); + closer_.reset(FDOpenReadOrThrow(hold)); + int bzerror = BZ_OK; + file_ = BZ2_bzReadOpen(&bzerror, closer_.get(), 0, 0, already_data, already_size); + switch (bzerror) { + case BZ_OK: + return; + case BZ_CONFIG_ERROR: + UTIL_THROW(BZException, "Looks like bzip2 was miscompiled."); + case BZ_PARAM_ERROR: + UTIL_THROW(BZException, "Parameter error"); + case BZ_IO_ERROR: + UTIL_THROW(BZException, "IO error reading file"); + case BZ_MEM_ERROR: + throw std::bad_alloc(); + } + } + + ~BZip() { + int bzerror = BZ_OK; + BZ2_bzReadClose(&bzerror, file_); + if (bzerror != BZ_OK) { + std::cerr << "bz2 readclose error" << std::endl; + abort(); + } + } + + std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) { + int bzerror = BZ_OK; + int ret = BZ2_bzRead(&bzerror, file_, to, std::min(static_cast(INT_MAX), amount)); + long pos; + switch (bzerror) { + case BZ_STREAM_END: + pos = ftell(closer_.get()); + if (pos != -1) ReadCount(thunk) = pos; + ReplaceThis(new Complete(), thunk); + return ret; + case BZ_OK: + pos = ftell(closer_.get()); + if (pos != -1) ReadCount(thunk) = pos; + return ret; + default: + UTIL_THROW(BZException, "bzip2 error " << BZ2_bzerror(file_, &bzerror) << " code " << bzerror); + } + } + + private: + scoped_FILE closer_; + BZFILE *file_; +}; +#endif // HAVE_BZLIB + +#ifdef HAVE_XZLIB +class XZip : public ReadBase { + private: + static const std::size_t kInputBuffer = 16384; + public: + XZip(int fd, void *already_data, std::size_t already_size) + : file_(fd), in_buffer_(malloc(kInputBuffer)), stream_(), action_(LZMA_RUN) { + if (!in_buffer_.get()) throw std::bad_alloc(); + assert(already_size < kInputBuffer); + if (already_size) { + memcpy(in_buffer_.get(), already_data, already_size); + stream_.next_in = static_cast(in_buffer_.get()); + stream_.avail_in = already_size; + stream_.avail_in += ReadOrEOF(file_.get(), static_cast(in_buffer_.get()) + already_size, kInputBuffer - already_size); + } else { + stream_.avail_in = 0; + } + stream_.allocator = NULL; + lzma_ret ret = lzma_stream_decoder(&stream_, UINT64_MAX, LZMA_CONCATENATED); + switch (ret) { + case LZMA_OK: + break; + case LZMA_MEM_ERROR: + UTIL_THROW(ErrnoException, "xz open error"); + default: + UTIL_THROW(XZException, "xz error code " << ret); + } + } + + ~XZip() { + lzma_end(&stream_); + } + + std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) { + if (amount == 0) return 0; + stream_.next_out = static_cast(to); + stream_.avail_out = amount; + do { + if (!stream_.avail_in) ReadInput(thunk); + lzma_ret status = lzma_code(&stream_, action_); + switch (status) { + case LZMA_OK: + break; + case LZMA_STREAM_END: + UTIL_THROW_IF(action_ != LZMA_FINISH, XZException, "Input not finished yet."); + { + std::size_t ret = static_cast(stream_.next_out) - static_cast(to); + ReplaceThis(new Complete(), thunk); + return ret; + } + case LZMA_MEM_ERROR: + throw std::bad_alloc(); + case LZMA_FORMAT_ERROR: + UTIL_THROW(XZException, "xzlib says file format not recognized"); + case LZMA_OPTIONS_ERROR: + UTIL_THROW(XZException, "xzlib says unsupported compression options"); + case LZMA_DATA_ERROR: + UTIL_THROW(XZException, "xzlib says this file is corrupt"); + case LZMA_BUF_ERROR: + UTIL_THROW(XZException, "xzlib says unexpected end of input"); + default: + UTIL_THROW(XZException, "unrecognized xzlib error " << status); + } + } while (stream_.next_out == to); + return static_cast(stream_.next_out) - static_cast(to); + } + + private: + void ReadInput(ReadCompressed &thunk) { + assert(!stream_.avail_in); + stream_.next_in = static_cast(in_buffer_.get()); + stream_.avail_in = ReadOrEOF(file_.get(), in_buffer_.get(), kInputBuffer); + if (!stream_.avail_in) action_ = LZMA_FINISH; + ReadCount(thunk) += stream_.avail_in; + } + + scoped_fd file_; + scoped_malloc in_buffer_; + lzma_stream stream_; + + lzma_action action_; +}; +#endif // HAVE_XZLIB + +enum MagicResult { + UNKNOWN, GZIP, BZIP, XZIP +}; + +MagicResult DetectMagic(const void *from_void) { + const uint8_t *header = static_cast(from_void); + if (header[0] == 0x1f && header[1] == 0x8b) { + return GZIP; + } + if (header[0] == 'B' && header[1] == 'Z') { + return BZIP; + } + const uint8_t xzmagic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 }; + if (!memcmp(header, xzmagic, 6)) { + return XZIP; + } + return UNKNOWN; +} + +ReadBase *ReadFactory(int fd, uint64_t &raw_amount) { + scoped_fd hold(fd); + unsigned char header[ReadCompressed::kMagicSize]; + raw_amount = ReadOrEOF(fd, header, ReadCompressed::kMagicSize); + if (!raw_amount) + return new Uncompressed(hold.release()); + if (raw_amount != ReadCompressed::kMagicSize) + return new UncompressedWithHeader(hold.release(), header, raw_amount); + switch (DetectMagic(header)) { + case GZIP: +#ifdef HAVE_ZLIB + return new GZip(hold.release(), header, ReadCompressed::kMagicSize); +#else + UTIL_THROW(CompressedException, "This looks like a gzip file but gzip support was not compiled in."); +#endif + case BZIP: +#ifdef HAVE_BZLIB + return new BZip(hold.release(), header, ReadCompressed::kMagicSize); +#else + UTIL_THROW(CompressedException, "This looks like a bzip file (it begins with BZ), but bzip support was not compiled in."); +#endif + case XZIP: +#ifdef HAVE_XZLIB + return new XZip(hold.release(), header, ReadCompressed::kMagicSize); +#else + UTIL_THROW(CompressedException, "This looks like an xz file, but xz support was not compiled in."); +#endif + case UNKNOWN: + break; + } + try { + AdvanceOrThrow(fd, -ReadCompressed::kMagicSize); + } catch (const util::ErrnoException &e) { + return new UncompressedWithHeader(hold.release(), header, ReadCompressed::kMagicSize); + } + return new Uncompressed(hold.release()); +} + +} // namespace + +bool ReadCompressed::DetectCompressedMagic(const void *from_void) { + return DetectMagic(from_void) != UNKNOWN; +} + +ReadCompressed::ReadCompressed(int fd) { + Reset(fd); +} + +ReadCompressed::ReadCompressed() {} + +ReadCompressed::~ReadCompressed() {} + +void ReadCompressed::Reset(int fd) { + internal_.reset(); + internal_.reset(ReadFactory(fd, raw_amount_)); +} + +std::size_t ReadCompressed::Read(void *to, std::size_t amount) { + return internal_->Read(to, amount, *this); +} + +} // namespace util diff --git a/klm/util/read_compressed.hh b/klm/util/read_compressed.hh new file mode 100644 index 00000000..83ca9fb2 --- /dev/null +++ b/klm/util/read_compressed.hh @@ -0,0 +1,74 @@ +#ifndef UTIL_READ_COMPRESSED__ +#define UTIL_READ_COMPRESSED__ + +#include "util/exception.hh" +#include "util/scoped.hh" + +#include + +#include + +namespace util { + +class CompressedException : public Exception { + public: + CompressedException() throw(); + virtual ~CompressedException() throw(); +}; + +class GZException : public CompressedException { + public: + GZException() throw(); + ~GZException() throw(); +}; + +class BZException : public CompressedException { + public: + BZException() throw(); + ~BZException() throw(); +}; + +class XZException : public CompressedException { + public: + XZException() throw(); + ~XZException() throw(); +}; + +class ReadBase; + +class ReadCompressed { + public: + static const std::size_t kMagicSize = 6; + // Must have at least kMagicSize bytes. + static bool DetectCompressedMagic(const void *from); + + // Takes ownership of fd. + explicit ReadCompressed(int fd); + + // Must call Reset later. + ReadCompressed(); + + ~ReadCompressed(); + + // Takes ownership of fd. + void Reset(int fd); + + std::size_t Read(void *to, std::size_t amount); + + uint64_t RawAmount() const { return raw_amount_; } + + private: + friend class ReadBase; + + scoped_ptr internal_; + + uint64_t raw_amount_; + + // No copying. + ReadCompressed(const ReadCompressed &); + void operator=(const ReadCompressed &); +}; + +} // namespace util + +#endif // UTIL_READ_COMPRESSED__ diff --git a/klm/util/read_compressed_test.cc b/klm/util/read_compressed_test.cc new file mode 100644 index 00000000..6fd97e5e --- /dev/null +++ b/klm/util/read_compressed_test.cc @@ -0,0 +1,94 @@ +#include "util/read_compressed.hh" + +#include "util/file.hh" +#include "util/have.hh" + +#define BOOST_TEST_MODULE ReadCompressedTest +#include +#include + +#include +#include + +#include + +namespace util { +namespace { + +void ReadLoop(ReadCompressed &reader, void *to_void, std::size_t amount) { + uint8_t *to = static_cast(to_void); + while (amount) { + std::size_t ret = reader.Read(to, amount); + BOOST_REQUIRE(ret); + to += ret; + amount -= ret; + } +} + +void TestRandom(const char *compressor) { + const uint32_t kSize4 = 100000 / 4; + char name[] = "tempXXXXXX"; + + // Write test file. + { + scoped_fd original(mkstemp(name)); + BOOST_REQUIRE(original.get() > 0); + for (uint32_t i = 0; i < kSize4; ++i) { + WriteOrThrow(original.get(), &i, sizeof(uint32_t)); + } + } + + char gzname[] = "tempXXXXXX"; + scoped_fd gzipped(mkstemp(gzname)); + + std::string command(compressor); +#ifdef __CYGWIN__ + command += ".exe"; +#endif + command += " <\""; + command += name; + command += "\" >\""; + command += gzname; + command += "\""; + BOOST_REQUIRE_EQUAL(0, system(command.c_str())); + + BOOST_CHECK_EQUAL(0, unlink(name)); + BOOST_CHECK_EQUAL(0, unlink(gzname)); + + ReadCompressed reader(gzipped.release()); + for (uint32_t i = 0; i < kSize4; ++i) { + uint32_t got; + ReadLoop(reader, &got, sizeof(uint32_t)); + BOOST_CHECK_EQUAL(i, got); + } + + char ignored; + BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1)); + // Test double EOF call. + BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1)); +} + +BOOST_AUTO_TEST_CASE(Uncompressed) { + TestRandom("cat"); +} + +#ifdef HAVE_ZLIB +BOOST_AUTO_TEST_CASE(ReadGZ) { + TestRandom("gzip"); +} +#endif // HAVE_ZLIB + +#ifdef HAVE_BZLIB +BOOST_AUTO_TEST_CASE(ReadBZ) { + TestRandom("bzip2"); +} +#endif // HAVE_BZLIB + +#ifdef HAVE_XZLIB +BOOST_AUTO_TEST_CASE(ReadXZ) { + TestRandom("xz"); +} +#endif + +} // namespace +} // namespace util diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh index 93e2e817..d62c6df1 100644 --- a/klm/util/scoped.hh +++ b/klm/util/scoped.hh @@ -1,40 +1,13 @@ #ifndef UTIL_SCOPED__ #define UTIL_SCOPED__ +/* Other scoped objects in the style of scoped_ptr. */ #include "util/exception.hh" - -/* Other scoped objects in the style of scoped_ptr. */ #include #include namespace util { -template class scoped_thing { - public: - explicit scoped_thing(T *c = static_cast(0)) : c_(c) {} - - ~scoped_thing() { if (c_) Free(c_); } - - void reset(T *c) { - if (c_) Free(c_); - c_ = c; - } - - T &operator*() { return *c_; } - const T&operator*() const { return *c_; } - T &operator->() { return *c_; } - const T&operator->() const { return *c_; } - - T *get() { return c_; } - const T *get() const { return c_; } - - private: - T *c_; - - scoped_thing(const scoped_thing &); - scoped_thing &operator=(const scoped_thing &); -}; - class scoped_malloc { public: scoped_malloc() : p_(NULL) {} @@ -77,9 +50,6 @@ template class scoped_array { T &operator*() { return *c_; } const T&operator*() const { return *c_; } - T &operator->() { return *c_; } - const T&operator->() const { return *c_; } - T &operator[](std::size_t idx) { return c_[idx]; } const T &operator[](std::size_t idx) const { return c_[idx]; } @@ -90,6 +60,39 @@ template class scoped_array { private: T *c_; + + scoped_array(const scoped_array &); + void operator=(const scoped_array &); +}; + +template class scoped_ptr { + public: + explicit scoped_ptr(T *content = NULL) : c_(content) {} + + ~scoped_ptr() { delete c_; } + + T *get() { return c_; } + const T* get() const { return c_; } + + T &operator*() { return *c_; } + const T&operator*() const { return *c_; } + + T *operator->() { return c_; } + const T*operator->() const { return c_; } + + T &operator[](std::size_t idx) { return c_[idx]; } + const T &operator[](std::size_t idx) const { return c_[idx]; } + + void reset(T *to = NULL) { + scoped_ptr other(c_); + c_ = to; + } + + private: + T *c_; + + scoped_ptr(const scoped_ptr &); + void operator=(const scoped_ptr &); }; } // namespace util diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index be6a643d..51481646 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -1,6 +1,6 @@ /* If you use ICU in your program, then compile with -DHAVE_ICU -licui18n. If * you don't use ICU, then this will use the Google implementation from Chrome. - * This has been modified from the original version to let you choose. + * This has been modified from the original version to let you choose. */ // Copyright 2008, Google Inc. @@ -62,9 +62,9 @@ #include #include -// Old versions of ICU don't define operator== and operator!=. +// Old versions of ICU don't define operator== and operator!=. #if (U_ICU_VERSION_MAJOR_NUM < 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM < 4)) -#warning You are using an old version of ICU. Consider upgrading to ICU >= 4.6. +#warning You are using an old version of ICU. Consider upgrading to ICU >= 4.6. inline bool operator==(const StringPiece& x, const StringPiece& y) { if (x.size() != y.size()) return false; @@ -274,15 +274,28 @@ struct StringPieceCompatibleEquals : public std::binary_function typename T::const_iterator FindStringPiece(const T &t, const StringPiece &key) { +#if BOOST_VERSION < 104200 + std::string temp(key.data(), key.size()); + return t.find(temp); +#else return t.find(key, StringPieceCompatibleHash(), StringPieceCompatibleEquals()); +#endif } + template typename T::iterator FindStringPiece(T &t, const StringPiece &key) { +#if BOOST_VERSION < 104200 + std::string temp(key.data(), key.size()); + return t.find(temp); +#else return t.find(key, StringPieceCompatibleHash(), StringPieceCompatibleEquals()); +#endif } #endif #ifdef HAVE_ICU U_NAMESPACE_END +using U_NAMESPACE_QUALIFIER StringPiece; #endif + #endif // BASE_STRING_PIECE_H__ diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh index 4a7f5460..a588c3fc 100644 --- a/klm/util/tokenize_piece.hh +++ b/klm/util/tokenize_piece.hh @@ -20,6 +20,7 @@ class OutOfTokens : public Exception { class SingleCharacter { public: + SingleCharacter() {} explicit SingleCharacter(char delim) : delim_(delim) {} StringPiece Find(const StringPiece &in) const { @@ -32,6 +33,8 @@ class SingleCharacter { class MultiCharacter { public: + MultiCharacter() {} + explicit MultiCharacter(const StringPiece &delimiter) : delimiter_(delimiter) {} StringPiece Find(const StringPiece &in) const { @@ -44,6 +47,7 @@ class MultiCharacter { class AnyCharacter { public: + AnyCharacter() {} explicit AnyCharacter(const StringPiece &chars) : chars_(chars) {} StringPiece Find(const StringPiece &in) const { @@ -56,6 +60,8 @@ class AnyCharacter { class AnyCharacterLast { public: + AnyCharacterLast() {} + explicit AnyCharacterLast(const StringPiece &chars) : chars_(chars) {} StringPiece Find(const StringPiece &in) const { @@ -81,8 +87,8 @@ template class TokenIter : public boost::it return current_.data() != 0; } - static TokenIter end() { - return TokenIter(); + static TokenIter end() { + return TokenIter(); } private: @@ -100,8 +106,8 @@ template class TokenIter : public boost::it } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false. } - bool equal(const TokenIter &other) const { - return after_.data() == other.after_.data(); + bool equal(const TokenIter &other) const { + return current_.data() == other.current_.data(); } const StringPiece &dereference() const { -- cgit v1.2.3 From e189e981a02ef03ccb2dba733cf1363dccbb9778 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 14 Dec 2012 22:20:31 +0000 Subject: Get macros from config.h, break build for Chris to fix by linking libs --- klm/util/have.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/klm/util/have.hh b/klm/util/have.hh index 1523c0c5..85b838e4 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -10,4 +10,8 @@ //#define HAVE_BOOST #endif +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #endif // UTIL_HAVE__ -- cgit v1.2.3 From 4b9e9d87b0ff91a98bfffb11d95f6b30f8e4c1b3 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 14 Dec 2012 22:40:17 +0000 Subject: Patch up build for now, still no compressed support --- klm/util/have.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/klm/util/have.hh b/klm/util/have.hh index 85b838e4..b86ba11e 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -11,7 +11,8 @@ #endif #ifdef HAVE_CONFIG_H -#include "config.h" +// Chris; uncomment this line. +//#include "config.h" #endif #endif // UTIL_HAVE__ -- cgit v1.2.3 From 29a47a94bfc09450802484e5cd3f835d39c9f66c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 15 Dec 2012 02:53:56 -0500 Subject: enable kenlm compression --- configure.ac | 26 ++++++++++++++++++-------- decoder/Makefile.am | 11 +++++------ example_extff/Makefile.am | 2 +- klm/util/have.hh | 3 +-- mteval/Makefile.am | 6 +++--- python/setup.py.in | 2 +- training/dpmert/Makefile.am | 10 +++++----- training/dtrain/Makefile.am | 2 +- training/minrisk/Makefile.am | 2 +- training/mira/Makefile.am | 2 +- training/pro/Makefile.am | 4 ++-- training/rampion/Makefile.am | 2 +- training/utils/Makefile.am | 4 ++-- utils/Makefile.am | 18 +++++++++--------- word-aligner/Makefile.am | 2 +- 15 files changed, 52 insertions(+), 44 deletions(-) diff --git a/configure.ac b/configure.ac index f4650ca4..eabb8645 100644 --- a/configure.ac +++ b/configure.ac @@ -18,6 +18,23 @@ BOOST_TEST AM_PATH_PYTHON AC_CHECK_HEADER(dlfcn.h,AC_DEFINE(HAVE_DLFCN_H)) AC_CHECK_LIB(dl, dlopen) +AC_CHECK_HEADERS(zlib.h, + AC_CHECK_LIB(z, gzread,[ + AC_DEFINE(HAVE_ZLIB,[],[Do we have zlib]) + ZLIBS="$ZLIBS -lz" + ])) + +AC_CHECK_HEADERS(bzlib.h, + AC_CHECK_LIB(bz2, BZ2_bzReadOpen,[ + AC_DEFINE(HAVE_BZLIB,[],[Do we have bzlib]) + ZLIBS="$ZLIBS -lbz2" + ])) + +AC_CHECK_HEADERS(lzma.h, + AC_CHECK_LIB(lzma, lzma_code,[ + AC_DEFINE(HAVE_XZLIB,[],[Do we have lzma]) + ZLIBS="$ZLIBS -llzma" + ])) AC_ARG_ENABLE(mpi, [ --enable-mpi Build MPI binaries, assumes mpi.h is present ], @@ -72,19 +89,12 @@ fi CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS $BOOST_SERIALIZATION_LDFLAGS $BOOST_SYSTEM_LDFLAGS" # $BOOST_THREAD_LDFLAGS" -LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS" +LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_SERIALIZATION_LIBS $BOOST_SYSTEM_LIBS $ZLIBS" # $BOOST_THREAD_LIBS" AC_CHECK_HEADER(google/dense_hash_map, [AC_DEFINE([HAVE_SPARSEHASH], [1], [flag for google::dense_hash_map])]) -AC_CHECK_HEADER(zlib.h, - [AC_DEFINE([HAVE_ZLIB], [1], [zlib])]) -AC_CHECK_HEADER(bzlib.h, - [AC_DEFINE([HAVE_BZLIB], [1], [bzlib])]) -AC_CHECK_HEADER(lzma.h, - [AC_DEFINE([HAVE_XZLIB], [1], [xzlib])]) - AC_PROG_INSTALL CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H -DKENLM_MAX_ORDER=6" diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 6914fa0f..88a6116c 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -8,16 +8,16 @@ noinst_PROGRAMS = \ TESTS = trule_test parser_test grammar_test hg_test parser_test_SOURCES = parser_test.cc -parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a grammar_test_SOURCES = grammar_test.cc -grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a hg_test_SOURCES = hg_test.cc -hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a trule_test_SOURCES = trule_test.cc -trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a cdec_SOURCES = cdec.cc -cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm @@ -82,4 +82,3 @@ libcdec_a_SOURCES = \ JSON_parser.c \ json_parse.cc \ grammar.cc - diff --git a/example_extff/Makefile.am b/example_extff/Makefile.am index ac2694ca..7b7c34b5 100644 --- a/example_extff/Makefile.am +++ b/example_extff/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm -I../decoder +AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I.. -I../mteval -I../utils -I../klm -I../decoder lib_LTLIBRARIES = libff_example.la libff_example_la_SOURCES = ff_example.cc diff --git a/klm/util/have.hh b/klm/util/have.hh index b86ba11e..85b838e4 100644 --- a/klm/util/have.hh +++ b/klm/util/have.hh @@ -11,8 +11,7 @@ #endif #ifdef HAVE_CONFIG_H -// Chris; uncomment this line. -//#include "config.h" +#include "config.h" #endif #endif // UTIL_HAVE__ diff --git a/mteval/Makefile.am b/mteval/Makefile.am index 5e9bba91..4444285f 100644 --- a/mteval/Makefile.am +++ b/mteval/Makefile.am @@ -23,12 +23,12 @@ libmteval_a_SOURCES = \ ter.cc fast_score_SOURCES = fast_score.cc -fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz +fast_score_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a mbr_kbest_SOURCES = mbr_kbest.cc -mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a -lz +mbr_kbest_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a scorer_test_SOURCES = scorer_test.cc -scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +scorer_test_LDADD = libmteval.a $(top_srcdir)/utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils diff --git a/python/setup.py.in b/python/setup.py.in index dac72903..fa8a9f5e 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -17,7 +17,7 @@ ext_modules = [ sources=['src/_cdec.cpp'], include_dirs=INC, library_dirs=LIB, - libraries=LIBS + ['z', 'cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'], + libraries=['cdec', 'utils', 'mteval', 'training_utils', 'klm', 'klm_util', 'ksearch'] + LIBS, extra_compile_args=CPPFLAGS, extra_link_args=LDFLAGS), Extension(name='cdec.sa._sa', diff --git a/training/dpmert/Makefile.am b/training/dpmert/Makefile.am index ff318bef..3dbdfa69 100644 --- a/training/dpmert/Makefile.am +++ b/training/dpmert/Makefile.am @@ -8,18 +8,18 @@ noinst_PROGRAMS = \ TESTS = lo_test mr_dpmert_generate_mapper_input_SOURCES = mr_dpmert_generate_mapper_input.cc line_optimizer.cc -mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_dpmert_generate_mapper_input_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a # nbest2hg_SOURCES = nbest2hg.cc -# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst -lz +# nbest2hg_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lfst mr_dpmert_map_SOURCES = mert_geometry.cc ces.cc error_surface.cc mr_dpmert_map.cc line_optimizer.cc -mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_dpmert_map_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a mr_dpmert_reduce_SOURCES = error_surface.cc ces.cc mr_dpmert_reduce.cc line_optimizer.cc mert_geometry.cc -mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_dpmert_reduce_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a lo_test_SOURCES = lo_test.cc ces.cc mert_geometry.cc error_surface.cc line_optimizer.cc -lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +lo_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am index 5b48e756..4f51b0c8 100644 --- a/training/dtrain/Makefile.am +++ b/training/dtrain/Makefile.am @@ -1,7 +1,7 @@ bin_PROGRAMS = dtrain dtrain_SOURCES = dtrain.cc score.cc -dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +dtrain_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/minrisk/Makefile.am b/training/minrisk/Makefile.am index a15e821e..821730c2 100644 --- a/training/minrisk/Makefile.am +++ b/training/minrisk/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = minrisk_optimize minrisk_optimize_SOURCES = minrisk_optimize.cc -minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a -lz +minrisk_optimize_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/training/liblbfgs/liblbfgs.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training -I$(top_srcdir)/training/utils diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am index ae609ede..c8f404fb 100644 --- a/training/mira/Makefile.am +++ b/training/mira/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = kbest_mira kbest_mira_SOURCES = kbest_mira.cc -kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz +kbest_mira_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/klm/search/libksearch.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/training/pro/Makefile.am b/training/pro/Makefile.am index 1916b6b2..e0a45a33 100644 --- a/training/pro/Makefile.am +++ b/training/pro/Makefile.am @@ -3,9 +3,9 @@ bin_PROGRAMS = \ mr_pro_reduce mr_pro_map_SOURCES = mr_pro_map.cc -mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_map_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a mr_pro_reduce_SOURCES = mr_pro_reduce.cc -mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz +mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils -I$(top_srcdir)/training diff --git a/training/rampion/Makefile.am b/training/rampion/Makefile.am index 1633d0f7..ef0ca147 100644 --- a/training/rampion/Makefile.am +++ b/training/rampion/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = rampion_cccp rampion_cccp_SOURCES = rampion_cccp.cc -rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz +rampion_cccp_LDADD = $(top_srcdir)/training/utils/libtraining_utils.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training/utils diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am index 189d9a76..c9405d4e 100644 --- a/training/utils/Makefile.am +++ b/training/utils/Makefile.am @@ -24,10 +24,10 @@ libtraining_utils_a_SOURCES = \ risk.cc optimize_test_SOURCES = optimize_test.cc -optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a -lz +optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a lbfgs_test_SOURCES = lbfgs_test.cc -lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a -lz +lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/decoder -I$(top_srcdir)/utils -I$(top_srcdir)/mteval -I$(top_srcdir)/klm diff --git a/utils/Makefile.am b/utils/Makefile.am index 3ad9d69e..639c30b8 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -33,24 +33,24 @@ if HAVE_CMPH endif reconstruct_weights_SOURCES = reconstruct_weights.cc -reconstruct_weights_LDADD = libutils.a -lz +reconstruct_weights_LDADD = libutils.a atools_SOURCES = atools.cc -atools_LDADD = libutils.a -lz +atools_LDADD = libutils.a phmt_SOURCES = phmt.cc -phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +phmt_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ts_SOURCES = ts.cc -ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +ts_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) m_test_SOURCES = m_test.cc -m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +m_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) dict_test_SOURCES = dict_test.cc -dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +dict_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) weights_test_SOURCES = weights_test.cc -weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +weights_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) logval_test_SOURCES = logval_test.cc -logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +logval_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) small_vector_test_SOURCES = small_vector_test.cc -small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) -lz +small_vector_test_LDADD = libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) ################################################################ # do NOT NOT NOT add any other -I includes NO NO NO NO NO ###### diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am index 280d3ae7..2dcb688e 100644 --- a/word-aligner/Makefile.am +++ b/word-aligner/Makefile.am @@ -1,6 +1,6 @@ bin_PROGRAMS = fast_align fast_align_SOURCES = fast_align.cc ttables.cc -fast_align_LDADD = $(top_srcdir)/utils/libutils.a -lz +fast_align_LDADD = $(top_srcdir)/utils/libutils.a AM_CPPFLAGS = -W -Wall $(GTEST_CPPFLAGS) -I$(top_srcdir)/utils -I$(top_srcdir)/training -- cgit v1.2.3 From ce6937f136a38af93d9a5cd9628acc712da95543 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 15 Dec 2012 02:58:54 -0500 Subject: fix recursion limit bug --- python/pkg/cdec/sa/compile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py index c0402761..ce249c0f 100644 --- a/python/pkg/cdec/sa/compile.py +++ b/python/pkg/cdec/sa/compile.py @@ -4,9 +4,10 @@ import os import logging import cdec.configobj import cdec.sa +import sys MAX_PHRASE_LENGTH = 4 -def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): +def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2, tight_phrases): lcp = cdec.sa.LCP(f_sa) stats = sorted(lcp.compute_stats(MAX_PHRASE_LENGTH), reverse=True) precomp = cdec.sa.Precomputation(from_stats=stats, @@ -20,6 +21,8 @@ def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2): return precomp def main(): + sys.setrecursionlimit(sys.getrecursionlimit() * 100) + logging.basicConfig(level=logging.INFO) logger = logging.getLogger('cdec.sa.compile') parser = argparse.ArgumentParser(description='Compile a corpus into a suffix array.') -- cgit v1.2.3 From 201af2acd394415a05072fbd53d42584875aa4b4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 16 Dec 2012 21:19:36 -0500 Subject: add grammar convert back in --- training/utils/Makefile.am | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/training/utils/Makefile.am b/training/utils/Makefile.am index c9405d4e..d708a9f5 100644 --- a/training/utils/Makefile.am +++ b/training/utils/Makefile.am @@ -2,7 +2,8 @@ noinst_LIBRARIES = libtraining_utils.a bin_PROGRAMS = \ sentserver \ - sentclient + sentclient \ + grammar_convert noinst_PROGRAMS = \ lbfgs_test \ @@ -26,6 +27,9 @@ libtraining_utils_a_SOURCES = \ optimize_test_SOURCES = optimize_test.cc optimize_test_LDADD = libtraining_utils.a $(top_srcdir)/utils/libutils.a +grammar_convert_SOURCES = grammar_convert.cc +grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a + lbfgs_test_SOURCES = lbfgs_test.cc lbfgs_test_LDADD = $(top_srcdir)/utils/libutils.a -- cgit v1.2.3