diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
commit | 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch) | |
tree | 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /phrasinator/train-phrasinator.pl | |
parent | cf9994131993b40be62e90e213b1e11e6b550143 (diff) | |
parent | 21825a09d97c2e0afd20512f306fb25fed55e529 (diff) |
Merge remote branch 'upstream/master'
Conflicts:
Jamroot
bjam
decoder/Jamfile
decoder/cdec.cc
dpmert/Jamfile
jam-files/sanity.jam
klm/lm/Jamfile
klm/util/Jamfile
mira/Jamfile
Diffstat (limited to 'phrasinator/train-phrasinator.pl')
-rwxr-xr-x | phrasinator/train-phrasinator.pl | 89 |
1 files changed, 0 insertions, 89 deletions
diff --git a/phrasinator/train-phrasinator.pl b/phrasinator/train-phrasinator.pl deleted file mode 100755 index c50b8e68..00000000 --- a/phrasinator/train-phrasinator.pl +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/perl -w -use strict; -my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; } -use Getopt::Long; -use File::Spec qw (rel2abs); - -my $DECODER = "$script_dir/../decoder/cdec"; -my $TRAINER = "$script_dir/gibbs_train_plm_notables"; - -die "Can't find $TRAINER" unless -f $TRAINER; -die "Can't execute $TRAINER" unless -x $TRAINER; - -if (!GetOptions( - "decoder=s" => \$DECODER, -)) { usage(); } - -die "Can't find $DECODER" unless -f $DECODER; -die "Can't execute $DECODER" unless -x $DECODER; -if (scalar @ARGV != 2) { usage(); } -my $INFILE = shift @ARGV; -my $OUTDIR = shift @ARGV; -$OUTDIR = File::Spec->rel2abs($OUTDIR); -print STDERR " Input file: $INFILE\n"; -print STDERR "Output directory: $OUTDIR\n"; -open F, "<$INFILE" or die "Failed to open $INFILE for reading: $!"; -close F; -die "Please remove existing directory $OUTDIR\n" if (-f $OUTDIR || -d $OUTDIR); - -my $CMD = "mkdir $OUTDIR"; -safesystem($CMD) or die "Failed to create directory $OUTDIR\n$!"; - -my $grammar="$OUTDIR/grammar.gz"; -my $weights="$OUTDIR/weights"; -$CMD = "$TRAINER -w $weights -g $grammar -i $INFILE"; -safesystem($CMD) or die "Failed to train model!\n"; -my $cdecini = "$OUTDIR/cdec.ini"; -open C, ">$cdecini" or die "Failed to open $cdecini for writing: $!"; - -print C <<EOINI; -quiet=true -formalism=scfg -grammar=$grammar -add_pass_through_rules=true -weights=$OUTDIR/weights -EOINI - -close C; - -print <<EOT; - -Model trained successfully. Text can be decoded into phrasal units with -the following command: - - $DECODER -c $OUTDIR/cdec.ini < FILE.TXT - -EOT -exit(0); - -sub usage { - print <<EOT; -Usage: $0 [options] INPUT.TXT OUTPUT-DIRECTORY - - Infers a phrasal segmentation model from the tokenized text in INPUT.TXT - and writes it to OUTPUT-DIRECTORY/ so that it can be applied to other - text or have its granularity altered. - -EOT - exit(1); -} - -sub safesystem { - print STDERR "Executing: @_\n"; - system(@_); - if ($? == -1) { - print STDERR "ERROR: Failed to execute: @_\n $!\n"; - exit(1); - } - elsif ($? & 127) { - printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - exit(1); - } - else { - my $exitcode = $? >> 8; - print STDERR "Exit code: $exitcode\n" if $exitcode; - return ! $exitcode; - } -} - |