summaryrefslogtreecommitdiff
path: root/phrasinator/train-phrasinator.pl
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commit5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /phrasinator/train-phrasinator.pl
parentcf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'phrasinator/train-phrasinator.pl')
-rwxr-xr-xphrasinator/train-phrasinator.pl89
1 files changed, 0 insertions, 89 deletions
diff --git a/phrasinator/train-phrasinator.pl b/phrasinator/train-phrasinator.pl
deleted file mode 100755
index c50b8e68..00000000
--- a/phrasinator/train-phrasinator.pl
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-my $script_dir; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-use Getopt::Long;
-use File::Spec qw (rel2abs);
-
-my $DECODER = "$script_dir/../decoder/cdec";
-my $TRAINER = "$script_dir/gibbs_train_plm_notables";
-
-die "Can't find $TRAINER" unless -f $TRAINER;
-die "Can't execute $TRAINER" unless -x $TRAINER;
-
-if (!GetOptions(
- "decoder=s" => \$DECODER,
-)) { usage(); }
-
-die "Can't find $DECODER" unless -f $DECODER;
-die "Can't execute $DECODER" unless -x $DECODER;
-if (scalar @ARGV != 2) { usage(); }
-my $INFILE = shift @ARGV;
-my $OUTDIR = shift @ARGV;
-$OUTDIR = File::Spec->rel2abs($OUTDIR);
-print STDERR " Input file: $INFILE\n";
-print STDERR "Output directory: $OUTDIR\n";
-open F, "<$INFILE" or die "Failed to open $INFILE for reading: $!";
-close F;
-die "Please remove existing directory $OUTDIR\n" if (-f $OUTDIR || -d $OUTDIR);
-
-my $CMD = "mkdir $OUTDIR";
-safesystem($CMD) or die "Failed to create directory $OUTDIR\n$!";
-
-my $grammar="$OUTDIR/grammar.gz";
-my $weights="$OUTDIR/weights";
-$CMD = "$TRAINER -w $weights -g $grammar -i $INFILE";
-safesystem($CMD) or die "Failed to train model!\n";
-my $cdecini = "$OUTDIR/cdec.ini";
-open C, ">$cdecini" or die "Failed to open $cdecini for writing: $!";
-
-print C <<EOINI;
-quiet=true
-formalism=scfg
-grammar=$grammar
-add_pass_through_rules=true
-weights=$OUTDIR/weights
-EOINI
-
-close C;
-
-print <<EOT;
-
-Model trained successfully. Text can be decoded into phrasal units with
-the following command:
-
- $DECODER -c $OUTDIR/cdec.ini < FILE.TXT
-
-EOT
-exit(0);
-
-sub usage {
- print <<EOT;
-Usage: $0 [options] INPUT.TXT OUTPUT-DIRECTORY
-
- Infers a phrasal segmentation model from the tokenized text in INPUT.TXT
- and writes it to OUTPUT-DIRECTORY/ so that it can be applied to other
- text or have its granularity altered.
-
-EOT
- exit(1);
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "ERROR: Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-