summaryrefslogtreecommitdiff
path: root/training/cluster-ptrain.pl
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-19 14:32:28 -0500
committerChris Dyer <redpony@gmail.com>2009-12-19 14:32:28 -0500
commit27db9d8c05188f64c17d61c394d3dafe8b8e93d8 (patch)
tree688930b6e95b6801ffe7d722f33a4f56712ecd21 /training/cluster-ptrain.pl
parent39b9c1e0aaec81492d81e541daf7703ba8c517ff (diff)
cool new alignment feature
Diffstat (limited to 'training/cluster-ptrain.pl')
-rwxr-xr-xtraining/cluster-ptrain.pl14
1 files changed, 14 insertions, 0 deletions
diff --git a/training/cluster-ptrain.pl b/training/cluster-ptrain.pl
index 33aab25d..8944ae34 100755
--- a/training/cluster-ptrain.pl
+++ b/training/cluster-ptrain.pl
@@ -104,7 +104,21 @@ if ($restart) {
} else {
`cp $initial_weights $dir/weights.1.gz`;
}
+ open T, "<$training_corpus" or die "Can't read $training_corpus: $!";
+ open TO, ">$dir/training.in";
+ my $lc = 0;
+ while(<T>) {
+ chomp;
+ s/^\s+//;
+ s/\s+$//;
+ die "Expected A ||| B in input file" unless / \|\|\| /;
+ print TO "<seg id=\"$lc\">$_</seg>\n";
+ $lc++;
+ }
+ close T;
+ close TO;
}
+$training_corpus = "$dir/training.in";
my $iter_attempts = 1;
while ($iter < $max_iteration) {