diff options
| author | Chris Dyer <redpony@gmail.com> | 2009-12-19 14:32:28 -0500 | 
|---|---|---|
| committer | Chris Dyer <redpony@gmail.com> | 2009-12-19 14:32:28 -0500 | 
| commit | 27db9d8c05188f64c17d61c394d3dafe8b8e93d8 (patch) | |
| tree | 688930b6e95b6801ffe7d722f33a4f56712ecd21 /training | |
| parent | 39b9c1e0aaec81492d81e541daf7703ba8c517ff (diff) | |
cool new alignment feature
Diffstat (limited to 'training')
| -rwxr-xr-x | training/cluster-ptrain.pl | 14 | 
1 files changed, 14 insertions, 0 deletions
| diff --git a/training/cluster-ptrain.pl b/training/cluster-ptrain.pl index 33aab25d..8944ae34 100755 --- a/training/cluster-ptrain.pl +++ b/training/cluster-ptrain.pl @@ -104,7 +104,21 @@ if ($restart) {    } else {      `cp $initial_weights $dir/weights.1.gz`;    } +  open T, "<$training_corpus" or die "Can't read $training_corpus: $!"; +  open TO, ">$dir/training.in"; +  my $lc = 0; +  while(<T>) { +    chomp; +    s/^\s+//; +    s/\s+$//; +    die "Expected A ||| B in input file" unless / \|\|\| /; +    print TO "<seg id=\"$lc\">$_</seg>\n"; +    $lc++; +  } +  close T; +  close TO;  } +$training_corpus = "$dir/training.in";  my $iter_attempts = 1;  while ($iter < $max_iteration) { | 
