#!/usr/bin/perl -w use strict; use Getopt::Long; my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; } my %init_weights = qw( EGivenF -0.3 FGivenE -0.3 LexE2F -0.3 LexF2E -0.3 WordPenalty -1.5 LanguageModel 1.2 Glue -1.0 GlueTop 0.00001 PassThrough -10.0 X_EGivenF -0.3 X_FGivenE -0.3 ); my $config = "$SCRIPT_DIR/config.eval"; open CONF, "<$config" or die "Can't read $config: $!"; my %paths; my %lms; my %devs; my %devrefs; my %tests; my %testrefs; print STDERR "LANGUAGE PAIRS:"; while() { chomp; next if /^#/; next if /^\s*$/; s/^\s+//; s/\s+$//; my ($name, $path, $lm, $dev, $devref, @xtests) = split /\s+/; $paths{$name} = $path; $lms{$name} = $lm; $devs{$name} = $dev; $devrefs{$name} = $devref; $tests{$name} = $xtests[0]; $testrefs{$name} = $xtests[1]; print STDERR " $name"; } print STDERR "\n"; my %langpairs = map { $_ => 1 } qw( btec zhen fbis aren uren nlfr ); my $help; my $dataDir = '/export/ws10smt/data'; if (GetOptions( "data=s" => \$dataDir, ) == 0 || @ARGV!=2 || $help) { print_help(); exit; } my $lp = $ARGV[0]; my $grammar = $ARGV[1]; print STDERR " CORPUS REPO: $dataDir\n"; print STDERR " LANGUAGE PAIR: $lp\n"; die "I don't know about that language pair\n" unless $paths{$lp}; my $corpdir = "$dataDir"; if ($paths{$lp} =~ /^\//) { $corpdir = $paths{$lp}; } else { $corpdir .= '/' . $paths{$lp}; } die "I can't find the corpora directory: $corpdir" unless -d $corpdir; print STDERR " GRAMMAR: $grammar\n"; my $LANG_MODEL = $corpdir . '/' . $lms{$lp}; print STDERR " LM: $LANG_MODEL\n"; sub write_cdec_ini { my ($filename, $grammar_path) = (@_); open CDECINI, ">$filename" or die "Can't write $filename: $!"; print CDECINI <