From 93f3b9ddb40bbe173d6297fa33bddf80ee974fce Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Tue, 20 Jul 2010 15:17:00 +0000 Subject: modified trainer for agreement of languages git-svn-id: https://ws10smt.googlecode.com/svn/trunk@333 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/phrase/Trainer.java | 21 +++++++++++++++++---- .../prjava/train-PR-cluster.sh | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'gi/posterior-regularisation/prjava') diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java index afc80724..f9fd8485 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java +++ b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java @@ -18,9 +18,11 @@ public class Trainer { public static void main(String[] args) { + OptionParser parser = new OptionParser(); parser.accepts("help"); parser.accepts("in").withRequiredArg().ofType(File.class); + parser.accepts("in1").withRequiredArg().ofType(File.class); parser.accepts("test").withRequiredArg().ofType(File.class); parser.accepts("out").withRequiredArg().ofType(File.class); parser.accepts("start").withRequiredArg().ofType(File.class); @@ -35,7 +37,8 @@ public class Trainer parser.accepts("variational-bayes"); parser.accepts("alpha-emit").withRequiredArg().ofType(Double.class).defaultsTo(0.1); parser.accepts("alpha-pi").withRequiredArg().ofType(Double.class).defaultsTo(0.01); - parser.accepts("agree"); + parser.accepts("agree-direction"); + parser.accepts("agree-language"); parser.accepts("no-parameter-cache"); parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5); OptionSet options = parser.parse(args); @@ -73,17 +76,21 @@ public class Trainer Corpus corpus = null; File infile = (File) options.valueOf("in"); + Corpus corpus1 = null; + File infile1 = (File) options.valueOf("in1"); try { System.out.println("Reading concordance from " + infile); corpus = Corpus.readFromFile(FileUtil.reader(infile)); corpus.printStats(System.out); + corpus1 = Corpus.readFromFile(FileUtil.reader(infile1)); + corpus1.printStats(System.out); } catch (IOException e) { System.err.println("Failed to open input file: " + infile); e.printStackTrace(); System.exit(1); } - if (!options.has("agree")) + if (!(options.has("agree-direction")||options.has("agree-language"))) System.out.println("Running with " + tags + " tags " + "for " + iterations + " iterations " + ((skip > 0) ? "skipping large phrases for first " + skip + " iterations " : "") + @@ -96,8 +103,11 @@ public class Trainer System.out.println(); PhraseCluster cluster = null; - Agree agree = null; - if (options.has("agree")) + Agree2Sides agree2sides = null; + Agree agree= null; + if (options.has("agree-language")) + agree2sides = new Agree2Sides(tags, corpus,corpus1); + else if (options.has("agree-direction")) agree = new Agree(tags, corpus); else { @@ -124,6 +134,9 @@ public class Trainer double o; if (agree != null) o = agree.EM(); + else if(agree2sides!=null){ + o = agree2sides.EM(); + } else { if (i < skip) diff --git a/gi/posterior-regularisation/prjava/train-PR-cluster.sh b/gi/posterior-regularisation/prjava/train-PR-cluster.sh index 8298aa14..6c2f62cd 100755 --- a/gi/posterior-regularisation/prjava/train-PR-cluster.sh +++ b/gi/posterior-regularisation/prjava/train-PR-cluster.sh @@ -1,4 +1,4 @@ #!/bin/sh d=`dirname $0` -java -ea -Xmx60g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/lib/commons-math-2.1.jar phrase.Trainer $* +java -ea -Xmx3g -cp $d/prjava.jar:$d/lib/trove-2.0.2.jar:$d/lib/optimization.jar:$d/lib/jopt-simple-3.2.jar:$d/lib/lib/commons-math-2.1.jar phrase.Trainer $* -- cgit v1.2.3