diff options
author | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-19 22:28:10 +0000 |
---|---|---|
committer | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-19 22:28:10 +0000 |
commit | a14894ec4dcf70a841130a93543839f489c4ce0e (patch) | |
tree | 26b7a44d803b512b22383817d1c6dd7d97708994 /gi/posterior-regularisation/prjava/src/phrase/Trainer.java | |
parent | a2c1681af636fb3fb0c4702627b9629c05af2051 (diff) |
Reversed out broken thresholding
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@324 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/posterior-regularisation/prjava/src/phrase/Trainer.java')
-rw-r--r-- | gi/posterior-regularisation/prjava/src/phrase/Trainer.java | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java index 7f0b1970..ec1a5804 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/Trainer.java +++ b/gi/posterior-regularisation/prjava/src/phrase/Trainer.java @@ -7,8 +7,11 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; +import java.util.List; import java.util.Random; +import phrase.Corpus.Edge; + import arr.F; public class Trainer @@ -34,10 +37,6 @@ public class Trainer parser.accepts("agree"); parser.accepts("no-parameter-cache"); parser.accepts("skip-large-phrases").withRequiredArg().ofType(Integer.class).defaultsTo(5); - parser.accepts("rare-word").withRequiredArg().ofType(Integer.class).defaultsTo(10); - parser.accepts("rare-edge").withRequiredArg().ofType(Integer.class).defaultsTo(1); - parser.accepts("rare-phrase").withRequiredArg().ofType(Integer.class).defaultsTo(2); - parser.accepts("rare-context").withRequiredArg().ofType(Integer.class).defaultsTo(2); OptionSet options = parser.parse(args); if (options.has("help") || !options.has("in")) @@ -61,10 +60,6 @@ public class Trainer double alphaEmit = (vb) ? (Double) options.valueOf("alpha-emit") : 0; double alphaPi = (vb) ? (Double) options.valueOf("alpha-pi") : 0; int skip = (Integer) options.valueOf("skip-large-phrases"); - int wordThreshold = (Integer) options.valueOf("rare-word"); - int edgeThreshold = (Integer) options.valueOf("rare-edge"); - int phraseThreshold = (Integer) options.valueOf("rare-phrase"); - int contextThreshold = (Integer) options.valueOf("rare-context"); if (options.has("seed")) F.rng = new Random((Long) options.valueOf("seed")); @@ -86,14 +81,7 @@ public class Trainer e.printStackTrace(); System.exit(1); } - - if (wordThreshold > 1) - corpus.applyWordThreshold(wordThreshold); - if (phraseThreshold > 1) - corpus.applyPhraseThreshold(phraseThreshold); - if (contextThreshold > 1) - corpus.applyContextThreshold(contextThreshold); - + if (!options.has("agree")) System.out.println("Running with " + tags + " tags " + "for " + iterations + " iterations " + @@ -127,7 +115,6 @@ public class Trainer e.printStackTrace(); } } - cluster.setEdgeThreshold(edgeThreshold); } double last = 0; @@ -138,20 +125,24 @@ public class Trainer o = agree.EM(); else { + if (i < skip) + System.out.println("Skipping phrases of length > " + (i+1)); + if (scale_phrase <= 0 && scale_context <= 0) { if (!vb) - o = cluster.EM(i < skip); + o = cluster.EM((i < skip) ? i+1 : 0); else - o = cluster.VBEM(alphaEmit, alphaPi, i < skip); + o = cluster.VBEM(alphaEmit, alphaPi); } else - o = cluster.PREM(scale_phrase, scale_context, i < skip); + o = cluster.PREM(scale_phrase, scale_context, (i < skip) ? i+1 : 0); } System.out.println("ITER: "+i+" objective: " + o); - if (i != 0 && Math.abs((o - last) / o) < threshold) + // sometimes takes a few iterations to break the ties + if (i > 5 && Math.abs((o - last) / o) < threshold) { last = o; break; @@ -171,10 +162,19 @@ public class Trainer File outfile = (File) options.valueOf("out"); try { PrintStream ps = FileUtil.printstream(outfile); - cluster.displayPosterior(ps); + List<Edge> test; + if (!options.has("test")) + test = corpus.getEdges(); + else + { + infile = (File) options.valueOf("test"); + System.out.println("Reading testing concordance from " + infile); + test = corpus.readEdges(FileUtil.reader(infile)); + } + cluster.displayPosterior(ps, test); ps.close(); } catch (IOException e) { - System.err.println("Failed to open output file: " + outfile); + System.err.println("Failed to open either testing file or output file"); e.printStackTrace(); System.exit(1); } |