diff options
author | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 22:48:44 +0000 |
---|---|---|
committer | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 22:48:44 +0000 |
commit | 4037e35c511aec96f780276aa4e3c1493e19eba1 (patch) | |
tree | 0e43592cc58682fe44d7d11abc6a9a835a0547a3 /gi/posterior-regularisation/prjava/src/phrase/Corpus.java | |
parent | c14b17b45c1215b1d4a1495c161531d3e8936a34 (diff) |
Option to run on single word phrases before moving to larger ones.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@272 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/posterior-regularisation/prjava/src/phrase/Corpus.java')
-rw-r--r-- | gi/posterior-regularisation/prjava/src/phrase/Corpus.java | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java index d57f3c04..2de2797b 100644 --- a/gi/posterior-regularisation/prjava/src/phrase/Corpus.java +++ b/gi/posterior-regularisation/prjava/src/phrase/Corpus.java @@ -15,6 +15,14 @@ public class Corpus private List<Edge> edges = new ArrayList<Edge>(); private List<List<Edge>> phraseToContext = new ArrayList<List<Edge>>(); private List<List<Edge>> contextToPhrase = new ArrayList<List<Edge>>(); + public int splitSentinel; + public int phraseSentinel; + + public Corpus() + { + splitSentinel = wordLexicon.insert("<SPLIT>"); + phraseSentinel = wordLexicon.insert("<PHRASE>"); + } public class Edge { @@ -157,6 +165,11 @@ public class Corpus return b.toString(); } + public boolean isSentinel(int wordId) + { + return wordId == splitSentinel || wordId == phraseSentinel; + } + static Corpus readFromFile(Reader in) throws IOException { Corpus c = new Corpus(); @@ -218,6 +231,19 @@ public class Corpus return c; } + + TIntArrayList phraseEdges(TIntArrayList phrase) + { + TIntArrayList r = new TIntArrayList(4); + for (int p = 0; p < phrase.size(); ++p) + { + if (p == 0 || phrase.get(p-1) == splitSentinel) + r.add(p); + if (p == phrase.size() - 1 || phrase.get(p+1) == splitSentinel) + r.add(p); + } + return r; + } public void printStats(PrintStream out) { |