From 925087356b853e2099c1b60d8b757d7aa02121a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- .../prjava/src/test/CorpusTest.java | 60 ---------- .../prjava/src/test/HMMModelStats.java | 105 ----------------- .../prjava/src/test/IntDoublePair.java | 23 ---- .../prjava/src/test/X2y2WithConstraints.java | 131 --------------------- 4 files changed, 319 deletions(-) delete mode 100644 gi/posterior-regularisation/prjava/src/test/CorpusTest.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/HMMModelStats.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/IntDoublePair.java delete mode 100644 gi/posterior-regularisation/prjava/src/test/X2y2WithConstraints.java (limited to 'gi/posterior-regularisation/prjava/src/test') diff --git a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java b/gi/posterior-regularisation/prjava/src/test/CorpusTest.java deleted file mode 100644 index b4c3041f..00000000 --- a/gi/posterior-regularisation/prjava/src/test/CorpusTest.java +++ /dev/null @@ -1,60 +0,0 @@ -package test; - -import java.util.Arrays; -import java.util.HashMap; - -import data.Corpus; -import hmm.POS; - -public class CorpusTest { - - public static void main(String[] args) { - Corpus c=new Corpus(POS.trainFilename); - - - int idx=30; - - - HashMapvocab= - (HashMap) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename); - - HashMaptagVocab= - (HashMap) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename); - - - String [] dict=new String [vocab.size()+1]; - for(String key:vocab.keySet()){ - dict[vocab.get(key)]=key; - } - dict[dict.length-1]=Corpus.UNK_TOK; - - String [] tagdict=new String [tagVocab.size()+1]; - for(String key:tagVocab.keySet()){ - tagdict[tagVocab.get(key)]=key; - } - tagdict[tagdict.length-1]=Corpus.UNK_TOK; - - String[] sent=c.get(idx); - int []data=c.getInt(idx); - - - String []roundtrip=new String [sent.length]; - for(int i=0;ivocab= - (HashMap) io.SerializedObjects.readSerializedObject(alphaFilename); - - Corpus test=new Corpus(testFilename,vocab); - - String [] dict=new String [vocab.size()+1]; - for(String key:vocab.keySet()){ - dict[vocab.get(key)]=key; - } - dict[dict.length-1]=Corpus.UNK_TOK; - - HMM hmm=new HMM(); - hmm.readModel(modelFilename); - - - - PrintStream ps = null; - try { - ps = io.FileUtil.printstream(new File(statsFilename)); - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - - double [][] emit=hmm.getEmitProb(); - for(int i=0;il=new ArrayList(); - for(int j=0;j=dict.length){ - break; - } - ps.print(dict[l.get(j).idx]+"\t"); - if((1+j)%10==0){ - ps.println(); - } - } - ps.println("\n"); - } - - checkMaxwt(hmm,ps,test.getAllData()); - - int terminalSym=vocab.get(Corpus .END_SYM); - //sample 10 sentences - for(int i=0;i<10;i++){ - int []sent=hmm.sample(terminalSym); - for(int j=0;jval){ - return 1; - } - if(pair.val