From ebd00f59aab18446051f9838d3d08427b242b435 Mon Sep 17 00:00:00 2001 From: desaicwtf Date: Mon, 28 Jun 2010 23:14:21 +0000 Subject: add draft version of POS induction with HMM and L1 Linf constraints git-svn-id: https://ws10smt.googlecode.com/svn/trunk@47 ec762483-ff6d-05da-a07a-a48fb63a330f --- .../prjava/src/test/HMMModelStats.java | 96 ++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 gi/posterior-regularisation/prjava/src/test/HMMModelStats.java (limited to 'gi/posterior-regularisation/prjava/src/test/HMMModelStats.java') diff --git a/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java new file mode 100644 index 00000000..26d7abec --- /dev/null +++ b/gi/posterior-regularisation/prjava/src/test/HMMModelStats.java @@ -0,0 +1,96 @@ +package test; + +import hmm.HMM; +import hmm.POS; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; + +import data.Corpus; + +public class HMMModelStats { + + public static String modelFilename="../posdata/posModel.out"; + public static String alphaFilename="../posdata/corpus.alphabet"; + public static String statsFilename="../posdata/model.stats"; + + public static final int NUM_WORD=50; + + public static String testFilename="../posdata/en_test.conll"; + + public static double [][]maxwt; + + public static void main(String[] args) { + HashMapvocab= + (HashMap) io.SerializedObjects.readSerializedObject(alphaFilename); + + Corpus test=new Corpus(testFilename,vocab); + + String [] dict=new String [vocab.size()+1]; + for(String key:vocab.keySet()){ + dict[vocab.get(key)]=key; + } + dict[dict.length-1]=Corpus.UNK_TOK; + + HMM hmm=new HMM(); + hmm.readModel(modelFilename); + + + + PrintStream ps=io.FileUtil.openOutFile(statsFilename); + + double [][] emit=hmm.getEmitProb(); + for(int i=0;il=new ArrayList(); + for(int j=0;j=dict.length){ + break; + } + ps.print(dict[l.get(j).idx]+"\t"); + if((1+j)%10==0){ + ps.println(); + } + } + ps.println("\n"); + } + + checkMaxwt(hmm,ps,test.getAllData()); + + int terminalSym=vocab.get(Corpus .END_SYM); + //sample 10 sentences + for(int i=0;i<10;i++){ + int []sent=hmm.sample(terminalSym); + for(int j=0;j