package hmm; import java.io.PrintStream; import java.util.HashMap; import data.Corpus; public class POS { //public String trainFilename="../posdata/en_train.conll"; public static String trainFilename="../posdata/small_train.txt"; // public static String trainFilename="../posdata/en_test.conll"; // public static String trainFilename="../posdata/trial1.txt"; public static String testFilename="../posdata/en_test.conll"; //public static String testFilename="../posdata/trial1.txt"; public static String predFilename="../posdata/en_test.predict.conll"; public static String modelFilename="../posdata/posModel.out"; public static final int ITER=20; public static final int N_STATE=30; public static void main(String[] args) { //POS p=new POS(); //POS p=new POS(true); PRPOS(); } public POS(){ Corpus c= new Corpus(trainFilename); //size of vocabulary +1 for unknown tokens HMM hmm =new HMM(N_STATE, c.getVocabSize()+1,c.getAllData()); for(int i=0;itagVocab= (HashMap) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename); String [] tagdict=new String [tagVocab.size()+1]; for(String key:tagVocab.keySet()){ tagdict[tagVocab.get(key)]=key; } tagdict[tagdict.length-1]=Corpus.UNK_TOK; System.out.println(c.vocab.get("")); PrintStream ps= io.FileUtil.openOutFile(predFilename); int [][]data=test.getAllData(); for(int i=0;i