package phrase; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Scanner; public class PhraseCorpus { public static String LEX_FILENAME="../pdata/lex.out"; //public static String DATA_FILENAME="../pdata/canned.con"; public static String DATA_FILENAME="../pdata/btec.con"; public static int NUM_CONTEXT=4; public HashMapwordLex; public HashMapphraseLex; public String wordList[]; public String phraseList[]; //data[phrase][num context][position] public int data[][][]; public static void main(String[] args) { // TODO Auto-generated method stub PhraseCorpus c=new PhraseCorpus(DATA_FILENAME); c.saveLex(LEX_FILENAME); c.loadLex(LEX_FILENAME); c.saveLex(LEX_FILENAME); } public PhraseCorpus(String filename){ BufferedReader r=io.FileUtil.openBufferedReader(filename); phraseLex=new HashMap(); wordLex=new HashMap(); ArrayListdataList=new ArrayList(); String line=null; while((line=readLine(r))!=null){ String toks[]=line.split("\t"); String phrase=toks[0]; addLex(phrase,phraseLex); toks=toks[1].split(" \\|\\|\\| "); ArrayList ctxList=new ArrayList(); for(int i=0;i")){ continue; } addLex(word,wordLex); context[idx]=wordLex.get(word); idx++; } String count=toks[i+1]; context[idx]=Integer.parseInt(count.trim().substring(2)); ctxList.add(context); } dataList.add(ctxList.toArray(new int [0][])); } try{ r.close(); }catch(IOException ioe){ ioe.printStackTrace(); } data=dataList.toArray(new int[0][][]); } private void addLex(String key, HashMaplex){ Integer i=lex.get(key); if(i==null){ lex.put(key, lex.size()); } } //for debugging public void saveLex(String lexFilename){ PrintStream ps=io.FileUtil.openOutFile(lexFilename); ps.println("Phrase Lexicon"); ps.println(phraseLex.size()); printDict(phraseLex,ps); ps.println("Word Lexicon"); ps.println(wordLex.size()); printDict(wordLex,ps); ps.close(); } private static void printDict(HashMaplex,PrintStream ps){ String []dict=buildList(lex); for(int i=0;i buildMap(String[]dict){ HashMap map=new HashMap(); for(int i=0;ilex){ String dict[]=new String [lex.size()]; for(String key:lex.keySet()){ dict[lex.get(key)]=key; } return dict; } public String getContextString(int context[]) { StringBuffer b = new StringBuffer(); for (int i=0;i 0) b.append(" "); b.append(wordList[context[i]]); } return b.toString(); } public static String readLine(BufferedReader r){ try{ return r.readLine(); } catch(IOException ioe){ ioe.printStackTrace(); } return null; } }