From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- .../prjava/src/phrase/PhraseCorpus.java | 193 --------------------- 1 file changed, 193 deletions(-) delete mode 100644 gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java (limited to 'gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java') diff --git a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java b/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java deleted file mode 100644 index 0cf31c1c..00000000 --- a/gi/posterior-regularisation/prjava/src/phrase/PhraseCorpus.java +++ /dev/null @@ -1,193 +0,0 @@ -package phrase; - -import io.FileUtil; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Scanner; - -public class PhraseCorpus -{ - public HashMapwordLex; - public HashMapphraseLex; - - public String wordList[]; - public String phraseList[]; - - //data[phrase][num context][position] - public int data[][][]; - public int numContexts; - - public PhraseCorpus(String filename) throws FileNotFoundException, IOException - { - BufferedReader r = FileUtil.reader(new File(filename)); - - phraseLex=new HashMap(); - wordLex=new HashMap(); - - ArrayListdataList=new ArrayList(); - String line=null; - numContexts = 0; - - while((line=readLine(r))!=null){ - - String toks[]=line.split("\t"); - String phrase=toks[0]; - addLex(phrase,phraseLex); - - toks=toks[1].split(" \\|\\|\\| "); - - ArrayList ctxList=new ArrayList(); - - for(int i=0;i")){ - continue; - } - addLex(word,wordLex); - context[idx]=wordLex.get(word); - idx++; - } - - String count=toks[i+1]; - context[idx]=Integer.parseInt(count.trim().substring(2)); - - ctxList.add(context); - } - - dataList.add(ctxList.toArray(new int [0][])); - - } - try{ - r.close(); - }catch(IOException ioe){ - ioe.printStackTrace(); - } - data=dataList.toArray(new int[0][][]); - } - - private void addLex(String key, HashMaplex){ - Integer i=lex.get(key); - if(i==null){ - lex.put(key, lex.size()); - } - } - - //for debugging - public void saveLex(String lexFilename) throws FileNotFoundException, IOException - { - PrintStream ps = FileUtil.printstream(new File(lexFilename)); - ps.println("Phrase Lexicon"); - ps.println(phraseLex.size()); - printDict(phraseLex,ps); - - ps.println("Word Lexicon"); - ps.println(wordLex.size()); - printDict(wordLex,ps); - ps.close(); - } - - private static void printDict(HashMaplex,PrintStream ps){ - String []dict=buildList(lex); - for(int i=0;i buildMap(String[]dict){ - HashMap map=new HashMap(); - for(int i=0;ilex){ - String dict[]=new String [lex.size()]; - for(String key:lex.keySet()){ - dict[lex.get(key)]=key; - } - return dict; - } - - public String getContextString(int context[], boolean addPhraseMarker) - { - StringBuffer b = new StringBuffer(); - for (int i=0;i 0) - b.append(" "); - - if (i == context.length/2) - b.append(" "); - - b.append(wordList[context[i]]); - } - return b.toString(); - } - - public static String readLine(BufferedReader r){ - try{ - return r.readLine(); - } - catch(IOException ioe){ - ioe.printStackTrace(); - } - return null; - } - - public static void main(String[] args) throws Exception - { - String LEX_FILENAME="../pdata/lex.out"; - String DATA_FILENAME="../pdata/btec.con"; - PhraseCorpus c=new PhraseCorpus(DATA_FILENAME); - c.saveLex(LEX_FILENAME); - c.loadLex(LEX_FILENAME); - c.saveLex(LEX_FILENAME); - } -} -- cgit v1.2.3