summaryrefslogtreecommitdiff
path: root/gi/posterior-regularisation/prjava/src/test/CorpusTest.java
blob: b4c3041fbed537127c4b1d1d173e522aa2a7d5ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
package test;

import java.util.Arrays;
import java.util.HashMap;

import data.Corpus;
import hmm.POS;

public class CorpusTest {

	public static void main(String[] args) {
		Corpus c=new Corpus(POS.trainFilename);

		
		int idx=30;
		
		
		HashMap<String, Integer>vocab=
			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.alphaFilename);
		
		HashMap<String, Integer>tagVocab=
			(HashMap<String, Integer>) io.SerializedObjects.readSerializedObject(Corpus.tagalphaFilename);
		
		
		String [] dict=new String [vocab.size()+1];
		for(String key:vocab.keySet()){
			dict[vocab.get(key)]=key;
		}
		dict[dict.length-1]=Corpus.UNK_TOK;
		
		String [] tagdict=new String [tagVocab.size()+1];
		for(String key:tagVocab.keySet()){
			tagdict[tagVocab.get(key)]=key;
		}
		tagdict[tagdict.length-1]=Corpus.UNK_TOK;
		
		String[] sent=c.get(idx);
		int []data=c.getInt(idx);
		
		
		String []roundtrip=new String [sent.length];
		for(int i=0;i<sent.length;i++){
			roundtrip[i]=dict[data[i]];
		}
		System.out.println(Arrays.toString(sent));
		System.out.println(Arrays.toString(roundtrip));
		
		sent=c.tag.get(idx);
		data=c.tagData.get(idx);
		
		
		roundtrip=new String [sent.length];
		for(int i=0;i<sent.length;i++){
			roundtrip[i]=tagdict[data[i]];
		}
		System.out.println(Arrays.toString(sent));
		System.out.println(Arrays.toString(roundtrip));
	}

}