summaryrefslogtreecommitdiff
path: root/report/pyp_clustering/acl09-short/code/wsjplots2.m
blob: eed41846809ade2403a7009dbc44aad9eec1d953 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

load wsj

figure(1)
clf 
subplot(1,2,2)
hold on

for i = 1:9
  a = i/10;
  [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20);
  ph = plot(log10(logbins),log10(predicted),'k');
  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
end

for i = 1:9
  a = i/10;  
  disp(['Loading results for a = ' num2str(a) ]);

  typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']);
  
  typecountrecordmean = mean(typecountrecord(500:1000,:));
  
  save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean');
  
  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20)
  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
  drawnow
end




[logbins meanval seval] = logbinmean(counts,counts,20,20)
[logbins predicted dummy] = logbinmean(counts,counts,20,20)
ph = plot(log10(logbins),log10(predicted),'r');
hold on
errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');

set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)

set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
set(gca,'xlim',[-0.1 3.5])
set(gca,'ylim',[-0.1 3.5])
set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
		    ' ', ' ', ' ', ' '});
set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
		    ' ', ' ', ' ', ' '});

title('Pitman-Yor process adaptor')
ylabel('Mean number of lexical entries')
xlabel('Word frequency (n_w)')
box on

subplot(1,2,1)

for i = 1:5

  b = 10^(i-1)

  disp(['Loading results for b = ' num2str(b) ]);
  typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);

  typecountrecordmean = mean(typecountrecord(500:1000,:));
  save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
  
  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20)
  [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20)
%  errorbar(log10(logbins),meanval,seval,'k.');
  hold on
  ph = plot(log10(logbins),log10(predicted),'r');
  %  ph = plot(log10(logbins),predicted,'r');
  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
end

set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
set(gca,'xlim',[-0.1 3.5])
set(gca,'ylim',[-0.1 1.5])
set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
		    ' ', ' ', ' ', ' '});
set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
		    ' ', ' ', ' ', ' '});
title('Chinese restaurant process adaptor')
ylabel('Mean number of lexical entries')
xlabel('Word frequency (n_w)')
box on