summaryrefslogtreecommitdiff
path: root/report/pyp_clustering/acl09-short/code/wsjplots_acl.m
blob: 50582e7f24b8c929276e23f8e06346bc292c4389 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
%wsj_lengths = load([ 'wsj_lengths.dat']);
%save([ 'wsj_lengths.mat'],'wsj_lengths');
load wsj
load wsj_lengths

figure(1)
clf 

hold on

for i = 3:6

    b = 10^(i-1)
   
  % plot lines for CRP exact prediction using summation
  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
  ph = plot(log10(logbins),log10(predicted),'r');
  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)

  % plot lines for CRP Antoniak prediction
  [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20);
  ph = plot(log10(logbins),log10(predicted),'r');
  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--')

  % plot lines for CRP Cohn prediction
  %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20);
  %ph = plot(log10(logbins),log10(predicted),'r');
  %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.')

  disp(['Loading results for b = ' num2str(b) ]);
  %%%  uncomment these lines if .mat file is not yet generated. %%%
  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
  %typecountrecordmean = mean(typecountrecord(:,:));
  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
  
  %plot emprical counts with error bars
  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
  plot(log10(logbins),log10(meanval),'k*');
  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');

  disp(['Loading results for b = ' num2str(b) ]);
  %%%  uncomment these lines if .mat file is not yet generated. %%%
  %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
  %typecountrecordmean = mean(typecountrecord(:,:));
  %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
  load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);

  %plot emprical counts with error bars
  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
  plot(log10(logbins),log10(meanval),'ko');
  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko');

end

set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
set(gca,'xlim',[-0.1 3.5])
set(gca,'ylim',[-1.1 2.5])
set(gca,'FontSize',14)
set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
		    ' ', ' ', ' ', ' '});
set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
		    ' ', ' ', ' ', ' '});
%title('Chinese restaurant process adaptor')
ylabel('Mean number of lexical entries')
xlabel('Word frequency (n_w)')
legend('Expectation','Antoniak approx.','Empirical, fixed base','Empirical, inferred base','Location','NorthWest')
box on