preso

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@435 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-27 16:13:19 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-27 16:13:19 +0000
commit: fd519b0e45c857b266814994ba8c1421f508e522 (patch)
tree: 6d50c9b954e3c13e9df627c1ecc25c53544a5f58 /report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m
parent: 4c5df460c9da5c935438850ef7993463a9113286 (diff)
1 files changed, 117 insertions, 0 deletions
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m
new file mode 100644
index 00000000..1d07e54c
--- /dev/null
+++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m
@@ -0,0 +1,117 @@
+%wsj_lengths = load([ 'wsj_lengths.dat']);
+%save([ 'wsj_lengths.mat'],'wsj_lengths');
+load wsj
+load wsj_lengths
+
+figure(1)
+clf 
+
+subplot(1,2,1);
+hold on
+
+for i = 3:6
+
+    b = 10^(i-1)
+
+  disp(['Loading results for b = ' num2str(b) ]);
+%%%  uncomment these lines if .mat file is not yet generated. %%%
+  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
+  %typecountrecordmean = mean(typecountrecord(:,:));
+  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
+  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
+   
+  % plot lines for CRP exact prediction using summation
+  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
+  ph = plot(log10(logbins),log10(predicted),'r');
+  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
+
+  % plot lines for CRP Antoniak prediction
+  [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20);
+  ph = plot(log10(logbins),log10(predicted),'r');
+  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--')
+
+  %plot lines for incorrect CRP Antoniak prediction (ACL07)
+  %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20);
+  %ph = plot(log10(logbins),log10(predicted),'r');
+  %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle',':')
+
+  % plot lines for CRP Cohn prediction
+  %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20);
+  %ph = plot(log10(logbins),log10(predicted),'r');
+  %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.')
+
+   %plot emprical counts with error bars
+  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
+  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
+end
+
+set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
+set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
+set(gca,'xlim',[-0.1 3.5])
+set(gca,'ylim',[-1.1 1.5])
+set(gca,'FontSize',14)
+set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
+		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
+		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
+		    ' ', ' ', ' ', ' '});
+set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
+    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
+		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
+		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
+		    ' ', ' ', ' ', ' '});
+%title('Chinese restaurant process adaptor')
+ylabel('Mean number of lexical entries')
+xlabel('Word frequency (n_w)')
+legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest')
+box on
+
+
+subplot(1,2,2);
+hold on
+
+for i =3:6
+
+  b = 10^(i-1)
+
+  disp(['Loading results for b = ' num2str(b) ]);
+%%%  uncomment these lines if .mat file is not yet generated. %%%
+  %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
+  %typecountrecordmean = mean(typecountrecord(:,:));
+  %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
+  load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);
+   
+  % plot lines for CRP exact prediction using summation
+  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
+  ph = plot(log10(logbins),log10(predicted),'r');
+  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
+
+   %plot lines for incorrect CRP Antoniak prediction (ACL07)
+  [logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20);
+  ph = plot(log10(logbins),log10(predicted),'r');
+  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.')
+
+  %plot emprical counts with error bars
+  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
+  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
+end
+
+set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
+set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
+set(gca,'xlim',[-0.1 3.5])
+set(gca,'ylim',[-.1 2.5])
+set(gca,'FontSize',14)
+set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
+		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
+		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
+		    ' ', ' ', ' ', ' '});
+set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
+    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ...
+		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
+		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
+		    ' ', ' ', ' ', ' '});
+%title('Chinese restaurant process adaptor')
+ylabel('Mean number of lexical entries')
+xlabel('Word frequency (n_w)')
+legend('Expectation','GGJ07 approx.','Empirical','Location','NorthWest')
+box on
+%axis square
+\ No newline at end of file
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-27 16:13:19 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-27 16:13:19 +0000
commit	fd519b0e45c857b266814994ba8c1421f508e522 (patch)
tree	6d50c9b954e3c13e9df627c1ecc25c53544a5f58 /report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m
parent	4c5df460c9da5c935438850ef7993463a9113286 (diff)