summaryrefslogtreecommitdiff
path: root/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-27 16:13:19 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-27 16:13:19 +0000
commitfd519b0e45c857b266814994ba8c1421f508e522 (patch)
tree6d50c9b954e3c13e9df627c1ecc25c53544a5f58 /report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m
parent4c5df460c9da5c935438850ef7993463a9113286 (diff)
preso
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@435 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m')
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m74
1 files changed, 74 insertions, 0 deletions
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m
new file mode 100644
index 00000000..8d570b7a
--- /dev/null
+++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m
@@ -0,0 +1,74 @@
+%wsj_lengths = load([ 'wsj_lengths.dat']);
+%save([ 'wsj_lengths.mat'],'wsj_lengths');
+load wsj
+load wsj_lengths
+
+figure(1)
+clf
+
+hold on
+
+%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green
+colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %similar but less garish
+%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink
+%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green
+
+for i = 3:6
+ col = colors(i-2,:);
+ b = 10^(i-1)
+
+ % plot lines for CRP exact prediction using summation
+ [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
+ ph = plot(log10(logbins),log10(predicted));
+ set(ph,'color',col,'linewidth',2);
+
+ disp(['Loading results for b = ' num2str(b) ]);
+ %%% uncomment these lines if .mat file is not yet generated. %%%
+ %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
+ %typecountrecordmean = mean(typecountrecord(:,:));
+ %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
+ load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
+
+ %plot emprical counts with error bars
+ [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
+ ph = plot(log10(logbins),log10(meanval));
+ %set(ph,'color',col,'linestyle','o','markerfacecolor',col,'markersize',8);
+ set(ph,'color',col,'linestyle','o','linewidth',2,'markersize',8);
+ %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
+
+ disp(['Loading results for b = ' num2str(b) ]);
+ %%% uncomment these lines if .mat file is not yet generated. %%%
+ %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
+ %typecountrecordmean = mean(typecountrecord(:,:));
+ %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
+ load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);
+
+ %plot emprical counts with error bars
+ [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
+ ph = plot(log10(logbins),log10(meanval));
+ %set(ph,'color',col,'linestyle','^','markerfacecolor',col,'markersize',8);
+ set(ph,'color',col,'linestyle','^','linewidth',2,'markersize',8);
+ %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko');
+
+end
+
+set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
+set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
+set(gca,'xlim',[-0.1 3.5])
+set(gca,'ylim',[-.1 2.5])
+set(gca,'FontSize',16)
+set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
+ '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
+ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
+ ' ', ' ', ' ', ' '});
+set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
+ '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
+ '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
+ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
+ ' ', ' ', ' ', ' '});
+%title('Chinese restaurant process adaptor')
+ylabel('Mean number of lexical entries (tables)')
+xlabel('Word frequency (n_w)')
+labs = {'Expectation','Empirical, fixed base','Empirical, inferred base'};
+legend(labs,'Location','NorthWest')
+box on