summaryrefslogtreecommitdiff
path: root/report/pyp_clustering/acl09-short/code
diff options
context:
space:
mode:
Diffstat (limited to 'report/pyp_clustering/acl09-short/code')
-rw-r--r--report/pyp_clustering/acl09-short/code/antoniakpred.m12
-rw-r--r--report/pyp_clustering/acl09-short/code/approximations.eps897
-rw-r--r--report/pyp_clustering/acl09-short/code/cohnpred.m12
-rw-r--r--report/pyp_clustering/acl09-short/code/cokus.c167
-rw-r--r--report/pyp_clustering/acl09-short/code/crppred.m12
-rw-r--r--report/pyp_clustering/acl09-short/code/crppred_geom.m12
-rw-r--r--report/pyp_clustering/acl09-short/code/logbinmean.m38
-rw-r--r--report/pyp_clustering/acl09-short/code/noP0pred.m11
-rw-r--r--report/pyp_clustering/acl09-short/code/plot0.eps633
-rw-r--r--report/pyp_clustering/acl09-short/code/plot0.pdfbin20351 -> 0 bytes
-rw-r--r--report/pyp_clustering/acl09-short/code/plot1.eps579
-rw-r--r--report/pyp_clustering/acl09-short/code/plot1.pdfbin17830 -> 0 bytes
-rw-r--r--report/pyp_clustering/acl09-short/code/plot2.eps552
-rw-r--r--report/pyp_clustering/acl09-short/code/plot2.pdfbin14992 -> 0 bytes
-rw-r--r--report/pyp_clustering/acl09-short/code/plot3.eps721
-rw-r--r--report/pyp_clustering/acl09-short/code/plot3.pdfbin19393 -> 0 bytes
-rw-r--r--report/pyp_clustering/acl09-short/code/pygibbs3.c198
-rwxr-xr-xreport/pyp_clustering/acl09-short/code/pygibbs_geombin9705 -> 0 bytes
-rw-r--r--report/pyp_clustering/acl09-short/code/pygibbs_geom.c212
-rwxr-xr-xreport/pyp_clustering/acl09-short/code/run-peak.prl8
-rwxr-xr-xreport/pyp_clustering/acl09-short/code/run.prl8
-rwxr-xr-xreport/pyp_clustering/acl09-short/code/word_lengths.prl21
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots2.m99
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl.m74
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m164
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m117
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m54
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m59
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m58
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m74
-rw-r--r--report/pyp_clustering/acl09-short/code/wsjplots_cl.m99
31 files changed, 0 insertions, 4891 deletions
diff --git a/report/pyp_clustering/acl09-short/code/antoniakpred.m b/report/pyp_clustering/acl09-short/code/antoniakpred.m
deleted file mode 100644
index c4153c04..00000000
--- a/report/pyp_clustering/acl09-short/code/antoniakpred.m
+++ /dev/null
@@ -1,12 +0,0 @@
-function output = antoniakpred(input,b)
-
-uniqin = unique(input);
-prediction = zeros(max(input),1);
-
-p0=1/30114;
-for i = 1:length(uniqin)
- prediction(uniqin(i)) = b*p0*log((b*p0+uniqin(i))/(b*p0));
-end
-
-output = prediction(input);
-
diff --git a/report/pyp_clustering/acl09-short/code/approximations.eps b/report/pyp_clustering/acl09-short/code/approximations.eps
deleted file mode 100644
index 67857497..00000000
--- a/report/pyp_clustering/acl09-short/code/approximations.eps
+++ /dev/null
@@ -1,897 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-92.1.13.el5.inf.1PAE #1 SMP Mon Oct 20 10:33:44 BST 2008 i686.
-%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/approximations.eps
-%%CreationDate: 04/25/2009 11:31:18
-%%DocumentNeededFonts: Helvetica
-%%DocumentProcessColors: Cyan Magenta Yellow Black
-%%LanguageLevel: 2
-%%Pages: 1
-%%BoundingBox: 89 164 503 676
-%%EndComments
-
-%%BeginProlog
-% MathWorks dictionary
-/MathWorks 160 dict begin
-% definition operators
-/bdef {bind def} bind def
-/ldef {load def} bind def
-/xdef {exch def} bdef
-/xstore {exch store} bdef
-% operator abbreviations
-/c /clip ldef
-/cc /concat ldef
-/cp /closepath ldef
-/gr /grestore ldef
-/gs /gsave ldef
-/mt /moveto ldef
-/np /newpath ldef
-/cm /currentmatrix ldef
-/sm /setmatrix ldef
-/rm /rmoveto ldef
-/rl /rlineto ldef
-/s {show newpath} bdef
-/sc {setcmykcolor} bdef
-/sr /setrgbcolor ldef
-/sg /setgray ldef
-/w /setlinewidth ldef
-/j /setlinejoin ldef
-/cap /setlinecap ldef
-/rc {rectclip} bdef
-/rf {rectfill} bdef
-% page state control
-/pgsv () def
-/bpage {/pgsv save def} bdef
-/epage {pgsv restore} bdef
-/bplot /gsave ldef
-/eplot {stroke grestore} bdef
-% orientation switch
-/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
-% coordinate system mappings
-/dpi2point 0 def
-% font control
-/FontSize 0 def
-/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
- makefont setfont} bdef
-/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
- exch dup 3 1 roll findfont dup length dict begin
- { 1 index /FID ne {def}{pop pop} ifelse } forall
- /Encoding exch def currentdict end definefont pop} bdef
-/isroman {findfont /CharStrings get /Agrave known} bdef
-/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
- exch FMS} bdef
-/csm {1 dpi2point div -1 dpi2point div scale neg translate
- dup landscapeMode eq {pop -90 rotate}
- {rotateMode eq {90 rotate} if} ifelse} bdef
-% line types: solid, dotted, dashed, dotdash
-/SO { [] 0 setdash } bdef
-/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
-/DA { [6 dpi2point mul] 0 setdash } bdef
-/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
- dpi2point mul] 0 setdash } bdef
-% macros for lines and objects
-/L {lineto stroke} bdef
-/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
-/AP {{rlineto} repeat} bdef
-/PDlw -1 def
-/W {/PDlw currentlinewidth def setlinewidth} def
-/PP {closepath eofill} bdef
-/DP {closepath stroke} bdef
-/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
- neg 0 exch rlineto closepath} bdef
-/FR {MR stroke} bdef
-/PR {MR fill} bdef
-/L1i {{currentfile picstr readhexstring pop} image} bdef
-/tMatrix matrix def
-/MakeOval {newpath tMatrix currentmatrix pop translate scale
-0 0 1 0 360 arc tMatrix setmatrix} bdef
-/FO {MakeOval stroke} bdef
-/PO {MakeOval fill} bdef
-/PD {currentlinewidth 2 div 0 360 arc fill
- PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
-/FA {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
-/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
-/FAn {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
-/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
-/vradius 0 def /hradius 0 def /lry 0 def
-/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
-/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
- /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
- vradius add translate hradius vradius scale 0 0 1 180 270 arc
- tMatrix setmatrix lrx hradius sub uly vradius add translate
- hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
- lrx hradius sub lry vradius sub translate hradius vradius scale
- 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
- translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
- closepath} bdef
-/FRR {MRR stroke } bdef
-/PRR {MRR fill } bdef
-/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
- closepath} bdef
-/FlrRR {MlrRR stroke } bdef
-/PlrRR {MlrRR fill } bdef
-/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
- closepath} bdef
-/FtbRR {MtbRR stroke } bdef
-/PtbRR {MtbRR fill } bdef
-/stri 6 array def /dtri 6 array def
-/smat 6 array def /dmat 6 array def
-/tmat1 6 array def /tmat2 6 array def /dif 3 array def
-/asub {/ind2 exch def /ind1 exch def dup dup
- ind1 get exch ind2 get sub exch } bdef
-/tri_to_matrix {
- 2 0 asub 3 1 asub 4 0 asub 5 1 asub
- dup 0 get exch 1 get 7 -1 roll astore } bdef
-/compute_transform {
- dmat dtri tri_to_matrix tmat1 invertmatrix
- smat stri tri_to_matrix tmat2 concatmatrix } bdef
-/ds {stri astore pop} bdef
-/dt {dtri astore pop} bdef
-/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
- currentfile
- 3 index 0 eq {/ASCIIHexDecode filter}
- {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
- ifelse exch readstring pop
- dup 0 3 index getinterval /rbmap xdef
- dup 2 index dup getinterval /gbmap xdef
- 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
-/it {gs np dtri aload pop moveto lineto lineto cp c
- cols rows 8 compute_transform
- rbmap gbmap bbmap true 3 colorimage gr}bdef
-/il {newpath moveto lineto stroke}bdef
-currentdict end def
-%%EndProlog
-
-%%BeginSetup
-MathWorks begin
-
-0 cap
-
-end
-%%EndSetup
-
-%%Page: 1 1
-%%BeginPageSetup
-%%PageBoundingBox: 89 164 503 676
-MathWorks begin
-bpage
-%%EndPageSetup
-
-%%BeginObject: obj1
-bplot
-
-/dpi2point 12 def
-portraitMode 1068 8112 csm
-
- 0 0 4976 6135 rc
-86 dict begin %Colortable dictionary
-/c0 { 0.000000 0.000000 0.000000 sr} bdef
-/c1 { 1.000000 1.000000 1.000000 sr} bdef
-/c2 { 0.900000 0.000000 0.000000 sr} bdef
-/c3 { 0.000000 0.820000 0.000000 sr} bdef
-/c4 { 0.000000 0.000000 0.800000 sr} bdef
-/c5 { 0.910000 0.820000 0.320000 sr} bdef
-/c6 { 1.000000 0.260000 0.820000 sr} bdef
-/c7 { 0.000000 0.820000 0.820000 sr} bdef
-c0
-1 j
-1 sg
- 0 0 4977 6136 rf
-6 w
-0 5000 3856 0 0 -5000 647 5460 4 MP
-PP
--3856 0 0 5000 3856 0 0 -5000 647 5460 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
- 647 5460 mt 4503 5460 L
- 647 460 mt 4503 460 L
- 647 5460 mt 647 460 L
-4503 5460 mt 4503 460 L
- 647 5460 mt 4503 5460 L
- 647 5460 mt 647 460 L
- 754 5460 mt 754 5410 L
- 754 460 mt 754 510 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 168 FMSR
-
- 708 5650 mt
-(1) s
-1076 5460 mt 1076 5410 L
-1076 460 mt 1076 510 L
-1053 5650 mt
-( ) s
-1265 5460 mt 1265 5410 L
-1265 460 mt 1265 510 L
-1242 5650 mt
-( ) s
-1398 5460 mt 1398 5410 L
-1398 460 mt 1398 510 L
-1375 5650 mt
-( ) s
-1502 5460 mt 1502 5410 L
-1502 460 mt 1502 510 L
-1479 5650 mt
-( ) s
-1587 5460 mt 1587 5410 L
-1587 460 mt 1587 510 L
-1564 5650 mt
-( ) s
-1659 5460 mt 1659 5410 L
-1659 460 mt 1659 510 L
-1636 5650 mt
-( ) s
-1721 5460 mt 1721 5410 L
-1721 460 mt 1721 510 L
-1698 5650 mt
-( ) s
-1776 5460 mt 1776 5410 L
-1776 460 mt 1776 510 L
-1753 5650 mt
-( ) s
-1825 5460 mt 1825 5410 L
-1825 460 mt 1825 510 L
-1732 5650 mt
-(10) s
-2147 5460 mt 2147 5410 L
-2147 460 mt 2147 510 L
-2124 5650 mt
-( ) s
-2336 5460 mt 2336 5410 L
-2336 460 mt 2336 510 L
-2313 5650 mt
-( ) s
-2470 5460 mt 2470 5410 L
-2470 460 mt 2470 510 L
-2447 5650 mt
-( ) s
-2573 5460 mt 2573 5410 L
-2573 460 mt 2573 510 L
-2550 5650 mt
-( ) s
-2658 5460 mt 2658 5410 L
-2658 460 mt 2658 510 L
-2635 5650 mt
-( ) s
-2730 5460 mt 2730 5410 L
-2730 460 mt 2730 510 L
-2707 5650 mt
-( ) s
-2792 5460 mt 2792 5410 L
-2792 460 mt 2792 510 L
-2769 5650 mt
-( ) s
-2847 5460 mt 2847 5410 L
-2847 460 mt 2847 510 L
-2824 5650 mt
-( ) s
-2896 5460 mt 2896 5410 L
-2896 460 mt 2896 510 L
-2756 5650 mt
-(100) s
-3218 5460 mt 3218 5410 L
-3218 460 mt 3218 510 L
-3195 5650 mt
-( ) s
-3407 5460 mt 3407 5410 L
-3407 460 mt 3407 510 L
-3384 5650 mt
-( ) s
-3541 5460 mt 3541 5410 L
-3541 460 mt 3541 510 L
-3518 5650 mt
-( ) s
-3645 5460 mt 3645 5410 L
-3645 460 mt 3645 510 L
-3622 5650 mt
-( ) s
-3729 5460 mt 3729 5410 L
-3729 460 mt 3729 510 L
-3706 5650 mt
-( ) s
-3801 5460 mt 3801 5410 L
-3801 460 mt 3801 510 L
-3778 5650 mt
-( ) s
-3863 5460 mt 3863 5410 L
-3863 460 mt 3863 510 L
-3840 5650 mt
-( ) s
-3918 5460 mt 3918 5410 L
-3918 460 mt 3918 510 L
-3895 5650 mt
-( ) s
-3967 5460 mt 3967 5410 L
-3967 460 mt 3967 510 L
-3781 5650 mt
-(1000) s
-4289 5460 mt 4289 5410 L
-4289 460 mt 4289 510 L
-4266 5650 mt
-( ) s
-4478 5460 mt 4478 5410 L
-4478 460 mt 4478 510 L
-4455 5650 mt
-( ) s
- 647 5321 mt 697 5321 L
-4503 5321 mt 4453 5321 L
- 379 5383 mt
-(0.1) s
- 647 4903 mt 697 4903 L
-4503 4903 mt 4453 4903 L
- 566 4965 mt
-( ) s
- 647 4658 mt 697 4658 L
-4503 4658 mt 4453 4658 L
- 566 4720 mt
-( ) s
- 647 4484 mt 697 4484 L
-4503 4484 mt 4453 4484 L
- 566 4546 mt
-( ) s
- 647 4350 mt 697 4350 L
-4503 4350 mt 4453 4350 L
- 566 4412 mt
-( ) s
- 647 4240 mt 697 4240 L
-4503 4240 mt 4453 4240 L
- 566 4302 mt
-( ) s
- 647 4147 mt 697 4147 L
-4503 4147 mt 4453 4147 L
- 566 4209 mt
-( ) s
- 647 4066 mt 697 4066 L
-4503 4066 mt 4453 4066 L
- 566 4128 mt
-( ) s
- 647 3995 mt 697 3995 L
-4503 3995 mt 4453 3995 L
- 566 4057 mt
-( ) s
- 647 3932 mt 697 3932 L
-4503 3932 mt 4453 3932 L
- 519 3994 mt
-(1) s
- 647 3514 mt 697 3514 L
-4503 3514 mt 4453 3514 L
- 566 3576 mt
-( ) s
- 647 3269 mt 697 3269 L
-4503 3269 mt 4453 3269 L
- 566 3331 mt
-( ) s
- 647 3096 mt 697 3096 L
-4503 3096 mt 4453 3096 L
- 566 3158 mt
-( ) s
- 647 2961 mt 697 2961 L
-4503 2961 mt 4453 2961 L
- 566 3023 mt
-( ) s
- 647 2851 mt 697 2851 L
-4503 2851 mt 4453 2851 L
- 566 2913 mt
-( ) s
- 647 2758 mt 697 2758 L
-4503 2758 mt 4453 2758 L
- 566 2820 mt
-( ) s
- 647 2677 mt 697 2677 L
-4503 2677 mt 4453 2677 L
- 566 2739 mt
-( ) s
- 647 2606 mt 697 2606 L
-4503 2606 mt 4453 2606 L
- 566 2668 mt
-( ) s
- 647 2543 mt 697 2543 L
-4503 2543 mt 4453 2543 L
- 426 2605 mt
-(10) s
- 647 2125 mt 697 2125 L
-4503 2125 mt 4453 2125 L
- 566 2187 mt
-( ) s
- 647 1880 mt 697 1880 L
-4503 1880 mt 4453 1880 L
- 566 1942 mt
-( ) s
- 647 1707 mt 697 1707 L
-4503 1707 mt 4453 1707 L
- 566 1769 mt
-( ) s
- 647 1572 mt 697 1572 L
-4503 1572 mt 4453 1572 L
- 566 1634 mt
-( ) s
- 647 1462 mt 697 1462 L
-4503 1462 mt 4453 1462 L
- 566 1524 mt
-( ) s
- 647 1369 mt 697 1369 L
-4503 1369 mt 4453 1369 L
- 566 1431 mt
-( ) s
- 647 1289 mt 697 1289 L
-4503 1289 mt 4453 1289 L
- 566 1351 mt
-( ) s
- 647 1217 mt 697 1217 L
-4503 1217 mt 4453 1217 L
- 566 1279 mt
-( ) s
- 647 1154 mt 697 1154 L
-4503 1154 mt 4453 1154 L
- 332 1216 mt
-(100) s
- 647 736 mt 697 736 L
-4503 736 mt 4453 736 L
- 566 798 mt
-( ) s
- 647 491 mt 697 491 L
-4503 491 mt 4453 491 L
- 566 553 mt
-( ) s
- 647 5460 mt 4503 5460 L
- 647 460 mt 4503 460 L
- 647 5460 mt 647 460 L
-4503 5460 mt 4503 460 L
-gs 647 460 3857 5001 rc
-18 w
-0.7 sg
-265 -2 266 -1 265 -1 265 -1 265 -1 266 -1 265 -1 265 -1
-266 -2 265 -1 265 -1 265 -1 266 -3 886 3932 14 MP stroke
-6 w
-gr
-
-0.7 sg
-0 sg
- 850 3932 mt 922 3932 L
- 886 3896 mt 886 3968 L
-1116 3929 mt 1188 3929 L
-1152 3893 mt 1152 3965 L
-1381 3928 mt 1453 3928 L
-1417 3892 mt 1417 3964 L
-1646 3927 mt 1718 3927 L
-1682 3891 mt 1682 3963 L
-1911 3926 mt 1983 3926 L
-1947 3890 mt 1947 3962 L
-2177 3924 mt 2249 3924 L
-2213 3888 mt 2213 3960 L
-2442 3923 mt 2514 3923 L
-2478 3887 mt 2478 3959 L
-2707 3922 mt 2779 3922 L
-2743 3886 mt 2743 3958 L
-2973 3922 mt 3045 3922 L
-3009 3886 mt 3009 3958 L
-3238 3919 mt 3310 3919 L
-3274 3883 mt 3274 3955 L
-3503 3919 mt 3575 3919 L
-3539 3883 mt 3539 3955 L
-3768 3919 mt 3840 3919 L
-3804 3883 mt 3804 3955 L
-4034 3917 mt 4106 3917 L
-4070 3881 mt 4070 3953 L
-4299 3918 mt 4371 3918 L
-4335 3882 mt 4335 3954 L
- 861 3907 mt 911 3957 L
- 911 3907 mt 861 3957 L
-1127 3904 mt 1177 3954 L
-1177 3904 mt 1127 3954 L
-1392 3903 mt 1442 3953 L
-1442 3903 mt 1392 3953 L
-1657 3902 mt 1707 3952 L
-1707 3902 mt 1657 3952 L
-1922 3901 mt 1972 3951 L
-1972 3901 mt 1922 3951 L
-2188 3899 mt 2238 3949 L
-2238 3899 mt 2188 3949 L
-2453 3898 mt 2503 3948 L
-2503 3898 mt 2453 3948 L
-2718 3897 mt 2768 3947 L
-2768 3897 mt 2718 3947 L
-2984 3897 mt 3034 3947 L
-3034 3897 mt 2984 3947 L
-3249 3894 mt 3299 3944 L
-3299 3894 mt 3249 3944 L
-3514 3894 mt 3564 3944 L
-3564 3894 mt 3514 3944 L
-3779 3894 mt 3829 3944 L
-3829 3894 mt 3779 3944 L
-4045 3892 mt 4095 3942 L
-4095 3892 mt 4045 3942 L
-4310 3893 mt 4360 3943 L
-4360 3893 mt 4310 3943 L
-gs 647 460 3857 5001 rc
-gr
-
- 36 36 886 3932 FO
- 36 36 1152 3929 FO
- 36 36 1417 3928 FO
- 36 36 1682 3927 FO
- 36 36 1947 3926 FO
- 36 36 2213 3924 FO
- 36 36 2478 3923 FO
- 36 36 2743 3922 FO
- 36 36 3009 3921 FO
- 36 36 3274 3919 FO
- 36 36 3539 3919 FO
- 36 36 3804 3918 FO
- 36 36 4070 3916 FO
- 36 36 4335 3920 FO
-gs 647 460 3857 5001 rc
-18 w
-0.7 sg
-265 -9 266 -9 265 -9 265 -10 265 -9 266 -10 265 -10 265 -10
-266 -11 265 -11 265 -10 265 -14 266 -23 886 3932 14 MP stroke
-DA
-265 -32 266 -32 265 -36 265 -37 265 -40 266 -43 265 -47 265 -50
-266 -56 265 -61 265 -57 265 -84 266 -129 886 5241 14 MP stroke
-SO
-6 w
-gr
-
-0.7 sg
-0 sg
- 850 3932 mt 922 3932 L
- 886 3896 mt 886 3968 L
-1116 3909 mt 1188 3909 L
-1152 3873 mt 1152 3945 L
-1381 3895 mt 1453 3895 L
-1417 3859 mt 1417 3931 L
-1646 3885 mt 1718 3885 L
-1682 3849 mt 1682 3921 L
-1911 3874 mt 1983 3874 L
-1947 3838 mt 1947 3910 L
-2177 3863 mt 2249 3863 L
-2213 3827 mt 2213 3899 L
-2442 3854 mt 2514 3854 L
-2478 3818 mt 2478 3890 L
-2707 3843 mt 2779 3843 L
-2743 3807 mt 2743 3879 L
-2973 3833 mt 3045 3833 L
-3009 3797 mt 3009 3869 L
-3238 3822 mt 3310 3822 L
-3274 3786 mt 3274 3858 L
-3503 3816 mt 3575 3816 L
-3539 3780 mt 3539 3852 L
-3768 3806 mt 3840 3806 L
-3804 3770 mt 3804 3842 L
-4034 3802 mt 4106 3802 L
-4070 3766 mt 4070 3838 L
-4299 3782 mt 4371 3782 L
-4335 3746 mt 4335 3818 L
- 861 3907 mt 911 3957 L
- 911 3907 mt 861 3957 L
-1127 3884 mt 1177 3934 L
-1177 3884 mt 1127 3934 L
-1392 3870 mt 1442 3920 L
-1442 3870 mt 1392 3920 L
-1657 3860 mt 1707 3910 L
-1707 3860 mt 1657 3910 L
-1922 3849 mt 1972 3899 L
-1972 3849 mt 1922 3899 L
-2188 3838 mt 2238 3888 L
-2238 3838 mt 2188 3888 L
-2453 3829 mt 2503 3879 L
-2503 3829 mt 2453 3879 L
-2718 3818 mt 2768 3868 L
-2768 3818 mt 2718 3868 L
-2984 3808 mt 3034 3858 L
-3034 3808 mt 2984 3858 L
-3249 3797 mt 3299 3847 L
-3299 3797 mt 3249 3847 L
-3514 3791 mt 3564 3841 L
-3564 3791 mt 3514 3841 L
-3779 3781 mt 3829 3831 L
-3829 3781 mt 3779 3831 L
-4045 3777 mt 4095 3827 L
-4095 3777 mt 4045 3827 L
-4310 3757 mt 4360 3807 L
-4360 3757 mt 4310 3807 L
-gs 647 460 3857 5001 rc
-gr
-
- 36 36 886 3932 FO
- 36 36 1152 3910 FO
- 36 36 1417 3895 FO
- 36 36 1682 3885 FO
- 36 36 1947 3872 FO
- 36 36 2213 3860 FO
- 36 36 2478 3848 FO
- 36 36 2743 3834 FO
- 36 36 3009 3821 FO
- 36 36 3274 3809 FO
- 36 36 3539 3798 FO
- 36 36 3804 3788 FO
- 36 36 4070 3771 FO
- 36 36 4335 3742 FO
-gs 647 460 3857 5001 rc
-18 w
-0.7 sg
-265 -33 266 -34 265 -36 265 -38 265 -42 266 -44 265 -49 265 -52
-266 -60 265 -66 265 -62 265 -96 266 -158 886 3932 14 MP stroke
-DA
-265 -40 266 -43 265 -46 265 -50 265 -55 266 -60 265 -67 265 -75
-266 -87 265 -100 265 -96 265 -148 266 -242 886 4398 14 MP stroke
-SO
-6 w
-gr
-
-0.7 sg
-0 sg
- 850 3932 mt 922 3932 L
- 886 3896 mt 886 3968 L
-1116 3774 mt 1188 3774 L
-1152 3738 mt 1152 3810 L
-1381 3678 mt 1453 3678 L
-1417 3642 mt 1417 3714 L
-1646 3616 mt 1718 3616 L
-1682 3580 mt 1682 3652 L
-1911 3549 mt 1983 3549 L
-1947 3513 mt 1947 3585 L
-2177 3490 mt 2249 3490 L
-2213 3454 mt 2213 3526 L
-2442 3438 mt 2514 3438 L
-2478 3402 mt 2478 3474 L
-2707 3390 mt 2779 3390 L
-2743 3354 mt 2743 3426 L
-2973 3344 mt 3045 3344 L
-3009 3308 mt 3009 3380 L
-3238 3303 mt 3310 3303 L
-3274 3267 mt 3274 3339 L
-3503 3267 mt 3575 3267 L
-3539 3231 mt 3539 3303 L
-3768 3228 mt 3840 3228 L
-3804 3192 mt 3804 3264 L
-4034 3193 mt 4106 3193 L
-4070 3157 mt 4070 3229 L
-4299 3160 mt 4371 3160 L
-4335 3124 mt 4335 3196 L
- 861 3907 mt 911 3957 L
- 911 3907 mt 861 3957 L
-1127 3749 mt 1177 3799 L
-1177 3749 mt 1127 3799 L
-1392 3653 mt 1442 3703 L
-1442 3653 mt 1392 3703 L
-1657 3591 mt 1707 3641 L
-1707 3591 mt 1657 3641 L
-1922 3524 mt 1972 3574 L
-1972 3524 mt 1922 3574 L
-2188 3465 mt 2238 3515 L
-2238 3465 mt 2188 3515 L
-2453 3413 mt 2503 3463 L
-2503 3413 mt 2453 3463 L
-2718 3365 mt 2768 3415 L
-2768 3365 mt 2718 3415 L
-2984 3319 mt 3034 3369 L
-3034 3319 mt 2984 3369 L
-3249 3278 mt 3299 3328 L
-3299 3278 mt 3249 3328 L
-3514 3242 mt 3564 3292 L
-3564 3242 mt 3514 3292 L
-3779 3203 mt 3829 3253 L
-3829 3203 mt 3779 3253 L
-4045 3168 mt 4095 3218 L
-4095 3168 mt 4045 3218 L
-4310 3135 mt 4360 3185 L
-4360 3135 mt 4310 3185 L
-gs 647 460 3857 5001 rc
-gr
-
- 36 36 886 3932 FO
- 36 36 1152 3825 FO
- 36 36 1417 3737 FO
- 36 36 1682 3663 FO
- 36 36 1947 3567 FO
- 36 36 2213 3455 FO
- 36 36 2478 3330 FO
- 36 36 2743 3183 FO
- 36 36 3009 3003 FO
- 36 36 3274 2790 FO
- 36 36 3539 2539 FO
- 36 36 3804 2234 FO
- 36 36 4070 1938 FO
- 36 36 4335 1575 FO
-gs 647 460 3857 5001 rc
-18 w
-0.7 sg
-265 -54 266 -58 265 -65 265 -72 265 -82 266 -95 265 -109 265 -127
-266 -155 265 -182 265 -180 265 -268 266 -415 886 3932 14 MP stroke
-DA
-265 -56 266 -59 265 -68 265 -74 265 -85 266 -99 265 -113 265 -133
-266 -162 265 -191 265 -187 265 -277 266 -425 886 4013 14 MP stroke
-SO
-6 w
-gr
-
-0.7 sg
-0 sg
- 850 3932 mt 922 3932 L
- 886 3896 mt 886 3968 L
-1116 3517 mt 1188 3517 L
-1152 3481 mt 1152 3553 L
-1381 3249 mt 1453 3249 L
-1417 3213 mt 1417 3285 L
-1646 3069 mt 1718 3069 L
-1682 3033 mt 1682 3105 L
-1911 2887 mt 1983 2887 L
-1947 2851 mt 1947 2923 L
-2177 2732 mt 2249 2732 L
-2213 2696 mt 2213 2768 L
-2442 2605 mt 2514 2605 L
-2478 2569 mt 2478 2641 L
-2707 2496 mt 2779 2496 L
-2743 2460 mt 2743 2532 L
-2973 2401 mt 3045 2401 L
-3009 2365 mt 3009 2437 L
-3238 2319 mt 3310 2319 L
-3274 2283 mt 3274 2355 L
-3503 2247 mt 3575 2247 L
-3539 2211 mt 3539 2283 L
-3768 2181 mt 3840 2181 L
-3804 2145 mt 3804 2217 L
-4034 2124 mt 4106 2124 L
-4070 2088 mt 4070 2160 L
-4299 2069 mt 4371 2069 L
-4335 2033 mt 4335 2105 L
- 861 3907 mt 911 3957 L
- 911 3907 mt 861 3957 L
-1127 3492 mt 1177 3542 L
-1177 3492 mt 1127 3542 L
-1392 3224 mt 1442 3274 L
-1442 3224 mt 1392 3274 L
-1657 3044 mt 1707 3094 L
-1707 3044 mt 1657 3094 L
-1922 2862 mt 1972 2912 L
-1972 2862 mt 1922 2912 L
-2188 2707 mt 2238 2757 L
-2238 2707 mt 2188 2757 L
-2453 2580 mt 2503 2630 L
-2503 2580 mt 2453 2630 L
-2718 2471 mt 2768 2521 L
-2768 2471 mt 2718 2521 L
-2984 2376 mt 3034 2426 L
-3034 2376 mt 2984 2426 L
-3249 2294 mt 3299 2344 L
-3299 2294 mt 3249 2344 L
-3514 2222 mt 3564 2272 L
-3564 2222 mt 3514 2272 L
-3779 2156 mt 3829 2206 L
-3829 2156 mt 3779 2206 L
-4045 2099 mt 4095 2149 L
-4095 2099 mt 4045 2149 L
-4310 2044 mt 4360 2094 L
-4360 2044 mt 4310 2094 L
-gs 647 460 3857 5001 rc
-gr
-
- 36 36 886 3932 FO
- 36 36 1152 3713 FO
- 36 36 1417 3510 FO
- 36 36 1682 3318 FO
- 36 36 1947 3048 FO
- 36 36 2213 2733 FO
- 36 36 2478 2401 FO
- 36 36 2743 2061 FO
- 36 36 3009 1720 FO
- 36 36 3274 1380 FO
- 36 36 3539 1045 FO
- 36 36 3804 746 FO
-gs 647 460 3857 5001 rc
-gr
-
- 240 4103 mt -90 rotate
-(Mean number of lexical entries) s
-90 rotate
-1812 5794 mt
-(Word frequency \(n) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 132 FMSR
-
-3188 5878 mt
-(w) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 168 FMSR
-
-3283 5794 mt
-(\)) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
- 630 5503 mt
-( ) s
-4487 502 mt
-( ) s
-1 sg
-0 846 2267 0 0 -846 707 1366 4 MP
-PP
--2267 0 0 846 2267 0 0 -846 707 1366 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
- 707 1366 mt 2974 1366 L
- 707 520 mt 2974 520 L
- 707 1366 mt 707 520 L
-2974 1366 mt 2974 520 L
- 707 1366 mt 2974 1366 L
- 707 1366 mt 707 520 L
- 707 1366 mt 2974 1366 L
- 707 520 mt 2974 520 L
- 707 1366 mt 707 520 L
-2974 1366 mt 2974 520 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 168 FMSR
-
-1183 698 mt
-(Expectation) s
-gs 707 520 2268 847 rc
-18 w
-0.7 sg
-365 0 780 637 2 MP stroke
-gr
-
-18 w
-0.7 sg
-0 sg
-1183 902 mt
-(Antoniak approx.) s
-gs 707 520 2268 847 rc
-DA
-0.7 sg
-365 0 780 841 2 MP stroke
-SO
-gr
-
-0.7 sg
-0 sg
-1183 1105 mt
-(Empirical, fixed base) s
-gs 707 520 2268 847 rc
-6 w
-gs 889 971 147 147 rc
- 926 1044 mt 998 1044 L
- 962 1008 mt 962 1080 L
- 937 1019 mt 987 1069 L
- 987 1019 mt 937 1069 L
-gr
-
-gr
-
-6 w
-1183 1309 mt
-(Empirical, inferred base) s
-gs 707 520 2268 847 rc
-gs 889 1175 147 147 rc
- 36 36 962 1248 FO
-gr
-
-gr
-
-
-end %%Color Dict
-
-eplot
-%%EndObject
-
-epage
-end
-
-showpage
-
-%%Trailer
-%%EOF
diff --git a/report/pyp_clustering/acl09-short/code/cohnpred.m b/report/pyp_clustering/acl09-short/code/cohnpred.m
deleted file mode 100644
index 35a49605..00000000
--- a/report/pyp_clustering/acl09-short/code/cohnpred.m
+++ /dev/null
@@ -1,12 +0,0 @@
-function output = cohnpred(input,b)
-
-uniqin = unique(input);
-prediction = zeros(max(input),1);
-
-p0=1/30114;
-for i = 1:length(uniqin)
- prediction(uniqin(i)) = b*p0*(psi(b*p0+uniqin(i)) - psi(b*p0));
-end
-
-output = prediction(input);
-
diff --git a/report/pyp_clustering/acl09-short/code/cokus.c b/report/pyp_clustering/acl09-short/code/cokus.c
deleted file mode 100644
index 3a959c0f..00000000
--- a/report/pyp_clustering/acl09-short/code/cokus.c
+++ /dev/null
@@ -1,167 +0,0 @@
-// This is the ``Mersenne Twister'' random number generator MT19937, which
-// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1)
-// starting from any odd seed in 0..(2^32 - 1). This version is a recode
-// by Shawn Cokus (Cokus@math.washington.edu) on March 8, 1998 of a version by
-// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in
-// July-August 1997).
-//
-// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha
-// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to
-// generate 300 million random numbers; after recoding: 24.0 sec. for the same
-// (i.e., 46.5% of original time), so speed is now about 12.5 million random
-// number generations per second on this machine.
-//
-// According to the URL <http://www.math.keio.ac.jp/~matumoto/emt.html>
-// (and paraphrasing a bit in places), the Mersenne Twister is ``designed
-// with consideration of the flaws of various existing generators,'' has
-// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally
-// equidistributed, and ``has passed many stringent tests, including the
-// die-hard test of G. Marsaglia and the load test of P. Hellekalek and
-// S. Wegenkittl.'' It is efficient in memory usage (typically using 2506
-// to 5012 bytes of static data, depending on data type sizes, and the code
-// is quite short as well). It generates random numbers in batches of 624
-// at a time, so the caching and pipelining of modern systems is exploited.
-// It is also divide- and mod-free.
-//
-// This library is free software; you can redistribute it and/or modify it
-// under the terms of the GNU Library General Public License as published by
-// the Free Software Foundation (either version 2 of the License or, at your
-// option, any later version). This library is distributed in the hope that
-// it will be useful, but WITHOUT ANY WARRANTY, without even the implied
-// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
-// the GNU Library General Public License for more details. You should have
-// received a copy of the GNU Library General Public License along with this
-// library; if not, write to the Free Software Foundation, Inc., 59 Temple
-// Place, Suite 330, Boston, MA 02111-1307, USA.
-//
-// The code as Shawn received it included the following notice:
-//
-// Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura. When
-// you use this, send an e-mail to <matumoto@math.keio.ac.jp> with
-// an appropriate reference to your work.
-//
-// It would be nice to CC: <Cokus@math.washington.edu> when you write.
-//
-
-#include <stdio.h>
-#include <stdlib.h>
-
-//
-// uint32 must be an unsigned integer type capable of holding at least 32
-// bits; exactly 32 should be fastest, but 64 is better on an Alpha with
-// GCC at -O3 optimization so try your options and see what's best for you
-//
-
-typedef unsigned long uint32;
-
-#define N (624) // length of state vector
-#define M (397) // a period parameter
-#define K (0x9908B0DFU) // a magic constant
-#define hiBit(u) ((u) & 0x80000000U) // mask all but highest bit of u
-#define loBit(u) ((u) & 0x00000001U) // mask all but lowest bit of u
-#define loBits(u) ((u) & 0x7FFFFFFFU) // mask the highest bit of u
-#define mixBits(u, v) (hiBit(u)|loBits(v)) // move hi bit of u to hi bit of v
-
-static uint32 state[N+1]; // state vector + 1 extra to not violate ANSI C
-static uint32 *next; // next random value is computed from here
-static int left = -1; // can *next++ this many times before reloading
-
-
-void seedMT(uint32 seed)
- {
- //
- // We initialize state[0..(N-1)] via the generator
- //
- // x_new = (69069 * x_old) mod 2^32
- //
- // from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuth's
- // _The Art of Computer Programming_, Volume 2, 3rd ed.
- //
- // Notes (SJC): I do not know what the initial state requirements
- // of the Mersenne Twister are, but it seems this seeding generator
- // could be better. It achieves the maximum period for its modulus
- // (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if
- // x_initial can be even, you have sequences like 0, 0, 0, ...;
- // 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31,
- // 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below.
- //
- // Even if x_initial is odd, if x_initial is 1 mod 4 then
- //
- // the lowest bit of x is always 1,
- // the next-to-lowest bit of x is always 0,
- // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... ,
- // the 3rd-from-lowest bit of x 4-cycles ... 0 1 1 0 0 1 1 0 ... ,
- // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... ,
- // ...
- //
- // and if x_initial is 3 mod 4 then
- //
- // the lowest bit of x is always 1,
- // the next-to-lowest bit of x is always 1,
- // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... ,
- // the 3rd-from-lowest bit of x 4-cycles ... 0 0 1 1 0 0 1 1 ... ,
- // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... ,
- // ...
- //
- // The generator's potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is
- // 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth. It
- // also does well in the dimension 2..5 spectral tests, but it could be
- // better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth).
- //
- // Note that the random number user does not see the values generated
- // here directly since reloadMT() will always munge them first, so maybe
- // none of all of this matters. In fact, the seed values made here could
- // even be extra-special desirable if the Mersenne Twister theory says
- // so-- that's why the only change I made is to restrict to odd seeds.
- //
-
- register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state;
- register int j;
-
- for(left=0, *s++=x, j=N; --j;
- *s++ = (x*=69069U) & 0xFFFFFFFFU);
- }
-
-
-uint32 reloadMT(void)
- {
- register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1;
- register int j;
-
- if(left < -1)
- seedMT(4357U);
-
- left=N-1, next=state+1;
-
- for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++)
- *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
-
- for(pM=state, j=M; --j; s0=s1, s1=*p2++)
- *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
-
- s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
- s1 ^= (s1 >> 11);
- s1 ^= (s1 << 7) & 0x9D2C5680U;
- s1 ^= (s1 << 15) & 0xEFC60000U;
- return(s1 ^ (s1 >> 18));
- }
-
-
-inline uint32 randomMT(void)
- {
- uint32 y;
-
- if(--left < 0)
- return(reloadMT());
-
- y = *next++;
- y ^= (y >> 11);
- y ^= (y << 7) & 0x9D2C5680U;
- y ^= (y << 15) & 0xEFC60000U;
- y ^= (y >> 18);
- return(y);
- }
-
-
-
-
diff --git a/report/pyp_clustering/acl09-short/code/crppred.m b/report/pyp_clustering/acl09-short/code/crppred.m
deleted file mode 100644
index 17f22652..00000000
--- a/report/pyp_clustering/acl09-short/code/crppred.m
+++ /dev/null
@@ -1,12 +0,0 @@
-function output = crppred(input,b)
-
-uniqin = unique(input);
-prediction = zeros(max(input),1);
-
-p0=1/30114;
-for i = 1:length(uniqin)
- prediction(uniqin(i)) = b*p0*sum(1./((1:uniqin(i))+b*p0-1));
-end
-
-output = prediction(input);
-
diff --git a/report/pyp_clustering/acl09-short/code/crppred_geom.m b/report/pyp_clustering/acl09-short/code/crppred_geom.m
deleted file mode 100644
index e6869e4f..00000000
--- a/report/pyp_clustering/acl09-short/code/crppred_geom.m
+++ /dev/null
@@ -1,12 +0,0 @@
-function output = crppred_geom(input,lengths,b)
-
-
-output = zeros(length(input),1);
-
-p0=(1/52).^lengths;
-a=b*p0;
-for i = 1:length(input)
- output(i) = a(i)*sum(1./((1:input(i))+a(i)-1));
-end
-
-
diff --git a/report/pyp_clustering/acl09-short/code/logbinmean.m b/report/pyp_clustering/acl09-short/code/logbinmean.m
deleted file mode 100644
index 23dbb0ac..00000000
--- a/report/pyp_clustering/acl09-short/code/logbinmean.m
+++ /dev/null
@@ -1,38 +0,0 @@
-function [ logbinsvalid , meanval, seval ] = logbinmean( frequency, typecount, NBINS , MinCounts );
-
-% calculate distribution of frequency
-Maxfrequency = max( frequency );
-meanK = mean( frequency );
-linbins = linspace( log10(1) , log10( Maxfrequency ) , NBINS );
-stepb = linbins( 2 ) - linbins( 1 );
-
-logbins = 10.^linbins;
-
-% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-LL = length( linbins ) - 1;
-for i=1:LL
- lowb = linbins( i );
- highb = linbins( i+1 );
- linbinsout( i ) = (highb + lowb) / 2;
-
- lowb = logbins( i );
- highb = logbins( i+1 );
- step = highb - lowb;
- logbinsout( i ) = 10^linbinsout( i );
-
- indices = find( frequency >= lowb & frequency < highb);
-
- meanval(i) = mean(typecount(indices));
- rawcounts(i) = length(indices);
- seval(i) = std(typecount(indices))./sqrt(rawcounts(i));
-
-end
-
-valid = 1:LL;
-valid( find( rawcounts <= MinCounts )) = [];
-
-linbinsvalid = linbinsout( valid );
-logbinsvalid = logbinsout( valid );
-
-meanval = meanval( valid );
-seval = seval( valid );
diff --git a/report/pyp_clustering/acl09-short/code/noP0pred.m b/report/pyp_clustering/acl09-short/code/noP0pred.m
deleted file mode 100644
index f72f1432..00000000
--- a/report/pyp_clustering/acl09-short/code/noP0pred.m
+++ /dev/null
@@ -1,11 +0,0 @@
-function output = antoniakpred(input,b)
-
-uniqin = unique(input);
-prediction = zeros(max(input),1);
-
-for i = 1:length(uniqin)
- prediction(uniqin(i)) = b*log((b+uniqin(i))/b);
-end
-
-output = prediction(input);
-
diff --git a/report/pyp_clustering/acl09-short/code/plot0.eps b/report/pyp_clustering/acl09-short/code/plot0.eps
deleted file mode 100644
index 6094346a..00000000
--- a/report/pyp_clustering/acl09-short/code/plot0.eps
+++ /dev/null
@@ -1,633 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686.
-%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot0.eps
-%%CreationDate: 07/23/2009 17:36:19
-%%DocumentNeededFonts: Helvetica
-%%DocumentProcessColors: Cyan Magenta Yellow Black
-%%LanguageLevel: 2
-%%Pages: 1
-%%BoundingBox: -44 170 641 672
-%%EndComments
-
-%%BeginProlog
-% MathWorks dictionary
-/MathWorks 160 dict begin
-% definition operators
-/bdef {bind def} bind def
-/ldef {load def} bind def
-/xdef {exch def} bdef
-/xstore {exch store} bdef
-% operator abbreviations
-/c /clip ldef
-/cc /concat ldef
-/cp /closepath ldef
-/gr /grestore ldef
-/gs /gsave ldef
-/mt /moveto ldef
-/np /newpath ldef
-/cm /currentmatrix ldef
-/sm /setmatrix ldef
-/rm /rmoveto ldef
-/rl /rlineto ldef
-/s {show newpath} bdef
-/sc {setcmykcolor} bdef
-/sr /setrgbcolor ldef
-/sg /setgray ldef
-/w /setlinewidth ldef
-/j /setlinejoin ldef
-/cap /setlinecap ldef
-/rc {rectclip} bdef
-/rf {rectfill} bdef
-% page state control
-/pgsv () def
-/bpage {/pgsv save def} bdef
-/epage {pgsv restore} bdef
-/bplot /gsave ldef
-/eplot {stroke grestore} bdef
-% orientation switch
-/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
-% coordinate system mappings
-/dpi2point 0 def
-% font control
-/FontSize 0 def
-/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
- makefont setfont} bdef
-/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
- exch dup 3 1 roll findfont dup length dict begin
- { 1 index /FID ne {def}{pop pop} ifelse } forall
- /Encoding exch def currentdict end definefont pop} bdef
-/isroman {findfont /CharStrings get /Agrave known} bdef
-/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
- exch FMS} bdef
-/csm {1 dpi2point div -1 dpi2point div scale neg translate
- dup landscapeMode eq {pop -90 rotate}
- {rotateMode eq {90 rotate} if} ifelse} bdef
-% line types: solid, dotted, dashed, dotdash
-/SO { [] 0 setdash } bdef
-/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
-/DA { [6 dpi2point mul] 0 setdash } bdef
-/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
- dpi2point mul] 0 setdash } bdef
-% macros for lines and objects
-/L {lineto stroke} bdef
-/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
-/AP {{rlineto} repeat} bdef
-/PDlw -1 def
-/W {/PDlw currentlinewidth def setlinewidth} def
-/PP {closepath eofill} bdef
-/DP {closepath stroke} bdef
-/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
- neg 0 exch rlineto closepath} bdef
-/FR {MR stroke} bdef
-/PR {MR fill} bdef
-/L1i {{currentfile picstr readhexstring pop} image} bdef
-/tMatrix matrix def
-/MakeOval {newpath tMatrix currentmatrix pop translate scale
-0 0 1 0 360 arc tMatrix setmatrix} bdef
-/FO {MakeOval stroke} bdef
-/PO {MakeOval fill} bdef
-/PD {currentlinewidth 2 div 0 360 arc fill
- PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
-/FA {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
-/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
-/FAn {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
-/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
-/vradius 0 def /hradius 0 def /lry 0 def
-/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
-/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
- /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
- vradius add translate hradius vradius scale 0 0 1 180 270 arc
- tMatrix setmatrix lrx hradius sub uly vradius add translate
- hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
- lrx hradius sub lry vradius sub translate hradius vradius scale
- 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
- translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
- closepath} bdef
-/FRR {MRR stroke } bdef
-/PRR {MRR fill } bdef
-/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
- closepath} bdef
-/FlrRR {MlrRR stroke } bdef
-/PlrRR {MlrRR fill } bdef
-/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
- closepath} bdef
-/FtbRR {MtbRR stroke } bdef
-/PtbRR {MtbRR fill } bdef
-/stri 6 array def /dtri 6 array def
-/smat 6 array def /dmat 6 array def
-/tmat1 6 array def /tmat2 6 array def /dif 3 array def
-/asub {/ind2 exch def /ind1 exch def dup dup
- ind1 get exch ind2 get sub exch } bdef
-/tri_to_matrix {
- 2 0 asub 3 1 asub 4 0 asub 5 1 asub
- dup 0 get exch 1 get 7 -1 roll astore } bdef
-/compute_transform {
- dmat dtri tri_to_matrix tmat1 invertmatrix
- smat stri tri_to_matrix tmat2 concatmatrix } bdef
-/ds {stri astore pop} bdef
-/dt {dtri astore pop} bdef
-/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
- currentfile
- 3 index 0 eq {/ASCIIHexDecode filter}
- {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
- ifelse exch readstring pop
- dup 0 3 index getinterval /rbmap xdef
- dup 2 index dup getinterval /gbmap xdef
- 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
-/it {gs np dtri aload pop moveto lineto lineto cp c
- cols rows 8 compute_transform
- rbmap gbmap bbmap true 3 colorimage gr}bdef
-/il {newpath moveto lineto stroke}bdef
-currentdict end def
-%%EndProlog
-
-%%BeginSetup
-MathWorks begin
-
-0 cap
-
-end
-%%EndSetup
-
-%%Page: 1 1
-%%BeginPageSetup
-%%PageBoundingBox: -44 170 641 672
-MathWorks begin
-bpage
-%%EndPageSetup
-
-%%BeginObject: obj1
-bplot
-
-/dpi2point 12 def
-portraitMode -0528 8064 csm
-
- 0 0 8231 6023 rc
-88 dict begin %Colortable dictionary
-/c0 { 0.000000 0.000000 0.000000 sr} bdef
-/c1 { 1.000000 1.000000 1.000000 sr} bdef
-/c2 { 0.900000 0.000000 0.000000 sr} bdef
-/c3 { 0.000000 0.820000 0.000000 sr} bdef
-/c4 { 0.000000 0.000000 0.800000 sr} bdef
-/c5 { 0.910000 0.820000 0.320000 sr} bdef
-/c6 { 1.000000 0.260000 0.820000 sr} bdef
-/c7 { 0.000000 0.820000 0.820000 sr} bdef
-c0
-1 j
-1 sg
- 0 0 8232 6024 rf
-6 w
-0 4908 6379 0 0 -4908 1070 5360 4 MP
-PP
--6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-1070 5360 mt 7449 5360 L
-1070 5360 mt 1070 452 L
-1247 5360 mt 1247 5296 L
-1247 452 mt 1247 515 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1194 5572 mt
-(1) s
-1780 5360 mt 1780 5296 L
-1780 452 mt 1780 515 L
-1754 5572 mt
-( ) s
-2092 5360 mt 2092 5296 L
-2092 452 mt 2092 515 L
-2066 5572 mt
-( ) s
-2314 5360 mt 2314 5296 L
-2314 452 mt 2314 515 L
-2288 5572 mt
-( ) s
-2485 5360 mt 2485 5296 L
-2485 452 mt 2485 515 L
-2459 5572 mt
-( ) s
-2626 5360 mt 2626 5296 L
-2626 452 mt 2626 515 L
-2600 5572 mt
-( ) s
-2744 5360 mt 2744 5296 L
-2744 452 mt 2744 515 L
-2718 5572 mt
-( ) s
-2847 5360 mt 2847 5296 L
-2847 452 mt 2847 515 L
-2821 5572 mt
-( ) s
-2938 5360 mt 2938 5296 L
-2938 452 mt 2938 515 L
-2912 5572 mt
-( ) s
-3019 5360 mt 3019 5296 L
-3019 452 mt 3019 515 L
-2913 5572 mt
-(10) s
-3552 5360 mt 3552 5296 L
-3552 452 mt 3552 515 L
-3526 5572 mt
-( ) s
-3864 5360 mt 3864 5296 L
-3864 452 mt 3864 515 L
-3838 5572 mt
-( ) s
-4085 5360 mt 4085 5296 L
-4085 452 mt 4085 515 L
-4059 5572 mt
-( ) s
-4257 5360 mt 4257 5296 L
-4257 452 mt 4257 515 L
-4231 5572 mt
-( ) s
-4397 5360 mt 4397 5296 L
-4397 452 mt 4397 515 L
-4371 5572 mt
-( ) s
-4516 5360 mt 4516 5296 L
-4516 452 mt 4516 515 L
-4490 5572 mt
-( ) s
-4619 5360 mt 4619 5296 L
-4619 452 mt 4619 515 L
-4593 5572 mt
-( ) s
-4710 5360 mt 4710 5296 L
-4710 452 mt 4710 515 L
-4684 5572 mt
-( ) s
-4791 5360 mt 4791 5296 L
-4791 452 mt 4791 515 L
-4631 5572 mt
-(100) s
-5324 5360 mt 5324 5296 L
-5324 452 mt 5324 515 L
-5298 5572 mt
-( ) s
-5636 5360 mt 5636 5296 L
-5636 452 mt 5636 515 L
-5610 5572 mt
-( ) s
-5857 5360 mt 5857 5296 L
-5857 452 mt 5857 515 L
-5831 5572 mt
-( ) s
-6029 5360 mt 6029 5296 L
-6029 452 mt 6029 515 L
-6003 5572 mt
-( ) s
-6169 5360 mt 6169 5296 L
-6169 452 mt 6169 515 L
-6143 5572 mt
-( ) s
-6288 5360 mt 6288 5296 L
-6288 452 mt 6288 515 L
-6262 5572 mt
-( ) s
-6391 5360 mt 6391 5296 L
-6391 452 mt 6391 515 L
-6365 5572 mt
-( ) s
-6481 5360 mt 6481 5296 L
-6481 452 mt 6481 515 L
-6455 5572 mt
-( ) s
-6563 5360 mt 6563 5296 L
-6563 452 mt 6563 515 L
-6350 5572 mt
-(1000) s
-7096 5360 mt 7096 5296 L
-7096 452 mt 7096 515 L
-7070 5572 mt
-( ) s
-7408 5360 mt 7408 5296 L
-7408 452 mt 7408 515 L
-7382 5572 mt
-( ) s
-1070 5352 mt 1133 5352 L
-7449 5352 mt 7385 5352 L
- 982 5423 mt
-( ) s
-1070 5233 mt 1133 5233 L
-7449 5233 mt 7385 5233 L
- 982 5304 mt
-( ) s
-1070 5126 mt 1133 5126 L
-7449 5126 mt 7385 5126 L
- 929 5197 mt
-(1) s
-1070 4422 mt 1133 4422 L
-7449 4422 mt 7385 4422 L
- 982 4493 mt
-( ) s
-1070 4011 mt 1133 4011 L
-7449 4011 mt 7385 4011 L
- 982 4082 mt
-( ) s
-1070 3719 mt 1133 3719 L
-7449 3719 mt 7385 3719 L
- 982 3790 mt
-( ) s
-1070 3492 mt 1133 3492 L
-7449 3492 mt 7385 3492 L
- 982 3563 mt
-( ) s
-1070 3307 mt 1133 3307 L
-7449 3307 mt 7385 3307 L
- 982 3378 mt
-( ) s
-1070 3151 mt 1133 3151 L
-7449 3151 mt 7385 3151 L
- 982 3222 mt
-( ) s
-1070 3015 mt 1133 3015 L
-7449 3015 mt 7385 3015 L
- 982 3086 mt
-( ) s
-1070 2896 mt 1133 2896 L
-7449 2896 mt 7385 2896 L
- 982 2967 mt
-( ) s
-1070 2789 mt 1133 2789 L
-7449 2789 mt 7385 2789 L
- 822 2860 mt
-(10) s
-1070 2085 mt 1133 2085 L
-7449 2085 mt 7385 2085 L
- 982 2156 mt
-( ) s
-1070 1674 mt 1133 1674 L
-7449 1674 mt 7385 1674 L
- 982 1745 mt
-( ) s
-1070 1382 mt 1133 1382 L
-7449 1382 mt 7385 1382 L
- 982 1453 mt
-( ) s
-1070 1155 mt 1133 1155 L
-7449 1155 mt 7385 1155 L
- 982 1226 mt
-( ) s
-1070 970 mt 1133 970 L
-7449 970 mt 7385 970 L
- 982 1041 mt
-( ) s
-1070 814 mt 1133 814 L
-7449 814 mt 7385 814 L
- 982 885 mt
-( ) s
-1070 678 mt 1133 678 L
-7449 678 mt 7385 678 L
- 982 749 mt
-( ) s
-1070 558 mt 1133 558 L
-7449 558 mt 7385 558 L
- 982 629 mt
-( ) s
-1070 452 mt 1133 452 L
-7449 452 mt 7385 452 L
- 715 523 mt
-(100) s
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-gs 1070 452 6380 4909 rc
-24 w
-gr
-
-24 w
-/c8 { 0.000000 0.700000 0.500000 sr} bdef
-c8
- 60 60 1466 5126 FO
- 60 60 1905 4428 FO
- 60 60 2344 3977 FO
- 60 60 2783 3675 FO
- 60 60 3222 3368 FO
- 60 60 3660 3107 FO
- 60 60 4099 2893 FO
- 60 60 4538 2710 FO
- 60 60 4977 2550 FO
- 60 60 5416 2412 FO
- 60 60 5855 2292 FO
- 60 60 6294 2180 FO
- 60 60 6733 2083 FO
- 60 60 7171 1991 FO
-gs 1070 452 6380 4909 rc
-gr
-
-/c9 { 0.400000 0.400000 1.000000 sr} bdef
-c9
- 60 60 1466 5126 FO
- 60 60 1905 4860 FO
- 60 60 2344 4699 FO
- 60 60 2783 4594 FO
- 60 60 3222 4483 FO
- 60 60 3660 4383 FO
- 60 60 4099 4295 FO
- 60 60 4538 4214 FO
- 60 60 4977 4137 FO
- 60 60 5416 4068 FO
- 60 60 5855 4008 FO
- 60 60 6294 3941 FO
- 60 60 6733 3883 FO
- 60 60 7171 3827 FO
-gs 1070 452 6380 4909 rc
-gr
-
-/c10 { 1.000000 0.400000 0.200000 sr} bdef
-c10
- 60 60 1466 5126 FO
- 60 60 1905 5088 FO
- 60 60 2344 5064 FO
- 60 60 2783 5048 FO
- 60 60 3222 5029 FO
- 60 60 3660 5010 FO
- 60 60 4099 4994 FO
- 60 60 4538 4976 FO
- 60 60 4977 4959 FO
- 60 60 5416 4942 FO
- 60 60 5855 4930 FO
- 60 60 6294 4914 FO
- 60 60 6733 4908 FO
- 60 60 7171 4874 FO
-gs 1070 452 6380 4909 rc
-gr
-
-0 sg
- 60 60 1466 5126 FO
- 60 60 1905 5122 FO
- 60 60 2344 5119 FO
- 60 60 2783 5118 FO
- 60 60 3222 5116 FO
- 60 60 3660 5113 FO
- 60 60 4099 5112 FO
- 60 60 4538 5110 FO
- 60 60 4977 5109 FO
- 60 60 5416 5105 FO
- 60 60 5855 5105 FO
- 60 60 6294 5104 FO
- 60 60 6733 5101 FO
- 60 60 7171 5103 FO
-gs 1070 452 6380 4909 rc
-gr
-
- 617 4557 mt -90 rotate
-(Mean number of lexical entries \(tables\)) s
-90 rotate
-3390 5724 mt
-(Word frequency \(n) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 144 FMSR
-
-4963 5820 mt
-(w) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-5066 5724 mt
-(\)) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-1053 5403 mt
-( ) s
-7433 494 mt
-( ) s
-6 w
-1 sg
-0 1070 1481 0 0 -1070 1129 1582 4 MP
-PP
--1481 0 0 1070 1481 0 0 -1070 1129 1582 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1129 1582 mt 2610 1582 L
-1129 512 mt 2610 512 L
-1129 1582 mt 1129 512 L
-2610 1582 mt 2610 512 L
-1129 1582 mt 2610 1582 L
-1129 1582 mt 1129 512 L
-1129 1582 mt 2610 1582 L
-1129 512 mt 2610 512 L
-1129 1582 mt 1129 512 L
-2610 1582 mt 2610 512 L
-%%IncludeResource: font Symbol
-/Symbol /ISOLatin1Encoding 192 FMSR
-
-1601 728 mt
-(a) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1722 728 mt
-( = 100000) s
-gs 1129 512 1482 1071 rc
-24 w
-gs 1236 513 293 293 rc
-c8
- 60 60 1382 659 FO
-gr
-
-c8
-gr
-
-24 w
-c8
-0 sg
-%%IncludeResource: font Symbol
-/Symbol /ISOLatin1Encoding 192 FMSR
-
-1601 987 mt
-(a) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1722 987 mt
-( = 10000) s
-gs 1129 512 1482 1071 rc
-gs 1236 771 293 293 rc
-c9
- 60 60 1382 917 FO
-gr
-
-c9
-gr
-
-c9
-0 sg
-%%IncludeResource: font Symbol
-/Symbol /ISOLatin1Encoding 192 FMSR
-
-1601 1246 mt
-(a) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1722 1246 mt
-( = 1000) s
-gs 1129 512 1482 1071 rc
-gs 1236 1030 293 293 rc
-c10
- 60 60 1382 1176 FO
-gr
-
-c10
-gr
-
-c10
-0 sg
-%%IncludeResource: font Symbol
-/Symbol /ISOLatin1Encoding 192 FMSR
-
-1601 1505 mt
-(a) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1722 1505 mt
-( = 100) s
-gs 1129 512 1482 1071 rc
-gs 1236 1288 293 293 rc
- 60 60 1382 1434 FO
-gr
-
-6 w
-gr
-
-6 w
-
-end %%Color Dict
-
-eplot
-%%EndObject
-
-epage
-end
-
-showpage
-
-%%Trailer
-%%EOF
diff --git a/report/pyp_clustering/acl09-short/code/plot0.pdf b/report/pyp_clustering/acl09-short/code/plot0.pdf
deleted file mode 100644
index fd1b4595..00000000
--- a/report/pyp_clustering/acl09-short/code/plot0.pdf
+++ /dev/null
Binary files differ
diff --git a/report/pyp_clustering/acl09-short/code/plot1.eps b/report/pyp_clustering/acl09-short/code/plot1.eps
deleted file mode 100644
index ebb2f194..00000000
--- a/report/pyp_clustering/acl09-short/code/plot1.eps
+++ /dev/null
@@ -1,579 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686.
-%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot1.eps
-%%CreationDate: 07/23/2009 17:34:27
-%%DocumentNeededFonts: Helvetica
-%%DocumentProcessColors: Cyan Magenta Yellow Black
-%%LanguageLevel: 2
-%%Pages: 1
-%%BoundingBox: -44 170 641 672
-%%EndComments
-
-%%BeginProlog
-% MathWorks dictionary
-/MathWorks 160 dict begin
-% definition operators
-/bdef {bind def} bind def
-/ldef {load def} bind def
-/xdef {exch def} bdef
-/xstore {exch store} bdef
-% operator abbreviations
-/c /clip ldef
-/cc /concat ldef
-/cp /closepath ldef
-/gr /grestore ldef
-/gs /gsave ldef
-/mt /moveto ldef
-/np /newpath ldef
-/cm /currentmatrix ldef
-/sm /setmatrix ldef
-/rm /rmoveto ldef
-/rl /rlineto ldef
-/s {show newpath} bdef
-/sc {setcmykcolor} bdef
-/sr /setrgbcolor ldef
-/sg /setgray ldef
-/w /setlinewidth ldef
-/j /setlinejoin ldef
-/cap /setlinecap ldef
-/rc {rectclip} bdef
-/rf {rectfill} bdef
-% page state control
-/pgsv () def
-/bpage {/pgsv save def} bdef
-/epage {pgsv restore} bdef
-/bplot /gsave ldef
-/eplot {stroke grestore} bdef
-% orientation switch
-/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
-% coordinate system mappings
-/dpi2point 0 def
-% font control
-/FontSize 0 def
-/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
- makefont setfont} bdef
-/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
- exch dup 3 1 roll findfont dup length dict begin
- { 1 index /FID ne {def}{pop pop} ifelse } forall
- /Encoding exch def currentdict end definefont pop} bdef
-/isroman {findfont /CharStrings get /Agrave known} bdef
-/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
- exch FMS} bdef
-/csm {1 dpi2point div -1 dpi2point div scale neg translate
- dup landscapeMode eq {pop -90 rotate}
- {rotateMode eq {90 rotate} if} ifelse} bdef
-% line types: solid, dotted, dashed, dotdash
-/SO { [] 0 setdash } bdef
-/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
-/DA { [6 dpi2point mul] 0 setdash } bdef
-/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
- dpi2point mul] 0 setdash } bdef
-% macros for lines and objects
-/L {lineto stroke} bdef
-/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
-/AP {{rlineto} repeat} bdef
-/PDlw -1 def
-/W {/PDlw currentlinewidth def setlinewidth} def
-/PP {closepath eofill} bdef
-/DP {closepath stroke} bdef
-/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
- neg 0 exch rlineto closepath} bdef
-/FR {MR stroke} bdef
-/PR {MR fill} bdef
-/L1i {{currentfile picstr readhexstring pop} image} bdef
-/tMatrix matrix def
-/MakeOval {newpath tMatrix currentmatrix pop translate scale
-0 0 1 0 360 arc tMatrix setmatrix} bdef
-/FO {MakeOval stroke} bdef
-/PO {MakeOval fill} bdef
-/PD {currentlinewidth 2 div 0 360 arc fill
- PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
-/FA {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
-/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
-/FAn {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
-/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
-/vradius 0 def /hradius 0 def /lry 0 def
-/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
-/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
- /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
- vradius add translate hradius vradius scale 0 0 1 180 270 arc
- tMatrix setmatrix lrx hradius sub uly vradius add translate
- hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
- lrx hradius sub lry vradius sub translate hradius vradius scale
- 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
- translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
- closepath} bdef
-/FRR {MRR stroke } bdef
-/PRR {MRR fill } bdef
-/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
- closepath} bdef
-/FlrRR {MlrRR stroke } bdef
-/PlrRR {MlrRR fill } bdef
-/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
- closepath} bdef
-/FtbRR {MtbRR stroke } bdef
-/PtbRR {MtbRR fill } bdef
-/stri 6 array def /dtri 6 array def
-/smat 6 array def /dmat 6 array def
-/tmat1 6 array def /tmat2 6 array def /dif 3 array def
-/asub {/ind2 exch def /ind1 exch def dup dup
- ind1 get exch ind2 get sub exch } bdef
-/tri_to_matrix {
- 2 0 asub 3 1 asub 4 0 asub 5 1 asub
- dup 0 get exch 1 get 7 -1 roll astore } bdef
-/compute_transform {
- dmat dtri tri_to_matrix tmat1 invertmatrix
- smat stri tri_to_matrix tmat2 concatmatrix } bdef
-/ds {stri astore pop} bdef
-/dt {dtri astore pop} bdef
-/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
- currentfile
- 3 index 0 eq {/ASCIIHexDecode filter}
- {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
- ifelse exch readstring pop
- dup 0 3 index getinterval /rbmap xdef
- dup 2 index dup getinterval /gbmap xdef
- 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
-/it {gs np dtri aload pop moveto lineto lineto cp c
- cols rows 8 compute_transform
- rbmap gbmap bbmap true 3 colorimage gr}bdef
-/il {newpath moveto lineto stroke}bdef
-currentdict end def
-%%EndProlog
-
-%%BeginSetup
-MathWorks begin
-
-0 cap
-
-end
-%%EndSetup
-
-%%Page: 1 1
-%%BeginPageSetup
-%%PageBoundingBox: -44 170 641 672
-MathWorks begin
-bpage
-%%EndPageSetup
-
-%%BeginObject: obj1
-bplot
-
-/dpi2point 12 def
-portraitMode -0528 8064 csm
-
- 0 0 8231 6023 rc
-88 dict begin %Colortable dictionary
-/c0 { 0.000000 0.000000 0.000000 sr} bdef
-/c1 { 1.000000 1.000000 1.000000 sr} bdef
-/c2 { 0.900000 0.000000 0.000000 sr} bdef
-/c3 { 0.000000 0.820000 0.000000 sr} bdef
-/c4 { 0.000000 0.000000 0.800000 sr} bdef
-/c5 { 0.910000 0.820000 0.320000 sr} bdef
-/c6 { 1.000000 0.260000 0.820000 sr} bdef
-/c7 { 0.000000 0.820000 0.820000 sr} bdef
-c0
-1 j
-1 sg
- 0 0 8232 6024 rf
-6 w
-0 4908 6379 0 0 -4908 1070 5360 4 MP
-PP
--6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-1070 5360 mt 7449 5360 L
-1070 5360 mt 1070 452 L
-1247 5360 mt 1247 5296 L
-1247 452 mt 1247 515 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1194 5572 mt
-(1) s
-1780 5360 mt 1780 5296 L
-1780 452 mt 1780 515 L
-1754 5572 mt
-( ) s
-2092 5360 mt 2092 5296 L
-2092 452 mt 2092 515 L
-2066 5572 mt
-( ) s
-2314 5360 mt 2314 5296 L
-2314 452 mt 2314 515 L
-2288 5572 mt
-( ) s
-2485 5360 mt 2485 5296 L
-2485 452 mt 2485 515 L
-2459 5572 mt
-( ) s
-2626 5360 mt 2626 5296 L
-2626 452 mt 2626 515 L
-2600 5572 mt
-( ) s
-2744 5360 mt 2744 5296 L
-2744 452 mt 2744 515 L
-2718 5572 mt
-( ) s
-2847 5360 mt 2847 5296 L
-2847 452 mt 2847 515 L
-2821 5572 mt
-( ) s
-2938 5360 mt 2938 5296 L
-2938 452 mt 2938 515 L
-2912 5572 mt
-( ) s
-3019 5360 mt 3019 5296 L
-3019 452 mt 3019 515 L
-2913 5572 mt
-(10) s
-3552 5360 mt 3552 5296 L
-3552 452 mt 3552 515 L
-3526 5572 mt
-( ) s
-3864 5360 mt 3864 5296 L
-3864 452 mt 3864 515 L
-3838 5572 mt
-( ) s
-4085 5360 mt 4085 5296 L
-4085 452 mt 4085 515 L
-4059 5572 mt
-( ) s
-4257 5360 mt 4257 5296 L
-4257 452 mt 4257 515 L
-4231 5572 mt
-( ) s
-4397 5360 mt 4397 5296 L
-4397 452 mt 4397 515 L
-4371 5572 mt
-( ) s
-4516 5360 mt 4516 5296 L
-4516 452 mt 4516 515 L
-4490 5572 mt
-( ) s
-4619 5360 mt 4619 5296 L
-4619 452 mt 4619 515 L
-4593 5572 mt
-( ) s
-4710 5360 mt 4710 5296 L
-4710 452 mt 4710 515 L
-4684 5572 mt
-( ) s
-4791 5360 mt 4791 5296 L
-4791 452 mt 4791 515 L
-4631 5572 mt
-(100) s
-5324 5360 mt 5324 5296 L
-5324 452 mt 5324 515 L
-5298 5572 mt
-( ) s
-5636 5360 mt 5636 5296 L
-5636 452 mt 5636 515 L
-5610 5572 mt
-( ) s
-5857 5360 mt 5857 5296 L
-5857 452 mt 5857 515 L
-5831 5572 mt
-( ) s
-6029 5360 mt 6029 5296 L
-6029 452 mt 6029 515 L
-6003 5572 mt
-( ) s
-6169 5360 mt 6169 5296 L
-6169 452 mt 6169 515 L
-6143 5572 mt
-( ) s
-6288 5360 mt 6288 5296 L
-6288 452 mt 6288 515 L
-6262 5572 mt
-( ) s
-6391 5360 mt 6391 5296 L
-6391 452 mt 6391 515 L
-6365 5572 mt
-( ) s
-6481 5360 mt 6481 5296 L
-6481 452 mt 6481 515 L
-6455 5572 mt
-( ) s
-6563 5360 mt 6563 5296 L
-6563 452 mt 6563 515 L
-6350 5572 mt
-(1000) s
-7096 5360 mt 7096 5296 L
-7096 452 mt 7096 515 L
-7070 5572 mt
-( ) s
-7408 5360 mt 7408 5296 L
-7408 452 mt 7408 515 L
-7382 5572 mt
-( ) s
-1070 5352 mt 1133 5352 L
-7449 5352 mt 7385 5352 L
- 982 5423 mt
-( ) s
-1070 5233 mt 1133 5233 L
-7449 5233 mt 7385 5233 L
- 982 5304 mt
-( ) s
-1070 5126 mt 1133 5126 L
-7449 5126 mt 7385 5126 L
- 929 5197 mt
-(1) s
-1070 4422 mt 1133 4422 L
-7449 4422 mt 7385 4422 L
- 982 4493 mt
-( ) s
-1070 4011 mt 1133 4011 L
-7449 4011 mt 7385 4011 L
- 982 4082 mt
-( ) s
-1070 3719 mt 1133 3719 L
-7449 3719 mt 7385 3719 L
- 982 3790 mt
-( ) s
-1070 3492 mt 1133 3492 L
-7449 3492 mt 7385 3492 L
- 982 3563 mt
-( ) s
-1070 3307 mt 1133 3307 L
-7449 3307 mt 7385 3307 L
- 982 3378 mt
-( ) s
-1070 3151 mt 1133 3151 L
-7449 3151 mt 7385 3151 L
- 982 3222 mt
-( ) s
-1070 3015 mt 1133 3015 L
-7449 3015 mt 7385 3015 L
- 982 3086 mt
-( ) s
-1070 2896 mt 1133 2896 L
-7449 2896 mt 7385 2896 L
- 982 2967 mt
-( ) s
-1070 2789 mt 1133 2789 L
-7449 2789 mt 7385 2789 L
- 822 2860 mt
-(10) s
-1070 2085 mt 1133 2085 L
-7449 2085 mt 7385 2085 L
- 982 2156 mt
-( ) s
-1070 1674 mt 1133 1674 L
-7449 1674 mt 7385 1674 L
- 982 1745 mt
-( ) s
-1070 1382 mt 1133 1382 L
-7449 1382 mt 7385 1382 L
- 982 1453 mt
-( ) s
-1070 1155 mt 1133 1155 L
-7449 1155 mt 7385 1155 L
- 982 1226 mt
-( ) s
-1070 970 mt 1133 970 L
-7449 970 mt 7385 970 L
- 982 1041 mt
-( ) s
-1070 814 mt 1133 814 L
-7449 814 mt 7385 814 L
- 982 885 mt
-( ) s
-1070 678 mt 1133 678 L
-7449 678 mt 7385 678 L
- 982 749 mt
-( ) s
-1070 558 mt 1133 558 L
-7449 558 mt 7385 558 L
- 982 629 mt
-( ) s
-1070 452 mt 1133 452 L
-7449 452 mt 7385 452 L
- 715 523 mt
-(100) s
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-gs 1070 452 6380 4909 rc
-24 w
-gr
-
-24 w
- 48 48 1466 5126 FO
- 48 48 1905 5122 FO
- 48 48 2344 5119 FO
- 48 48 2783 5118 FO
- 48 48 3222 5116 FO
- 48 48 3660 5113 FO
- 48 48 4099 5112 FO
- 48 48 4538 5110 FO
- 48 48 4977 5109 FO
- 48 48 5416 5105 FO
- 48 48 5855 5105 FO
- 48 48 6294 5104 FO
- 48 48 6733 5101 FO
- 48 48 7171 5103 FO
-gs 1070 452 6380 4909 rc
-438 -2 439 -2 439 -2 439 -2 439 -2 439 -2 439 -2 439 -1
-438 -2 439 -3 439 -1 439 -3 439 -4 1466 5126 14 MP stroke
-gr
-
-/c8 { 1.000000 0.400000 0.200000 sr} bdef
-c8
- 48 48 1466 5126 FO
- 48 48 1905 5088 FO
- 48 48 2344 5064 FO
- 48 48 2783 5048 FO
- 48 48 3222 5029 FO
- 48 48 3660 5010 FO
- 48 48 4099 4994 FO
- 48 48 4538 4976 FO
- 48 48 4977 4959 FO
- 48 48 5416 4942 FO
- 48 48 5855 4930 FO
- 48 48 6294 4914 FO
- 48 48 6733 4908 FO
- 48 48 7171 4874 FO
-gs 1070 452 6380 4909 rc
-438 -16 439 -15 439 -15 439 -16 439 -16 439 -16 439 -17 439 -17
-438 -19 439 -18 439 -17 439 -24 439 -38 1466 5126 14 MP stroke
-gr
-
-/c9 { 0.400000 0.400000 1.000000 sr} bdef
-c9
- 48 48 1466 5126 FO
- 48 48 1905 4860 FO
- 48 48 2344 4699 FO
- 48 48 2783 4594 FO
- 48 48 3222 4483 FO
- 48 48 3660 4383 FO
- 48 48 4099 4295 FO
- 48 48 4538 4214 FO
- 48 48 4977 4137 FO
- 48 48 5416 4068 FO
- 48 48 5855 4008 FO
- 48 48 6294 3941 FO
- 48 48 6733 3883 FO
- 48 48 7171 3827 FO
-gs 1070 452 6380 4909 rc
-438 -55 439 -57 439 -61 439 -64 439 -70 439 -75 439 -81 439 -89
-438 -100 439 -111 439 -105 439 -161 439 -266 1466 5126 14 MP stroke
-gr
-
-/c10 { 0.000000 0.700000 0.500000 sr} bdef
-c10
- 48 48 1466 5126 FO
- 48 48 1905 4428 FO
- 48 48 2344 3977 FO
- 48 48 2783 3675 FO
- 48 48 3222 3368 FO
- 48 48 3660 3107 FO
- 48 48 4099 2893 FO
- 48 48 4538 2710 FO
- 48 48 4977 2550 FO
- 48 48 5416 2412 FO
- 48 48 5855 2292 FO
- 48 48 6294 2180 FO
- 48 48 6733 2083 FO
- 48 48 7171 1991 FO
-gs 1070 452 6380 4909 rc
-438 -91 439 -97 439 -110 439 -121 439 -138 439 -160 439 -183 439 -214
-438 -261 439 -307 439 -302 439 -451 439 -698 1466 5126 14 MP stroke
-gr
-
-0 sg
- 617 4557 mt -90 rotate
-(Mean number of lexical entries \(tables\)) s
-90 rotate
-3390 5724 mt
-(Word frequency \(n) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 144 FMSR
-
-4963 5820 mt
-(w) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-5066 5724 mt
-(\)) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-1053 5403 mt
-( ) s
-7433 494 mt
-( ) s
-6 w
-1 sg
-0 500 1510 0 0 -500 1129 1012 4 MP
-PP
--1510 0 0 500 1510 0 0 -500 1129 1012 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1129 1012 mt 2639 1012 L
-1129 512 mt 2639 512 L
-1129 1012 mt 1129 512 L
-2639 1012 mt 2639 512 L
-1129 1012 mt 2639 1012 L
-1129 1012 mt 1129 512 L
-1129 1012 mt 2639 1012 L
-1129 512 mt 2639 512 L
-1129 1012 mt 1129 512 L
-2639 1012 mt 2639 512 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1594 713 mt
-(Empirical) s
-gs 1129 512 1511 501 rc
-24 w
-gs 1257 523 245 245 rc
- 48 48 1379 645 FO
-gr
-
-gr
-
-24 w
-1594 948 mt
-(Expectation) s
-gs 1129 512 1511 501 rc
-358 0 1200 878 2 MP stroke
-6 w
-gr
-
-6 w
-
-end %%Color Dict
-
-eplot
-%%EndObject
-
-epage
-end
-
-showpage
-
-%%Trailer
-%%EOF
diff --git a/report/pyp_clustering/acl09-short/code/plot1.pdf b/report/pyp_clustering/acl09-short/code/plot1.pdf
deleted file mode 100644
index 90fcd9ba..00000000
--- a/report/pyp_clustering/acl09-short/code/plot1.pdf
+++ /dev/null
Binary files differ
diff --git a/report/pyp_clustering/acl09-short/code/plot2.eps b/report/pyp_clustering/acl09-short/code/plot2.eps
deleted file mode 100644
index e5c5536a..00000000
--- a/report/pyp_clustering/acl09-short/code/plot2.eps
+++ /dev/null
@@ -1,552 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686.
-%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot2.eps
-%%CreationDate: 07/23/2009 17:33:05
-%%DocumentNeededFonts: Helvetica
-%%DocumentProcessColors: Cyan Magenta Yellow Black
-%%LanguageLevel: 2
-%%Pages: 1
-%%BoundingBox: -44 170 641 672
-%%EndComments
-
-%%BeginProlog
-% MathWorks dictionary
-/MathWorks 160 dict begin
-% definition operators
-/bdef {bind def} bind def
-/ldef {load def} bind def
-/xdef {exch def} bdef
-/xstore {exch store} bdef
-% operator abbreviations
-/c /clip ldef
-/cc /concat ldef
-/cp /closepath ldef
-/gr /grestore ldef
-/gs /gsave ldef
-/mt /moveto ldef
-/np /newpath ldef
-/cm /currentmatrix ldef
-/sm /setmatrix ldef
-/rm /rmoveto ldef
-/rl /rlineto ldef
-/s {show newpath} bdef
-/sc {setcmykcolor} bdef
-/sr /setrgbcolor ldef
-/sg /setgray ldef
-/w /setlinewidth ldef
-/j /setlinejoin ldef
-/cap /setlinecap ldef
-/rc {rectclip} bdef
-/rf {rectfill} bdef
-% page state control
-/pgsv () def
-/bpage {/pgsv save def} bdef
-/epage {pgsv restore} bdef
-/bplot /gsave ldef
-/eplot {stroke grestore} bdef
-% orientation switch
-/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
-% coordinate system mappings
-/dpi2point 0 def
-% font control
-/FontSize 0 def
-/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
- makefont setfont} bdef
-/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
- exch dup 3 1 roll findfont dup length dict begin
- { 1 index /FID ne {def}{pop pop} ifelse } forall
- /Encoding exch def currentdict end definefont pop} bdef
-/isroman {findfont /CharStrings get /Agrave known} bdef
-/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
- exch FMS} bdef
-/csm {1 dpi2point div -1 dpi2point div scale neg translate
- dup landscapeMode eq {pop -90 rotate}
- {rotateMode eq {90 rotate} if} ifelse} bdef
-% line types: solid, dotted, dashed, dotdash
-/SO { [] 0 setdash } bdef
-/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
-/DA { [6 dpi2point mul] 0 setdash } bdef
-/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
- dpi2point mul] 0 setdash } bdef
-% macros for lines and objects
-/L {lineto stroke} bdef
-/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
-/AP {{rlineto} repeat} bdef
-/PDlw -1 def
-/W {/PDlw currentlinewidth def setlinewidth} def
-/PP {closepath eofill} bdef
-/DP {closepath stroke} bdef
-/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
- neg 0 exch rlineto closepath} bdef
-/FR {MR stroke} bdef
-/PR {MR fill} bdef
-/L1i {{currentfile picstr readhexstring pop} image} bdef
-/tMatrix matrix def
-/MakeOval {newpath tMatrix currentmatrix pop translate scale
-0 0 1 0 360 arc tMatrix setmatrix} bdef
-/FO {MakeOval stroke} bdef
-/PO {MakeOval fill} bdef
-/PD {currentlinewidth 2 div 0 360 arc fill
- PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
-/FA {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
-/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
-/FAn {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
-/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
-/vradius 0 def /hradius 0 def /lry 0 def
-/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
-/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
- /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
- vradius add translate hradius vradius scale 0 0 1 180 270 arc
- tMatrix setmatrix lrx hradius sub uly vradius add translate
- hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
- lrx hradius sub lry vradius sub translate hradius vradius scale
- 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
- translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
- closepath} bdef
-/FRR {MRR stroke } bdef
-/PRR {MRR fill } bdef
-/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
- closepath} bdef
-/FlrRR {MlrRR stroke } bdef
-/PlrRR {MlrRR fill } bdef
-/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
- closepath} bdef
-/FtbRR {MtbRR stroke } bdef
-/PtbRR {MtbRR fill } bdef
-/stri 6 array def /dtri 6 array def
-/smat 6 array def /dmat 6 array def
-/tmat1 6 array def /tmat2 6 array def /dif 3 array def
-/asub {/ind2 exch def /ind1 exch def dup dup
- ind1 get exch ind2 get sub exch } bdef
-/tri_to_matrix {
- 2 0 asub 3 1 asub 4 0 asub 5 1 asub
- dup 0 get exch 1 get 7 -1 roll astore } bdef
-/compute_transform {
- dmat dtri tri_to_matrix tmat1 invertmatrix
- smat stri tri_to_matrix tmat2 concatmatrix } bdef
-/ds {stri astore pop} bdef
-/dt {dtri astore pop} bdef
-/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
- currentfile
- 3 index 0 eq {/ASCIIHexDecode filter}
- {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
- ifelse exch readstring pop
- dup 0 3 index getinterval /rbmap xdef
- dup 2 index dup getinterval /gbmap xdef
- 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
-/it {gs np dtri aload pop moveto lineto lineto cp c
- cols rows 8 compute_transform
- rbmap gbmap bbmap true 3 colorimage gr}bdef
-/il {newpath moveto lineto stroke}bdef
-currentdict end def
-%%EndProlog
-
-%%BeginSetup
-MathWorks begin
-
-0 cap
-
-end
-%%EndSetup
-
-%%Page: 1 1
-%%BeginPageSetup
-%%PageBoundingBox: -44 170 641 672
-MathWorks begin
-bpage
-%%EndPageSetup
-
-%%BeginObject: obj1
-bplot
-
-/dpi2point 12 def
-portraitMode -0528 8064 csm
-
- 0 0 8231 6023 rc
-88 dict begin %Colortable dictionary
-/c0 { 0.000000 0.000000 0.000000 sr} bdef
-/c1 { 1.000000 1.000000 1.000000 sr} bdef
-/c2 { 0.900000 0.000000 0.000000 sr} bdef
-/c3 { 0.000000 0.820000 0.000000 sr} bdef
-/c4 { 0.000000 0.000000 0.800000 sr} bdef
-/c5 { 0.910000 0.820000 0.320000 sr} bdef
-/c6 { 1.000000 0.260000 0.820000 sr} bdef
-/c7 { 0.000000 0.820000 0.820000 sr} bdef
-c0
-1 j
-1 sg
- 0 0 8232 6024 rf
-6 w
-0 4908 6379 0 0 -4908 1070 5360 4 MP
-PP
--6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-1070 5360 mt 7449 5360 L
-1070 5360 mt 1070 452 L
-1247 5360 mt 1247 5296 L
-1247 452 mt 1247 515 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1194 5572 mt
-(1) s
-1780 5360 mt 1780 5296 L
-1780 452 mt 1780 515 L
-1754 5572 mt
-( ) s
-2092 5360 mt 2092 5296 L
-2092 452 mt 2092 515 L
-2066 5572 mt
-( ) s
-2314 5360 mt 2314 5296 L
-2314 452 mt 2314 515 L
-2288 5572 mt
-( ) s
-2485 5360 mt 2485 5296 L
-2485 452 mt 2485 515 L
-2459 5572 mt
-( ) s
-2626 5360 mt 2626 5296 L
-2626 452 mt 2626 515 L
-2600 5572 mt
-( ) s
-2744 5360 mt 2744 5296 L
-2744 452 mt 2744 515 L
-2718 5572 mt
-( ) s
-2847 5360 mt 2847 5296 L
-2847 452 mt 2847 515 L
-2821 5572 mt
-( ) s
-2938 5360 mt 2938 5296 L
-2938 452 mt 2938 515 L
-2912 5572 mt
-( ) s
-3019 5360 mt 3019 5296 L
-3019 452 mt 3019 515 L
-2913 5572 mt
-(10) s
-3552 5360 mt 3552 5296 L
-3552 452 mt 3552 515 L
-3526 5572 mt
-( ) s
-3864 5360 mt 3864 5296 L
-3864 452 mt 3864 515 L
-3838 5572 mt
-( ) s
-4085 5360 mt 4085 5296 L
-4085 452 mt 4085 515 L
-4059 5572 mt
-( ) s
-4257 5360 mt 4257 5296 L
-4257 452 mt 4257 515 L
-4231 5572 mt
-( ) s
-4397 5360 mt 4397 5296 L
-4397 452 mt 4397 515 L
-4371 5572 mt
-( ) s
-4516 5360 mt 4516 5296 L
-4516 452 mt 4516 515 L
-4490 5572 mt
-( ) s
-4619 5360 mt 4619 5296 L
-4619 452 mt 4619 515 L
-4593 5572 mt
-( ) s
-4710 5360 mt 4710 5296 L
-4710 452 mt 4710 515 L
-4684 5572 mt
-( ) s
-4791 5360 mt 4791 5296 L
-4791 452 mt 4791 515 L
-4631 5572 mt
-(100) s
-5324 5360 mt 5324 5296 L
-5324 452 mt 5324 515 L
-5298 5572 mt
-( ) s
-5636 5360 mt 5636 5296 L
-5636 452 mt 5636 515 L
-5610 5572 mt
-( ) s
-5857 5360 mt 5857 5296 L
-5857 452 mt 5857 515 L
-5831 5572 mt
-( ) s
-6029 5360 mt 6029 5296 L
-6029 452 mt 6029 515 L
-6003 5572 mt
-( ) s
-6169 5360 mt 6169 5296 L
-6169 452 mt 6169 515 L
-6143 5572 mt
-( ) s
-6288 5360 mt 6288 5296 L
-6288 452 mt 6288 515 L
-6262 5572 mt
-( ) s
-6391 5360 mt 6391 5296 L
-6391 452 mt 6391 515 L
-6365 5572 mt
-( ) s
-6481 5360 mt 6481 5296 L
-6481 452 mt 6481 515 L
-6455 5572 mt
-( ) s
-6563 5360 mt 6563 5296 L
-6563 452 mt 6563 515 L
-6350 5572 mt
-(1000) s
-7096 5360 mt 7096 5296 L
-7096 452 mt 7096 515 L
-7070 5572 mt
-( ) s
-7408 5360 mt 7408 5296 L
-7408 452 mt 7408 515 L
-7382 5572 mt
-( ) s
-1070 5201 mt 1133 5201 L
-7449 5201 mt 7385 5201 L
- 769 5272 mt
-(0.1) s
-1070 4725 mt 1133 4725 L
-7449 4725 mt 7385 4725 L
- 982 4796 mt
-( ) s
-1070 4446 mt 1133 4446 L
-7449 4446 mt 7385 4446 L
- 982 4517 mt
-( ) s
-1070 4248 mt 1133 4248 L
-7449 4248 mt 7385 4248 L
- 982 4319 mt
-( ) s
-1070 4095 mt 1133 4095 L
-7449 4095 mt 7385 4095 L
- 982 4166 mt
-( ) s
-1070 3969 mt 1133 3969 L
-7449 3969 mt 7385 3969 L
- 982 4040 mt
-( ) s
-1070 3863 mt 1133 3863 L
-7449 3863 mt 7385 3863 L
- 982 3934 mt
-( ) s
-1070 3771 mt 1133 3771 L
-7449 3771 mt 7385 3771 L
- 982 3842 mt
-( ) s
-1070 3690 mt 1133 3690 L
-7449 3690 mt 7385 3690 L
- 982 3761 mt
-( ) s
-1070 3618 mt 1133 3618 L
-7449 3618 mt 7385 3618 L
- 929 3689 mt
-(1) s
-1070 3141 mt 1133 3141 L
-7449 3141 mt 7385 3141 L
- 982 3212 mt
-( ) s
-1070 2863 mt 1133 2863 L
-7449 2863 mt 7385 2863 L
- 982 2934 mt
-( ) s
-1070 2665 mt 1133 2665 L
-7449 2665 mt 7385 2665 L
- 982 2736 mt
-( ) s
-1070 2511 mt 1133 2511 L
-7449 2511 mt 7385 2511 L
- 982 2582 mt
-( ) s
-1070 2386 mt 1133 2386 L
-7449 2386 mt 7385 2386 L
- 982 2457 mt
-( ) s
-1070 2280 mt 1133 2280 L
-7449 2280 mt 7385 2280 L
- 982 2351 mt
-( ) s
-1070 2188 mt 1133 2188 L
-7449 2188 mt 7385 2188 L
- 982 2259 mt
-( ) s
-1070 2107 mt 1133 2107 L
-7449 2107 mt 7385 2107 L
- 982 2178 mt
-( ) s
-1070 2035 mt 1133 2035 L
-7449 2035 mt 7385 2035 L
- 822 2106 mt
-(10) s
-1070 1558 mt 1133 1558 L
-7449 1558 mt 7385 1558 L
- 982 1629 mt
-( ) s
-1070 1279 mt 1133 1279 L
-7449 1279 mt 7385 1279 L
- 982 1350 mt
-( ) s
-1070 1082 mt 1133 1082 L
-7449 1082 mt 7385 1082 L
- 982 1153 mt
-( ) s
-1070 928 mt 1133 928 L
-7449 928 mt 7385 928 L
- 982 999 mt
-( ) s
-1070 803 mt 1133 803 L
-7449 803 mt 7385 803 L
- 982 874 mt
-( ) s
-1070 697 mt 1133 697 L
-7449 697 mt 7385 697 L
- 982 768 mt
-( ) s
-1070 605 mt 1133 605 L
-7449 605 mt 7385 605 L
- 982 676 mt
-( ) s
-1070 524 mt 1133 524 L
-7449 524 mt 7385 524 L
- 982 595 mt
-( ) s
-1070 452 mt 1133 452 L
-7449 452 mt 7385 452 L
- 715 523 mt
-(100) s
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-gs 1070 452 6380 4909 rc
-24 w
-438 -2 439 -1 439 -1 439 -1 439 -2 439 -1 439 -1 439 -2
-438 -1 439 -1 439 -2 439 -1 439 -3 1466 3618 14 MP stroke
-/c8 { 1.000000 0.400000 0.200000 sr} bdef
-c8
-438 -10 439 -10 439 -11 439 -11 439 -11 439 -11 439 -11 439 -12
-438 -12 439 -13 439 -11 439 -16 439 -26 1466 3618 14 MP stroke
-DA
-438 -36 439 -37 439 -41 439 -42 439 -46 439 -49 439 -53 439 -57
-438 -64 439 -70 439 -65 439 -95 439 -147 1466 5110 14 MP stroke
-SO
-/c9 { 0.400000 0.400000 1.000000 sr} bdef
-c9
-438 -37 439 -38 439 -42 439 -43 439 -48 439 -51 439 -55 439 -60
-438 -68 439 -75 439 -71 439 -109 439 -180 1466 3618 14 MP stroke
-DA
-438 -46 439 -49 439 -53 439 -56 439 -63 439 -69 439 -76 439 -85
-438 -100 439 -113 439 -110 439 -169 439 -276 1466 4150 14 MP stroke
-SO
-/c10 { 0.000000 0.700000 0.500000 sr} bdef
-c10
-438 -61 439 -66 439 -75 439 -81 439 -94 439 -108 439 -125 439 -144
-438 -177 439 -208 439 -205 439 -305 439 -473 1466 3618 14 MP stroke
-DA
-438 -63 439 -68 439 -77 439 -84 439 -98 439 -112 439 -130 439 -151
-438 -185 439 -218 439 -213 439 -315 439 -484 1466 3710 14 MP stroke
-gr
-
-24 w
-c10
-DA
-0 sg
- 617 4557 mt -90 rotate
-(Mean number of lexical entries \(tables\)) s
-90 rotate
-3390 5724 mt
-(Word frequency \(n) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 144 FMSR
-
-4963 5820 mt
-(w) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-5066 5724 mt
-(\)) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-1053 5403 mt
-( ) s
-7433 494 mt
-( ) s
-SO
-6 w
-1 sg
-0 500 2507 0 0 -500 1129 1012 4 MP
-PP
--2507 0 0 500 2507 0 0 -500 1129 1012 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1129 1012 mt 3636 1012 L
-1129 512 mt 3636 512 L
-1129 1012 mt 1129 512 L
-3636 1012 mt 3636 512 L
-1129 1012 mt 3636 1012 L
-1129 1012 mt 1129 512 L
-1129 1012 mt 3636 1012 L
-1129 512 mt 3636 512 L
-1129 1012 mt 1129 512 L
-3636 1012 mt 3636 512 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1603 713 mt
-(Expectation) s
-gs 1129 512 2508 501 rc
-24 w
-365 0 1201 645 2 MP stroke
-gr
-
-24 w
-1603 948 mt
-(Antoniak approximation) s
-gs 1129 512 2508 501 rc
-DA
-365 0 1201 878 2 MP stroke
-SO
-6 w
-gr
-
-6 w
-
-end %%Color Dict
-
-eplot
-%%EndObject
-
-epage
-end
-
-showpage
-
-%%Trailer
-%%EOF
diff --git a/report/pyp_clustering/acl09-short/code/plot2.pdf b/report/pyp_clustering/acl09-short/code/plot2.pdf
deleted file mode 100644
index d9783120..00000000
--- a/report/pyp_clustering/acl09-short/code/plot2.pdf
+++ /dev/null
Binary files differ
diff --git a/report/pyp_clustering/acl09-short/code/plot3.eps b/report/pyp_clustering/acl09-short/code/plot3.eps
deleted file mode 100644
index f4ffbb62..00000000
--- a/report/pyp_clustering/acl09-short/code/plot3.eps
+++ /dev/null
@@ -1,721 +0,0 @@
-%!PS-Adobe-3.0 EPSF-3.0
-%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686.
-%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot3.eps
-%%CreationDate: 07/23/2009 17:31:43
-%%DocumentNeededFonts: Helvetica
-%%DocumentProcessColors: Cyan Magenta Yellow Black
-%%LanguageLevel: 2
-%%Pages: 1
-%%BoundingBox: -44 170 641 672
-%%EndComments
-
-%%BeginProlog
-% MathWorks dictionary
-/MathWorks 160 dict begin
-% definition operators
-/bdef {bind def} bind def
-/ldef {load def} bind def
-/xdef {exch def} bdef
-/xstore {exch store} bdef
-% operator abbreviations
-/c /clip ldef
-/cc /concat ldef
-/cp /closepath ldef
-/gr /grestore ldef
-/gs /gsave ldef
-/mt /moveto ldef
-/np /newpath ldef
-/cm /currentmatrix ldef
-/sm /setmatrix ldef
-/rm /rmoveto ldef
-/rl /rlineto ldef
-/s {show newpath} bdef
-/sc {setcmykcolor} bdef
-/sr /setrgbcolor ldef
-/sg /setgray ldef
-/w /setlinewidth ldef
-/j /setlinejoin ldef
-/cap /setlinecap ldef
-/rc {rectclip} bdef
-/rf {rectfill} bdef
-% page state control
-/pgsv () def
-/bpage {/pgsv save def} bdef
-/epage {pgsv restore} bdef
-/bplot /gsave ldef
-/eplot {stroke grestore} bdef
-% orientation switch
-/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
-% coordinate system mappings
-/dpi2point 0 def
-% font control
-/FontSize 0 def
-/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
- makefont setfont} bdef
-/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
- exch dup 3 1 roll findfont dup length dict begin
- { 1 index /FID ne {def}{pop pop} ifelse } forall
- /Encoding exch def currentdict end definefont pop} bdef
-/isroman {findfont /CharStrings get /Agrave known} bdef
-/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
- exch FMS} bdef
-/csm {1 dpi2point div -1 dpi2point div scale neg translate
- dup landscapeMode eq {pop -90 rotate}
- {rotateMode eq {90 rotate} if} ifelse} bdef
-% line types: solid, dotted, dashed, dotdash
-/SO { [] 0 setdash } bdef
-/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
-/DA { [6 dpi2point mul] 0 setdash } bdef
-/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
- dpi2point mul] 0 setdash } bdef
-% macros for lines and objects
-/L {lineto stroke} bdef
-/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
-/AP {{rlineto} repeat} bdef
-/PDlw -1 def
-/W {/PDlw currentlinewidth def setlinewidth} def
-/PP {closepath eofill} bdef
-/DP {closepath stroke} bdef
-/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
- neg 0 exch rlineto closepath} bdef
-/FR {MR stroke} bdef
-/PR {MR fill} bdef
-/L1i {{currentfile picstr readhexstring pop} image} bdef
-/tMatrix matrix def
-/MakeOval {newpath tMatrix currentmatrix pop translate scale
-0 0 1 0 360 arc tMatrix setmatrix} bdef
-/FO {MakeOval stroke} bdef
-/PO {MakeOval fill} bdef
-/PD {currentlinewidth 2 div 0 360 arc fill
- PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
-/FA {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
-/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
-/FAn {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
-/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
-/vradius 0 def /hradius 0 def /lry 0 def
-/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
-/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
- /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
- vradius add translate hradius vradius scale 0 0 1 180 270 arc
- tMatrix setmatrix lrx hradius sub uly vradius add translate
- hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
- lrx hradius sub lry vradius sub translate hradius vradius scale
- 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
- translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
- closepath} bdef
-/FRR {MRR stroke } bdef
-/PRR {MRR fill } bdef
-/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
- closepath} bdef
-/FlrRR {MlrRR stroke } bdef
-/PlrRR {MlrRR fill } bdef
-/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
- closepath} bdef
-/FtbRR {MtbRR stroke } bdef
-/PtbRR {MtbRR fill } bdef
-/stri 6 array def /dtri 6 array def
-/smat 6 array def /dmat 6 array def
-/tmat1 6 array def /tmat2 6 array def /dif 3 array def
-/asub {/ind2 exch def /ind1 exch def dup dup
- ind1 get exch ind2 get sub exch } bdef
-/tri_to_matrix {
- 2 0 asub 3 1 asub 4 0 asub 5 1 asub
- dup 0 get exch 1 get 7 -1 roll astore } bdef
-/compute_transform {
- dmat dtri tri_to_matrix tmat1 invertmatrix
- smat stri tri_to_matrix tmat2 concatmatrix } bdef
-/ds {stri astore pop} bdef
-/dt {dtri astore pop} bdef
-/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
- currentfile
- 3 index 0 eq {/ASCIIHexDecode filter}
- {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
- ifelse exch readstring pop
- dup 0 3 index getinterval /rbmap xdef
- dup 2 index dup getinterval /gbmap xdef
- 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
-/it {gs np dtri aload pop moveto lineto lineto cp c
- cols rows 8 compute_transform
- rbmap gbmap bbmap true 3 colorimage gr}bdef
-/il {newpath moveto lineto stroke}bdef
-currentdict end def
-%%EndProlog
-
-%%BeginSetup
-MathWorks begin
-
-0 cap
-
-end
-%%EndSetup
-
-%%Page: 1 1
-%%BeginPageSetup
-%%PageBoundingBox: -44 170 641 672
-MathWorks begin
-bpage
-%%EndPageSetup
-
-%%BeginObject: obj1
-bplot
-
-/dpi2point 12 def
-portraitMode -0528 8064 csm
-
- 0 0 8231 6023 rc
-88 dict begin %Colortable dictionary
-/c0 { 0.000000 0.000000 0.000000 sr} bdef
-/c1 { 1.000000 1.000000 1.000000 sr} bdef
-/c2 { 0.900000 0.000000 0.000000 sr} bdef
-/c3 { 0.000000 0.820000 0.000000 sr} bdef
-/c4 { 0.000000 0.000000 0.800000 sr} bdef
-/c5 { 0.910000 0.820000 0.320000 sr} bdef
-/c6 { 1.000000 0.260000 0.820000 sr} bdef
-/c7 { 0.000000 0.820000 0.820000 sr} bdef
-c0
-1 j
-1 sg
- 0 0 8232 6024 rf
-6 w
-0 4908 6379 0 0 -4908 1070 5360 4 MP
-PP
--6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-1070 5360 mt 7449 5360 L
-1070 5360 mt 1070 452 L
-1247 5360 mt 1247 5296 L
-1247 452 mt 1247 515 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1194 5572 mt
-(1) s
-1780 5360 mt 1780 5296 L
-1780 452 mt 1780 515 L
-1754 5572 mt
-( ) s
-2092 5360 mt 2092 5296 L
-2092 452 mt 2092 515 L
-2066 5572 mt
-( ) s
-2314 5360 mt 2314 5296 L
-2314 452 mt 2314 515 L
-2288 5572 mt
-( ) s
-2485 5360 mt 2485 5296 L
-2485 452 mt 2485 515 L
-2459 5572 mt
-( ) s
-2626 5360 mt 2626 5296 L
-2626 452 mt 2626 515 L
-2600 5572 mt
-( ) s
-2744 5360 mt 2744 5296 L
-2744 452 mt 2744 515 L
-2718 5572 mt
-( ) s
-2847 5360 mt 2847 5296 L
-2847 452 mt 2847 515 L
-2821 5572 mt
-( ) s
-2938 5360 mt 2938 5296 L
-2938 452 mt 2938 515 L
-2912 5572 mt
-( ) s
-3019 5360 mt 3019 5296 L
-3019 452 mt 3019 515 L
-2913 5572 mt
-(10) s
-3552 5360 mt 3552 5296 L
-3552 452 mt 3552 515 L
-3526 5572 mt
-( ) s
-3864 5360 mt 3864 5296 L
-3864 452 mt 3864 515 L
-3838 5572 mt
-( ) s
-4085 5360 mt 4085 5296 L
-4085 452 mt 4085 515 L
-4059 5572 mt
-( ) s
-4257 5360 mt 4257 5296 L
-4257 452 mt 4257 515 L
-4231 5572 mt
-( ) s
-4397 5360 mt 4397 5296 L
-4397 452 mt 4397 515 L
-4371 5572 mt
-( ) s
-4516 5360 mt 4516 5296 L
-4516 452 mt 4516 515 L
-4490 5572 mt
-( ) s
-4619 5360 mt 4619 5296 L
-4619 452 mt 4619 515 L
-4593 5572 mt
-( ) s
-4710 5360 mt 4710 5296 L
-4710 452 mt 4710 515 L
-4684 5572 mt
-( ) s
-4791 5360 mt 4791 5296 L
-4791 452 mt 4791 515 L
-4631 5572 mt
-(100) s
-5324 5360 mt 5324 5296 L
-5324 452 mt 5324 515 L
-5298 5572 mt
-( ) s
-5636 5360 mt 5636 5296 L
-5636 452 mt 5636 515 L
-5610 5572 mt
-( ) s
-5857 5360 mt 5857 5296 L
-5857 452 mt 5857 515 L
-5831 5572 mt
-( ) s
-6029 5360 mt 6029 5296 L
-6029 452 mt 6029 515 L
-6003 5572 mt
-( ) s
-6169 5360 mt 6169 5296 L
-6169 452 mt 6169 515 L
-6143 5572 mt
-( ) s
-6288 5360 mt 6288 5296 L
-6288 452 mt 6288 515 L
-6262 5572 mt
-( ) s
-6391 5360 mt 6391 5296 L
-6391 452 mt 6391 515 L
-6365 5572 mt
-( ) s
-6481 5360 mt 6481 5296 L
-6481 452 mt 6481 515 L
-6455 5572 mt
-( ) s
-6563 5360 mt 6563 5296 L
-6563 452 mt 6563 515 L
-6350 5572 mt
-(1000) s
-7096 5360 mt 7096 5296 L
-7096 452 mt 7096 515 L
-7070 5572 mt
-( ) s
-7408 5360 mt 7408 5296 L
-7408 452 mt 7408 515 L
-7382 5572 mt
-( ) s
-1070 5354 mt 1133 5354 L
-7449 5354 mt 7385 5354 L
- 982 5425 mt
-( ) s
-1070 5257 mt 1133 5257 L
-7449 5257 mt 7385 5257 L
- 982 5328 mt
-( ) s
-1070 5171 mt 1133 5171 L
-7449 5171 mt 7385 5171 L
- 929 5242 mt
-(1) s
-1070 4602 mt 1133 4602 L
-7449 4602 mt 7385 4602 L
- 982 4673 mt
-( ) s
-1070 4270 mt 1133 4270 L
-7449 4270 mt 7385 4270 L
- 982 4341 mt
-( ) s
-1070 4034 mt 1133 4034 L
-7449 4034 mt 7385 4034 L
- 982 4105 mt
-( ) s
-1070 3851 mt 1133 3851 L
-7449 3851 mt 7385 3851 L
- 982 3922 mt
-( ) s
-1070 3702 mt 1133 3702 L
-7449 3702 mt 7385 3702 L
- 982 3773 mt
-( ) s
-1070 3575 mt 1133 3575 L
-7449 3575 mt 7385 3575 L
- 982 3646 mt
-( ) s
-1070 3466 mt 1133 3466 L
-7449 3466 mt 7385 3466 L
- 982 3537 mt
-( ) s
-1070 3369 mt 1133 3369 L
-7449 3369 mt 7385 3369 L
- 982 3440 mt
-( ) s
-1070 3283 mt 1133 3283 L
-7449 3283 mt 7385 3283 L
- 822 3354 mt
-(10) s
-1070 2715 mt 1133 2715 L
-7449 2715 mt 7385 2715 L
- 982 2786 mt
-( ) s
-1070 2382 mt 1133 2382 L
-7449 2382 mt 7385 2382 L
- 982 2453 mt
-( ) s
-1070 2147 mt 1133 2147 L
-7449 2147 mt 7385 2147 L
- 982 2218 mt
-( ) s
-1070 1964 mt 1133 1964 L
-7449 1964 mt 7385 1964 L
- 982 2035 mt
-( ) s
-1070 1814 mt 1133 1814 L
-7449 1814 mt 7385 1814 L
- 982 1885 mt
-( ) s
-1070 1688 mt 1133 1688 L
-7449 1688 mt 7385 1688 L
- 982 1759 mt
-( ) s
-1070 1578 mt 1133 1578 L
-7449 1578 mt 7385 1578 L
- 982 1649 mt
-( ) s
-1070 1482 mt 1133 1482 L
-7449 1482 mt 7385 1482 L
- 982 1553 mt
-( ) s
-1070 1395 mt 1133 1395 L
-7449 1395 mt 7385 1395 L
- 715 1466 mt
-(100) s
-1070 827 mt 1133 827 L
-7449 827 mt 7385 827 L
- 982 898 mt
-( ) s
-1070 495 mt 1133 495 L
-7449 495 mt 7385 495 L
- 982 566 mt
-( ) s
-1070 5360 mt 7449 5360 L
-1070 452 mt 7449 452 L
-1070 5360 mt 1070 452 L
-7449 5360 mt 7449 452 L
-gs 1070 452 6380 4909 rc
-24 w
-438 -1 439 -2 439 -1 439 -2 439 -1 439 -2 439 -1 439 -2
-438 -1 439 -2 439 -1 439 -3 439 -3 1466 5171 14 MP stroke
-gr
-
-24 w
- 48 48 1466 5171 FO
- 48 48 1905 5168 FO
- 48 48 2344 5165 FO
- 48 48 2783 5164 FO
- 48 48 3222 5162 FO
- 48 48 3660 5161 FO
- 48 48 4099 5159 FO
- 48 48 4538 5158 FO
- 48 48 4977 5157 FO
- 48 48 5416 5154 FO
- 48 48 5855 5154 FO
- 48 48 6294 5153 FO
- 48 48 6733 5151 FO
- 48 48 7171 5153 FO
-gs 1070 452 6380 4909 rc
-gr
-
-0 j
--55 95 -55 -95 110 0 1411 5203 4 MP
-DP
--55 95 -55 -95 110 0 1850 5200 4 MP
-DP
--55 95 -55 -95 110 0 2289 5197 4 MP
-DP
--55 95 -55 -95 110 0 2728 5196 4 MP
-DP
--55 95 -55 -95 110 0 3167 5194 4 MP
-DP
--55 95 -55 -95 110 0 3605 5193 4 MP
-DP
--55 95 -55 -95 110 0 4044 5191 4 MP
-DP
--55 95 -55 -95 110 0 4483 5190 4 MP
-DP
--55 95 -55 -95 110 0 4922 5189 4 MP
-DP
--55 95 -55 -95 110 0 5361 5185 4 MP
-DP
--55 95 -55 -95 110 0 5800 5186 4 MP
-DP
--55 95 -55 -95 110 0 6239 5184 4 MP
-DP
--55 95 -55 -95 110 0 6678 5182 4 MP
-DP
--55 95 -55 -95 110 0 7116 5187 4 MP
-DP
-gs 1070 452 6380 4909 rc
-/c8 { 1.000000 0.400000 0.200000 sr} bdef
-c8
-438 -12 439 -12 439 -13 439 -13 439 -13 439 -13 439 -14 439 -13
-438 -15 439 -15 439 -14 439 -19 439 -31 1466 5171 14 MP stroke
-gr
-
-c8
- 48 48 1466 5171 FO
- 48 48 1905 5140 FO
- 48 48 2344 5121 FO
- 48 48 2783 5108 FO
- 48 48 3222 5092 FO
- 48 48 3660 5077 FO
- 48 48 4099 5065 FO
- 48 48 4538 5050 FO
- 48 48 4977 5036 FO
- 48 48 5416 5022 FO
- 48 48 5855 5013 FO
- 48 48 6294 4999 FO
- 48 48 6733 4995 FO
- 48 48 7171 4967 FO
-gs 1070 452 6380 4909 rc
-gr
-
--55 95 -55 -95 110 0 1411 5203 4 MP
-DP
--55 95 -55 -95 110 0 1850 5173 4 MP
-DP
--55 95 -55 -95 110 0 2289 5153 4 MP
-DP
--55 95 -55 -95 110 0 2728 5139 4 MP
-DP
--55 95 -55 -95 110 0 3167 5122 4 MP
-DP
--55 95 -55 -95 110 0 3605 5105 4 MP
-DP
--55 95 -55 -95 110 0 4044 5089 4 MP
-DP
--55 95 -55 -95 110 0 4483 5070 4 MP
-DP
--55 95 -55 -95 110 0 4922 5053 4 MP
-DP
--55 95 -55 -95 110 0 5361 5036 4 MP
-DP
--55 95 -55 -95 110 0 5800 5021 4 MP
-DP
--55 95 -55 -95 110 0 6239 5007 4 MP
-DP
--55 95 -55 -95 110 0 6678 4984 4 MP
-DP
--55 95 -55 -95 110 0 7116 4944 4 MP
-DP
-gs 1070 452 6380 4909 rc
-/c9 { 0.400000 0.400000 1.000000 sr} bdef
-c9
-438 -44 439 -46 439 -50 439 -51 439 -57 439 -61 439 -65 439 -72
-438 -81 439 -89 439 -85 439 -130 439 -215 1466 5171 14 MP stroke
-gr
-
-c9
- 48 48 1466 5171 FO
- 48 48 1905 4956 FO
- 48 48 2344 4826 FO
- 48 48 2783 4741 FO
- 48 48 3222 4651 FO
- 48 48 3660 4571 FO
- 48 48 4099 4500 FO
- 48 48 4538 4435 FO
- 48 48 4977 4372 FO
- 48 48 5416 4316 FO
- 48 48 5855 4268 FO
- 48 48 6294 4214 FO
- 48 48 6733 4167 FO
- 48 48 7171 4121 FO
-gs 1070 452 6380 4909 rc
-gr
-
--55 95 -55 -95 110 0 1411 5203 4 MP
-DP
--55 95 -55 -95 110 0 1850 5057 4 MP
-DP
--55 95 -55 -95 110 0 2289 4938 4 MP
-DP
--55 95 -55 -95 110 0 2728 4838 4 MP
-DP
--55 95 -55 -95 110 0 3167 4706 4 MP
-DP
--55 95 -55 -95 110 0 3605 4555 4 MP
-DP
--55 95 -55 -95 110 0 4044 4385 4 MP
-DP
--55 95 -55 -95 110 0 4483 4185 4 MP
-DP
--55 95 -55 -95 110 0 4922 3940 4 MP
-DP
--55 95 -55 -95 110 0 5361 3650 4 MP
-DP
--55 95 -55 -95 110 0 5800 3310 4 MP
-DP
--55 95 -55 -95 110 0 6239 2895 4 MP
-DP
--55 95 -55 -95 110 0 6678 2492 4 MP
-DP
--55 95 -55 -95 110 0 7116 2000 4 MP
-DP
-gs 1070 452 6380 4909 rc
-/c10 { 0.000000 0.700000 0.500000 sr} bdef
-c10
-438 -74 439 -78 439 -89 439 -98 439 -111 439 -129 439 -148 439 -173
-438 -211 439 -248 439 -244 439 -364 439 -564 1466 5171 14 MP stroke
-gr
-
-c10
- 48 48 1466 5171 FO
- 48 48 1905 4607 FO
- 48 48 2344 4243 FO
- 48 48 2783 3999 FO
- 48 48 3222 3751 FO
- 48 48 3660 3540 FO
- 48 48 4099 3368 FO
- 48 48 4538 3220 FO
- 48 48 4977 3090 FO
- 48 48 5416 2979 FO
- 48 48 5855 2882 FO
- 48 48 6294 2791 FO
- 48 48 6733 2713 FO
- 48 48 7171 2638 FO
-gs 1070 452 6380 4909 rc
-gr
-
--55 95 -55 -95 110 0 1411 5203 4 MP
-DP
--55 95 -55 -95 110 0 1850 4905 4 MP
-DP
--55 95 -55 -95 110 0 2289 4630 4 MP
-DP
--55 95 -55 -95 110 0 2728 4368 4 MP
-DP
--55 95 -55 -95 110 0 3167 4001 4 MP
-DP
--55 95 -55 -95 110 0 3605 3574 4 MP
-DP
--55 95 -55 -95 110 0 4044 3123 4 MP
-DP
--55 95 -55 -95 110 0 4483 2661 4 MP
-DP
--55 95 -55 -95 110 0 4922 2196 4 MP
-DP
--55 95 -55 -95 110 0 5361 1735 4 MP
-DP
--55 95 -55 -95 110 0 5800 1279 4 MP
-DP
--55 95 -55 -95 110 0 6239 873 4 MP
-DP
-gs 1070 452 6380 4909 rc
-gr
-
-0 sg
- 617 4557 mt -90 rotate
-(Mean number of lexical entries \(tables\)) s
-90 rotate
-3390 5724 mt
-(Word frequency \(n) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 144 FMSR
-
-4963 5820 mt
-(w) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-5066 5724 mt
-(\)) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-1053 5403 mt
-( ) s
-7433 494 mt
-( ) s
-6 w
-1 sg
-0 729 2519 0 0 -729 1129 1241 4 MP
-PP
--2519 0 0 729 2519 0 0 -729 1129 1241 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-1129 1241 mt 3648 1241 L
-1129 512 mt 3648 512 L
-1129 1241 mt 1129 512 L
-3648 1241 mt 3648 512 L
-1129 1241 mt 3648 1241 L
-1129 1241 mt 1129 512 L
-1129 1241 mt 3648 1241 L
-1129 512 mt 3648 512 L
-1129 1241 mt 1129 512 L
-3648 1241 mt 3648 512 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 192 FMSR
-
-1609 712 mt
-(Expectation) s
-gs 1129 512 2520 730 rc
-24 w
-370 0 1202 644 2 MP stroke
-gr
-
-24 w
-1609 945 mt
-(Empirical, fixed base) s
-gs 1129 512 2520 730 rc
-gs 1265 754 245 245 rc
- 48 48 1387 876 FO
-gr
-
-gr
-
-1609 1178 mt
-(Empirical, inferred base) s
-gs 1129 512 2520 730 rc
-gs 1265 986 245 245 rc
--55 95 -55 -95 110 0 1332 1140 4 MP
-DP
-gr
-
-6 w
-gr
-
-6 w
-
-end %%Color Dict
-
-eplot
-%%EndObject
-
-epage
-end
-
-showpage
-
-%%Trailer
-%%EOF
diff --git a/report/pyp_clustering/acl09-short/code/plot3.pdf b/report/pyp_clustering/acl09-short/code/plot3.pdf
deleted file mode 100644
index a3e81faa..00000000
--- a/report/pyp_clustering/acl09-short/code/plot3.pdf
+++ /dev/null
Binary files differ
diff --git a/report/pyp_clustering/acl09-short/code/pygibbs3.c b/report/pyp_clustering/acl09-short/code/pygibbs3.c
deleted file mode 100644
index 3c2240a1..00000000
--- a/report/pyp_clustering/acl09-short/code/pygibbs3.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-
-#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.)
-
-#define W 30114
-#define N 831190
-#define KWMAX 1000
-
-#define NLOOPS 1000
-#define BURNIN 0
-#define SAMPLEFREQ 1
-
-#define ALPHA 0.0 // PYB a
-//#define GAMMA 1000000000.0
-#define GAMMA .01 // Dirichlet over multinomial P0
-
-double BETA; // CRP alpha (PYB b)
-int w[N], z[N]; // words, table assignments
-int typecount[W], typetot; //# of tables of each type, total # tables
-int usedcount[W];
-double ztot[W][KWMAX];
-double k; // total # tables
-int nactive;
-
-void initialise(void);
-void anderson(void);
-void fileread(void);
-
-void initialise(void)
-{
- int i,j;
-
- for (i = 1; i < W; i++) {
- typecount[i] = 0;
- usedcount[i] = 0;
- for (j = 0; j < KWMAX; j++) {
- ztot[i][j] = 0;
- }
- }
-
-}
-
-void anderson(void) //stochastic Anderson-style initialisation
-{
- int i,j, tag;
- double max, totprob, r, runtot;
- double probs[KWMAX];
- int ind, temp;
-
- ztot[w[0]][0] = 1;
- z[0] = 0;
- typecount[w[0]] = 1;
- usedcount[w[0]] = 1;
- k = 1;
- typetot = 1;
-
- for (i = 1; i < N; i++) {
- // printf("%5d\n", w[i]);
- max = 0; tag = 0; totprob = 0;
- for (j = 0; j < usedcount[w[i]]; j++) {
- probs[j] = ztot[w[i]][j] - ALPHA;
- totprob += probs[j];
- }
- probs[usedcount[w[i]]] = (ALPHA*k+BETA)*((double) typecount[w[i]]+GAMMA)/((double) typetot+W*GAMMA);
- totprob += probs[usedcount[w[i]]];
- // printf("%10.6lf\n",totprob);
- r = myrand()*totprob;
- max = probs[0];
- j = 0;
- while (r>max) {
- j++;
- max += probs[j];
- }
- // printf("%5d\n",j);
- z[i] = j;
- ztot[w[i]][j]++;
- if (ztot[w[i]][j]==1) {
- typecount[w[i]]++;
- usedcount[w[i]]++;
- if (usedcount[w[i]]==KWMAX) {
- printf("Maximum number of tables exceeded!!!\n");
- }
- typetot++;
- k++;
- }
- }
-}
-
-void fileread(void)
-{
- int i,j, wt;
- FILE *fileptr;
-
- fileptr = fopen("wsj.dat", "r");
-
- for (i = 1; i < N; i++) {
- fscanf(fileptr, "%d", &wt);
- w[i] = wt-1;
- z[i] = 0;
- }
- printf("Total cases: %10d\n", N);
- fclose(fileptr);
-}
-
-main(int argc, char* argv[])
-{
- int i,j,loop,run;
- int temp,ind, tag;
- double newprob, WBETA;
- double probs[KWMAX];
- double max, totprob, r;
- int sampcount;
- FILE *fileptr;
- char filename[30];
- double score;
-
- if (argc < 2) {
- printf("Please provide a value of b\n");
- exit(0);
- }
- BETA = strtol(argv[1]);
- printf("Basic initialising...\n");
-
- // you can seed with any uint32, but the best are odds in 0..(2^32 - 1)
- seedMT(4157U);
-
- sprintf(filename,"typecountrecordwsjpeak%0.1f.%0.1f.dat",ALPHA,BETA);
- fileptr = fopen(filename, "w");
-
- printf("Reading from file...\n");
- fileread();
-
- printf("Initialising...\n");
- initialise();
- printf("k = %1.0f, typetot = %d\n",k,typetot);
-
- printf("Finding start state...\n");
- anderson();
- printf("Beginning burnin...\n");
- for (loop = 0; loop < NLOOPS; loop++) {
- for (i = 0; i < N; i++) {
- j = z[i];
- ztot[w[i]][j]--;
- if (ztot[w[i]][j] == 0) {
- if (j==usedcount[w[i]]) {
- usedcount[w[i]]--;
- }
- typecount[w[i]]--;
- typetot--;
- k--;
- }
- max = 0; tag = 0; totprob = 0;
- for (j = 0; j <= usedcount[w[i]]; j++) {
- if (ztot[w[i]][j] > 0) {
- probs[j] = ztot[w[i]][j] - ALPHA;
- } else {
- probs[j] = 0;
- if (tag == 0) {
- probs[j] = (ALPHA*k+BETA)*(((double) typecount[w[i]])+GAMMA)/(((double) typetot)+((double) W)*GAMMA);
- tag = 1;
- }
- }
- totprob += probs[j];
- }
- r = myrand()*totprob;
- max = probs[0];
- j = 0;
- while (r>max) {
- j++;
- max += probs[j];
- }
- z[i] = j;
- ztot[w[i]][j]++;
- if (ztot[w[i]][j]==1) {
- if (j == usedcount[w[i]]) {
- usedcount[w[i]]++;
- if (usedcount[w[i]]==KWMAX) {
- printf("Maximum number of tables exceeded!!!\n");
- }
- }
- typecount[w[i]]++;
- typetot++;
- k++;
- }
- }
- printf("Completed sample # %5d\n", loop);
- if (k != typetot) printf("k = %1.0f, typetot = %d\n",k,typetot);
- if (loop >= BURNIN && loop % SAMPLEFREQ == 0) {
- for (i = 0; i < W; i++) {
- fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type
- }
- fprintf(fileptr,"\n");
- }
- }
- fclose(fileptr);
-}
-
diff --git a/report/pyp_clustering/acl09-short/code/pygibbs_geom b/report/pyp_clustering/acl09-short/code/pygibbs_geom
deleted file mode 100755
index 14ae82f1..00000000
--- a/report/pyp_clustering/acl09-short/code/pygibbs_geom
+++ /dev/null
Binary files differ
diff --git a/report/pyp_clustering/acl09-short/code/pygibbs_geom.c b/report/pyp_clustering/acl09-short/code/pygibbs_geom.c
deleted file mode 100644
index bafa0416..00000000
--- a/report/pyp_clustering/acl09-short/code/pygibbs_geom.c
+++ /dev/null
@@ -1,212 +0,0 @@
-#include <stdio.h>
-#include <math.h>
-
-#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.)
-
-#define W 30114
-#define N 831190
-#define KWMAX 5000
-
-#define NLOOPS 11000
-#define BURNIN 1000
-#define SAMPLEFREQ 10
-
-#define ALPHA 0.0 // PYB a
-//#define GAMMA 1000000000.0
-#define GAMMA .01 // Dirichlet over multinomial P0
-
-double BETA; // CRP alpha (PYB b)
-int w[N], z[N]; // words, table assignments
-double base[N]; // base prob of word under geometric
-int typecount[W], typetot; //# of tables of each type, total # tables
-int usedcount[W];
-double ztot[W][KWMAX];
-double k; // total # tables
-int nactive;
-
-void initialise(void);
-void anderson(void);
-void fileread(void);
-
-void initialise(void)
-{
- int i,j;
-
- for (i = 1; i < W; i++) {
- typecount[i] = 0;
- usedcount[i] = 0;
- for (j = 0; j < KWMAX; j++) {
- ztot[i][j] = 0;
- }
- }
-
-}
-
-double base_p(int len) {
- double p = 1.0/26;
- return pow(p,len)*pow(.5,len); //assume p_# = .5
-}
-
-void anderson(void) //stochastic Anderson-style initialisation
-{
- int i,j, tag;
- double max, totprob, r, runtot;
- double probs[KWMAX];
- int ind, temp;
-
- ztot[w[0]][0] = 1;
- z[0] = 0;
- typecount[w[0]] = 1;
- usedcount[w[0]] = 1;
- k = 1;
- typetot = 1;
-
- for (i = 1; i < N; i++) {
- // printf("%5d\n", w[i]);
- max = 0; tag = 0; totprob = 0;
- for (j = 0; j < usedcount[w[i]]; j++) {
- probs[j] = ztot[w[i]][j] - ALPHA;
- totprob += probs[j];
- }
- probs[usedcount[w[i]]] = (ALPHA*k+BETA)*base[i];
- totprob += probs[usedcount[w[i]]];
- // printf("%10.6lf\n",totprob);
- r = myrand()*totprob;
- max = probs[0];
- j = 0;
- while (r>max) {
- j++;
- max += probs[j];
- }
- // printf("%5d\n",j);
- z[i] = j;
- ztot[w[i]][j]++;
- if (ztot[w[i]][j]==1) {
- typecount[w[i]]++;
- usedcount[w[i]]++;
- if (usedcount[w[i]]==KWMAX) {
- printf("Maximum number of tables exceeded!!!\n");
- }
- typetot++;
- k++;
- }
- }
-}
-
-void fileread(void)
-{
- int i,j, wt, len;
- FILE *fileptr;
-
- fileptr = fopen("wsj.dat", "r");
-
- for (i = 1; i < N; i++) {
- fscanf(fileptr, "%d", &wt);
- w[i] = wt-1;
- z[i] = 0;
- }
- printf("Total cases: %10d\n", N);
- fclose(fileptr);
-
- fileptr = fopen("wsj_lengths.dat", "r");
-
- for (i = 1; i < N; i++) {
- fscanf(fileptr, "%d", &len);
- base[i] = base_p(len);
- }
- fclose(fileptr);
-}
-
-main(int argc, char* argv[])
-{
- int i,j,loop,run;
- int temp,ind, tag;
- double newprob, WBETA;
- double probs[KWMAX];
- double max, totprob, r;
- int sampcount;
- FILE *fileptr;
- char filename[30];
- double score;
-
- if (argc < 2) {
- printf("Please provide a value of b\n");
- exit(0);
- }
- BETA = strtol(argv[1]);
- printf("Basic initialising...\n");
-
- // you can seed with any uint32, but the best are odds in 0..(2^32 - 1)
- seedMT(4157U);
-
- sprintf(filename,"typecountrecordwsjgeom%0.1f.%0.1f.dat",ALPHA,BETA);
- fileptr = fopen(filename, "w");
-
- printf("Reading from file...\n");
- fileread();
-
- printf("Initialising...\n");
- initialise();
- printf("k = %1.0f, typetot = %d\n",k,typetot);
-
- printf("Finding start state...\n");
- anderson();
- printf("Beginning burnin...\n");
- for (loop = 0; loop < NLOOPS; loop++) {
- for (i = 0; i < N; i++) {
- j = z[i];
- ztot[w[i]][j]--;
- if (ztot[w[i]][j] == 0) {
- if (j==usedcount[w[i]]) {
- usedcount[w[i]]--;
- }
- typecount[w[i]]--;
- typetot--;
- k--;
- }
- max = 0; tag = 0; totprob = 0;
- for (j = 0; j <= usedcount[w[i]]; j++) {
- if (ztot[w[i]][j] > 0) {
- probs[j] = ztot[w[i]][j] - ALPHA;
- } else {
- probs[j] = 0;
- if (tag == 0) {
- probs[j] = (ALPHA*k+BETA)*base[i];
- tag = 1;
- }
- }
- totprob += probs[j];
- }
- r = myrand()*totprob;
- max = probs[0];
- j = 0;
- while (r>max) {
- j++;
- max += probs[j];
- }
- z[i] = j;
- ztot[w[i]][j]++;
- if (ztot[w[i]][j]==1) {
- if (j == usedcount[w[i]]) {
- usedcount[w[i]]++;
- if (usedcount[w[i]]==KWMAX) {
- printf("Maximum number of tables exceeded!!!\n");
- }
- }
- typecount[w[i]]++;
- typetot++;
- k++;
- }
- }
- printf("Completed sample # %5d\n", loop);
- if (k != typetot) printf("k = %1.0f, typetot = %d\n",k,typetot);
- if (loop >= BURNIN && loop % SAMPLEFREQ == 0) {
- for (i = 0; i < W; i++) {
- fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type
- }
- fprintf(fileptr,"\n");
- }
- }
- fclose(fileptr);
-}
-
diff --git a/report/pyp_clustering/acl09-short/code/run-peak.prl b/report/pyp_clustering/acl09-short/code/run-peak.prl
deleted file mode 100755
index fb1e798a..00000000
--- a/report/pyp_clustering/acl09-short/code/run-peak.prl
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/perl
-
-for $i (0..5) {
-$beta = 10**$i;
-$cmd = "pygibbs_peak $beta\n";
-print $cmd;
-`$cmd`;
-}
diff --git a/report/pyp_clustering/acl09-short/code/run.prl b/report/pyp_clustering/acl09-short/code/run.prl
deleted file mode 100755
index ac69559c..00000000
--- a/report/pyp_clustering/acl09-short/code/run.prl
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/perl
-
-for $i (0..5) {
-$beta = 10**$i;
-$cmd = "pygibbs_geom $beta\n";
-print $cmd;
-`$cmd`;
-}
diff --git a/report/pyp_clustering/acl09-short/code/word_lengths.prl b/report/pyp_clustering/acl09-short/code/word_lengths.prl
deleted file mode 100755
index 4b4ed03b..00000000
--- a/report/pyp_clustering/acl09-short/code/word_lengths.prl
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/perl -w
-use Getopt::Std;
-use File::Basename;
-use List::Util qw(max maxstr min minstr reduce shuffle sum);
-use lib "$ENV{HOME}/src/perl/";
-use sg_utils;
-use strict;
-use vars qw();
-
-my $usage = "Usage: $0 \n";
-
-getopts('');
-
-die $usage unless (1);
-
-while (<>) {
-chomp;
-print length;
-print "\n";
-}
-
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots2.m b/report/pyp_clustering/acl09-short/code/wsjplots2.m
deleted file mode 100644
index eed41846..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots2.m
+++ /dev/null
@@ -1,99 +0,0 @@
-
-load wsj
-
-figure(1)
-clf
-subplot(1,2,2)
-hold on
-
-for i = 1:9
- a = i/10;
- [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20);
- ph = plot(log10(logbins),log10(predicted),'k');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-end
-
-for i = 1:9
- a = i/10;
- disp(['Loading results for a = ' num2str(a) ]);
-
- typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']);
-
- typecountrecordmean = mean(typecountrecord(500:1000,:));
-
- save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean');
-
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20)
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
- drawnow
-end
-
-
-
-
-[logbins meanval seval] = logbinmean(counts,counts,20,20)
-[logbins predicted dummy] = logbinmean(counts,counts,20,20)
-ph = plot(log10(logbins),log10(predicted),'r');
-hold on
-errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-
-set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-0.1 3.5])
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-
-title('Pitman-Yor process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-box on
-
-subplot(1,2,1)
-
-for i = 1:5
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
- typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
-
- typecountrecordmean = mean(typecountrecord(500:1000,:));
- save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
-
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20)
- [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20)
-% errorbar(log10(logbins),meanval,seval,'k.');
- hold on
- ph = plot(log10(logbins),log10(predicted),'r');
- % ph = plot(log10(logbins),predicted,'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-0.1 1.5])
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-box on
-
-
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl.m
deleted file mode 100644
index 50582e7f..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl.m
+++ /dev/null
@@ -1,74 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-hold on
-
-for i = 3:6
-
- b = 10^(i-1)
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
- % plot lines for CRP Antoniak prediction
- [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--')
-
- % plot lines for CRP Cohn prediction
- %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20);
- %ph = plot(log10(logbins),log10(predicted),'r');
- %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.')
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- plot(log10(logbins),log10(meanval),'k*');
- %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- plot(log10(logbins),log10(meanval),'ko');
- %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko');
-
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-1.1 2.5])
-set(gca,'FontSize',14)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-legend('Expectation','Antoniak approx.','Empirical, fixed base','Empirical, inferred base','Location','NorthWest')
-box on
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m
deleted file mode 100644
index 33419845..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m
+++ /dev/null
@@ -1,164 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-subplot(1,3,1);
-hold on
-
-for i = 2:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
-%%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(500:999,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
- % plot lines for CRP Antoniak prediction
- [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--')
-
- % plot lines for incorrect CRP Antoniak prediction (ACL07)
- %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20);
- %ph = plot(log10(logbins),log10(predicted),'r');
- %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.')
-
- % plot lines for CRP Cohn prediction
- %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20);
- %ph = plot(log10(logbins),log10(predicted),'r');
- %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.')
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-1.1 1.5])
-set(gca,'FontSize',14)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest')
-box on
-
-
-subplot(1,3,2);
-hold on
-
-for i =2:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
-%%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(500:999,:));
- %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-.1 2.5])
-set(gca,'FontSize',14)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-legend('Expectation','Location','NorthWest')
-box on
-%axis square
-
-
-subplot(1,3,3);
-hold on
-
-for i =2:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
-%%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjgeom0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(500:999,:));
- %save([ 'outputs/typecountrecordmeanwsjgeom0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjgeom0.0.' num2str(b) '.0.mat']);
-
- % plot lines for CRP exact prediction using summation
-% [logbins meaneval seval] = logbinmean(counts, crppred_geom(counts,wsj_lengths,b),20,20)
-[logbins meaneval seval] = logbinmean(counts, crppred(counts,b),20,20)
- plot(log10(logbins),log10(meaneval),'r.');
-%errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'r.');
-% ph = plot(log10(logbins),log10(meaneval),'r');
-% set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-.1 2.5])
-set(gca,'FontSize',14)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-legend('Expectation','Location','NorthWest')
-box on
-hold off
-%axis square
-
-
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m
deleted file mode 100644
index 1d07e54c..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m
+++ /dev/null
@@ -1,117 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-subplot(1,2,1);
-hold on
-
-for i = 3:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
-%%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
- % plot lines for CRP Antoniak prediction
- [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--')
-
- %plot lines for incorrect CRP Antoniak prediction (ACL07)
- %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20);
- %ph = plot(log10(logbins),log10(predicted),'r');
- %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle',':')
-
- % plot lines for CRP Cohn prediction
- %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20);
- %ph = plot(log10(logbins),log10(predicted),'r');
- %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.')
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-1.1 1.5])
-set(gca,'FontSize',14)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest')
-box on
-
-
-subplot(1,2,2);
-hold on
-
-for i =3:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
-%%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
- %plot lines for incorrect CRP Antoniak prediction (ACL07)
- [logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.')
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-.1 2.5])
-set(gca,'FontSize',14)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-legend('Expectation','GGJ07 approx.','Empirical','Location','NorthWest')
-box on
-%axis square \ No newline at end of file
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m
deleted file mode 100644
index dc54dea4..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m
+++ /dev/null
@@ -1,54 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-hold on
-
-%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green
-colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish
-%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink
-%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green
-
-for i = 9-[3:6]
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- ph = plot(log10(logbins),log10(meanval));
- set(ph,'color',colors(i-2,:),'linestyle','o','linewidth',2,'markersize',10);
- %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-.1 2])
-set(gca,'FontSize',16)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries (tables)')
-xlabel('Word frequency (n_w)')
-labs = {'\alpha = 100000','\alpha = 10000','\alpha = 1000','\alpha = 100'};
-legend(labs,'Location','NorthWest')
-box on
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m
deleted file mode 100644
index dd3615ac..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m
+++ /dev/null
@@ -1,59 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-hold on
-
-%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green
-colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish
-%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink
-%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green
-
-for i = 3:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- ph = plot(log10(logbins),log10(meanval));
- set(ph,'color',colors(i-2,:),'linestyle','o','linewidth',2,'markersize',8);
- %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',colors(i-2,:),'linewidth',2);
-
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-.1 2])
-set(gca,'FontSize',16)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries (tables)')
-xlabel('Word frequency (n_w)')
-labs = {'Empirical','Expectation'};
-legend(labs,'Location','NorthWest')
-box on
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m
deleted file mode 100644
index dd039289..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m
+++ /dev/null
@@ -1,58 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-hold on
-
-%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green
-colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish
-%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink
-%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green
-
-for i = 3:6
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',colors(i-2,:),'linewidth',2);
-
- % plot lines for CRP Antoniak prediction
- [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted),'r');
- set(ph,'color',colors(i-2,:),'linewidth',2,'linestyle','--')
-
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-1.1 2])
-set(gca,'FontSize',16)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries (tables)')
-xlabel('Word frequency (n_w)')
-labs = {'Expectation','Antoniak approximation'};
-legend(labs,'Location','NorthWest')
-box on
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m
deleted file mode 100644
index 8d570b7a..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m
+++ /dev/null
@@ -1,74 +0,0 @@
-%wsj_lengths = load([ 'wsj_lengths.dat']);
-%save([ 'wsj_lengths.mat'],'wsj_lengths');
-load wsj
-load wsj_lengths
-
-figure(1)
-clf
-
-hold on
-
-%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green
-colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %similar but less garish
-%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink
-%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green
-
-for i = 3:6
- col = colors(i-2,:);
- b = 10^(i-1)
-
- % plot lines for CRP exact prediction using summation
- [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20);
- ph = plot(log10(logbins),log10(predicted));
- set(ph,'color',col,'linewidth',2);
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']);
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- ph = plot(log10(logbins),log10(meanval));
- %set(ph,'color',col,'linestyle','o','markerfacecolor',col,'markersize',8);
- set(ph,'color',col,'linestyle','o','linewidth',2,'markersize',8);
- %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-
- disp(['Loading results for b = ' num2str(b) ]);
- %%% uncomment these lines if .mat file is not yet generated. %%%
- %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']);
- %typecountrecordmean = mean(typecountrecord(:,:));
- %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
- load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']);
-
- %plot emprical counts with error bars
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20);
- ph = plot(log10(logbins),log10(meanval));
- %set(ph,'color',col,'linestyle','^','markerfacecolor',col,'markersize',8);
- set(ph,'color',col,'linestyle','^','linewidth',2,'markersize',8);
- %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko');
-
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-.1 2.5])
-set(gca,'FontSize',16)
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-%title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries (tables)')
-xlabel('Word frequency (n_w)')
-labs = {'Expectation','Empirical, fixed base','Empirical, inferred base'};
-legend(labs,'Location','NorthWest')
-box on
diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_cl.m b/report/pyp_clustering/acl09-short/code/wsjplots_cl.m
deleted file mode 100644
index eed41846..00000000
--- a/report/pyp_clustering/acl09-short/code/wsjplots_cl.m
+++ /dev/null
@@ -1,99 +0,0 @@
-
-load wsj
-
-figure(1)
-clf
-subplot(1,2,2)
-hold on
-
-for i = 1:9
- a = i/10;
- [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20);
- ph = plot(log10(logbins),log10(predicted),'k');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-end
-
-for i = 1:9
- a = i/10;
- disp(['Loading results for a = ' num2str(a) ]);
-
- typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']);
-
- typecountrecordmean = mean(typecountrecord(500:1000,:));
-
- save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean');
-
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20)
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
- drawnow
-end
-
-
-
-
-[logbins meanval seval] = logbinmean(counts,counts,20,20)
-[logbins predicted dummy] = logbinmean(counts,counts,20,20)
-ph = plot(log10(logbins),log10(predicted),'r');
-hold on
-errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-
-set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-0.1 3.5])
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-
-title('Pitman-Yor process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-box on
-
-subplot(1,2,1)
-
-for i = 1:5
-
- b = 10^(i-1)
-
- disp(['Loading results for b = ' num2str(b) ]);
- typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']);
-
- typecountrecordmean = mean(typecountrecord(500:1000,:));
- save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean');
-
- [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20)
- [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20)
-% errorbar(log10(logbins),meanval,seval,'k.');
- hold on
- ph = plot(log10(logbins),log10(predicted),'r');
- % ph = plot(log10(logbins),predicted,'r');
- set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5)
- errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.');
-end
-
-set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000]))
-set(gca,'xlim',[-0.1 3.5])
-set(gca,'ylim',[-0.1 1.5])
-set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ...
- '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ...
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ...
- ' ', ' ', ' ', ' '});
-title('Chinese restaurant process adaptor')
-ylabel('Mean number of lexical entries')
-xlabel('Word frequency (n_w)')
-box on
-
-