diff options
| author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-27 16:13:19 +0000 | 
|---|---|---|
| committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-27 16:13:19 +0000 | 
| commit | fd519b0e45c857b266814994ba8c1421f508e522 (patch) | |
| tree | 6d50c9b954e3c13e9df627c1ecc25c53544a5f58 /report/pyp_clustering/acl09-short/code | |
| parent | 4c5df460c9da5c935438850ef7993463a9113286 (diff) | |
preso
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@435 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'report/pyp_clustering/acl09-short/code')
31 files changed, 4891 insertions, 0 deletions
diff --git a/report/pyp_clustering/acl09-short/code/antoniakpred.m b/report/pyp_clustering/acl09-short/code/antoniakpred.m new file mode 100644 index 00000000..c4153c04 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/antoniakpred.m @@ -0,0 +1,12 @@ +function output = antoniakpred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +p0=1/30114; +for i = 1:length(uniqin) +  prediction(uniqin(i)) = b*p0*log((b*p0+uniqin(i))/(b*p0)); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/approximations.eps b/report/pyp_clustering/acl09-short/code/approximations.eps new file mode 100644 index 00000000..67857497 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/approximations.eps @@ -0,0 +1,897 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-92.1.13.el5.inf.1PAE #1 SMP Mon Oct 20 10:33:44 BST 2008 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/approximations.eps +%%CreationDate: 04/25/2009  11:31:18 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox:    89   164   503   676 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c  /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] +  makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse +  exch dup 3 1 roll findfont dup length dict begin +  { 1 index /FID ne {def}{pop pop} ifelse } forall +  /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse +  exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} +  {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 +  dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto +  neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill +   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale +  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale +  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef +  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly +  vradius add translate hradius vradius scale 0 0 1 180 270 arc  +  tMatrix setmatrix lrx hradius sub uly vradius add translate +  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix +  lrx hradius sub lry vradius sub translate hradius vradius scale +  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub +  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix +  closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix +  closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix +  closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup +  ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { +  2 0 asub 3 1 asub 4 0 asub 5 1 asub +  dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { +  dmat dtri tri_to_matrix tmat1 invertmatrix  +  smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string +  currentfile  +  3 index 0 eq {/ASCIIHexDecode filter} +  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } +  ifelse exch readstring pop +  dup 0 3 index getinterval /rbmap xdef +  dup 2 index dup getinterval /gbmap xdef +  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c +  cols rows 8 compute_transform  +  rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox:    89   164   503   676 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 1068 8112 csm + +    0     0  4976  6135 rc +86 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg +   0    0 4977 6136 rf +6 w +0 5000 3856 0 0 -5000 647 5460 4 MP +PP +-3856 0 0 5000 3856 0 0 -5000 647 5460 5 MP stroke +4 w +DO +SO +6 w +0 sg + 647 5460 mt 4503 5460 L + 647  460 mt 4503  460 L + 647 5460 mt  647  460 L +4503 5460 mt 4503  460 L + 647 5460 mt 4503 5460 L + 647 5460 mt  647  460 L + 754 5460 mt  754 5410 L + 754  460 mt  754  510 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 708 5650 mt  +(1) s +1076 5460 mt 1076 5410 L +1076  460 mt 1076  510 L +1053 5650 mt  +( ) s +1265 5460 mt 1265 5410 L +1265  460 mt 1265  510 L +1242 5650 mt  +( ) s +1398 5460 mt 1398 5410 L +1398  460 mt 1398  510 L +1375 5650 mt  +( ) s +1502 5460 mt 1502 5410 L +1502  460 mt 1502  510 L +1479 5650 mt  +( ) s +1587 5460 mt 1587 5410 L +1587  460 mt 1587  510 L +1564 5650 mt  +( ) s +1659 5460 mt 1659 5410 L +1659  460 mt 1659  510 L +1636 5650 mt  +( ) s +1721 5460 mt 1721 5410 L +1721  460 mt 1721  510 L +1698 5650 mt  +( ) s +1776 5460 mt 1776 5410 L +1776  460 mt 1776  510 L +1753 5650 mt  +( ) s +1825 5460 mt 1825 5410 L +1825  460 mt 1825  510 L +1732 5650 mt  +(10) s +2147 5460 mt 2147 5410 L +2147  460 mt 2147  510 L +2124 5650 mt  +( ) s +2336 5460 mt 2336 5410 L +2336  460 mt 2336  510 L +2313 5650 mt  +( ) s +2470 5460 mt 2470 5410 L +2470  460 mt 2470  510 L +2447 5650 mt  +( ) s +2573 5460 mt 2573 5410 L +2573  460 mt 2573  510 L +2550 5650 mt  +( ) s +2658 5460 mt 2658 5410 L +2658  460 mt 2658  510 L +2635 5650 mt  +( ) s +2730 5460 mt 2730 5410 L +2730  460 mt 2730  510 L +2707 5650 mt  +( ) s +2792 5460 mt 2792 5410 L +2792  460 mt 2792  510 L +2769 5650 mt  +( ) s +2847 5460 mt 2847 5410 L +2847  460 mt 2847  510 L +2824 5650 mt  +( ) s +2896 5460 mt 2896 5410 L +2896  460 mt 2896  510 L +2756 5650 mt  +(100) s +3218 5460 mt 3218 5410 L +3218  460 mt 3218  510 L +3195 5650 mt  +( ) s +3407 5460 mt 3407 5410 L +3407  460 mt 3407  510 L +3384 5650 mt  +( ) s +3541 5460 mt 3541 5410 L +3541  460 mt 3541  510 L +3518 5650 mt  +( ) s +3645 5460 mt 3645 5410 L +3645  460 mt 3645  510 L +3622 5650 mt  +( ) s +3729 5460 mt 3729 5410 L +3729  460 mt 3729  510 L +3706 5650 mt  +( ) s +3801 5460 mt 3801 5410 L +3801  460 mt 3801  510 L +3778 5650 mt  +( ) s +3863 5460 mt 3863 5410 L +3863  460 mt 3863  510 L +3840 5650 mt  +( ) s +3918 5460 mt 3918 5410 L +3918  460 mt 3918  510 L +3895 5650 mt  +( ) s +3967 5460 mt 3967 5410 L +3967  460 mt 3967  510 L +3781 5650 mt  +(1000) s +4289 5460 mt 4289 5410 L +4289  460 mt 4289  510 L +4266 5650 mt  +( ) s +4478 5460 mt 4478 5410 L +4478  460 mt 4478  510 L +4455 5650 mt  +( ) s + 647 5321 mt  697 5321 L +4503 5321 mt 4453 5321 L + 379 5383 mt  +(0.1) s + 647 4903 mt  697 4903 L +4503 4903 mt 4453 4903 L + 566 4965 mt  +( ) s + 647 4658 mt  697 4658 L +4503 4658 mt 4453 4658 L + 566 4720 mt  +( ) s + 647 4484 mt  697 4484 L +4503 4484 mt 4453 4484 L + 566 4546 mt  +( ) s + 647 4350 mt  697 4350 L +4503 4350 mt 4453 4350 L + 566 4412 mt  +( ) s + 647 4240 mt  697 4240 L +4503 4240 mt 4453 4240 L + 566 4302 mt  +( ) s + 647 4147 mt  697 4147 L +4503 4147 mt 4453 4147 L + 566 4209 mt  +( ) s + 647 4066 mt  697 4066 L +4503 4066 mt 4453 4066 L + 566 4128 mt  +( ) s + 647 3995 mt  697 3995 L +4503 3995 mt 4453 3995 L + 566 4057 mt  +( ) s + 647 3932 mt  697 3932 L +4503 3932 mt 4453 3932 L + 519 3994 mt  +(1) s + 647 3514 mt  697 3514 L +4503 3514 mt 4453 3514 L + 566 3576 mt  +( ) s + 647 3269 mt  697 3269 L +4503 3269 mt 4453 3269 L + 566 3331 mt  +( ) s + 647 3096 mt  697 3096 L +4503 3096 mt 4453 3096 L + 566 3158 mt  +( ) s + 647 2961 mt  697 2961 L +4503 2961 mt 4453 2961 L + 566 3023 mt  +( ) s + 647 2851 mt  697 2851 L +4503 2851 mt 4453 2851 L + 566 2913 mt  +( ) s + 647 2758 mt  697 2758 L +4503 2758 mt 4453 2758 L + 566 2820 mt  +( ) s + 647 2677 mt  697 2677 L +4503 2677 mt 4453 2677 L + 566 2739 mt  +( ) s + 647 2606 mt  697 2606 L +4503 2606 mt 4453 2606 L + 566 2668 mt  +( ) s + 647 2543 mt  697 2543 L +4503 2543 mt 4453 2543 L + 426 2605 mt  +(10) s + 647 2125 mt  697 2125 L +4503 2125 mt 4453 2125 L + 566 2187 mt  +( ) s + 647 1880 mt  697 1880 L +4503 1880 mt 4453 1880 L + 566 1942 mt  +( ) s + 647 1707 mt  697 1707 L +4503 1707 mt 4453 1707 L + 566 1769 mt  +( ) s + 647 1572 mt  697 1572 L +4503 1572 mt 4453 1572 L + 566 1634 mt  +( ) s + 647 1462 mt  697 1462 L +4503 1462 mt 4453 1462 L + 566 1524 mt  +( ) s + 647 1369 mt  697 1369 L +4503 1369 mt 4453 1369 L + 566 1431 mt  +( ) s + 647 1289 mt  697 1289 L +4503 1289 mt 4453 1289 L + 566 1351 mt  +( ) s + 647 1217 mt  697 1217 L +4503 1217 mt 4453 1217 L + 566 1279 mt  +( ) s + 647 1154 mt  697 1154 L +4503 1154 mt 4453 1154 L + 332 1216 mt  +(100) s + 647  736 mt  697  736 L +4503  736 mt 4453  736 L + 566  798 mt  +( ) s + 647  491 mt  697  491 L +4503  491 mt 4453  491 L + 566  553 mt  +( ) s + 647 5460 mt 4503 5460 L + 647  460 mt 4503  460 L + 647 5460 mt  647  460 L +4503 5460 mt 4503  460 L +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -2 266 -1 265 -1 265 -1 265 -1 266 -1 265 -1 265 -1  +266 -2 265 -1 265 -1 265 -1 266 -3 886 3932 14 MP stroke +6 w +gr + +0.7 sg +0 sg + 850 3932 mt  922 3932 L + 886 3896 mt  886 3968 L +1116 3929 mt 1188 3929 L +1152 3893 mt 1152 3965 L +1381 3928 mt 1453 3928 L +1417 3892 mt 1417 3964 L +1646 3927 mt 1718 3927 L +1682 3891 mt 1682 3963 L +1911 3926 mt 1983 3926 L +1947 3890 mt 1947 3962 L +2177 3924 mt 2249 3924 L +2213 3888 mt 2213 3960 L +2442 3923 mt 2514 3923 L +2478 3887 mt 2478 3959 L +2707 3922 mt 2779 3922 L +2743 3886 mt 2743 3958 L +2973 3922 mt 3045 3922 L +3009 3886 mt 3009 3958 L +3238 3919 mt 3310 3919 L +3274 3883 mt 3274 3955 L +3503 3919 mt 3575 3919 L +3539 3883 mt 3539 3955 L +3768 3919 mt 3840 3919 L +3804 3883 mt 3804 3955 L +4034 3917 mt 4106 3917 L +4070 3881 mt 4070 3953 L +4299 3918 mt 4371 3918 L +4335 3882 mt 4335 3954 L + 861 3907 mt  911 3957 L + 911 3907 mt  861 3957 L +1127 3904 mt 1177 3954 L +1177 3904 mt 1127 3954 L +1392 3903 mt 1442 3953 L +1442 3903 mt 1392 3953 L +1657 3902 mt 1707 3952 L +1707 3902 mt 1657 3952 L +1922 3901 mt 1972 3951 L +1972 3901 mt 1922 3951 L +2188 3899 mt 2238 3949 L +2238 3899 mt 2188 3949 L +2453 3898 mt 2503 3948 L +2503 3898 mt 2453 3948 L +2718 3897 mt 2768 3947 L +2768 3897 mt 2718 3947 L +2984 3897 mt 3034 3947 L +3034 3897 mt 2984 3947 L +3249 3894 mt 3299 3944 L +3299 3894 mt 3249 3944 L +3514 3894 mt 3564 3944 L +3564 3894 mt 3514 3944 L +3779 3894 mt 3829 3944 L +3829 3894 mt 3779 3944 L +4045 3892 mt 4095 3942 L +4095 3892 mt 4045 3942 L +4310 3893 mt 4360 3943 L +4360 3893 mt 4310 3943 L +gs 647 460 3857 5001 rc +gr + +  36   36  886 3932 FO +  36   36 1152 3929 FO +  36   36 1417 3928 FO +  36   36 1682 3927 FO +  36   36 1947 3926 FO +  36   36 2213 3924 FO +  36   36 2478 3923 FO +  36   36 2743 3922 FO +  36   36 3009 3921 FO +  36   36 3274 3919 FO +  36   36 3539 3919 FO +  36   36 3804 3918 FO +  36   36 4070 3916 FO +  36   36 4335 3920 FO +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -9 266 -9 265 -9 265 -10 265 -9 266 -10 265 -10 265 -10  +266 -11 265 -11 265 -10 265 -14 266 -23 886 3932 14 MP stroke +DA +265 -32 266 -32 265 -36 265 -37 265 -40 266 -43 265 -47 265 -50  +266 -56 265 -61 265 -57 265 -84 266 -129 886 5241 14 MP stroke +SO +6 w +gr + +0.7 sg +0 sg + 850 3932 mt  922 3932 L + 886 3896 mt  886 3968 L +1116 3909 mt 1188 3909 L +1152 3873 mt 1152 3945 L +1381 3895 mt 1453 3895 L +1417 3859 mt 1417 3931 L +1646 3885 mt 1718 3885 L +1682 3849 mt 1682 3921 L +1911 3874 mt 1983 3874 L +1947 3838 mt 1947 3910 L +2177 3863 mt 2249 3863 L +2213 3827 mt 2213 3899 L +2442 3854 mt 2514 3854 L +2478 3818 mt 2478 3890 L +2707 3843 mt 2779 3843 L +2743 3807 mt 2743 3879 L +2973 3833 mt 3045 3833 L +3009 3797 mt 3009 3869 L +3238 3822 mt 3310 3822 L +3274 3786 mt 3274 3858 L +3503 3816 mt 3575 3816 L +3539 3780 mt 3539 3852 L +3768 3806 mt 3840 3806 L +3804 3770 mt 3804 3842 L +4034 3802 mt 4106 3802 L +4070 3766 mt 4070 3838 L +4299 3782 mt 4371 3782 L +4335 3746 mt 4335 3818 L + 861 3907 mt  911 3957 L + 911 3907 mt  861 3957 L +1127 3884 mt 1177 3934 L +1177 3884 mt 1127 3934 L +1392 3870 mt 1442 3920 L +1442 3870 mt 1392 3920 L +1657 3860 mt 1707 3910 L +1707 3860 mt 1657 3910 L +1922 3849 mt 1972 3899 L +1972 3849 mt 1922 3899 L +2188 3838 mt 2238 3888 L +2238 3838 mt 2188 3888 L +2453 3829 mt 2503 3879 L +2503 3829 mt 2453 3879 L +2718 3818 mt 2768 3868 L +2768 3818 mt 2718 3868 L +2984 3808 mt 3034 3858 L +3034 3808 mt 2984 3858 L +3249 3797 mt 3299 3847 L +3299 3797 mt 3249 3847 L +3514 3791 mt 3564 3841 L +3564 3791 mt 3514 3841 L +3779 3781 mt 3829 3831 L +3829 3781 mt 3779 3831 L +4045 3777 mt 4095 3827 L +4095 3777 mt 4045 3827 L +4310 3757 mt 4360 3807 L +4360 3757 mt 4310 3807 L +gs 647 460 3857 5001 rc +gr + +  36   36  886 3932 FO +  36   36 1152 3910 FO +  36   36 1417 3895 FO +  36   36 1682 3885 FO +  36   36 1947 3872 FO +  36   36 2213 3860 FO +  36   36 2478 3848 FO +  36   36 2743 3834 FO +  36   36 3009 3821 FO +  36   36 3274 3809 FO +  36   36 3539 3798 FO +  36   36 3804 3788 FO +  36   36 4070 3771 FO +  36   36 4335 3742 FO +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -33 266 -34 265 -36 265 -38 265 -42 266 -44 265 -49 265 -52  +266 -60 265 -66 265 -62 265 -96 266 -158 886 3932 14 MP stroke +DA +265 -40 266 -43 265 -46 265 -50 265 -55 266 -60 265 -67 265 -75  +266 -87 265 -100 265 -96 265 -148 266 -242 886 4398 14 MP stroke +SO +6 w +gr + +0.7 sg +0 sg + 850 3932 mt  922 3932 L + 886 3896 mt  886 3968 L +1116 3774 mt 1188 3774 L +1152 3738 mt 1152 3810 L +1381 3678 mt 1453 3678 L +1417 3642 mt 1417 3714 L +1646 3616 mt 1718 3616 L +1682 3580 mt 1682 3652 L +1911 3549 mt 1983 3549 L +1947 3513 mt 1947 3585 L +2177 3490 mt 2249 3490 L +2213 3454 mt 2213 3526 L +2442 3438 mt 2514 3438 L +2478 3402 mt 2478 3474 L +2707 3390 mt 2779 3390 L +2743 3354 mt 2743 3426 L +2973 3344 mt 3045 3344 L +3009 3308 mt 3009 3380 L +3238 3303 mt 3310 3303 L +3274 3267 mt 3274 3339 L +3503 3267 mt 3575 3267 L +3539 3231 mt 3539 3303 L +3768 3228 mt 3840 3228 L +3804 3192 mt 3804 3264 L +4034 3193 mt 4106 3193 L +4070 3157 mt 4070 3229 L +4299 3160 mt 4371 3160 L +4335 3124 mt 4335 3196 L + 861 3907 mt  911 3957 L + 911 3907 mt  861 3957 L +1127 3749 mt 1177 3799 L +1177 3749 mt 1127 3799 L +1392 3653 mt 1442 3703 L +1442 3653 mt 1392 3703 L +1657 3591 mt 1707 3641 L +1707 3591 mt 1657 3641 L +1922 3524 mt 1972 3574 L +1972 3524 mt 1922 3574 L +2188 3465 mt 2238 3515 L +2238 3465 mt 2188 3515 L +2453 3413 mt 2503 3463 L +2503 3413 mt 2453 3463 L +2718 3365 mt 2768 3415 L +2768 3365 mt 2718 3415 L +2984 3319 mt 3034 3369 L +3034 3319 mt 2984 3369 L +3249 3278 mt 3299 3328 L +3299 3278 mt 3249 3328 L +3514 3242 mt 3564 3292 L +3564 3242 mt 3514 3292 L +3779 3203 mt 3829 3253 L +3829 3203 mt 3779 3253 L +4045 3168 mt 4095 3218 L +4095 3168 mt 4045 3218 L +4310 3135 mt 4360 3185 L +4360 3135 mt 4310 3185 L +gs 647 460 3857 5001 rc +gr + +  36   36  886 3932 FO +  36   36 1152 3825 FO +  36   36 1417 3737 FO +  36   36 1682 3663 FO +  36   36 1947 3567 FO +  36   36 2213 3455 FO +  36   36 2478 3330 FO +  36   36 2743 3183 FO +  36   36 3009 3003 FO +  36   36 3274 2790 FO +  36   36 3539 2539 FO +  36   36 3804 2234 FO +  36   36 4070 1938 FO +  36   36 4335 1575 FO +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -54 266 -58 265 -65 265 -72 265 -82 266 -95 265 -109 265 -127  +266 -155 265 -182 265 -180 265 -268 266 -415 886 3932 14 MP stroke +DA +265 -56 266 -59 265 -68 265 -74 265 -85 266 -99 265 -113 265 -133  +266 -162 265 -191 265 -187 265 -277 266 -425 886 4013 14 MP stroke +SO +6 w +gr + +0.7 sg +0 sg + 850 3932 mt  922 3932 L + 886 3896 mt  886 3968 L +1116 3517 mt 1188 3517 L +1152 3481 mt 1152 3553 L +1381 3249 mt 1453 3249 L +1417 3213 mt 1417 3285 L +1646 3069 mt 1718 3069 L +1682 3033 mt 1682 3105 L +1911 2887 mt 1983 2887 L +1947 2851 mt 1947 2923 L +2177 2732 mt 2249 2732 L +2213 2696 mt 2213 2768 L +2442 2605 mt 2514 2605 L +2478 2569 mt 2478 2641 L +2707 2496 mt 2779 2496 L +2743 2460 mt 2743 2532 L +2973 2401 mt 3045 2401 L +3009 2365 mt 3009 2437 L +3238 2319 mt 3310 2319 L +3274 2283 mt 3274 2355 L +3503 2247 mt 3575 2247 L +3539 2211 mt 3539 2283 L +3768 2181 mt 3840 2181 L +3804 2145 mt 3804 2217 L +4034 2124 mt 4106 2124 L +4070 2088 mt 4070 2160 L +4299 2069 mt 4371 2069 L +4335 2033 mt 4335 2105 L + 861 3907 mt  911 3957 L + 911 3907 mt  861 3957 L +1127 3492 mt 1177 3542 L +1177 3492 mt 1127 3542 L +1392 3224 mt 1442 3274 L +1442 3224 mt 1392 3274 L +1657 3044 mt 1707 3094 L +1707 3044 mt 1657 3094 L +1922 2862 mt 1972 2912 L +1972 2862 mt 1922 2912 L +2188 2707 mt 2238 2757 L +2238 2707 mt 2188 2757 L +2453 2580 mt 2503 2630 L +2503 2580 mt 2453 2630 L +2718 2471 mt 2768 2521 L +2768 2471 mt 2718 2521 L +2984 2376 mt 3034 2426 L +3034 2376 mt 2984 2426 L +3249 2294 mt 3299 2344 L +3299 2294 mt 3249 2344 L +3514 2222 mt 3564 2272 L +3564 2222 mt 3514 2272 L +3779 2156 mt 3829 2206 L +3829 2156 mt 3779 2206 L +4045 2099 mt 4095 2149 L +4095 2099 mt 4045 2149 L +4310 2044 mt 4360 2094 L +4360 2044 mt 4310 2094 L +gs 647 460 3857 5001 rc +gr + +  36   36  886 3932 FO +  36   36 1152 3713 FO +  36   36 1417 3510 FO +  36   36 1682 3318 FO +  36   36 1947 3048 FO +  36   36 2213 2733 FO +  36   36 2478 2401 FO +  36   36 2743 2061 FO +  36   36 3009 1720 FO +  36   36 3274 1380 FO +  36   36 3539 1045 FO +  36   36 3804  746 FO +gs 647 460 3857 5001 rc +gr + + 240 4103 mt  -90 rotate +(Mean number of lexical entries) s +90 rotate +1812 5794 mt  +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 132 FMSR + +3188 5878 mt  +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3283 5794 mt  +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 630 5503 mt  +( ) s +4487  502 mt  +( ) s +1 sg +0 846 2267 0 0 -846 707 1366 4 MP +PP +-2267 0 0 846 2267 0 0 -846 707 1366 5 MP stroke +4 w +DO +SO +6 w +0 sg + 707 1366 mt 2974 1366 L + 707  520 mt 2974  520 L + 707 1366 mt  707  520 L +2974 1366 mt 2974  520 L + 707 1366 mt 2974 1366 L + 707 1366 mt  707  520 L + 707 1366 mt 2974 1366 L + 707  520 mt 2974  520 L + 707 1366 mt  707  520 L +2974 1366 mt 2974  520 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +1183  698 mt  +(Expectation) s +gs 707 520 2268 847 rc +18 w +0.7 sg +365 0 780 637 2 MP stroke +gr + +18 w +0.7 sg +0 sg +1183  902 mt  +(Antoniak approx.) s +gs 707 520 2268 847 rc +DA +0.7 sg +365 0 780 841 2 MP stroke +SO +gr + +0.7 sg +0 sg +1183 1105 mt  +(Empirical, fixed base) s +gs 707 520 2268 847 rc +6 w +gs 889 971 147 147 rc + 926 1044 mt  998 1044 L + 962 1008 mt  962 1080 L + 937 1019 mt  987 1069 L + 987 1019 mt  937 1069 L +gr + +gr + +6 w +1183 1309 mt  +(Empirical, inferred base) s +gs 707 520 2268 847 rc +gs 889 1175 147 147 rc +  36   36  962 1248 FO +gr + +gr + + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/cohnpred.m b/report/pyp_clustering/acl09-short/code/cohnpred.m new file mode 100644 index 00000000..35a49605 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/cohnpred.m @@ -0,0 +1,12 @@ +function output = cohnpred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +p0=1/30114; +for i = 1:length(uniqin) +  prediction(uniqin(i)) = b*p0*(psi(b*p0+uniqin(i)) - psi(b*p0)); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/cokus.c b/report/pyp_clustering/acl09-short/code/cokus.c new file mode 100644 index 00000000..3a959c0f --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/cokus.c @@ -0,0 +1,167 @@ +// This is the ``Mersenne Twister'' random number generator MT19937, which +// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1) +// starting from any odd seed in 0..(2^32 - 1).  This version is a recode +// by Shawn Cokus (Cokus@math.washington.edu) on March 8, 1998 of a version by +// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in +// July-August 1997). +// +// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha +// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to +// generate 300 million random numbers; after recoding: 24.0 sec. for the same +// (i.e., 46.5% of original time), so speed is now about 12.5 million random +// number generations per second on this machine. +// +// According to the URL <http://www.math.keio.ac.jp/~matumoto/emt.html> +// (and paraphrasing a bit in places), the Mersenne Twister is ``designed +// with consideration of the flaws of various existing generators,'' has +// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally +// equidistributed, and ``has passed many stringent tests, including the +// die-hard test of G. Marsaglia and the load test of P. Hellekalek and +// S. Wegenkittl.''  It is efficient in memory usage (typically using 2506 +// to 5012 bytes of static data, depending on data type sizes, and the code +// is quite short as well).  It generates random numbers in batches of 624 +// at a time, so the caching and pipelining of modern systems is exploited. +// It is also divide- and mod-free. +// +// This library is free software; you can redistribute it and/or modify it +// under the terms of the GNU Library General Public License as published by +// the Free Software Foundation (either version 2 of the License or, at your +// option, any later version).  This library is distributed in the hope that +// it will be useful, but WITHOUT ANY WARRANTY, without even the implied +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See +// the GNU Library General Public License for more details.  You should have +// received a copy of the GNU Library General Public License along with this +// library; if not, write to the Free Software Foundation, Inc., 59 Temple +// Place, Suite 330, Boston, MA 02111-1307, USA. +// +// The code as Shawn received it included the following notice: +// +//   Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura.  When +//   you use this, send an e-mail to <matumoto@math.keio.ac.jp> with +//   an appropriate reference to your work. +// +// It would be nice to CC: <Cokus@math.washington.edu> when you write. +// + +#include <stdio.h> +#include <stdlib.h> + +// +// uint32 must be an unsigned integer type capable of holding at least 32 +// bits; exactly 32 should be fastest, but 64 is better on an Alpha with +// GCC at -O3 optimization so try your options and see what's best for you +// + +typedef unsigned long uint32; + +#define N              (624)                 // length of state vector +#define M              (397)                 // a period parameter +#define K              (0x9908B0DFU)         // a magic constant +#define hiBit(u)       ((u) & 0x80000000U)   // mask all but highest   bit of u +#define loBit(u)       ((u) & 0x00000001U)   // mask all but lowest    bit of u +#define loBits(u)      ((u) & 0x7FFFFFFFU)   // mask     the highest   bit of u +#define mixBits(u, v)  (hiBit(u)|loBits(v))  // move hi bit of u to hi bit of v + +static uint32   state[N+1];     // state vector + 1 extra to not violate ANSI C +static uint32   *next;          // next random value is computed from here +static int      left = -1;      // can *next++ this many times before reloading + + +void seedMT(uint32 seed) + { +    // +    // We initialize state[0..(N-1)] via the generator +    // +    //   x_new = (69069 * x_old) mod 2^32 +    // +    // from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuth's +    // _The Art of Computer Programming_, Volume 2, 3rd ed. +    // +    // Notes (SJC): I do not know what the initial state requirements +    // of the Mersenne Twister are, but it seems this seeding generator +    // could be better.  It achieves the maximum period for its modulus +    // (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if +    // x_initial can be even, you have sequences like 0, 0, 0, ...; +    // 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31, +    // 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below. +    // +    // Even if x_initial is odd, if x_initial is 1 mod 4 then +    // +    //   the          lowest bit of x is always 1, +    //   the  next-to-lowest bit of x is always 0, +    //   the 2nd-from-lowest bit of x alternates      ... 0 1 0 1 0 1 0 1 ... , +    //   the 3rd-from-lowest bit of x 4-cycles        ... 0 1 1 0 0 1 1 0 ... , +    //   the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... , +    //    ... +    // +    // and if x_initial is 3 mod 4 then +    // +    //   the          lowest bit of x is always 1, +    //   the  next-to-lowest bit of x is always 1, +    //   the 2nd-from-lowest bit of x alternates      ... 0 1 0 1 0 1 0 1 ... , +    //   the 3rd-from-lowest bit of x 4-cycles        ... 0 0 1 1 0 0 1 1 ... , +    //   the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... , +    //    ... +    // +    // The generator's potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is +    // 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth.  It +    // also does well in the dimension 2..5 spectral tests, but it could be +    // better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth). +    // +    // Note that the random number user does not see the values generated +    // here directly since reloadMT() will always munge them first, so maybe +    // none of all of this matters.  In fact, the seed values made here could +    // even be extra-special desirable if the Mersenne Twister theory says +    // so-- that's why the only change I made is to restrict to odd seeds. +    // + +    register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state; +    register int    j; + +    for(left=0, *s++=x, j=N; --j; +        *s++ = (x*=69069U) & 0xFFFFFFFFU); + } + + +uint32 reloadMT(void) + { +    register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1; +    register int    j; + +    if(left < -1) +        seedMT(4357U); + +    left=N-1, next=state+1; + +    for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++) +        *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + +    for(pM=state, j=M; --j; s0=s1, s1=*p2++) +        *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + +    s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); +    s1 ^= (s1 >> 11); +    s1 ^= (s1 <<  7) & 0x9D2C5680U; +    s1 ^= (s1 << 15) & 0xEFC60000U; +    return(s1 ^ (s1 >> 18)); + } + + +inline uint32 randomMT(void) + { +    uint32 y; + +    if(--left < 0) +        return(reloadMT()); + +    y  = *next++; +    y ^= (y >> 11); +    y ^= (y <<  7) & 0x9D2C5680U; +    y ^= (y << 15) & 0xEFC60000U; +    y ^= (y >> 18); +    return(y); + } + + + + diff --git a/report/pyp_clustering/acl09-short/code/crppred.m b/report/pyp_clustering/acl09-short/code/crppred.m new file mode 100644 index 00000000..17f22652 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/crppred.m @@ -0,0 +1,12 @@ +function output = crppred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +p0=1/30114; +for i = 1:length(uniqin) +  prediction(uniqin(i)) = b*p0*sum(1./((1:uniqin(i))+b*p0-1)); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/crppred_geom.m b/report/pyp_clustering/acl09-short/code/crppred_geom.m new file mode 100644 index 00000000..e6869e4f --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/crppred_geom.m @@ -0,0 +1,12 @@ +function output = crppred_geom(input,lengths,b) + + +output = zeros(length(input),1); + +p0=(1/52).^lengths; +a=b*p0; +for i = 1:length(input) +  output(i) = a(i)*sum(1./((1:input(i))+a(i)-1)); +end + + diff --git a/report/pyp_clustering/acl09-short/code/logbinmean.m b/report/pyp_clustering/acl09-short/code/logbinmean.m new file mode 100644 index 00000000..23dbb0ac --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/logbinmean.m @@ -0,0 +1,38 @@ +function [ logbinsvalid , meanval, seval ] = logbinmean( frequency, typecount, NBINS , MinCounts ); + +% calculate distribution of frequency +Maxfrequency = max( frequency ); +meanK  = mean( frequency ); +linbins = linspace( log10(1) , log10( Maxfrequency ) , NBINS ); +stepb   = linbins( 2 ) - linbins( 1 ); + +logbins  = 10.^linbins; + +% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +LL       = length( linbins ) - 1; +for i=1:LL +  lowb  = linbins( i   ); +  highb = linbins( i+1 ); +  linbinsout( i ) = (highb + lowb) / 2; +   +  lowb  = logbins( i   ); +  highb = logbins( i+1 ); +  step  = highb - lowb; +  logbinsout( i ) = 10^linbinsout( i ); +   +  indices = find( frequency >= lowb & frequency < highb); +   +  meanval(i) = mean(typecount(indices)); +  rawcounts(i) = length(indices); +  seval(i) = std(typecount(indices))./sqrt(rawcounts(i)); +   +end + +valid = 1:LL; +valid( find( rawcounts <= MinCounts )) = []; + +linbinsvalid   = linbinsout( valid ); +logbinsvalid   = logbinsout( valid ); + +meanval     =  meanval( valid ); +seval       =  seval( valid ); diff --git a/report/pyp_clustering/acl09-short/code/noP0pred.m b/report/pyp_clustering/acl09-short/code/noP0pred.m new file mode 100644 index 00000000..f72f1432 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/noP0pred.m @@ -0,0 +1,11 @@ +function output = antoniakpred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +for i = 1:length(uniqin) +  prediction(uniqin(i)) = b*log((b+uniqin(i))/b); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/plot0.eps b/report/pyp_clustering/acl09-short/code/plot0.eps new file mode 100644 index 00000000..6094346a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot0.eps @@ -0,0 +1,633 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot0.eps +%%CreationDate: 07/23/2009  17:36:19 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox:   -44   170   641   672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c  /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] +  makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse +  exch dup 3 1 roll findfont dup length dict begin +  { 1 index /FID ne {def}{pop pop} ifelse } forall +  /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse +  exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} +  {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 +  dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto +  neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill +   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale +  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale +  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef +  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly +  vradius add translate hradius vradius scale 0 0 1 180 270 arc  +  tMatrix setmatrix lrx hradius sub uly vradius add translate +  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix +  lrx hradius sub lry vradius sub translate hradius vradius scale +  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub +  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix +  closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix +  closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix +  closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup +  ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { +  2 0 asub 3 1 asub 4 0 asub 5 1 asub +  dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { +  dmat dtri tri_to_matrix tmat1 invertmatrix  +  smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string +  currentfile  +  3 index 0 eq {/ASCIIHexDecode filter} +  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } +  ifelse exch readstring pop +  dup 0 3 index getinterval /rbmap xdef +  dup 2 index dup getinterval /gbmap xdef +  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c +  cols rows 8 compute_transform  +  rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox:   -44   170   641   672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + +    0     0  8231  6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg +   0    0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070  452 L +1247 5360 mt 1247 5296 L +1247  452 mt 1247  515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt  +(1) s +1780 5360 mt 1780 5296 L +1780  452 mt 1780  515 L +1754 5572 mt  +( ) s +2092 5360 mt 2092 5296 L +2092  452 mt 2092  515 L +2066 5572 mt  +( ) s +2314 5360 mt 2314 5296 L +2314  452 mt 2314  515 L +2288 5572 mt  +( ) s +2485 5360 mt 2485 5296 L +2485  452 mt 2485  515 L +2459 5572 mt  +( ) s +2626 5360 mt 2626 5296 L +2626  452 mt 2626  515 L +2600 5572 mt  +( ) s +2744 5360 mt 2744 5296 L +2744  452 mt 2744  515 L +2718 5572 mt  +( ) s +2847 5360 mt 2847 5296 L +2847  452 mt 2847  515 L +2821 5572 mt  +( ) s +2938 5360 mt 2938 5296 L +2938  452 mt 2938  515 L +2912 5572 mt  +( ) s +3019 5360 mt 3019 5296 L +3019  452 mt 3019  515 L +2913 5572 mt  +(10) s +3552 5360 mt 3552 5296 L +3552  452 mt 3552  515 L +3526 5572 mt  +( ) s +3864 5360 mt 3864 5296 L +3864  452 mt 3864  515 L +3838 5572 mt  +( ) s +4085 5360 mt 4085 5296 L +4085  452 mt 4085  515 L +4059 5572 mt  +( ) s +4257 5360 mt 4257 5296 L +4257  452 mt 4257  515 L +4231 5572 mt  +( ) s +4397 5360 mt 4397 5296 L +4397  452 mt 4397  515 L +4371 5572 mt  +( ) s +4516 5360 mt 4516 5296 L +4516  452 mt 4516  515 L +4490 5572 mt  +( ) s +4619 5360 mt 4619 5296 L +4619  452 mt 4619  515 L +4593 5572 mt  +( ) s +4710 5360 mt 4710 5296 L +4710  452 mt 4710  515 L +4684 5572 mt  +( ) s +4791 5360 mt 4791 5296 L +4791  452 mt 4791  515 L +4631 5572 mt  +(100) s +5324 5360 mt 5324 5296 L +5324  452 mt 5324  515 L +5298 5572 mt  +( ) s +5636 5360 mt 5636 5296 L +5636  452 mt 5636  515 L +5610 5572 mt  +( ) s +5857 5360 mt 5857 5296 L +5857  452 mt 5857  515 L +5831 5572 mt  +( ) s +6029 5360 mt 6029 5296 L +6029  452 mt 6029  515 L +6003 5572 mt  +( ) s +6169 5360 mt 6169 5296 L +6169  452 mt 6169  515 L +6143 5572 mt  +( ) s +6288 5360 mt 6288 5296 L +6288  452 mt 6288  515 L +6262 5572 mt  +( ) s +6391 5360 mt 6391 5296 L +6391  452 mt 6391  515 L +6365 5572 mt  +( ) s +6481 5360 mt 6481 5296 L +6481  452 mt 6481  515 L +6455 5572 mt  +( ) s +6563 5360 mt 6563 5296 L +6563  452 mt 6563  515 L +6350 5572 mt  +(1000) s +7096 5360 mt 7096 5296 L +7096  452 mt 7096  515 L +7070 5572 mt  +( ) s +7408 5360 mt 7408 5296 L +7408  452 mt 7408  515 L +7382 5572 mt  +( ) s +1070 5352 mt 1133 5352 L +7449 5352 mt 7385 5352 L + 982 5423 mt  +( ) s +1070 5233 mt 1133 5233 L +7449 5233 mt 7385 5233 L + 982 5304 mt  +( ) s +1070 5126 mt 1133 5126 L +7449 5126 mt 7385 5126 L + 929 5197 mt  +(1) s +1070 4422 mt 1133 4422 L +7449 4422 mt 7385 4422 L + 982 4493 mt  +( ) s +1070 4011 mt 1133 4011 L +7449 4011 mt 7385 4011 L + 982 4082 mt  +( ) s +1070 3719 mt 1133 3719 L +7449 3719 mt 7385 3719 L + 982 3790 mt  +( ) s +1070 3492 mt 1133 3492 L +7449 3492 mt 7385 3492 L + 982 3563 mt  +( ) s +1070 3307 mt 1133 3307 L +7449 3307 mt 7385 3307 L + 982 3378 mt  +( ) s +1070 3151 mt 1133 3151 L +7449 3151 mt 7385 3151 L + 982 3222 mt  +( ) s +1070 3015 mt 1133 3015 L +7449 3015 mt 7385 3015 L + 982 3086 mt  +( ) s +1070 2896 mt 1133 2896 L +7449 2896 mt 7385 2896 L + 982 2967 mt  +( ) s +1070 2789 mt 1133 2789 L +7449 2789 mt 7385 2789 L + 822 2860 mt  +(10) s +1070 2085 mt 1133 2085 L +7449 2085 mt 7385 2085 L + 982 2156 mt  +( ) s +1070 1674 mt 1133 1674 L +7449 1674 mt 7385 1674 L + 982 1745 mt  +( ) s +1070 1382 mt 1133 1382 L +7449 1382 mt 7385 1382 L + 982 1453 mt  +( ) s +1070 1155 mt 1133 1155 L +7449 1155 mt 7385 1155 L + 982 1226 mt  +( ) s +1070  970 mt 1133  970 L +7449  970 mt 7385  970 L + 982 1041 mt  +( ) s +1070  814 mt 1133  814 L +7449  814 mt 7385  814 L + 982  885 mt  +( ) s +1070  678 mt 1133  678 L +7449  678 mt 7385  678 L + 982  749 mt  +( ) s +1070  558 mt 1133  558 L +7449  558 mt 7385  558 L + 982  629 mt  +( ) s +1070  452 mt 1133  452 L +7449  452 mt 7385  452 L + 715  523 mt  +(100) s +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +gs 1070 452 6380 4909 rc +24 w +gr + +24 w +/c8 { 0.000000 0.700000 0.500000 sr} bdef +c8 +  60   60 1466 5126 FO +  60   60 1905 4428 FO +  60   60 2344 3977 FO +  60   60 2783 3675 FO +  60   60 3222 3368 FO +  60   60 3660 3107 FO +  60   60 4099 2893 FO +  60   60 4538 2710 FO +  60   60 4977 2550 FO +  60   60 5416 2412 FO +  60   60 5855 2292 FO +  60   60 6294 2180 FO +  60   60 6733 2083 FO +  60   60 7171 1991 FO +gs 1070 452 6380 4909 rc +gr + +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 +  60   60 1466 5126 FO +  60   60 1905 4860 FO +  60   60 2344 4699 FO +  60   60 2783 4594 FO +  60   60 3222 4483 FO +  60   60 3660 4383 FO +  60   60 4099 4295 FO +  60   60 4538 4214 FO +  60   60 4977 4137 FO +  60   60 5416 4068 FO +  60   60 5855 4008 FO +  60   60 6294 3941 FO +  60   60 6733 3883 FO +  60   60 7171 3827 FO +gs 1070 452 6380 4909 rc +gr + +/c10 { 1.000000 0.400000 0.200000 sr} bdef +c10 +  60   60 1466 5126 FO +  60   60 1905 5088 FO +  60   60 2344 5064 FO +  60   60 2783 5048 FO +  60   60 3222 5029 FO +  60   60 3660 5010 FO +  60   60 4099 4994 FO +  60   60 4538 4976 FO +  60   60 4977 4959 FO +  60   60 5416 4942 FO +  60   60 5855 4930 FO +  60   60 6294 4914 FO +  60   60 6733 4908 FO +  60   60 7171 4874 FO +gs 1070 452 6380 4909 rc +gr + +0 sg +  60   60 1466 5126 FO +  60   60 1905 5122 FO +  60   60 2344 5119 FO +  60   60 2783 5118 FO +  60   60 3222 5116 FO +  60   60 3660 5113 FO +  60   60 4099 5112 FO +  60   60 4538 5110 FO +  60   60 4977 5109 FO +  60   60 5416 5105 FO +  60   60 5855 5105 FO +  60   60 6294 5104 FO +  60   60 6733 5101 FO +  60   60 7171 5103 FO +gs 1070 452 6380 4909 rc +gr + + 617 4557 mt  -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt  +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt  +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt  +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt  +( ) s +7433  494 mt  +( ) s +6 w +1 sg +0 1070 1481 0 0 -1070 1129 1582 4 MP +PP +-1481 0 0 1070 1481 0 0 -1070 1129 1582 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1582 mt 2610 1582 L +1129  512 mt 2610  512 L +1129 1582 mt 1129  512 L +2610 1582 mt 2610  512 L +1129 1582 mt 2610 1582 L +1129 1582 mt 1129  512 L +1129 1582 mt 2610 1582 L +1129  512 mt 2610  512 L +1129 1582 mt 1129  512 L +2610 1582 mt 2610  512 L +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601  728 mt  +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722  728 mt  +( = 100000) s +gs 1129 512 1482 1071 rc +24 w +gs 1236 513 293 293 rc +c8 +  60   60 1382  659 FO +gr + +c8 +gr + +24 w +c8 +0 sg +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601  987 mt  +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722  987 mt  +( = 10000) s +gs 1129 512 1482 1071 rc +gs 1236 771 293 293 rc +c9 +  60   60 1382  917 FO +gr + +c9 +gr + +c9 +0 sg +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601 1246 mt  +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722 1246 mt  +( = 1000) s +gs 1129 512 1482 1071 rc +gs 1236 1030 293 293 rc +c10 +  60   60 1382 1176 FO +gr + +c10 +gr + +c10 +0 sg +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601 1505 mt  +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722 1505 mt  +( = 100) s +gs 1129 512 1482 1071 rc +gs 1236 1288 293 293 rc +  60   60 1382 1434 FO +gr + +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot0.pdf b/report/pyp_clustering/acl09-short/code/plot0.pdf Binary files differnew file mode 100644 index 00000000..fd1b4595 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot0.pdf diff --git a/report/pyp_clustering/acl09-short/code/plot1.eps b/report/pyp_clustering/acl09-short/code/plot1.eps new file mode 100644 index 00000000..ebb2f194 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot1.eps @@ -0,0 +1,579 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot1.eps +%%CreationDate: 07/23/2009  17:34:27 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox:   -44   170   641   672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c  /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] +  makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse +  exch dup 3 1 roll findfont dup length dict begin +  { 1 index /FID ne {def}{pop pop} ifelse } forall +  /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse +  exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} +  {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 +  dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto +  neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill +   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale +  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale +  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef +  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly +  vradius add translate hradius vradius scale 0 0 1 180 270 arc  +  tMatrix setmatrix lrx hradius sub uly vradius add translate +  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix +  lrx hradius sub lry vradius sub translate hradius vradius scale +  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub +  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix +  closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix +  closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix +  closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup +  ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { +  2 0 asub 3 1 asub 4 0 asub 5 1 asub +  dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { +  dmat dtri tri_to_matrix tmat1 invertmatrix  +  smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string +  currentfile  +  3 index 0 eq {/ASCIIHexDecode filter} +  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } +  ifelse exch readstring pop +  dup 0 3 index getinterval /rbmap xdef +  dup 2 index dup getinterval /gbmap xdef +  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c +  cols rows 8 compute_transform  +  rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox:   -44   170   641   672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + +    0     0  8231  6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg +   0    0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070  452 L +1247 5360 mt 1247 5296 L +1247  452 mt 1247  515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt  +(1) s +1780 5360 mt 1780 5296 L +1780  452 mt 1780  515 L +1754 5572 mt  +( ) s +2092 5360 mt 2092 5296 L +2092  452 mt 2092  515 L +2066 5572 mt  +( ) s +2314 5360 mt 2314 5296 L +2314  452 mt 2314  515 L +2288 5572 mt  +( ) s +2485 5360 mt 2485 5296 L +2485  452 mt 2485  515 L +2459 5572 mt  +( ) s +2626 5360 mt 2626 5296 L +2626  452 mt 2626  515 L +2600 5572 mt  +( ) s +2744 5360 mt 2744 5296 L +2744  452 mt 2744  515 L +2718 5572 mt  +( ) s +2847 5360 mt 2847 5296 L +2847  452 mt 2847  515 L +2821 5572 mt  +( ) s +2938 5360 mt 2938 5296 L +2938  452 mt 2938  515 L +2912 5572 mt  +( ) s +3019 5360 mt 3019 5296 L +3019  452 mt 3019  515 L +2913 5572 mt  +(10) s +3552 5360 mt 3552 5296 L +3552  452 mt 3552  515 L +3526 5572 mt  +( ) s +3864 5360 mt 3864 5296 L +3864  452 mt 3864  515 L +3838 5572 mt  +( ) s +4085 5360 mt 4085 5296 L +4085  452 mt 4085  515 L +4059 5572 mt  +( ) s +4257 5360 mt 4257 5296 L +4257  452 mt 4257  515 L +4231 5572 mt  +( ) s +4397 5360 mt 4397 5296 L +4397  452 mt 4397  515 L +4371 5572 mt  +( ) s +4516 5360 mt 4516 5296 L +4516  452 mt 4516  515 L +4490 5572 mt  +( ) s +4619 5360 mt 4619 5296 L +4619  452 mt 4619  515 L +4593 5572 mt  +( ) s +4710 5360 mt 4710 5296 L +4710  452 mt 4710  515 L +4684 5572 mt  +( ) s +4791 5360 mt 4791 5296 L +4791  452 mt 4791  515 L +4631 5572 mt  +(100) s +5324 5360 mt 5324 5296 L +5324  452 mt 5324  515 L +5298 5572 mt  +( ) s +5636 5360 mt 5636 5296 L +5636  452 mt 5636  515 L +5610 5572 mt  +( ) s +5857 5360 mt 5857 5296 L +5857  452 mt 5857  515 L +5831 5572 mt  +( ) s +6029 5360 mt 6029 5296 L +6029  452 mt 6029  515 L +6003 5572 mt  +( ) s +6169 5360 mt 6169 5296 L +6169  452 mt 6169  515 L +6143 5572 mt  +( ) s +6288 5360 mt 6288 5296 L +6288  452 mt 6288  515 L +6262 5572 mt  +( ) s +6391 5360 mt 6391 5296 L +6391  452 mt 6391  515 L +6365 5572 mt  +( ) s +6481 5360 mt 6481 5296 L +6481  452 mt 6481  515 L +6455 5572 mt  +( ) s +6563 5360 mt 6563 5296 L +6563  452 mt 6563  515 L +6350 5572 mt  +(1000) s +7096 5360 mt 7096 5296 L +7096  452 mt 7096  515 L +7070 5572 mt  +( ) s +7408 5360 mt 7408 5296 L +7408  452 mt 7408  515 L +7382 5572 mt  +( ) s +1070 5352 mt 1133 5352 L +7449 5352 mt 7385 5352 L + 982 5423 mt  +( ) s +1070 5233 mt 1133 5233 L +7449 5233 mt 7385 5233 L + 982 5304 mt  +( ) s +1070 5126 mt 1133 5126 L +7449 5126 mt 7385 5126 L + 929 5197 mt  +(1) s +1070 4422 mt 1133 4422 L +7449 4422 mt 7385 4422 L + 982 4493 mt  +( ) s +1070 4011 mt 1133 4011 L +7449 4011 mt 7385 4011 L + 982 4082 mt  +( ) s +1070 3719 mt 1133 3719 L +7449 3719 mt 7385 3719 L + 982 3790 mt  +( ) s +1070 3492 mt 1133 3492 L +7449 3492 mt 7385 3492 L + 982 3563 mt  +( ) s +1070 3307 mt 1133 3307 L +7449 3307 mt 7385 3307 L + 982 3378 mt  +( ) s +1070 3151 mt 1133 3151 L +7449 3151 mt 7385 3151 L + 982 3222 mt  +( ) s +1070 3015 mt 1133 3015 L +7449 3015 mt 7385 3015 L + 982 3086 mt  +( ) s +1070 2896 mt 1133 2896 L +7449 2896 mt 7385 2896 L + 982 2967 mt  +( ) s +1070 2789 mt 1133 2789 L +7449 2789 mt 7385 2789 L + 822 2860 mt  +(10) s +1070 2085 mt 1133 2085 L +7449 2085 mt 7385 2085 L + 982 2156 mt  +( ) s +1070 1674 mt 1133 1674 L +7449 1674 mt 7385 1674 L + 982 1745 mt  +( ) s +1070 1382 mt 1133 1382 L +7449 1382 mt 7385 1382 L + 982 1453 mt  +( ) s +1070 1155 mt 1133 1155 L +7449 1155 mt 7385 1155 L + 982 1226 mt  +( ) s +1070  970 mt 1133  970 L +7449  970 mt 7385  970 L + 982 1041 mt  +( ) s +1070  814 mt 1133  814 L +7449  814 mt 7385  814 L + 982  885 mt  +( ) s +1070  678 mt 1133  678 L +7449  678 mt 7385  678 L + 982  749 mt  +( ) s +1070  558 mt 1133  558 L +7449  558 mt 7385  558 L + 982  629 mt  +( ) s +1070  452 mt 1133  452 L +7449  452 mt 7385  452 L + 715  523 mt  +(100) s +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +gs 1070 452 6380 4909 rc +24 w +gr + +24 w +  48   48 1466 5126 FO +  48   48 1905 5122 FO +  48   48 2344 5119 FO +  48   48 2783 5118 FO +  48   48 3222 5116 FO +  48   48 3660 5113 FO +  48   48 4099 5112 FO +  48   48 4538 5110 FO +  48   48 4977 5109 FO +  48   48 5416 5105 FO +  48   48 5855 5105 FO +  48   48 6294 5104 FO +  48   48 6733 5101 FO +  48   48 7171 5103 FO +gs 1070 452 6380 4909 rc +438 -2 439 -2 439 -2 439 -2 439 -2 439 -2 439 -2 439 -1  +438 -2 439 -3 439 -1 439 -3 439 -4 1466 5126 14 MP stroke +gr + +/c8 { 1.000000 0.400000 0.200000 sr} bdef +c8 +  48   48 1466 5126 FO +  48   48 1905 5088 FO +  48   48 2344 5064 FO +  48   48 2783 5048 FO +  48   48 3222 5029 FO +  48   48 3660 5010 FO +  48   48 4099 4994 FO +  48   48 4538 4976 FO +  48   48 4977 4959 FO +  48   48 5416 4942 FO +  48   48 5855 4930 FO +  48   48 6294 4914 FO +  48   48 6733 4908 FO +  48   48 7171 4874 FO +gs 1070 452 6380 4909 rc +438 -16 439 -15 439 -15 439 -16 439 -16 439 -16 439 -17 439 -17  +438 -19 439 -18 439 -17 439 -24 439 -38 1466 5126 14 MP stroke +gr + +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 +  48   48 1466 5126 FO +  48   48 1905 4860 FO +  48   48 2344 4699 FO +  48   48 2783 4594 FO +  48   48 3222 4483 FO +  48   48 3660 4383 FO +  48   48 4099 4295 FO +  48   48 4538 4214 FO +  48   48 4977 4137 FO +  48   48 5416 4068 FO +  48   48 5855 4008 FO +  48   48 6294 3941 FO +  48   48 6733 3883 FO +  48   48 7171 3827 FO +gs 1070 452 6380 4909 rc +438 -55 439 -57 439 -61 439 -64 439 -70 439 -75 439 -81 439 -89  +438 -100 439 -111 439 -105 439 -161 439 -266 1466 5126 14 MP stroke +gr + +/c10 { 0.000000 0.700000 0.500000 sr} bdef +c10 +  48   48 1466 5126 FO +  48   48 1905 4428 FO +  48   48 2344 3977 FO +  48   48 2783 3675 FO +  48   48 3222 3368 FO +  48   48 3660 3107 FO +  48   48 4099 2893 FO +  48   48 4538 2710 FO +  48   48 4977 2550 FO +  48   48 5416 2412 FO +  48   48 5855 2292 FO +  48   48 6294 2180 FO +  48   48 6733 2083 FO +  48   48 7171 1991 FO +gs 1070 452 6380 4909 rc +438 -91 439 -97 439 -110 439 -121 439 -138 439 -160 439 -183 439 -214  +438 -261 439 -307 439 -302 439 -451 439 -698 1466 5126 14 MP stroke +gr + +0 sg + 617 4557 mt  -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt  +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt  +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt  +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt  +( ) s +7433  494 mt  +( ) s +6 w +1 sg +0 500 1510 0 0 -500 1129 1012 4 MP +PP +-1510 0 0 500 1510 0 0 -500 1129 1012 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1012 mt 2639 1012 L +1129  512 mt 2639  512 L +1129 1012 mt 1129  512 L +2639 1012 mt 2639  512 L +1129 1012 mt 2639 1012 L +1129 1012 mt 1129  512 L +1129 1012 mt 2639 1012 L +1129  512 mt 2639  512 L +1129 1012 mt 1129  512 L +2639 1012 mt 2639  512 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1594  713 mt  +(Empirical) s +gs 1129 512 1511 501 rc +24 w +gs 1257 523 245 245 rc +  48   48 1379  645 FO +gr + +gr + +24 w +1594  948 mt  +(Expectation) s +gs 1129 512 1511 501 rc +358 0 1200 878 2 MP stroke +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot1.pdf b/report/pyp_clustering/acl09-short/code/plot1.pdf Binary files differnew file mode 100644 index 00000000..90fcd9ba --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot1.pdf diff --git a/report/pyp_clustering/acl09-short/code/plot2.eps b/report/pyp_clustering/acl09-short/code/plot2.eps new file mode 100644 index 00000000..e5c5536a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot2.eps @@ -0,0 +1,552 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot2.eps +%%CreationDate: 07/23/2009  17:33:05 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox:   -44   170   641   672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c  /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] +  makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse +  exch dup 3 1 roll findfont dup length dict begin +  { 1 index /FID ne {def}{pop pop} ifelse } forall +  /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse +  exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} +  {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 +  dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto +  neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill +   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale +  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale +  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef +  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly +  vradius add translate hradius vradius scale 0 0 1 180 270 arc  +  tMatrix setmatrix lrx hradius sub uly vradius add translate +  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix +  lrx hradius sub lry vradius sub translate hradius vradius scale +  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub +  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix +  closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix +  closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix +  closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup +  ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { +  2 0 asub 3 1 asub 4 0 asub 5 1 asub +  dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { +  dmat dtri tri_to_matrix tmat1 invertmatrix  +  smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string +  currentfile  +  3 index 0 eq {/ASCIIHexDecode filter} +  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } +  ifelse exch readstring pop +  dup 0 3 index getinterval /rbmap xdef +  dup 2 index dup getinterval /gbmap xdef +  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c +  cols rows 8 compute_transform  +  rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox:   -44   170   641   672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + +    0     0  8231  6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg +   0    0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070  452 L +1247 5360 mt 1247 5296 L +1247  452 mt 1247  515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt  +(1) s +1780 5360 mt 1780 5296 L +1780  452 mt 1780  515 L +1754 5572 mt  +( ) s +2092 5360 mt 2092 5296 L +2092  452 mt 2092  515 L +2066 5572 mt  +( ) s +2314 5360 mt 2314 5296 L +2314  452 mt 2314  515 L +2288 5572 mt  +( ) s +2485 5360 mt 2485 5296 L +2485  452 mt 2485  515 L +2459 5572 mt  +( ) s +2626 5360 mt 2626 5296 L +2626  452 mt 2626  515 L +2600 5572 mt  +( ) s +2744 5360 mt 2744 5296 L +2744  452 mt 2744  515 L +2718 5572 mt  +( ) s +2847 5360 mt 2847 5296 L +2847  452 mt 2847  515 L +2821 5572 mt  +( ) s +2938 5360 mt 2938 5296 L +2938  452 mt 2938  515 L +2912 5572 mt  +( ) s +3019 5360 mt 3019 5296 L +3019  452 mt 3019  515 L +2913 5572 mt  +(10) s +3552 5360 mt 3552 5296 L +3552  452 mt 3552  515 L +3526 5572 mt  +( ) s +3864 5360 mt 3864 5296 L +3864  452 mt 3864  515 L +3838 5572 mt  +( ) s +4085 5360 mt 4085 5296 L +4085  452 mt 4085  515 L +4059 5572 mt  +( ) s +4257 5360 mt 4257 5296 L +4257  452 mt 4257  515 L +4231 5572 mt  +( ) s +4397 5360 mt 4397 5296 L +4397  452 mt 4397  515 L +4371 5572 mt  +( ) s +4516 5360 mt 4516 5296 L +4516  452 mt 4516  515 L +4490 5572 mt  +( ) s +4619 5360 mt 4619 5296 L +4619  452 mt 4619  515 L +4593 5572 mt  +( ) s +4710 5360 mt 4710 5296 L +4710  452 mt 4710  515 L +4684 5572 mt  +( ) s +4791 5360 mt 4791 5296 L +4791  452 mt 4791  515 L +4631 5572 mt  +(100) s +5324 5360 mt 5324 5296 L +5324  452 mt 5324  515 L +5298 5572 mt  +( ) s +5636 5360 mt 5636 5296 L +5636  452 mt 5636  515 L +5610 5572 mt  +( ) s +5857 5360 mt 5857 5296 L +5857  452 mt 5857  515 L +5831 5572 mt  +( ) s +6029 5360 mt 6029 5296 L +6029  452 mt 6029  515 L +6003 5572 mt  +( ) s +6169 5360 mt 6169 5296 L +6169  452 mt 6169  515 L +6143 5572 mt  +( ) s +6288 5360 mt 6288 5296 L +6288  452 mt 6288  515 L +6262 5572 mt  +( ) s +6391 5360 mt 6391 5296 L +6391  452 mt 6391  515 L +6365 5572 mt  +( ) s +6481 5360 mt 6481 5296 L +6481  452 mt 6481  515 L +6455 5572 mt  +( ) s +6563 5360 mt 6563 5296 L +6563  452 mt 6563  515 L +6350 5572 mt  +(1000) s +7096 5360 mt 7096 5296 L +7096  452 mt 7096  515 L +7070 5572 mt  +( ) s +7408 5360 mt 7408 5296 L +7408  452 mt 7408  515 L +7382 5572 mt  +( ) s +1070 5201 mt 1133 5201 L +7449 5201 mt 7385 5201 L + 769 5272 mt  +(0.1) s +1070 4725 mt 1133 4725 L +7449 4725 mt 7385 4725 L + 982 4796 mt  +( ) s +1070 4446 mt 1133 4446 L +7449 4446 mt 7385 4446 L + 982 4517 mt  +( ) s +1070 4248 mt 1133 4248 L +7449 4248 mt 7385 4248 L + 982 4319 mt  +( ) s +1070 4095 mt 1133 4095 L +7449 4095 mt 7385 4095 L + 982 4166 mt  +( ) s +1070 3969 mt 1133 3969 L +7449 3969 mt 7385 3969 L + 982 4040 mt  +( ) s +1070 3863 mt 1133 3863 L +7449 3863 mt 7385 3863 L + 982 3934 mt  +( ) s +1070 3771 mt 1133 3771 L +7449 3771 mt 7385 3771 L + 982 3842 mt  +( ) s +1070 3690 mt 1133 3690 L +7449 3690 mt 7385 3690 L + 982 3761 mt  +( ) s +1070 3618 mt 1133 3618 L +7449 3618 mt 7385 3618 L + 929 3689 mt  +(1) s +1070 3141 mt 1133 3141 L +7449 3141 mt 7385 3141 L + 982 3212 mt  +( ) s +1070 2863 mt 1133 2863 L +7449 2863 mt 7385 2863 L + 982 2934 mt  +( ) s +1070 2665 mt 1133 2665 L +7449 2665 mt 7385 2665 L + 982 2736 mt  +( ) s +1070 2511 mt 1133 2511 L +7449 2511 mt 7385 2511 L + 982 2582 mt  +( ) s +1070 2386 mt 1133 2386 L +7449 2386 mt 7385 2386 L + 982 2457 mt  +( ) s +1070 2280 mt 1133 2280 L +7449 2280 mt 7385 2280 L + 982 2351 mt  +( ) s +1070 2188 mt 1133 2188 L +7449 2188 mt 7385 2188 L + 982 2259 mt  +( ) s +1070 2107 mt 1133 2107 L +7449 2107 mt 7385 2107 L + 982 2178 mt  +( ) s +1070 2035 mt 1133 2035 L +7449 2035 mt 7385 2035 L + 822 2106 mt  +(10) s +1070 1558 mt 1133 1558 L +7449 1558 mt 7385 1558 L + 982 1629 mt  +( ) s +1070 1279 mt 1133 1279 L +7449 1279 mt 7385 1279 L + 982 1350 mt  +( ) s +1070 1082 mt 1133 1082 L +7449 1082 mt 7385 1082 L + 982 1153 mt  +( ) s +1070  928 mt 1133  928 L +7449  928 mt 7385  928 L + 982  999 mt  +( ) s +1070  803 mt 1133  803 L +7449  803 mt 7385  803 L + 982  874 mt  +( ) s +1070  697 mt 1133  697 L +7449  697 mt 7385  697 L + 982  768 mt  +( ) s +1070  605 mt 1133  605 L +7449  605 mt 7385  605 L + 982  676 mt  +( ) s +1070  524 mt 1133  524 L +7449  524 mt 7385  524 L + 982  595 mt  +( ) s +1070  452 mt 1133  452 L +7449  452 mt 7385  452 L + 715  523 mt  +(100) s +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +gs 1070 452 6380 4909 rc +24 w +438 -2 439 -1 439 -1 439 -1 439 -2 439 -1 439 -1 439 -2  +438 -1 439 -1 439 -2 439 -1 439 -3 1466 3618 14 MP stroke +/c8 { 1.000000 0.400000 0.200000 sr} bdef +c8 +438 -10 439 -10 439 -11 439 -11 439 -11 439 -11 439 -11 439 -12  +438 -12 439 -13 439 -11 439 -16 439 -26 1466 3618 14 MP stroke +DA +438 -36 439 -37 439 -41 439 -42 439 -46 439 -49 439 -53 439 -57  +438 -64 439 -70 439 -65 439 -95 439 -147 1466 5110 14 MP stroke +SO +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 +438 -37 439 -38 439 -42 439 -43 439 -48 439 -51 439 -55 439 -60  +438 -68 439 -75 439 -71 439 -109 439 -180 1466 3618 14 MP stroke +DA +438 -46 439 -49 439 -53 439 -56 439 -63 439 -69 439 -76 439 -85  +438 -100 439 -113 439 -110 439 -169 439 -276 1466 4150 14 MP stroke +SO +/c10 { 0.000000 0.700000 0.500000 sr} bdef +c10 +438 -61 439 -66 439 -75 439 -81 439 -94 439 -108 439 -125 439 -144  +438 -177 439 -208 439 -205 439 -305 439 -473 1466 3618 14 MP stroke +DA +438 -63 439 -68 439 -77 439 -84 439 -98 439 -112 439 -130 439 -151  +438 -185 439 -218 439 -213 439 -315 439 -484 1466 3710 14 MP stroke +gr + +24 w +c10 +DA +0 sg + 617 4557 mt  -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt  +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt  +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt  +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt  +( ) s +7433  494 mt  +( ) s +SO +6 w +1 sg +0 500 2507 0 0 -500 1129 1012 4 MP +PP +-2507 0 0 500 2507 0 0 -500 1129 1012 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1012 mt 3636 1012 L +1129  512 mt 3636  512 L +1129 1012 mt 1129  512 L +3636 1012 mt 3636  512 L +1129 1012 mt 3636 1012 L +1129 1012 mt 1129  512 L +1129 1012 mt 3636 1012 L +1129  512 mt 3636  512 L +1129 1012 mt 1129  512 L +3636 1012 mt 3636  512 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1603  713 mt  +(Expectation) s +gs 1129 512 2508 501 rc +24 w +365 0 1201 645 2 MP stroke +gr + +24 w +1603  948 mt  +(Antoniak approximation) s +gs 1129 512 2508 501 rc +DA +365 0 1201 878 2 MP stroke +SO +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot2.pdf b/report/pyp_clustering/acl09-short/code/plot2.pdf Binary files differnew file mode 100644 index 00000000..d9783120 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot2.pdf diff --git a/report/pyp_clustering/acl09-short/code/plot3.eps b/report/pyp_clustering/acl09-short/code/plot3.eps new file mode 100644 index 00000000..f4ffbb62 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot3.eps @@ -0,0 +1,721 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot3.eps +%%CreationDate: 07/23/2009  17:31:43 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox:   -44   170   641   672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c  /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] +  makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse +  exch dup 3 1 roll findfont dup length dict begin +  { 1 index /FID ne {def}{pop pop} ifelse } forall +  /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse +  exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} +  {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 +  dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto +  neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill +   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale +  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale +  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale +  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef +  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly +  vradius add translate hradius vradius scale 0 0 1 180 270 arc  +  tMatrix setmatrix lrx hradius sub uly vradius add translate +  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix +  lrx hradius sub lry vradius sub translate hradius vradius scale +  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub +  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix +  closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix +  closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def +  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate +  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad +  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix +  closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup +  ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { +  2 0 asub 3 1 asub 4 0 asub 5 1 asub +  dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { +  dmat dtri tri_to_matrix tmat1 invertmatrix  +  smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string +  currentfile  +  3 index 0 eq {/ASCIIHexDecode filter} +  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } +  ifelse exch readstring pop +  dup 0 3 index getinterval /rbmap xdef +  dup 2 index dup getinterval /gbmap xdef +  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c +  cols rows 8 compute_transform  +  rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox:   -44   170   641   672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + +    0     0  8231  6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg +   0    0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070  452 L +1247 5360 mt 1247 5296 L +1247  452 mt 1247  515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt  +(1) s +1780 5360 mt 1780 5296 L +1780  452 mt 1780  515 L +1754 5572 mt  +( ) s +2092 5360 mt 2092 5296 L +2092  452 mt 2092  515 L +2066 5572 mt  +( ) s +2314 5360 mt 2314 5296 L +2314  452 mt 2314  515 L +2288 5572 mt  +( ) s +2485 5360 mt 2485 5296 L +2485  452 mt 2485  515 L +2459 5572 mt  +( ) s +2626 5360 mt 2626 5296 L +2626  452 mt 2626  515 L +2600 5572 mt  +( ) s +2744 5360 mt 2744 5296 L +2744  452 mt 2744  515 L +2718 5572 mt  +( ) s +2847 5360 mt 2847 5296 L +2847  452 mt 2847  515 L +2821 5572 mt  +( ) s +2938 5360 mt 2938 5296 L +2938  452 mt 2938  515 L +2912 5572 mt  +( ) s +3019 5360 mt 3019 5296 L +3019  452 mt 3019  515 L +2913 5572 mt  +(10) s +3552 5360 mt 3552 5296 L +3552  452 mt 3552  515 L +3526 5572 mt  +( ) s +3864 5360 mt 3864 5296 L +3864  452 mt 3864  515 L +3838 5572 mt  +( ) s +4085 5360 mt 4085 5296 L +4085  452 mt 4085  515 L +4059 5572 mt  +( ) s +4257 5360 mt 4257 5296 L +4257  452 mt 4257  515 L +4231 5572 mt  +( ) s +4397 5360 mt 4397 5296 L +4397  452 mt 4397  515 L +4371 5572 mt  +( ) s +4516 5360 mt 4516 5296 L +4516  452 mt 4516  515 L +4490 5572 mt  +( ) s +4619 5360 mt 4619 5296 L +4619  452 mt 4619  515 L +4593 5572 mt  +( ) s +4710 5360 mt 4710 5296 L +4710  452 mt 4710  515 L +4684 5572 mt  +( ) s +4791 5360 mt 4791 5296 L +4791  452 mt 4791  515 L +4631 5572 mt  +(100) s +5324 5360 mt 5324 5296 L +5324  452 mt 5324  515 L +5298 5572 mt  +( ) s +5636 5360 mt 5636 5296 L +5636  452 mt 5636  515 L +5610 5572 mt  +( ) s +5857 5360 mt 5857 5296 L +5857  452 mt 5857  515 L +5831 5572 mt  +( ) s +6029 5360 mt 6029 5296 L +6029  452 mt 6029  515 L +6003 5572 mt  +( ) s +6169 5360 mt 6169 5296 L +6169  452 mt 6169  515 L +6143 5572 mt  +( ) s +6288 5360 mt 6288 5296 L +6288  452 mt 6288  515 L +6262 5572 mt  +( ) s +6391 5360 mt 6391 5296 L +6391  452 mt 6391  515 L +6365 5572 mt  +( ) s +6481 5360 mt 6481 5296 L +6481  452 mt 6481  515 L +6455 5572 mt  +( ) s +6563 5360 mt 6563 5296 L +6563  452 mt 6563  515 L +6350 5572 mt  +(1000) s +7096 5360 mt 7096 5296 L +7096  452 mt 7096  515 L +7070 5572 mt  +( ) s +7408 5360 mt 7408 5296 L +7408  452 mt 7408  515 L +7382 5572 mt  +( ) s +1070 5354 mt 1133 5354 L +7449 5354 mt 7385 5354 L + 982 5425 mt  +( ) s +1070 5257 mt 1133 5257 L +7449 5257 mt 7385 5257 L + 982 5328 mt  +( ) s +1070 5171 mt 1133 5171 L +7449 5171 mt 7385 5171 L + 929 5242 mt  +(1) s +1070 4602 mt 1133 4602 L +7449 4602 mt 7385 4602 L + 982 4673 mt  +( ) s +1070 4270 mt 1133 4270 L +7449 4270 mt 7385 4270 L + 982 4341 mt  +( ) s +1070 4034 mt 1133 4034 L +7449 4034 mt 7385 4034 L + 982 4105 mt  +( ) s +1070 3851 mt 1133 3851 L +7449 3851 mt 7385 3851 L + 982 3922 mt  +( ) s +1070 3702 mt 1133 3702 L +7449 3702 mt 7385 3702 L + 982 3773 mt  +( ) s +1070 3575 mt 1133 3575 L +7449 3575 mt 7385 3575 L + 982 3646 mt  +( ) s +1070 3466 mt 1133 3466 L +7449 3466 mt 7385 3466 L + 982 3537 mt  +( ) s +1070 3369 mt 1133 3369 L +7449 3369 mt 7385 3369 L + 982 3440 mt  +( ) s +1070 3283 mt 1133 3283 L +7449 3283 mt 7385 3283 L + 822 3354 mt  +(10) s +1070 2715 mt 1133 2715 L +7449 2715 mt 7385 2715 L + 982 2786 mt  +( ) s +1070 2382 mt 1133 2382 L +7449 2382 mt 7385 2382 L + 982 2453 mt  +( ) s +1070 2147 mt 1133 2147 L +7449 2147 mt 7385 2147 L + 982 2218 mt  +( ) s +1070 1964 mt 1133 1964 L +7449 1964 mt 7385 1964 L + 982 2035 mt  +( ) s +1070 1814 mt 1133 1814 L +7449 1814 mt 7385 1814 L + 982 1885 mt  +( ) s +1070 1688 mt 1133 1688 L +7449 1688 mt 7385 1688 L + 982 1759 mt  +( ) s +1070 1578 mt 1133 1578 L +7449 1578 mt 7385 1578 L + 982 1649 mt  +( ) s +1070 1482 mt 1133 1482 L +7449 1482 mt 7385 1482 L + 982 1553 mt  +( ) s +1070 1395 mt 1133 1395 L +7449 1395 mt 7385 1395 L + 715 1466 mt  +(100) s +1070  827 mt 1133  827 L +7449  827 mt 7385  827 L + 982  898 mt  +( ) s +1070  495 mt 1133  495 L +7449  495 mt 7385  495 L + 982  566 mt  +( ) s +1070 5360 mt 7449 5360 L +1070  452 mt 7449  452 L +1070 5360 mt 1070  452 L +7449 5360 mt 7449  452 L +gs 1070 452 6380 4909 rc +24 w +438 -1 439 -2 439 -1 439 -2 439 -1 439 -2 439 -1 439 -2  +438 -1 439 -2 439 -1 439 -3 439 -3 1466 5171 14 MP stroke +gr + +24 w +  48   48 1466 5171 FO +  48   48 1905 5168 FO +  48   48 2344 5165 FO +  48   48 2783 5164 FO +  48   48 3222 5162 FO +  48   48 3660 5161 FO +  48   48 4099 5159 FO +  48   48 4538 5158 FO +  48   48 4977 5157 FO +  48   48 5416 5154 FO +  48   48 5855 5154 FO +  48   48 6294 5153 FO +  48   48 6733 5151 FO +  48   48 7171 5153 FO +gs 1070 452 6380 4909 rc +gr + +0 j +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 5200 4 MP +DP +-55 95 -55 -95 110 0 2289 5197 4 MP +DP +-55 95 -55 -95 110 0 2728 5196 4 MP +DP +-55 95 -55 -95 110 0 3167 5194 4 MP +DP +-55 95 -55 -95 110 0 3605 5193 4 MP +DP +-55 95 -55 -95 110 0 4044 5191 4 MP +DP +-55 95 -55 -95 110 0 4483 5190 4 MP +DP +-55 95 -55 -95 110 0 4922 5189 4 MP +DP +-55 95 -55 -95 110 0 5361 5185 4 MP +DP +-55 95 -55 -95 110 0 5800 5186 4 MP +DP +-55 95 -55 -95 110 0 6239 5184 4 MP +DP +-55 95 -55 -95 110 0 6678 5182 4 MP +DP +-55 95 -55 -95 110 0 7116 5187 4 MP +DP +gs 1070 452 6380 4909 rc +/c8 { 1.000000 0.400000 0.200000 sr} bdef +c8 +438 -12 439 -12 439 -13 439 -13 439 -13 439 -13 439 -14 439 -13  +438 -15 439 -15 439 -14 439 -19 439 -31 1466 5171 14 MP stroke +gr + +c8 +  48   48 1466 5171 FO +  48   48 1905 5140 FO +  48   48 2344 5121 FO +  48   48 2783 5108 FO +  48   48 3222 5092 FO +  48   48 3660 5077 FO +  48   48 4099 5065 FO +  48   48 4538 5050 FO +  48   48 4977 5036 FO +  48   48 5416 5022 FO +  48   48 5855 5013 FO +  48   48 6294 4999 FO +  48   48 6733 4995 FO +  48   48 7171 4967 FO +gs 1070 452 6380 4909 rc +gr + +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 5173 4 MP +DP +-55 95 -55 -95 110 0 2289 5153 4 MP +DP +-55 95 -55 -95 110 0 2728 5139 4 MP +DP +-55 95 -55 -95 110 0 3167 5122 4 MP +DP +-55 95 -55 -95 110 0 3605 5105 4 MP +DP +-55 95 -55 -95 110 0 4044 5089 4 MP +DP +-55 95 -55 -95 110 0 4483 5070 4 MP +DP +-55 95 -55 -95 110 0 4922 5053 4 MP +DP +-55 95 -55 -95 110 0 5361 5036 4 MP +DP +-55 95 -55 -95 110 0 5800 5021 4 MP +DP +-55 95 -55 -95 110 0 6239 5007 4 MP +DP +-55 95 -55 -95 110 0 6678 4984 4 MP +DP +-55 95 -55 -95 110 0 7116 4944 4 MP +DP +gs 1070 452 6380 4909 rc +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 +438 -44 439 -46 439 -50 439 -51 439 -57 439 -61 439 -65 439 -72  +438 -81 439 -89 439 -85 439 -130 439 -215 1466 5171 14 MP stroke +gr + +c9 +  48   48 1466 5171 FO +  48   48 1905 4956 FO +  48   48 2344 4826 FO +  48   48 2783 4741 FO +  48   48 3222 4651 FO +  48   48 3660 4571 FO +  48   48 4099 4500 FO +  48   48 4538 4435 FO +  48   48 4977 4372 FO +  48   48 5416 4316 FO +  48   48 5855 4268 FO +  48   48 6294 4214 FO +  48   48 6733 4167 FO +  48   48 7171 4121 FO +gs 1070 452 6380 4909 rc +gr + +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 5057 4 MP +DP +-55 95 -55 -95 110 0 2289 4938 4 MP +DP +-55 95 -55 -95 110 0 2728 4838 4 MP +DP +-55 95 -55 -95 110 0 3167 4706 4 MP +DP +-55 95 -55 -95 110 0 3605 4555 4 MP +DP +-55 95 -55 -95 110 0 4044 4385 4 MP +DP +-55 95 -55 -95 110 0 4483 4185 4 MP +DP +-55 95 -55 -95 110 0 4922 3940 4 MP +DP +-55 95 -55 -95 110 0 5361 3650 4 MP +DP +-55 95 -55 -95 110 0 5800 3310 4 MP +DP +-55 95 -55 -95 110 0 6239 2895 4 MP +DP +-55 95 -55 -95 110 0 6678 2492 4 MP +DP +-55 95 -55 -95 110 0 7116 2000 4 MP +DP +gs 1070 452 6380 4909 rc +/c10 { 0.000000 0.700000 0.500000 sr} bdef +c10 +438 -74 439 -78 439 -89 439 -98 439 -111 439 -129 439 -148 439 -173  +438 -211 439 -248 439 -244 439 -364 439 -564 1466 5171 14 MP stroke +gr + +c10 +  48   48 1466 5171 FO +  48   48 1905 4607 FO +  48   48 2344 4243 FO +  48   48 2783 3999 FO +  48   48 3222 3751 FO +  48   48 3660 3540 FO +  48   48 4099 3368 FO +  48   48 4538 3220 FO +  48   48 4977 3090 FO +  48   48 5416 2979 FO +  48   48 5855 2882 FO +  48   48 6294 2791 FO +  48   48 6733 2713 FO +  48   48 7171 2638 FO +gs 1070 452 6380 4909 rc +gr + +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 4905 4 MP +DP +-55 95 -55 -95 110 0 2289 4630 4 MP +DP +-55 95 -55 -95 110 0 2728 4368 4 MP +DP +-55 95 -55 -95 110 0 3167 4001 4 MP +DP +-55 95 -55 -95 110 0 3605 3574 4 MP +DP +-55 95 -55 -95 110 0 4044 3123 4 MP +DP +-55 95 -55 -95 110 0 4483 2661 4 MP +DP +-55 95 -55 -95 110 0 4922 2196 4 MP +DP +-55 95 -55 -95 110 0 5361 1735 4 MP +DP +-55 95 -55 -95 110 0 5800 1279 4 MP +DP +-55 95 -55 -95 110 0 6239 873 4 MP +DP +gs 1070 452 6380 4909 rc +gr + +0 sg + 617 4557 mt  -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt  +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt  +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt  +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt  +( ) s +7433  494 mt  +( ) s +6 w +1 sg +0 729 2519 0 0 -729 1129 1241 4 MP +PP +-2519 0 0 729 2519 0 0 -729 1129 1241 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1241 mt 3648 1241 L +1129  512 mt 3648  512 L +1129 1241 mt 1129  512 L +3648 1241 mt 3648  512 L +1129 1241 mt 3648 1241 L +1129 1241 mt 1129  512 L +1129 1241 mt 3648 1241 L +1129  512 mt 3648  512 L +1129 1241 mt 1129  512 L +3648 1241 mt 3648  512 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1609  712 mt  +(Expectation) s +gs 1129 512 2520 730 rc +24 w +370 0 1202 644 2 MP stroke +gr + +24 w +1609  945 mt  +(Empirical, fixed base) s +gs 1129 512 2520 730 rc +gs 1265 754 245 245 rc +  48   48 1387  876 FO +gr + +gr + +1609 1178 mt  +(Empirical, inferred base) s +gs 1129 512 2520 730 rc +gs 1265 986 245 245 rc +-55 95 -55 -95 110 0 1332 1140 4 MP +DP +gr + +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot3.pdf b/report/pyp_clustering/acl09-short/code/plot3.pdf Binary files differnew file mode 100644 index 00000000..a3e81faa --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot3.pdf diff --git a/report/pyp_clustering/acl09-short/code/pygibbs3.c b/report/pyp_clustering/acl09-short/code/pygibbs3.c new file mode 100644 index 00000000..3c2240a1 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/pygibbs3.c @@ -0,0 +1,198 @@ +#include <stdio.h> +#include <math.h> + +#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.) + +#define W 30114 +#define N 831190  +#define KWMAX 1000 + +#define NLOOPS 1000 +#define BURNIN 0 +#define SAMPLEFREQ 1 + +#define ALPHA 0.0 // PYB a +//#define GAMMA 1000000000.0 +#define GAMMA .01 // Dirichlet over multinomial P0 + +double BETA; // CRP alpha (PYB b) +int w[N], z[N]; // words, table assignments +int typecount[W], typetot; //# of tables of each type, total # tables +int usedcount[W]; +double ztot[W][KWMAX]; +double k; // total # tables +int nactive; + +void initialise(void); +void anderson(void); +void fileread(void); + +void initialise(void) +{ +  int i,j;  + +  for (i = 1; i < W; i++) { +    typecount[i] = 0; +    usedcount[i] = 0; +    for (j = 0; j < KWMAX; j++) { +      ztot[i][j] = 0; +    } +  } + +} + +void anderson(void)  //stochastic Anderson-style initialisation +{ +  int i,j, tag; +  double max, totprob, r, runtot; +  double probs[KWMAX]; +  int ind, temp; +   +  ztot[w[0]][0] = 1; +  z[0] = 0; +  typecount[w[0]] = 1; +  usedcount[w[0]] = 1; +  k = 1;				        +  typetot = 1; + +  for (i = 1; i < N; i++) { +    //    printf("%5d\n", w[i]); +    max = 0; tag = 0; totprob = 0; +    for (j = 0; j < usedcount[w[i]]; j++) { +      probs[j] = ztot[w[i]][j] - ALPHA; +	totprob += probs[j]; +    }  +    probs[usedcount[w[i]]] = (ALPHA*k+BETA)*((double) typecount[w[i]]+GAMMA)/((double) typetot+W*GAMMA); +    totprob += probs[usedcount[w[i]]]; +    //    printf("%10.6lf\n",totprob); +    r = myrand()*totprob; +    max = probs[0]; +    j = 0; +    while (r>max) { +      j++; +      max += probs[j]; +    } +    //    printf("%5d\n",j); +    z[i] = j; +    ztot[w[i]][j]++; +    if (ztot[w[i]][j]==1) { +      typecount[w[i]]++; +      usedcount[w[i]]++; +      if (usedcount[w[i]]==KWMAX) { +	printf("Maximum number of tables exceeded!!!\n"); +      } +      typetot++; +      k++; +    } +  } +} + +void fileread(void)  +{ +  int i,j, wt; +  FILE *fileptr; +   +  fileptr = fopen("wsj.dat", "r");  + +  for (i = 1; i < N; i++) { +    fscanf(fileptr, "%d", &wt); +    w[i] = wt-1; +    z[i] = 0; +  } +  printf("Total cases: %10d\n", N); +  fclose(fileptr); +} + +main(int argc, char* argv[]) +{ +  int i,j,loop,run; +  int temp,ind, tag; +  double newprob, WBETA; +  double probs[KWMAX]; +  double max, totprob, r; +  int sampcount; +  FILE *fileptr; +  char filename[30]; +  double score; + +  if (argc < 2) { +    printf("Please provide a value of b\n"); +    exit(0); +  } +  BETA = strtol(argv[1]); +  printf("Basic initialising...\n"); + +   // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) +  seedMT(4157U); + +  sprintf(filename,"typecountrecordwsjpeak%0.1f.%0.1f.dat",ALPHA,BETA); +  fileptr = fopen(filename, "w"); + +  printf("Reading from file...\n"); +  fileread(); + +  printf("Initialising...\n"); +  initialise(); +    printf("k = %1.0f, typetot = %d\n",k,typetot); + +  printf("Finding start state...\n"); +  anderson(); +  printf("Beginning burnin...\n"); +  for (loop = 0; loop < NLOOPS; loop++) { +    for (i = 0; i < N; i++) { +      j = z[i]; +      ztot[w[i]][j]--; +      if (ztot[w[i]][j] == 0) { +	if (j==usedcount[w[i]]) { +	  usedcount[w[i]]--; +	} +	typecount[w[i]]--; +	typetot--; +	k--; +      } +      max = 0; tag = 0; totprob = 0; +      for (j = 0; j <= usedcount[w[i]]; j++) { +	if (ztot[w[i]][j] > 0) { +	  probs[j] = ztot[w[i]][j] - ALPHA; +	} else { +	  probs[j] = 0;  +	  if (tag == 0) { +	    probs[j] = (ALPHA*k+BETA)*(((double) typecount[w[i]])+GAMMA)/(((double) typetot)+((double) W)*GAMMA); +	    tag = 1; +	  } +	} +	totprob += probs[j]; +      } +      r = myrand()*totprob; +      max = probs[0]; +      j = 0; +      while (r>max) { +	j++; +	max += probs[j]; +      } +      z[i] = j; +      ztot[w[i]][j]++; +      if (ztot[w[i]][j]==1) { +	if (j == usedcount[w[i]]) { +	  usedcount[w[i]]++; +	  if (usedcount[w[i]]==KWMAX) { +	    printf("Maximum number of tables exceeded!!!\n"); +	  } +	} +	typecount[w[i]]++; +	typetot++; +	k++; +      }       +    } +    printf("Completed sample # %5d\n", loop); +    if (k != typetot)  printf("k = %1.0f, typetot = %d\n",k,typetot); +    if (loop >= BURNIN && loop % SAMPLEFREQ == 0) { +      for (i = 0; i < W; i++) { +	fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type +      } +      fprintf(fileptr,"\n"); +    } +  } +  fclose(fileptr); +} +   diff --git a/report/pyp_clustering/acl09-short/code/pygibbs_geom b/report/pyp_clustering/acl09-short/code/pygibbs_geom Binary files differnew file mode 100755 index 00000000..14ae82f1 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/pygibbs_geom diff --git a/report/pyp_clustering/acl09-short/code/pygibbs_geom.c b/report/pyp_clustering/acl09-short/code/pygibbs_geom.c new file mode 100644 index 00000000..bafa0416 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/pygibbs_geom.c @@ -0,0 +1,212 @@ +#include <stdio.h> +#include <math.h> + +#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.) + +#define W 30114 +#define N 831190  +#define KWMAX 5000 + +#define NLOOPS 11000 +#define BURNIN 1000 +#define SAMPLEFREQ 10 + +#define ALPHA 0.0 // PYB a +//#define GAMMA 1000000000.0 +#define GAMMA .01 // Dirichlet over multinomial P0 + +double BETA; // CRP alpha (PYB b) +int w[N], z[N]; // words, table assignments +double base[N]; // base prob of word under geometric +int typecount[W], typetot; //# of tables of each type, total # tables +int usedcount[W]; +double ztot[W][KWMAX]; +double k; // total # tables +int nactive; + +void initialise(void); +void anderson(void); +void fileread(void); + +void initialise(void) +{ +  int i,j;  + +  for (i = 1; i < W; i++) { +    typecount[i] = 0; +    usedcount[i] = 0; +    for (j = 0; j < KWMAX; j++) { +      ztot[i][j] = 0; +    } +  } + +} + +double base_p(int len) { +  double p = 1.0/26; +  return pow(p,len)*pow(.5,len); //assume p_# = .5 +} + +void anderson(void)  //stochastic Anderson-style initialisation +{ +  int i,j, tag; +  double max, totprob, r, runtot; +  double probs[KWMAX]; +  int ind, temp; +   +  ztot[w[0]][0] = 1; +  z[0] = 0; +  typecount[w[0]] = 1; +  usedcount[w[0]] = 1; +  k = 1;				        +  typetot = 1; + +  for (i = 1; i < N; i++) { +    //    printf("%5d\n", w[i]); +    max = 0; tag = 0; totprob = 0; +    for (j = 0; j < usedcount[w[i]]; j++) { +      probs[j] = ztot[w[i]][j] - ALPHA; +	totprob += probs[j]; +    }  +    probs[usedcount[w[i]]] = (ALPHA*k+BETA)*base[i]; +    totprob += probs[usedcount[w[i]]]; +    //    printf("%10.6lf\n",totprob); +    r = myrand()*totprob; +    max = probs[0]; +    j = 0; +    while (r>max) { +      j++; +      max += probs[j]; +    } +    //    printf("%5d\n",j); +    z[i] = j; +    ztot[w[i]][j]++; +    if (ztot[w[i]][j]==1) { +      typecount[w[i]]++; +      usedcount[w[i]]++; +      if (usedcount[w[i]]==KWMAX) { +	printf("Maximum number of tables exceeded!!!\n"); +      } +      typetot++; +      k++; +    } +  } +} + +void fileread(void)  +{ +  int i,j, wt, len; +  FILE *fileptr; +   +  fileptr = fopen("wsj.dat", "r");  + +  for (i = 1; i < N; i++) { +    fscanf(fileptr, "%d", &wt); +    w[i] = wt-1; +    z[i] = 0; +  } +  printf("Total cases: %10d\n", N); +  fclose(fileptr); + +  fileptr = fopen("wsj_lengths.dat", "r");  + +  for (i = 1; i < N; i++) { +    fscanf(fileptr, "%d", &len); +    base[i] = base_p(len); +  } +  fclose(fileptr); +} + +main(int argc, char* argv[]) +{ +  int i,j,loop,run; +  int temp,ind, tag; +  double newprob, WBETA; +  double probs[KWMAX]; +  double max, totprob, r; +  int sampcount; +  FILE *fileptr; +  char filename[30]; +  double score; + +  if (argc < 2) { +    printf("Please provide a value of b\n"); +    exit(0); +  } +  BETA = strtol(argv[1]); +  printf("Basic initialising...\n"); + +   // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) +  seedMT(4157U); + +  sprintf(filename,"typecountrecordwsjgeom%0.1f.%0.1f.dat",ALPHA,BETA); +  fileptr = fopen(filename, "w"); + +  printf("Reading from file...\n"); +  fileread(); + +  printf("Initialising...\n"); +  initialise(); +    printf("k = %1.0f, typetot = %d\n",k,typetot); + +  printf("Finding start state...\n"); +  anderson(); +  printf("Beginning burnin...\n"); +  for (loop = 0; loop < NLOOPS; loop++) { +    for (i = 0; i < N; i++) { +      j = z[i]; +      ztot[w[i]][j]--; +      if (ztot[w[i]][j] == 0) { +	if (j==usedcount[w[i]]) { +	  usedcount[w[i]]--; +	} +	typecount[w[i]]--; +	typetot--; +	k--; +      } +      max = 0; tag = 0; totprob = 0; +      for (j = 0; j <= usedcount[w[i]]; j++) { +	if (ztot[w[i]][j] > 0) { +	  probs[j] = ztot[w[i]][j] - ALPHA; +	} else { +	  probs[j] = 0;  +	  if (tag == 0) { +	    probs[j] = (ALPHA*k+BETA)*base[i]; +	    tag = 1; +	  } +	} +	totprob += probs[j]; +      } +      r = myrand()*totprob; +      max = probs[0]; +      j = 0; +      while (r>max) { +	j++; +	max += probs[j]; +      } +      z[i] = j; +      ztot[w[i]][j]++; +      if (ztot[w[i]][j]==1) { +	if (j == usedcount[w[i]]) { +	  usedcount[w[i]]++; +	  if (usedcount[w[i]]==KWMAX) { +	    printf("Maximum number of tables exceeded!!!\n"); +	  } +	} +	typecount[w[i]]++; +	typetot++; +	k++; +      }       +    } +    printf("Completed sample # %5d\n", loop); +    if (k != typetot)  printf("k = %1.0f, typetot = %d\n",k,typetot); +    if (loop >= BURNIN && loop % SAMPLEFREQ == 0) { +      for (i = 0; i < W; i++) { +	fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type +      } +      fprintf(fileptr,"\n"); +    } +  } +  fclose(fileptr); +} +   diff --git a/report/pyp_clustering/acl09-short/code/run-peak.prl b/report/pyp_clustering/acl09-short/code/run-peak.prl new file mode 100755 index 00000000..fb1e798a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/run-peak.prl @@ -0,0 +1,8 @@ +#!/usr/bin/perl + +for $i (0..5) { +$beta = 10**$i; +$cmd = "pygibbs_peak $beta\n"; +print $cmd; +`$cmd`; +} diff --git a/report/pyp_clustering/acl09-short/code/run.prl b/report/pyp_clustering/acl09-short/code/run.prl new file mode 100755 index 00000000..ac69559c --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/run.prl @@ -0,0 +1,8 @@ +#!/usr/bin/perl + +for $i (0..5) { +$beta = 10**$i; +$cmd = "pygibbs_geom $beta\n"; +print $cmd; +`$cmd`; +} diff --git a/report/pyp_clustering/acl09-short/code/word_lengths.prl b/report/pyp_clustering/acl09-short/code/word_lengths.prl new file mode 100755 index 00000000..4b4ed03b --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/word_lengths.prl @@ -0,0 +1,21 @@ +#!/usr/bin/perl -w +use Getopt::Std; +use File::Basename; +use List::Util qw(max maxstr min minstr reduce shuffle sum); +use lib "$ENV{HOME}/src/perl/"; +use sg_utils; +use strict; +use vars qw(); + +my $usage = "Usage: $0 \n"; + +getopts(''); + +die $usage unless (1); + +while (<>) { +chomp; +print length; +print "\n"; +} + diff --git a/report/pyp_clustering/acl09-short/code/wsjplots2.m b/report/pyp_clustering/acl09-short/code/wsjplots2.m new file mode 100644 index 00000000..eed41846 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots2.m @@ -0,0 +1,99 @@ + +load wsj + +figure(1) +clf  +subplot(1,2,2) +hold on + +for i = 1:9 +  a = i/10; +  [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20); +  ph = plot(log10(logbins),log10(predicted),'k'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) +end + +for i = 1:9 +  a = i/10;   +  disp(['Loading results for a = ' num2str(a) ]); + +  typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']); +   +  typecountrecordmean = mean(typecountrecord(500:1000,:)); +   +  save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean'); +   +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +  drawnow +end + + + + +[logbins meanval seval] = logbinmean(counts,counts,20,20) +[logbins predicted dummy] = logbinmean(counts,counts,20,20) +ph = plot(log10(logbins),log10(predicted),'r'); +hold on +errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 3.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); + +title('Pitman-Yor process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + +subplot(1,2,1) + +for i = 1:5 + +  b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +  typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + +  typecountrecordmean = mean(typecountrecord(500:1000,:)); +  save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +   +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) +  [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20) +%  errorbar(log10(logbins),meanval,seval,'k.'); +  hold on +  ph = plot(log10(logbins),log10(predicted),'r'); +  %  ph = plot(log10(logbins),predicted,'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 1.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + + diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl.m new file mode 100644 index 00000000..50582e7f --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl.m @@ -0,0 +1,74 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +hold on + +for i = 3:6 + +    b = 10^(i-1) +    +  % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +  % plot lines for CRP Antoniak prediction +  [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--') + +  % plot lines for CRP Cohn prediction +  %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20); +  %ph = plot(log10(logbins),log10(predicted),'r'); +  %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.') + +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +   +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  plot(log10(logbins),log10(meanval),'k*'); +  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); + +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  plot(log10(logbins),log10(meanval),'ko'); +  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko'); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Antoniak approx.','Empirical, fixed base','Empirical, inferred base','Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m new file mode 100644 index 00000000..33419845 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m @@ -0,0 +1,164 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +subplot(1,3,1); +hold on + +for i = 2:6 + +    b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +%%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(500:999,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +    +  % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +  % plot lines for CRP Antoniak prediction +  [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--') + +  % plot lines for incorrect CRP Antoniak prediction (ACL07) +  %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20); +  %ph = plot(log10(logbins),log10(predicted),'r'); +  %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.') + +  % plot lines for CRP Cohn prediction +  %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20); +  %ph = plot(log10(logbins),log10(predicted),'r'); +  %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.') + +   %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 1.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest') +box on + + +subplot(1,3,2); +hold on + +for i =2:6 + +  b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +%%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(500:999,:)); +  %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +   load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); +    +  % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Location','NorthWest') +box on +%axis square + + +subplot(1,3,3); +hold on + +for i =2:6 + +  b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +%%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjgeom0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(500:999,:)); +  %save([ 'outputs/typecountrecordmeanwsjgeom0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjgeom0.0.' num2str(b) '.0.mat']); +    +  % plot lines for CRP exact prediction using summation +%  [logbins meaneval seval] = logbinmean(counts, crppred_geom(counts,wsj_lengths,b),20,20) +[logbins meaneval seval] = logbinmean(counts, crppred(counts,b),20,20) + plot(log10(logbins),log10(meaneval),'r.'); +%errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'r.'); +%  ph = plot(log10(logbins),log10(meaneval),'r'); +%  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Location','NorthWest') +box on +hold off +%axis square + + diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m new file mode 100644 index 00000000..1d07e54c --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m @@ -0,0 +1,117 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +subplot(1,2,1); +hold on + +for i = 3:6 + +    b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +%%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +    +  % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +  % plot lines for CRP Antoniak prediction +  [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--') + +  %plot lines for incorrect CRP Antoniak prediction (ACL07) +  %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20); +  %ph = plot(log10(logbins),log10(predicted),'r'); +  %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle',':') + +  % plot lines for CRP Cohn prediction +  %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20); +  %ph = plot(log10(logbins),log10(predicted),'r'); +  %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.') + +   %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 1.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest') +box on + + +subplot(1,2,2); +hold on + +for i =3:6 + +  b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +%%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); +    +  % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +   %plot lines for incorrect CRP Antoniak prediction (ACL07) +  [logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.') + +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','GGJ07 approx.','Empirical','Location','NorthWest') +box on +%axis square
\ No newline at end of file diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m new file mode 100644 index 00000000..dc54dea4 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m @@ -0,0 +1,54 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 9-[3:6] + +  b = 10^(i-1) +    +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +   +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  ph = plot(log10(logbins),log10(meanval)); +  set(ph,'color',colors(i-2,:),'linestyle','o','linewidth',2,'markersize',10); +  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'\alpha = 100000','\alpha = 10000','\alpha = 1000','\alpha = 100'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m new file mode 100644 index 00000000..dd3615ac --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m @@ -0,0 +1,59 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 3:6 + +  b = 10^(i-1) +    +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +   +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  ph = plot(log10(logbins),log10(meanval)); +  set(ph,'color',colors(i-2,:),'linestyle','o','linewidth',2,'markersize',8); +  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +    % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',colors(i-2,:),'linewidth',2); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'Empirical','Expectation'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m new file mode 100644 index 00000000..dd039289 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m @@ -0,0 +1,58 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 3:6 + +  b = 10^(i-1) +    +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +   +    % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',colors(i-2,:),'linewidth',2); + +  % plot lines for CRP Antoniak prediction +  [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted),'r'); +  set(ph,'color',colors(i-2,:),'linewidth',2,'linestyle','--') + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 2]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'Expectation','Antoniak approximation'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m new file mode 100644 index 00000000..8d570b7a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m @@ -0,0 +1,74 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf  + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %similar but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 3:6 +  col = colors(i-2,:); +  b = 10^(i-1) +    +  % plot lines for CRP exact prediction using summation +  [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); +  ph = plot(log10(logbins),log10(predicted)); +  set(ph,'color',col,'linewidth',2); + +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); +   +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  ph = plot(log10(logbins),log10(meanval)); +  %set(ph,'color',col,'linestyle','o','markerfacecolor',col,'markersize',8); +  set(ph,'color',col,'linestyle','o','linewidth',2,'markersize',8); +  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +  disp(['Loading results for b = ' num2str(b) ]); +  %%%  uncomment these lines if .mat file is not yet generated. %%% +  %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); +  %typecountrecordmean = mean(typecountrecord(:,:)); +  %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +  load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); + +  %plot emprical counts with error bars +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); +  ph = plot(log10(logbins),log10(meanval)); +  %set(ph,'color',col,'linestyle','^','markerfacecolor',col,'markersize',8); +  set(ph,'color',col,'linestyle','^','linewidth',2,'markersize',8); +  %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko'); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +    '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'Expectation','Empirical, fixed base','Empirical, inferred base'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_cl.m b/report/pyp_clustering/acl09-short/code/wsjplots_cl.m new file mode 100644 index 00000000..eed41846 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_cl.m @@ -0,0 +1,99 @@ + +load wsj + +figure(1) +clf  +subplot(1,2,2) +hold on + +for i = 1:9 +  a = i/10; +  [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20); +  ph = plot(log10(logbins),log10(predicted),'k'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) +end + +for i = 1:9 +  a = i/10;   +  disp(['Loading results for a = ' num2str(a) ]); + +  typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']); +   +  typecountrecordmean = mean(typecountrecord(500:1000,:)); +   +  save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean'); +   +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +  drawnow +end + + + + +[logbins meanval seval] = logbinmean(counts,counts,20,20) +[logbins predicted dummy] = logbinmean(counts,counts,20,20) +ph = plot(log10(logbins),log10(predicted),'r'); +hold on +errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 3.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); + +title('Pitman-Yor process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + +subplot(1,2,1) + +for i = 1:5 + +  b = 10^(i-1) + +  disp(['Loading results for b = ' num2str(b) ]); +  typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + +  typecountrecordmean = mean(typecountrecord(500:1000,:)); +  save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); +   +  [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) +  [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20) +%  errorbar(log10(logbins),meanval,seval,'k.'); +  hold on +  ph = plot(log10(logbins),log10(predicted),'r'); +  %  ph = plot(log10(logbins),predicted,'r'); +  set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) +  errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 1.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',  ... +		    '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... +		    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... +		    ' ', ' ', ' ', ' '}); +title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + +  | 
