diff options
Diffstat (limited to 'report/pyp_clustering/acl09-short/code')
31 files changed, 4891 insertions, 0 deletions
diff --git a/report/pyp_clustering/acl09-short/code/antoniakpred.m b/report/pyp_clustering/acl09-short/code/antoniakpred.m new file mode 100644 index 00000000..c4153c04 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/antoniakpred.m @@ -0,0 +1,12 @@ +function output = antoniakpred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +p0=1/30114; +for i = 1:length(uniqin) + prediction(uniqin(i)) = b*p0*log((b*p0+uniqin(i))/(b*p0)); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/approximations.eps b/report/pyp_clustering/acl09-short/code/approximations.eps new file mode 100644 index 00000000..67857497 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/approximations.eps @@ -0,0 +1,897 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-92.1.13.el5.inf.1PAE #1 SMP Mon Oct 20 10:33:44 BST 2008 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/approximations.eps +%%CreationDate: 04/25/2009 11:31:18 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 89 164 503 676 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 89 164 503 676 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 1068 8112 csm + + 0 0 4976 6135 rc +86 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 4977 6136 rf +6 w +0 5000 3856 0 0 -5000 647 5460 4 MP +PP +-3856 0 0 5000 3856 0 0 -5000 647 5460 5 MP stroke +4 w +DO +SO +6 w +0 sg + 647 5460 mt 4503 5460 L + 647 460 mt 4503 460 L + 647 5460 mt 647 460 L +4503 5460 mt 4503 460 L + 647 5460 mt 4503 5460 L + 647 5460 mt 647 460 L + 754 5460 mt 754 5410 L + 754 460 mt 754 510 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 708 5650 mt +(1) s +1076 5460 mt 1076 5410 L +1076 460 mt 1076 510 L +1053 5650 mt +( ) s +1265 5460 mt 1265 5410 L +1265 460 mt 1265 510 L +1242 5650 mt +( ) s +1398 5460 mt 1398 5410 L +1398 460 mt 1398 510 L +1375 5650 mt +( ) s +1502 5460 mt 1502 5410 L +1502 460 mt 1502 510 L +1479 5650 mt +( ) s +1587 5460 mt 1587 5410 L +1587 460 mt 1587 510 L +1564 5650 mt +( ) s +1659 5460 mt 1659 5410 L +1659 460 mt 1659 510 L +1636 5650 mt +( ) s +1721 5460 mt 1721 5410 L +1721 460 mt 1721 510 L +1698 5650 mt +( ) s +1776 5460 mt 1776 5410 L +1776 460 mt 1776 510 L +1753 5650 mt +( ) s +1825 5460 mt 1825 5410 L +1825 460 mt 1825 510 L +1732 5650 mt +(10) s +2147 5460 mt 2147 5410 L +2147 460 mt 2147 510 L +2124 5650 mt +( ) s +2336 5460 mt 2336 5410 L +2336 460 mt 2336 510 L +2313 5650 mt +( ) s +2470 5460 mt 2470 5410 L +2470 460 mt 2470 510 L +2447 5650 mt +( ) s +2573 5460 mt 2573 5410 L +2573 460 mt 2573 510 L +2550 5650 mt +( ) s +2658 5460 mt 2658 5410 L +2658 460 mt 2658 510 L +2635 5650 mt +( ) s +2730 5460 mt 2730 5410 L +2730 460 mt 2730 510 L +2707 5650 mt +( ) s +2792 5460 mt 2792 5410 L +2792 460 mt 2792 510 L +2769 5650 mt +( ) s +2847 5460 mt 2847 5410 L +2847 460 mt 2847 510 L +2824 5650 mt +( ) s +2896 5460 mt 2896 5410 L +2896 460 mt 2896 510 L +2756 5650 mt +(100) s +3218 5460 mt 3218 5410 L +3218 460 mt 3218 510 L +3195 5650 mt +( ) s +3407 5460 mt 3407 5410 L +3407 460 mt 3407 510 L +3384 5650 mt +( ) s +3541 5460 mt 3541 5410 L +3541 460 mt 3541 510 L +3518 5650 mt +( ) s +3645 5460 mt 3645 5410 L +3645 460 mt 3645 510 L +3622 5650 mt +( ) s +3729 5460 mt 3729 5410 L +3729 460 mt 3729 510 L +3706 5650 mt +( ) s +3801 5460 mt 3801 5410 L +3801 460 mt 3801 510 L +3778 5650 mt +( ) s +3863 5460 mt 3863 5410 L +3863 460 mt 3863 510 L +3840 5650 mt +( ) s +3918 5460 mt 3918 5410 L +3918 460 mt 3918 510 L +3895 5650 mt +( ) s +3967 5460 mt 3967 5410 L +3967 460 mt 3967 510 L +3781 5650 mt +(1000) s +4289 5460 mt 4289 5410 L +4289 460 mt 4289 510 L +4266 5650 mt +( ) s +4478 5460 mt 4478 5410 L +4478 460 mt 4478 510 L +4455 5650 mt +( ) s + 647 5321 mt 697 5321 L +4503 5321 mt 4453 5321 L + 379 5383 mt +(0.1) s + 647 4903 mt 697 4903 L +4503 4903 mt 4453 4903 L + 566 4965 mt +( ) s + 647 4658 mt 697 4658 L +4503 4658 mt 4453 4658 L + 566 4720 mt +( ) s + 647 4484 mt 697 4484 L +4503 4484 mt 4453 4484 L + 566 4546 mt +( ) s + 647 4350 mt 697 4350 L +4503 4350 mt 4453 4350 L + 566 4412 mt +( ) s + 647 4240 mt 697 4240 L +4503 4240 mt 4453 4240 L + 566 4302 mt +( ) s + 647 4147 mt 697 4147 L +4503 4147 mt 4453 4147 L + 566 4209 mt +( ) s + 647 4066 mt 697 4066 L +4503 4066 mt 4453 4066 L + 566 4128 mt +( ) s + 647 3995 mt 697 3995 L +4503 3995 mt 4453 3995 L + 566 4057 mt +( ) s + 647 3932 mt 697 3932 L +4503 3932 mt 4453 3932 L + 519 3994 mt +(1) s + 647 3514 mt 697 3514 L +4503 3514 mt 4453 3514 L + 566 3576 mt +( ) s + 647 3269 mt 697 3269 L +4503 3269 mt 4453 3269 L + 566 3331 mt +( ) s + 647 3096 mt 697 3096 L +4503 3096 mt 4453 3096 L + 566 3158 mt +( ) s + 647 2961 mt 697 2961 L +4503 2961 mt 4453 2961 L + 566 3023 mt +( ) s + 647 2851 mt 697 2851 L +4503 2851 mt 4453 2851 L + 566 2913 mt +( ) s + 647 2758 mt 697 2758 L +4503 2758 mt 4453 2758 L + 566 2820 mt +( ) s + 647 2677 mt 697 2677 L +4503 2677 mt 4453 2677 L + 566 2739 mt +( ) s + 647 2606 mt 697 2606 L +4503 2606 mt 4453 2606 L + 566 2668 mt +( ) s + 647 2543 mt 697 2543 L +4503 2543 mt 4453 2543 L + 426 2605 mt +(10) s + 647 2125 mt 697 2125 L +4503 2125 mt 4453 2125 L + 566 2187 mt +( ) s + 647 1880 mt 697 1880 L +4503 1880 mt 4453 1880 L + 566 1942 mt +( ) s + 647 1707 mt 697 1707 L +4503 1707 mt 4453 1707 L + 566 1769 mt +( ) s + 647 1572 mt 697 1572 L +4503 1572 mt 4453 1572 L + 566 1634 mt +( ) s + 647 1462 mt 697 1462 L +4503 1462 mt 4453 1462 L + 566 1524 mt +( ) s + 647 1369 mt 697 1369 L +4503 1369 mt 4453 1369 L + 566 1431 mt +( ) s + 647 1289 mt 697 1289 L +4503 1289 mt 4453 1289 L + 566 1351 mt +( ) s + 647 1217 mt 697 1217 L +4503 1217 mt 4453 1217 L + 566 1279 mt +( ) s + 647 1154 mt 697 1154 L +4503 1154 mt 4453 1154 L + 332 1216 mt +(100) s + 647 736 mt 697 736 L +4503 736 mt 4453 736 L + 566 798 mt +( ) s + 647 491 mt 697 491 L +4503 491 mt 4453 491 L + 566 553 mt +( ) s + 647 5460 mt 4503 5460 L + 647 460 mt 4503 460 L + 647 5460 mt 647 460 L +4503 5460 mt 4503 460 L +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -2 266 -1 265 -1 265 -1 265 -1 266 -1 265 -1 265 -1 +266 -2 265 -1 265 -1 265 -1 266 -3 886 3932 14 MP stroke +6 w +gr + +0.7 sg +0 sg + 850 3932 mt 922 3932 L + 886 3896 mt 886 3968 L +1116 3929 mt 1188 3929 L +1152 3893 mt 1152 3965 L +1381 3928 mt 1453 3928 L +1417 3892 mt 1417 3964 L +1646 3927 mt 1718 3927 L +1682 3891 mt 1682 3963 L +1911 3926 mt 1983 3926 L +1947 3890 mt 1947 3962 L +2177 3924 mt 2249 3924 L +2213 3888 mt 2213 3960 L +2442 3923 mt 2514 3923 L +2478 3887 mt 2478 3959 L +2707 3922 mt 2779 3922 L +2743 3886 mt 2743 3958 L +2973 3922 mt 3045 3922 L +3009 3886 mt 3009 3958 L +3238 3919 mt 3310 3919 L +3274 3883 mt 3274 3955 L +3503 3919 mt 3575 3919 L +3539 3883 mt 3539 3955 L +3768 3919 mt 3840 3919 L +3804 3883 mt 3804 3955 L +4034 3917 mt 4106 3917 L +4070 3881 mt 4070 3953 L +4299 3918 mt 4371 3918 L +4335 3882 mt 4335 3954 L + 861 3907 mt 911 3957 L + 911 3907 mt 861 3957 L +1127 3904 mt 1177 3954 L +1177 3904 mt 1127 3954 L +1392 3903 mt 1442 3953 L +1442 3903 mt 1392 3953 L +1657 3902 mt 1707 3952 L +1707 3902 mt 1657 3952 L +1922 3901 mt 1972 3951 L +1972 3901 mt 1922 3951 L +2188 3899 mt 2238 3949 L +2238 3899 mt 2188 3949 L +2453 3898 mt 2503 3948 L +2503 3898 mt 2453 3948 L +2718 3897 mt 2768 3947 L +2768 3897 mt 2718 3947 L +2984 3897 mt 3034 3947 L +3034 3897 mt 2984 3947 L +3249 3894 mt 3299 3944 L +3299 3894 mt 3249 3944 L +3514 3894 mt 3564 3944 L +3564 3894 mt 3514 3944 L +3779 3894 mt 3829 3944 L +3829 3894 mt 3779 3944 L +4045 3892 mt 4095 3942 L +4095 3892 mt 4045 3942 L +4310 3893 mt 4360 3943 L +4360 3893 mt 4310 3943 L +gs 647 460 3857 5001 rc +gr + + 36 36 886 3932 FO + 36 36 1152 3929 FO + 36 36 1417 3928 FO + 36 36 1682 3927 FO + 36 36 1947 3926 FO + 36 36 2213 3924 FO + 36 36 2478 3923 FO + 36 36 2743 3922 FO + 36 36 3009 3921 FO + 36 36 3274 3919 FO + 36 36 3539 3919 FO + 36 36 3804 3918 FO + 36 36 4070 3916 FO + 36 36 4335 3920 FO +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -9 266 -9 265 -9 265 -10 265 -9 266 -10 265 -10 265 -10 +266 -11 265 -11 265 -10 265 -14 266 -23 886 3932 14 MP stroke +DA +265 -32 266 -32 265 -36 265 -37 265 -40 266 -43 265 -47 265 -50 +266 -56 265 -61 265 -57 265 -84 266 -129 886 5241 14 MP stroke +SO +6 w +gr + +0.7 sg +0 sg + 850 3932 mt 922 3932 L + 886 3896 mt 886 3968 L +1116 3909 mt 1188 3909 L +1152 3873 mt 1152 3945 L +1381 3895 mt 1453 3895 L +1417 3859 mt 1417 3931 L +1646 3885 mt 1718 3885 L +1682 3849 mt 1682 3921 L +1911 3874 mt 1983 3874 L +1947 3838 mt 1947 3910 L +2177 3863 mt 2249 3863 L +2213 3827 mt 2213 3899 L +2442 3854 mt 2514 3854 L +2478 3818 mt 2478 3890 L +2707 3843 mt 2779 3843 L +2743 3807 mt 2743 3879 L +2973 3833 mt 3045 3833 L +3009 3797 mt 3009 3869 L +3238 3822 mt 3310 3822 L +3274 3786 mt 3274 3858 L +3503 3816 mt 3575 3816 L +3539 3780 mt 3539 3852 L +3768 3806 mt 3840 3806 L +3804 3770 mt 3804 3842 L +4034 3802 mt 4106 3802 L +4070 3766 mt 4070 3838 L +4299 3782 mt 4371 3782 L +4335 3746 mt 4335 3818 L + 861 3907 mt 911 3957 L + 911 3907 mt 861 3957 L +1127 3884 mt 1177 3934 L +1177 3884 mt 1127 3934 L +1392 3870 mt 1442 3920 L +1442 3870 mt 1392 3920 L +1657 3860 mt 1707 3910 L +1707 3860 mt 1657 3910 L +1922 3849 mt 1972 3899 L +1972 3849 mt 1922 3899 L +2188 3838 mt 2238 3888 L +2238 3838 mt 2188 3888 L +2453 3829 mt 2503 3879 L +2503 3829 mt 2453 3879 L +2718 3818 mt 2768 3868 L +2768 3818 mt 2718 3868 L +2984 3808 mt 3034 3858 L +3034 3808 mt 2984 3858 L +3249 3797 mt 3299 3847 L +3299 3797 mt 3249 3847 L +3514 3791 mt 3564 3841 L +3564 3791 mt 3514 3841 L +3779 3781 mt 3829 3831 L +3829 3781 mt 3779 3831 L +4045 3777 mt 4095 3827 L +4095 3777 mt 4045 3827 L +4310 3757 mt 4360 3807 L +4360 3757 mt 4310 3807 L +gs 647 460 3857 5001 rc +gr + + 36 36 886 3932 FO + 36 36 1152 3910 FO + 36 36 1417 3895 FO + 36 36 1682 3885 FO + 36 36 1947 3872 FO + 36 36 2213 3860 FO + 36 36 2478 3848 FO + 36 36 2743 3834 FO + 36 36 3009 3821 FO + 36 36 3274 3809 FO + 36 36 3539 3798 FO + 36 36 3804 3788 FO + 36 36 4070 3771 FO + 36 36 4335 3742 FO +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -33 266 -34 265 -36 265 -38 265 -42 266 -44 265 -49 265 -52 +266 -60 265 -66 265 -62 265 -96 266 -158 886 3932 14 MP stroke +DA +265 -40 266 -43 265 -46 265 -50 265 -55 266 -60 265 -67 265 -75 +266 -87 265 -100 265 -96 265 -148 266 -242 886 4398 14 MP stroke +SO +6 w +gr + +0.7 sg +0 sg + 850 3932 mt 922 3932 L + 886 3896 mt 886 3968 L +1116 3774 mt 1188 3774 L +1152 3738 mt 1152 3810 L +1381 3678 mt 1453 3678 L +1417 3642 mt 1417 3714 L +1646 3616 mt 1718 3616 L +1682 3580 mt 1682 3652 L +1911 3549 mt 1983 3549 L +1947 3513 mt 1947 3585 L +2177 3490 mt 2249 3490 L +2213 3454 mt 2213 3526 L +2442 3438 mt 2514 3438 L +2478 3402 mt 2478 3474 L +2707 3390 mt 2779 3390 L +2743 3354 mt 2743 3426 L +2973 3344 mt 3045 3344 L +3009 3308 mt 3009 3380 L +3238 3303 mt 3310 3303 L +3274 3267 mt 3274 3339 L +3503 3267 mt 3575 3267 L +3539 3231 mt 3539 3303 L +3768 3228 mt 3840 3228 L +3804 3192 mt 3804 3264 L +4034 3193 mt 4106 3193 L +4070 3157 mt 4070 3229 L +4299 3160 mt 4371 3160 L +4335 3124 mt 4335 3196 L + 861 3907 mt 911 3957 L + 911 3907 mt 861 3957 L +1127 3749 mt 1177 3799 L +1177 3749 mt 1127 3799 L +1392 3653 mt 1442 3703 L +1442 3653 mt 1392 3703 L +1657 3591 mt 1707 3641 L +1707 3591 mt 1657 3641 L +1922 3524 mt 1972 3574 L +1972 3524 mt 1922 3574 L +2188 3465 mt 2238 3515 L +2238 3465 mt 2188 3515 L +2453 3413 mt 2503 3463 L +2503 3413 mt 2453 3463 L +2718 3365 mt 2768 3415 L +2768 3365 mt 2718 3415 L +2984 3319 mt 3034 3369 L +3034 3319 mt 2984 3369 L +3249 3278 mt 3299 3328 L +3299 3278 mt 3249 3328 L +3514 3242 mt 3564 3292 L +3564 3242 mt 3514 3292 L +3779 3203 mt 3829 3253 L +3829 3203 mt 3779 3253 L +4045 3168 mt 4095 3218 L +4095 3168 mt 4045 3218 L +4310 3135 mt 4360 3185 L +4360 3135 mt 4310 3185 L +gs 647 460 3857 5001 rc +gr + + 36 36 886 3932 FO + 36 36 1152 3825 FO + 36 36 1417 3737 FO + 36 36 1682 3663 FO + 36 36 1947 3567 FO + 36 36 2213 3455 FO + 36 36 2478 3330 FO + 36 36 2743 3183 FO + 36 36 3009 3003 FO + 36 36 3274 2790 FO + 36 36 3539 2539 FO + 36 36 3804 2234 FO + 36 36 4070 1938 FO + 36 36 4335 1575 FO +gs 647 460 3857 5001 rc +18 w +0.7 sg +265 -54 266 -58 265 -65 265 -72 265 -82 266 -95 265 -109 265 -127 +266 -155 265 -182 265 -180 265 -268 266 -415 886 3932 14 MP stroke +DA +265 -56 266 -59 265 -68 265 -74 265 -85 266 -99 265 -113 265 -133 +266 -162 265 -191 265 -187 265 -277 266 -425 886 4013 14 MP stroke +SO +6 w +gr + +0.7 sg +0 sg + 850 3932 mt 922 3932 L + 886 3896 mt 886 3968 L +1116 3517 mt 1188 3517 L +1152 3481 mt 1152 3553 L +1381 3249 mt 1453 3249 L +1417 3213 mt 1417 3285 L +1646 3069 mt 1718 3069 L +1682 3033 mt 1682 3105 L +1911 2887 mt 1983 2887 L +1947 2851 mt 1947 2923 L +2177 2732 mt 2249 2732 L +2213 2696 mt 2213 2768 L +2442 2605 mt 2514 2605 L +2478 2569 mt 2478 2641 L +2707 2496 mt 2779 2496 L +2743 2460 mt 2743 2532 L +2973 2401 mt 3045 2401 L +3009 2365 mt 3009 2437 L +3238 2319 mt 3310 2319 L +3274 2283 mt 3274 2355 L +3503 2247 mt 3575 2247 L +3539 2211 mt 3539 2283 L +3768 2181 mt 3840 2181 L +3804 2145 mt 3804 2217 L +4034 2124 mt 4106 2124 L +4070 2088 mt 4070 2160 L +4299 2069 mt 4371 2069 L +4335 2033 mt 4335 2105 L + 861 3907 mt 911 3957 L + 911 3907 mt 861 3957 L +1127 3492 mt 1177 3542 L +1177 3492 mt 1127 3542 L +1392 3224 mt 1442 3274 L +1442 3224 mt 1392 3274 L +1657 3044 mt 1707 3094 L +1707 3044 mt 1657 3094 L +1922 2862 mt 1972 2912 L +1972 2862 mt 1922 2912 L +2188 2707 mt 2238 2757 L +2238 2707 mt 2188 2757 L +2453 2580 mt 2503 2630 L +2503 2580 mt 2453 2630 L +2718 2471 mt 2768 2521 L +2768 2471 mt 2718 2521 L +2984 2376 mt 3034 2426 L +3034 2376 mt 2984 2426 L +3249 2294 mt 3299 2344 L +3299 2294 mt 3249 2344 L +3514 2222 mt 3564 2272 L +3564 2222 mt 3514 2272 L +3779 2156 mt 3829 2206 L +3829 2156 mt 3779 2206 L +4045 2099 mt 4095 2149 L +4095 2099 mt 4045 2149 L +4310 2044 mt 4360 2094 L +4360 2044 mt 4310 2094 L +gs 647 460 3857 5001 rc +gr + + 36 36 886 3932 FO + 36 36 1152 3713 FO + 36 36 1417 3510 FO + 36 36 1682 3318 FO + 36 36 1947 3048 FO + 36 36 2213 2733 FO + 36 36 2478 2401 FO + 36 36 2743 2061 FO + 36 36 3009 1720 FO + 36 36 3274 1380 FO + 36 36 3539 1045 FO + 36 36 3804 746 FO +gs 647 460 3857 5001 rc +gr + + 240 4103 mt -90 rotate +(Mean number of lexical entries) s +90 rotate +1812 5794 mt +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 132 FMSR + +3188 5878 mt +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3283 5794 mt +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 630 5503 mt +( ) s +4487 502 mt +( ) s +1 sg +0 846 2267 0 0 -846 707 1366 4 MP +PP +-2267 0 0 846 2267 0 0 -846 707 1366 5 MP stroke +4 w +DO +SO +6 w +0 sg + 707 1366 mt 2974 1366 L + 707 520 mt 2974 520 L + 707 1366 mt 707 520 L +2974 1366 mt 2974 520 L + 707 1366 mt 2974 1366 L + 707 1366 mt 707 520 L + 707 1366 mt 2974 1366 L + 707 520 mt 2974 520 L + 707 1366 mt 707 520 L +2974 1366 mt 2974 520 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +1183 698 mt +(Expectation) s +gs 707 520 2268 847 rc +18 w +0.7 sg +365 0 780 637 2 MP stroke +gr + +18 w +0.7 sg +0 sg +1183 902 mt +(Antoniak approx.) s +gs 707 520 2268 847 rc +DA +0.7 sg +365 0 780 841 2 MP stroke +SO +gr + +0.7 sg +0 sg +1183 1105 mt +(Empirical, fixed base) s +gs 707 520 2268 847 rc +6 w +gs 889 971 147 147 rc + 926 1044 mt 998 1044 L + 962 1008 mt 962 1080 L + 937 1019 mt 987 1069 L + 987 1019 mt 937 1069 L +gr + +gr + +6 w +1183 1309 mt +(Empirical, inferred base) s +gs 707 520 2268 847 rc +gs 889 1175 147 147 rc + 36 36 962 1248 FO +gr + +gr + + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/cohnpred.m b/report/pyp_clustering/acl09-short/code/cohnpred.m new file mode 100644 index 00000000..35a49605 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/cohnpred.m @@ -0,0 +1,12 @@ +function output = cohnpred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +p0=1/30114; +for i = 1:length(uniqin) + prediction(uniqin(i)) = b*p0*(psi(b*p0+uniqin(i)) - psi(b*p0)); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/cokus.c b/report/pyp_clustering/acl09-short/code/cokus.c new file mode 100644 index 00000000..3a959c0f --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/cokus.c @@ -0,0 +1,167 @@ +// This is the ``Mersenne Twister'' random number generator MT19937, which +// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1) +// starting from any odd seed in 0..(2^32 - 1). This version is a recode +// by Shawn Cokus (Cokus@math.washington.edu) on March 8, 1998 of a version by +// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in +// July-August 1997). +// +// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha +// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to +// generate 300 million random numbers; after recoding: 24.0 sec. for the same +// (i.e., 46.5% of original time), so speed is now about 12.5 million random +// number generations per second on this machine. +// +// According to the URL <http://www.math.keio.ac.jp/~matumoto/emt.html> +// (and paraphrasing a bit in places), the Mersenne Twister is ``designed +// with consideration of the flaws of various existing generators,'' has +// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally +// equidistributed, and ``has passed many stringent tests, including the +// die-hard test of G. Marsaglia and the load test of P. Hellekalek and +// S. Wegenkittl.'' It is efficient in memory usage (typically using 2506 +// to 5012 bytes of static data, depending on data type sizes, and the code +// is quite short as well). It generates random numbers in batches of 624 +// at a time, so the caching and pipelining of modern systems is exploited. +// It is also divide- and mod-free. +// +// This library is free software; you can redistribute it and/or modify it +// under the terms of the GNU Library General Public License as published by +// the Free Software Foundation (either version 2 of the License or, at your +// option, any later version). This library is distributed in the hope that +// it will be useful, but WITHOUT ANY WARRANTY, without even the implied +// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +// the GNU Library General Public License for more details. You should have +// received a copy of the GNU Library General Public License along with this +// library; if not, write to the Free Software Foundation, Inc., 59 Temple +// Place, Suite 330, Boston, MA 02111-1307, USA. +// +// The code as Shawn received it included the following notice: +// +// Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura. When +// you use this, send an e-mail to <matumoto@math.keio.ac.jp> with +// an appropriate reference to your work. +// +// It would be nice to CC: <Cokus@math.washington.edu> when you write. +// + +#include <stdio.h> +#include <stdlib.h> + +// +// uint32 must be an unsigned integer type capable of holding at least 32 +// bits; exactly 32 should be fastest, but 64 is better on an Alpha with +// GCC at -O3 optimization so try your options and see what's best for you +// + +typedef unsigned long uint32; + +#define N (624) // length of state vector +#define M (397) // a period parameter +#define K (0x9908B0DFU) // a magic constant +#define hiBit(u) ((u) & 0x80000000U) // mask all but highest bit of u +#define loBit(u) ((u) & 0x00000001U) // mask all but lowest bit of u +#define loBits(u) ((u) & 0x7FFFFFFFU) // mask the highest bit of u +#define mixBits(u, v) (hiBit(u)|loBits(v)) // move hi bit of u to hi bit of v + +static uint32 state[N+1]; // state vector + 1 extra to not violate ANSI C +static uint32 *next; // next random value is computed from here +static int left = -1; // can *next++ this many times before reloading + + +void seedMT(uint32 seed) + { + // + // We initialize state[0..(N-1)] via the generator + // + // x_new = (69069 * x_old) mod 2^32 + // + // from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuth's + // _The Art of Computer Programming_, Volume 2, 3rd ed. + // + // Notes (SJC): I do not know what the initial state requirements + // of the Mersenne Twister are, but it seems this seeding generator + // could be better. It achieves the maximum period for its modulus + // (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if + // x_initial can be even, you have sequences like 0, 0, 0, ...; + // 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31, + // 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below. + // + // Even if x_initial is odd, if x_initial is 1 mod 4 then + // + // the lowest bit of x is always 1, + // the next-to-lowest bit of x is always 0, + // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... , + // the 3rd-from-lowest bit of x 4-cycles ... 0 1 1 0 0 1 1 0 ... , + // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... , + // ... + // + // and if x_initial is 3 mod 4 then + // + // the lowest bit of x is always 1, + // the next-to-lowest bit of x is always 1, + // the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... , + // the 3rd-from-lowest bit of x 4-cycles ... 0 0 1 1 0 0 1 1 ... , + // the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... , + // ... + // + // The generator's potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is + // 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth. It + // also does well in the dimension 2..5 spectral tests, but it could be + // better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth). + // + // Note that the random number user does not see the values generated + // here directly since reloadMT() will always munge them first, so maybe + // none of all of this matters. In fact, the seed values made here could + // even be extra-special desirable if the Mersenne Twister theory says + // so-- that's why the only change I made is to restrict to odd seeds. + // + + register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state; + register int j; + + for(left=0, *s++=x, j=N; --j; + *s++ = (x*=69069U) & 0xFFFFFFFFU); + } + + +uint32 reloadMT(void) + { + register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1; + register int j; + + if(left < -1) + seedMT(4357U); + + left=N-1, next=state+1; + + for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++) + *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + + for(pM=state, j=M; --j; s0=s1, s1=*p2++) + *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + + s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); + s1 ^= (s1 >> 11); + s1 ^= (s1 << 7) & 0x9D2C5680U; + s1 ^= (s1 << 15) & 0xEFC60000U; + return(s1 ^ (s1 >> 18)); + } + + +inline uint32 randomMT(void) + { + uint32 y; + + if(--left < 0) + return(reloadMT()); + + y = *next++; + y ^= (y >> 11); + y ^= (y << 7) & 0x9D2C5680U; + y ^= (y << 15) & 0xEFC60000U; + y ^= (y >> 18); + return(y); + } + + + + diff --git a/report/pyp_clustering/acl09-short/code/crppred.m b/report/pyp_clustering/acl09-short/code/crppred.m new file mode 100644 index 00000000..17f22652 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/crppred.m @@ -0,0 +1,12 @@ +function output = crppred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +p0=1/30114; +for i = 1:length(uniqin) + prediction(uniqin(i)) = b*p0*sum(1./((1:uniqin(i))+b*p0-1)); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/crppred_geom.m b/report/pyp_clustering/acl09-short/code/crppred_geom.m new file mode 100644 index 00000000..e6869e4f --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/crppred_geom.m @@ -0,0 +1,12 @@ +function output = crppred_geom(input,lengths,b) + + +output = zeros(length(input),1); + +p0=(1/52).^lengths; +a=b*p0; +for i = 1:length(input) + output(i) = a(i)*sum(1./((1:input(i))+a(i)-1)); +end + + diff --git a/report/pyp_clustering/acl09-short/code/logbinmean.m b/report/pyp_clustering/acl09-short/code/logbinmean.m new file mode 100644 index 00000000..23dbb0ac --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/logbinmean.m @@ -0,0 +1,38 @@ +function [ logbinsvalid , meanval, seval ] = logbinmean( frequency, typecount, NBINS , MinCounts ); + +% calculate distribution of frequency +Maxfrequency = max( frequency ); +meanK = mean( frequency ); +linbins = linspace( log10(1) , log10( Maxfrequency ) , NBINS ); +stepb = linbins( 2 ) - linbins( 1 ); + +logbins = 10.^linbins; + +% !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +LL = length( linbins ) - 1; +for i=1:LL + lowb = linbins( i ); + highb = linbins( i+1 ); + linbinsout( i ) = (highb + lowb) / 2; + + lowb = logbins( i ); + highb = logbins( i+1 ); + step = highb - lowb; + logbinsout( i ) = 10^linbinsout( i ); + + indices = find( frequency >= lowb & frequency < highb); + + meanval(i) = mean(typecount(indices)); + rawcounts(i) = length(indices); + seval(i) = std(typecount(indices))./sqrt(rawcounts(i)); + +end + +valid = 1:LL; +valid( find( rawcounts <= MinCounts )) = []; + +linbinsvalid = linbinsout( valid ); +logbinsvalid = logbinsout( valid ); + +meanval = meanval( valid ); +seval = seval( valid ); diff --git a/report/pyp_clustering/acl09-short/code/noP0pred.m b/report/pyp_clustering/acl09-short/code/noP0pred.m new file mode 100644 index 00000000..f72f1432 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/noP0pred.m @@ -0,0 +1,11 @@ +function output = antoniakpred(input,b) + +uniqin = unique(input); +prediction = zeros(max(input),1); + +for i = 1:length(uniqin) + prediction(uniqin(i)) = b*log((b+uniqin(i))/b); +end + +output = prediction(input); + diff --git a/report/pyp_clustering/acl09-short/code/plot0.eps b/report/pyp_clustering/acl09-short/code/plot0.eps new file mode 100644 index 00000000..6094346a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot0.eps @@ -0,0 +1,633 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot0.eps +%%CreationDate: 07/23/2009 17:36:19 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: -44 170 641 672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: -44 170 641 672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + + 0 0 8231 6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070 452 L +1247 5360 mt 1247 5296 L +1247 452 mt 1247 515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt +(1) s +1780 5360 mt 1780 5296 L +1780 452 mt 1780 515 L +1754 5572 mt +( ) s +2092 5360 mt 2092 5296 L +2092 452 mt 2092 515 L +2066 5572 mt +( ) s +2314 5360 mt 2314 5296 L +2314 452 mt 2314 515 L +2288 5572 mt +( ) s +2485 5360 mt 2485 5296 L +2485 452 mt 2485 515 L +2459 5572 mt +( ) s +2626 5360 mt 2626 5296 L +2626 452 mt 2626 515 L +2600 5572 mt +( ) s +2744 5360 mt 2744 5296 L +2744 452 mt 2744 515 L +2718 5572 mt +( ) s +2847 5360 mt 2847 5296 L +2847 452 mt 2847 515 L +2821 5572 mt +( ) s +2938 5360 mt 2938 5296 L +2938 452 mt 2938 515 L +2912 5572 mt +( ) s +3019 5360 mt 3019 5296 L +3019 452 mt 3019 515 L +2913 5572 mt +(10) s +3552 5360 mt 3552 5296 L +3552 452 mt 3552 515 L +3526 5572 mt +( ) s +3864 5360 mt 3864 5296 L +3864 452 mt 3864 515 L +3838 5572 mt +( ) s +4085 5360 mt 4085 5296 L +4085 452 mt 4085 515 L +4059 5572 mt +( ) s +4257 5360 mt 4257 5296 L +4257 452 mt 4257 515 L +4231 5572 mt +( ) s +4397 5360 mt 4397 5296 L +4397 452 mt 4397 515 L +4371 5572 mt +( ) s +4516 5360 mt 4516 5296 L +4516 452 mt 4516 515 L +4490 5572 mt +( ) s +4619 5360 mt 4619 5296 L +4619 452 mt 4619 515 L +4593 5572 mt +( ) s +4710 5360 mt 4710 5296 L +4710 452 mt 4710 515 L +4684 5572 mt +( ) s +4791 5360 mt 4791 5296 L +4791 452 mt 4791 515 L +4631 5572 mt +(100) s +5324 5360 mt 5324 5296 L +5324 452 mt 5324 515 L +5298 5572 mt +( ) s +5636 5360 mt 5636 5296 L +5636 452 mt 5636 515 L +5610 5572 mt +( ) s +5857 5360 mt 5857 5296 L +5857 452 mt 5857 515 L +5831 5572 mt +( ) s +6029 5360 mt 6029 5296 L +6029 452 mt 6029 515 L +6003 5572 mt +( ) s +6169 5360 mt 6169 5296 L +6169 452 mt 6169 515 L +6143 5572 mt +( ) s +6288 5360 mt 6288 5296 L +6288 452 mt 6288 515 L +6262 5572 mt +( ) s +6391 5360 mt 6391 5296 L +6391 452 mt 6391 515 L +6365 5572 mt +( ) s +6481 5360 mt 6481 5296 L +6481 452 mt 6481 515 L +6455 5572 mt +( ) s +6563 5360 mt 6563 5296 L +6563 452 mt 6563 515 L +6350 5572 mt +(1000) s +7096 5360 mt 7096 5296 L +7096 452 mt 7096 515 L +7070 5572 mt +( ) s +7408 5360 mt 7408 5296 L +7408 452 mt 7408 515 L +7382 5572 mt +( ) s +1070 5352 mt 1133 5352 L +7449 5352 mt 7385 5352 L + 982 5423 mt +( ) s +1070 5233 mt 1133 5233 L +7449 5233 mt 7385 5233 L + 982 5304 mt +( ) s +1070 5126 mt 1133 5126 L +7449 5126 mt 7385 5126 L + 929 5197 mt +(1) s +1070 4422 mt 1133 4422 L +7449 4422 mt 7385 4422 L + 982 4493 mt +( ) s +1070 4011 mt 1133 4011 L +7449 4011 mt 7385 4011 L + 982 4082 mt +( ) s +1070 3719 mt 1133 3719 L +7449 3719 mt 7385 3719 L + 982 3790 mt +( ) s +1070 3492 mt 1133 3492 L +7449 3492 mt 7385 3492 L + 982 3563 mt +( ) s +1070 3307 mt 1133 3307 L +7449 3307 mt 7385 3307 L + 982 3378 mt +( ) s +1070 3151 mt 1133 3151 L +7449 3151 mt 7385 3151 L + 982 3222 mt +( ) s +1070 3015 mt 1133 3015 L +7449 3015 mt 7385 3015 L + 982 3086 mt +( ) s +1070 2896 mt 1133 2896 L +7449 2896 mt 7385 2896 L + 982 2967 mt +( ) s +1070 2789 mt 1133 2789 L +7449 2789 mt 7385 2789 L + 822 2860 mt +(10) s +1070 2085 mt 1133 2085 L +7449 2085 mt 7385 2085 L + 982 2156 mt +( ) s +1070 1674 mt 1133 1674 L +7449 1674 mt 7385 1674 L + 982 1745 mt +( ) s +1070 1382 mt 1133 1382 L +7449 1382 mt 7385 1382 L + 982 1453 mt +( ) s +1070 1155 mt 1133 1155 L +7449 1155 mt 7385 1155 L + 982 1226 mt +( ) s +1070 970 mt 1133 970 L +7449 970 mt 7385 970 L + 982 1041 mt +( ) s +1070 814 mt 1133 814 L +7449 814 mt 7385 814 L + 982 885 mt +( ) s +1070 678 mt 1133 678 L +7449 678 mt 7385 678 L + 982 749 mt +( ) s +1070 558 mt 1133 558 L +7449 558 mt 7385 558 L + 982 629 mt +( ) s +1070 452 mt 1133 452 L +7449 452 mt 7385 452 L + 715 523 mt +(100) s +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +gs 1070 452 6380 4909 rc +24 w +gr + +24 w +/c8 { 0.000000 0.700000 0.500000 sr} bdef +c8 + 60 60 1466 5126 FO + 60 60 1905 4428 FO + 60 60 2344 3977 FO + 60 60 2783 3675 FO + 60 60 3222 3368 FO + 60 60 3660 3107 FO + 60 60 4099 2893 FO + 60 60 4538 2710 FO + 60 60 4977 2550 FO + 60 60 5416 2412 FO + 60 60 5855 2292 FO + 60 60 6294 2180 FO + 60 60 6733 2083 FO + 60 60 7171 1991 FO +gs 1070 452 6380 4909 rc +gr + +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 + 60 60 1466 5126 FO + 60 60 1905 4860 FO + 60 60 2344 4699 FO + 60 60 2783 4594 FO + 60 60 3222 4483 FO + 60 60 3660 4383 FO + 60 60 4099 4295 FO + 60 60 4538 4214 FO + 60 60 4977 4137 FO + 60 60 5416 4068 FO + 60 60 5855 4008 FO + 60 60 6294 3941 FO + 60 60 6733 3883 FO + 60 60 7171 3827 FO +gs 1070 452 6380 4909 rc +gr + +/c10 { 1.000000 0.400000 0.200000 sr} bdef +c10 + 60 60 1466 5126 FO + 60 60 1905 5088 FO + 60 60 2344 5064 FO + 60 60 2783 5048 FO + 60 60 3222 5029 FO + 60 60 3660 5010 FO + 60 60 4099 4994 FO + 60 60 4538 4976 FO + 60 60 4977 4959 FO + 60 60 5416 4942 FO + 60 60 5855 4930 FO + 60 60 6294 4914 FO + 60 60 6733 4908 FO + 60 60 7171 4874 FO +gs 1070 452 6380 4909 rc +gr + +0 sg + 60 60 1466 5126 FO + 60 60 1905 5122 FO + 60 60 2344 5119 FO + 60 60 2783 5118 FO + 60 60 3222 5116 FO + 60 60 3660 5113 FO + 60 60 4099 5112 FO + 60 60 4538 5110 FO + 60 60 4977 5109 FO + 60 60 5416 5105 FO + 60 60 5855 5105 FO + 60 60 6294 5104 FO + 60 60 6733 5101 FO + 60 60 7171 5103 FO +gs 1070 452 6380 4909 rc +gr + + 617 4557 mt -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt +( ) s +7433 494 mt +( ) s +6 w +1 sg +0 1070 1481 0 0 -1070 1129 1582 4 MP +PP +-1481 0 0 1070 1481 0 0 -1070 1129 1582 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1582 mt 2610 1582 L +1129 512 mt 2610 512 L +1129 1582 mt 1129 512 L +2610 1582 mt 2610 512 L +1129 1582 mt 2610 1582 L +1129 1582 mt 1129 512 L +1129 1582 mt 2610 1582 L +1129 512 mt 2610 512 L +1129 1582 mt 1129 512 L +2610 1582 mt 2610 512 L +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601 728 mt +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722 728 mt +( = 100000) s +gs 1129 512 1482 1071 rc +24 w +gs 1236 513 293 293 rc +c8 + 60 60 1382 659 FO +gr + +c8 +gr + +24 w +c8 +0 sg +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601 987 mt +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722 987 mt +( = 10000) s +gs 1129 512 1482 1071 rc +gs 1236 771 293 293 rc +c9 + 60 60 1382 917 FO +gr + +c9 +gr + +c9 +0 sg +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601 1246 mt +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722 1246 mt +( = 1000) s +gs 1129 512 1482 1071 rc +gs 1236 1030 293 293 rc +c10 + 60 60 1382 1176 FO +gr + +c10 +gr + +c10 +0 sg +%%IncludeResource: font Symbol +/Symbol /ISOLatin1Encoding 192 FMSR + +1601 1505 mt +(a) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1722 1505 mt +( = 100) s +gs 1129 512 1482 1071 rc +gs 1236 1288 293 293 rc + 60 60 1382 1434 FO +gr + +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot0.pdf b/report/pyp_clustering/acl09-short/code/plot0.pdf Binary files differnew file mode 100644 index 00000000..fd1b4595 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot0.pdf diff --git a/report/pyp_clustering/acl09-short/code/plot1.eps b/report/pyp_clustering/acl09-short/code/plot1.eps new file mode 100644 index 00000000..ebb2f194 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot1.eps @@ -0,0 +1,579 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot1.eps +%%CreationDate: 07/23/2009 17:34:27 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: -44 170 641 672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: -44 170 641 672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + + 0 0 8231 6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070 452 L +1247 5360 mt 1247 5296 L +1247 452 mt 1247 515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt +(1) s +1780 5360 mt 1780 5296 L +1780 452 mt 1780 515 L +1754 5572 mt +( ) s +2092 5360 mt 2092 5296 L +2092 452 mt 2092 515 L +2066 5572 mt +( ) s +2314 5360 mt 2314 5296 L +2314 452 mt 2314 515 L +2288 5572 mt +( ) s +2485 5360 mt 2485 5296 L +2485 452 mt 2485 515 L +2459 5572 mt +( ) s +2626 5360 mt 2626 5296 L +2626 452 mt 2626 515 L +2600 5572 mt +( ) s +2744 5360 mt 2744 5296 L +2744 452 mt 2744 515 L +2718 5572 mt +( ) s +2847 5360 mt 2847 5296 L +2847 452 mt 2847 515 L +2821 5572 mt +( ) s +2938 5360 mt 2938 5296 L +2938 452 mt 2938 515 L +2912 5572 mt +( ) s +3019 5360 mt 3019 5296 L +3019 452 mt 3019 515 L +2913 5572 mt +(10) s +3552 5360 mt 3552 5296 L +3552 452 mt 3552 515 L +3526 5572 mt +( ) s +3864 5360 mt 3864 5296 L +3864 452 mt 3864 515 L +3838 5572 mt +( ) s +4085 5360 mt 4085 5296 L +4085 452 mt 4085 515 L +4059 5572 mt +( ) s +4257 5360 mt 4257 5296 L +4257 452 mt 4257 515 L +4231 5572 mt +( ) s +4397 5360 mt 4397 5296 L +4397 452 mt 4397 515 L +4371 5572 mt +( ) s +4516 5360 mt 4516 5296 L +4516 452 mt 4516 515 L +4490 5572 mt +( ) s +4619 5360 mt 4619 5296 L +4619 452 mt 4619 515 L +4593 5572 mt +( ) s +4710 5360 mt 4710 5296 L +4710 452 mt 4710 515 L +4684 5572 mt +( ) s +4791 5360 mt 4791 5296 L +4791 452 mt 4791 515 L +4631 5572 mt +(100) s +5324 5360 mt 5324 5296 L +5324 452 mt 5324 515 L +5298 5572 mt +( ) s +5636 5360 mt 5636 5296 L +5636 452 mt 5636 515 L +5610 5572 mt +( ) s +5857 5360 mt 5857 5296 L +5857 452 mt 5857 515 L +5831 5572 mt +( ) s +6029 5360 mt 6029 5296 L +6029 452 mt 6029 515 L +6003 5572 mt +( ) s +6169 5360 mt 6169 5296 L +6169 452 mt 6169 515 L +6143 5572 mt +( ) s +6288 5360 mt 6288 5296 L +6288 452 mt 6288 515 L +6262 5572 mt +( ) s +6391 5360 mt 6391 5296 L +6391 452 mt 6391 515 L +6365 5572 mt +( ) s +6481 5360 mt 6481 5296 L +6481 452 mt 6481 515 L +6455 5572 mt +( ) s +6563 5360 mt 6563 5296 L +6563 452 mt 6563 515 L +6350 5572 mt +(1000) s +7096 5360 mt 7096 5296 L +7096 452 mt 7096 515 L +7070 5572 mt +( ) s +7408 5360 mt 7408 5296 L +7408 452 mt 7408 515 L +7382 5572 mt +( ) s +1070 5352 mt 1133 5352 L +7449 5352 mt 7385 5352 L + 982 5423 mt +( ) s +1070 5233 mt 1133 5233 L +7449 5233 mt 7385 5233 L + 982 5304 mt +( ) s +1070 5126 mt 1133 5126 L +7449 5126 mt 7385 5126 L + 929 5197 mt +(1) s +1070 4422 mt 1133 4422 L +7449 4422 mt 7385 4422 L + 982 4493 mt +( ) s +1070 4011 mt 1133 4011 L +7449 4011 mt 7385 4011 L + 982 4082 mt +( ) s +1070 3719 mt 1133 3719 L +7449 3719 mt 7385 3719 L + 982 3790 mt +( ) s +1070 3492 mt 1133 3492 L +7449 3492 mt 7385 3492 L + 982 3563 mt +( ) s +1070 3307 mt 1133 3307 L +7449 3307 mt 7385 3307 L + 982 3378 mt +( ) s +1070 3151 mt 1133 3151 L +7449 3151 mt 7385 3151 L + 982 3222 mt +( ) s +1070 3015 mt 1133 3015 L +7449 3015 mt 7385 3015 L + 982 3086 mt +( ) s +1070 2896 mt 1133 2896 L +7449 2896 mt 7385 2896 L + 982 2967 mt +( ) s +1070 2789 mt 1133 2789 L +7449 2789 mt 7385 2789 L + 822 2860 mt +(10) s +1070 2085 mt 1133 2085 L +7449 2085 mt 7385 2085 L + 982 2156 mt +( ) s +1070 1674 mt 1133 1674 L +7449 1674 mt 7385 1674 L + 982 1745 mt +( ) s +1070 1382 mt 1133 1382 L +7449 1382 mt 7385 1382 L + 982 1453 mt +( ) s +1070 1155 mt 1133 1155 L +7449 1155 mt 7385 1155 L + 982 1226 mt +( ) s +1070 970 mt 1133 970 L +7449 970 mt 7385 970 L + 982 1041 mt +( ) s +1070 814 mt 1133 814 L +7449 814 mt 7385 814 L + 982 885 mt +( ) s +1070 678 mt 1133 678 L +7449 678 mt 7385 678 L + 982 749 mt +( ) s +1070 558 mt 1133 558 L +7449 558 mt 7385 558 L + 982 629 mt +( ) s +1070 452 mt 1133 452 L +7449 452 mt 7385 452 L + 715 523 mt +(100) s +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +gs 1070 452 6380 4909 rc +24 w +gr + +24 w + 48 48 1466 5126 FO + 48 48 1905 5122 FO + 48 48 2344 5119 FO + 48 48 2783 5118 FO + 48 48 3222 5116 FO + 48 48 3660 5113 FO + 48 48 4099 5112 FO + 48 48 4538 5110 FO + 48 48 4977 5109 FO + 48 48 5416 5105 FO + 48 48 5855 5105 FO + 48 48 6294 5104 FO + 48 48 6733 5101 FO + 48 48 7171 5103 FO +gs 1070 452 6380 4909 rc +438 -2 439 -2 439 -2 439 -2 439 -2 439 -2 439 -2 439 -1 +438 -2 439 -3 439 -1 439 -3 439 -4 1466 5126 14 MP stroke +gr + +/c8 { 1.000000 0.400000 0.200000 sr} bdef +c8 + 48 48 1466 5126 FO + 48 48 1905 5088 FO + 48 48 2344 5064 FO + 48 48 2783 5048 FO + 48 48 3222 5029 FO + 48 48 3660 5010 FO + 48 48 4099 4994 FO + 48 48 4538 4976 FO + 48 48 4977 4959 FO + 48 48 5416 4942 FO + 48 48 5855 4930 FO + 48 48 6294 4914 FO + 48 48 6733 4908 FO + 48 48 7171 4874 FO +gs 1070 452 6380 4909 rc +438 -16 439 -15 439 -15 439 -16 439 -16 439 -16 439 -17 439 -17 +438 -19 439 -18 439 -17 439 -24 439 -38 1466 5126 14 MP stroke +gr + +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 + 48 48 1466 5126 FO + 48 48 1905 4860 FO + 48 48 2344 4699 FO + 48 48 2783 4594 FO + 48 48 3222 4483 FO + 48 48 3660 4383 FO + 48 48 4099 4295 FO + 48 48 4538 4214 FO + 48 48 4977 4137 FO + 48 48 5416 4068 FO + 48 48 5855 4008 FO + 48 48 6294 3941 FO + 48 48 6733 3883 FO + 48 48 7171 3827 FO +gs 1070 452 6380 4909 rc +438 -55 439 -57 439 -61 439 -64 439 -70 439 -75 439 -81 439 -89 +438 -100 439 -111 439 -105 439 -161 439 -266 1466 5126 14 MP stroke +gr + +/c10 { 0.000000 0.700000 0.500000 sr} bdef +c10 + 48 48 1466 5126 FO + 48 48 1905 4428 FO + 48 48 2344 3977 FO + 48 48 2783 3675 FO + 48 48 3222 3368 FO + 48 48 3660 3107 FO + 48 48 4099 2893 FO + 48 48 4538 2710 FO + 48 48 4977 2550 FO + 48 48 5416 2412 FO + 48 48 5855 2292 FO + 48 48 6294 2180 FO + 48 48 6733 2083 FO + 48 48 7171 1991 FO +gs 1070 452 6380 4909 rc +438 -91 439 -97 439 -110 439 -121 439 -138 439 -160 439 -183 439 -214 +438 -261 439 -307 439 -302 439 -451 439 -698 1466 5126 14 MP stroke +gr + +0 sg + 617 4557 mt -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt +( ) s +7433 494 mt +( ) s +6 w +1 sg +0 500 1510 0 0 -500 1129 1012 4 MP +PP +-1510 0 0 500 1510 0 0 -500 1129 1012 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1012 mt 2639 1012 L +1129 512 mt 2639 512 L +1129 1012 mt 1129 512 L +2639 1012 mt 2639 512 L +1129 1012 mt 2639 1012 L +1129 1012 mt 1129 512 L +1129 1012 mt 2639 1012 L +1129 512 mt 2639 512 L +1129 1012 mt 1129 512 L +2639 1012 mt 2639 512 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1594 713 mt +(Empirical) s +gs 1129 512 1511 501 rc +24 w +gs 1257 523 245 245 rc + 48 48 1379 645 FO +gr + +gr + +24 w +1594 948 mt +(Expectation) s +gs 1129 512 1511 501 rc +358 0 1200 878 2 MP stroke +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot1.pdf b/report/pyp_clustering/acl09-short/code/plot1.pdf Binary files differnew file mode 100644 index 00000000..90fcd9ba --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot1.pdf diff --git a/report/pyp_clustering/acl09-short/code/plot2.eps b/report/pyp_clustering/acl09-short/code/plot2.eps new file mode 100644 index 00000000..e5c5536a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot2.eps @@ -0,0 +1,552 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot2.eps +%%CreationDate: 07/23/2009 17:33:05 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: -44 170 641 672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: -44 170 641 672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + + 0 0 8231 6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070 452 L +1247 5360 mt 1247 5296 L +1247 452 mt 1247 515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt +(1) s +1780 5360 mt 1780 5296 L +1780 452 mt 1780 515 L +1754 5572 mt +( ) s +2092 5360 mt 2092 5296 L +2092 452 mt 2092 515 L +2066 5572 mt +( ) s +2314 5360 mt 2314 5296 L +2314 452 mt 2314 515 L +2288 5572 mt +( ) s +2485 5360 mt 2485 5296 L +2485 452 mt 2485 515 L +2459 5572 mt +( ) s +2626 5360 mt 2626 5296 L +2626 452 mt 2626 515 L +2600 5572 mt +( ) s +2744 5360 mt 2744 5296 L +2744 452 mt 2744 515 L +2718 5572 mt +( ) s +2847 5360 mt 2847 5296 L +2847 452 mt 2847 515 L +2821 5572 mt +( ) s +2938 5360 mt 2938 5296 L +2938 452 mt 2938 515 L +2912 5572 mt +( ) s +3019 5360 mt 3019 5296 L +3019 452 mt 3019 515 L +2913 5572 mt +(10) s +3552 5360 mt 3552 5296 L +3552 452 mt 3552 515 L +3526 5572 mt +( ) s +3864 5360 mt 3864 5296 L +3864 452 mt 3864 515 L +3838 5572 mt +( ) s +4085 5360 mt 4085 5296 L +4085 452 mt 4085 515 L +4059 5572 mt +( ) s +4257 5360 mt 4257 5296 L +4257 452 mt 4257 515 L +4231 5572 mt +( ) s +4397 5360 mt 4397 5296 L +4397 452 mt 4397 515 L +4371 5572 mt +( ) s +4516 5360 mt 4516 5296 L +4516 452 mt 4516 515 L +4490 5572 mt +( ) s +4619 5360 mt 4619 5296 L +4619 452 mt 4619 515 L +4593 5572 mt +( ) s +4710 5360 mt 4710 5296 L +4710 452 mt 4710 515 L +4684 5572 mt +( ) s +4791 5360 mt 4791 5296 L +4791 452 mt 4791 515 L +4631 5572 mt +(100) s +5324 5360 mt 5324 5296 L +5324 452 mt 5324 515 L +5298 5572 mt +( ) s +5636 5360 mt 5636 5296 L +5636 452 mt 5636 515 L +5610 5572 mt +( ) s +5857 5360 mt 5857 5296 L +5857 452 mt 5857 515 L +5831 5572 mt +( ) s +6029 5360 mt 6029 5296 L +6029 452 mt 6029 515 L +6003 5572 mt +( ) s +6169 5360 mt 6169 5296 L +6169 452 mt 6169 515 L +6143 5572 mt +( ) s +6288 5360 mt 6288 5296 L +6288 452 mt 6288 515 L +6262 5572 mt +( ) s +6391 5360 mt 6391 5296 L +6391 452 mt 6391 515 L +6365 5572 mt +( ) s +6481 5360 mt 6481 5296 L +6481 452 mt 6481 515 L +6455 5572 mt +( ) s +6563 5360 mt 6563 5296 L +6563 452 mt 6563 515 L +6350 5572 mt +(1000) s +7096 5360 mt 7096 5296 L +7096 452 mt 7096 515 L +7070 5572 mt +( ) s +7408 5360 mt 7408 5296 L +7408 452 mt 7408 515 L +7382 5572 mt +( ) s +1070 5201 mt 1133 5201 L +7449 5201 mt 7385 5201 L + 769 5272 mt +(0.1) s +1070 4725 mt 1133 4725 L +7449 4725 mt 7385 4725 L + 982 4796 mt +( ) s +1070 4446 mt 1133 4446 L +7449 4446 mt 7385 4446 L + 982 4517 mt +( ) s +1070 4248 mt 1133 4248 L +7449 4248 mt 7385 4248 L + 982 4319 mt +( ) s +1070 4095 mt 1133 4095 L +7449 4095 mt 7385 4095 L + 982 4166 mt +( ) s +1070 3969 mt 1133 3969 L +7449 3969 mt 7385 3969 L + 982 4040 mt +( ) s +1070 3863 mt 1133 3863 L +7449 3863 mt 7385 3863 L + 982 3934 mt +( ) s +1070 3771 mt 1133 3771 L +7449 3771 mt 7385 3771 L + 982 3842 mt +( ) s +1070 3690 mt 1133 3690 L +7449 3690 mt 7385 3690 L + 982 3761 mt +( ) s +1070 3618 mt 1133 3618 L +7449 3618 mt 7385 3618 L + 929 3689 mt +(1) s +1070 3141 mt 1133 3141 L +7449 3141 mt 7385 3141 L + 982 3212 mt +( ) s +1070 2863 mt 1133 2863 L +7449 2863 mt 7385 2863 L + 982 2934 mt +( ) s +1070 2665 mt 1133 2665 L +7449 2665 mt 7385 2665 L + 982 2736 mt +( ) s +1070 2511 mt 1133 2511 L +7449 2511 mt 7385 2511 L + 982 2582 mt +( ) s +1070 2386 mt 1133 2386 L +7449 2386 mt 7385 2386 L + 982 2457 mt +( ) s +1070 2280 mt 1133 2280 L +7449 2280 mt 7385 2280 L + 982 2351 mt +( ) s +1070 2188 mt 1133 2188 L +7449 2188 mt 7385 2188 L + 982 2259 mt +( ) s +1070 2107 mt 1133 2107 L +7449 2107 mt 7385 2107 L + 982 2178 mt +( ) s +1070 2035 mt 1133 2035 L +7449 2035 mt 7385 2035 L + 822 2106 mt +(10) s +1070 1558 mt 1133 1558 L +7449 1558 mt 7385 1558 L + 982 1629 mt +( ) s +1070 1279 mt 1133 1279 L +7449 1279 mt 7385 1279 L + 982 1350 mt +( ) s +1070 1082 mt 1133 1082 L +7449 1082 mt 7385 1082 L + 982 1153 mt +( ) s +1070 928 mt 1133 928 L +7449 928 mt 7385 928 L + 982 999 mt +( ) s +1070 803 mt 1133 803 L +7449 803 mt 7385 803 L + 982 874 mt +( ) s +1070 697 mt 1133 697 L +7449 697 mt 7385 697 L + 982 768 mt +( ) s +1070 605 mt 1133 605 L +7449 605 mt 7385 605 L + 982 676 mt +( ) s +1070 524 mt 1133 524 L +7449 524 mt 7385 524 L + 982 595 mt +( ) s +1070 452 mt 1133 452 L +7449 452 mt 7385 452 L + 715 523 mt +(100) s +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +gs 1070 452 6380 4909 rc +24 w +438 -2 439 -1 439 -1 439 -1 439 -2 439 -1 439 -1 439 -2 +438 -1 439 -1 439 -2 439 -1 439 -3 1466 3618 14 MP stroke +/c8 { 1.000000 0.400000 0.200000 sr} bdef +c8 +438 -10 439 -10 439 -11 439 -11 439 -11 439 -11 439 -11 439 -12 +438 -12 439 -13 439 -11 439 -16 439 -26 1466 3618 14 MP stroke +DA +438 -36 439 -37 439 -41 439 -42 439 -46 439 -49 439 -53 439 -57 +438 -64 439 -70 439 -65 439 -95 439 -147 1466 5110 14 MP stroke +SO +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 +438 -37 439 -38 439 -42 439 -43 439 -48 439 -51 439 -55 439 -60 +438 -68 439 -75 439 -71 439 -109 439 -180 1466 3618 14 MP stroke +DA +438 -46 439 -49 439 -53 439 -56 439 -63 439 -69 439 -76 439 -85 +438 -100 439 -113 439 -110 439 -169 439 -276 1466 4150 14 MP stroke +SO +/c10 { 0.000000 0.700000 0.500000 sr} bdef +c10 +438 -61 439 -66 439 -75 439 -81 439 -94 439 -108 439 -125 439 -144 +438 -177 439 -208 439 -205 439 -305 439 -473 1466 3618 14 MP stroke +DA +438 -63 439 -68 439 -77 439 -84 439 -98 439 -112 439 -130 439 -151 +438 -185 439 -218 439 -213 439 -315 439 -484 1466 3710 14 MP stroke +gr + +24 w +c10 +DA +0 sg + 617 4557 mt -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt +( ) s +7433 494 mt +( ) s +SO +6 w +1 sg +0 500 2507 0 0 -500 1129 1012 4 MP +PP +-2507 0 0 500 2507 0 0 -500 1129 1012 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1012 mt 3636 1012 L +1129 512 mt 3636 512 L +1129 1012 mt 1129 512 L +3636 1012 mt 3636 512 L +1129 1012 mt 3636 1012 L +1129 1012 mt 1129 512 L +1129 1012 mt 3636 1012 L +1129 512 mt 3636 512 L +1129 1012 mt 1129 512 L +3636 1012 mt 3636 512 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1603 713 mt +(Expectation) s +gs 1129 512 2508 501 rc +24 w +365 0 1201 645 2 MP stroke +gr + +24 w +1603 948 mt +(Antoniak approximation) s +gs 1129 512 2508 501 rc +DA +365 0 1201 878 2 MP stroke +SO +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot2.pdf b/report/pyp_clustering/acl09-short/code/plot2.pdf Binary files differnew file mode 100644 index 00000000..d9783120 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot2.pdf diff --git a/report/pyp_clustering/acl09-short/code/plot3.eps b/report/pyp_clustering/acl09-short/code/plot3.eps new file mode 100644 index 00000000..f4ffbb62 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot3.eps @@ -0,0 +1,721 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.7.0.471 (R2008b). Operating System: Linux 2.6.18-128.1.6.el5.inf.1PAE #1 SMP Wed Apr 15 10:23:41 BST 2009 i686. +%%Title: /afs/inf.ed.ac.uk/user/s/sgwater/research/papers/2009/acl09-short/code/plot3.eps +%%CreationDate: 07/23/2009 17:31:43 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: -44 170 641 672 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: -44 170 641 672 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode -0528 8064 csm + + 0 0 8231 6023 rc +88 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 8232 6024 rf +6 w +0 4908 6379 0 0 -4908 1070 5360 4 MP +PP +-6379 0 0 4908 6379 0 0 -4908 1070 5360 5 MP stroke +4 w +DO +SO +6 w +0 sg +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +1070 5360 mt 7449 5360 L +1070 5360 mt 1070 452 L +1247 5360 mt 1247 5296 L +1247 452 mt 1247 515 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1194 5572 mt +(1) s +1780 5360 mt 1780 5296 L +1780 452 mt 1780 515 L +1754 5572 mt +( ) s +2092 5360 mt 2092 5296 L +2092 452 mt 2092 515 L +2066 5572 mt +( ) s +2314 5360 mt 2314 5296 L +2314 452 mt 2314 515 L +2288 5572 mt +( ) s +2485 5360 mt 2485 5296 L +2485 452 mt 2485 515 L +2459 5572 mt +( ) s +2626 5360 mt 2626 5296 L +2626 452 mt 2626 515 L +2600 5572 mt +( ) s +2744 5360 mt 2744 5296 L +2744 452 mt 2744 515 L +2718 5572 mt +( ) s +2847 5360 mt 2847 5296 L +2847 452 mt 2847 515 L +2821 5572 mt +( ) s +2938 5360 mt 2938 5296 L +2938 452 mt 2938 515 L +2912 5572 mt +( ) s +3019 5360 mt 3019 5296 L +3019 452 mt 3019 515 L +2913 5572 mt +(10) s +3552 5360 mt 3552 5296 L +3552 452 mt 3552 515 L +3526 5572 mt +( ) s +3864 5360 mt 3864 5296 L +3864 452 mt 3864 515 L +3838 5572 mt +( ) s +4085 5360 mt 4085 5296 L +4085 452 mt 4085 515 L +4059 5572 mt +( ) s +4257 5360 mt 4257 5296 L +4257 452 mt 4257 515 L +4231 5572 mt +( ) s +4397 5360 mt 4397 5296 L +4397 452 mt 4397 515 L +4371 5572 mt +( ) s +4516 5360 mt 4516 5296 L +4516 452 mt 4516 515 L +4490 5572 mt +( ) s +4619 5360 mt 4619 5296 L +4619 452 mt 4619 515 L +4593 5572 mt +( ) s +4710 5360 mt 4710 5296 L +4710 452 mt 4710 515 L +4684 5572 mt +( ) s +4791 5360 mt 4791 5296 L +4791 452 mt 4791 515 L +4631 5572 mt +(100) s +5324 5360 mt 5324 5296 L +5324 452 mt 5324 515 L +5298 5572 mt +( ) s +5636 5360 mt 5636 5296 L +5636 452 mt 5636 515 L +5610 5572 mt +( ) s +5857 5360 mt 5857 5296 L +5857 452 mt 5857 515 L +5831 5572 mt +( ) s +6029 5360 mt 6029 5296 L +6029 452 mt 6029 515 L +6003 5572 mt +( ) s +6169 5360 mt 6169 5296 L +6169 452 mt 6169 515 L +6143 5572 mt +( ) s +6288 5360 mt 6288 5296 L +6288 452 mt 6288 515 L +6262 5572 mt +( ) s +6391 5360 mt 6391 5296 L +6391 452 mt 6391 515 L +6365 5572 mt +( ) s +6481 5360 mt 6481 5296 L +6481 452 mt 6481 515 L +6455 5572 mt +( ) s +6563 5360 mt 6563 5296 L +6563 452 mt 6563 515 L +6350 5572 mt +(1000) s +7096 5360 mt 7096 5296 L +7096 452 mt 7096 515 L +7070 5572 mt +( ) s +7408 5360 mt 7408 5296 L +7408 452 mt 7408 515 L +7382 5572 mt +( ) s +1070 5354 mt 1133 5354 L +7449 5354 mt 7385 5354 L + 982 5425 mt +( ) s +1070 5257 mt 1133 5257 L +7449 5257 mt 7385 5257 L + 982 5328 mt +( ) s +1070 5171 mt 1133 5171 L +7449 5171 mt 7385 5171 L + 929 5242 mt +(1) s +1070 4602 mt 1133 4602 L +7449 4602 mt 7385 4602 L + 982 4673 mt +( ) s +1070 4270 mt 1133 4270 L +7449 4270 mt 7385 4270 L + 982 4341 mt +( ) s +1070 4034 mt 1133 4034 L +7449 4034 mt 7385 4034 L + 982 4105 mt +( ) s +1070 3851 mt 1133 3851 L +7449 3851 mt 7385 3851 L + 982 3922 mt +( ) s +1070 3702 mt 1133 3702 L +7449 3702 mt 7385 3702 L + 982 3773 mt +( ) s +1070 3575 mt 1133 3575 L +7449 3575 mt 7385 3575 L + 982 3646 mt +( ) s +1070 3466 mt 1133 3466 L +7449 3466 mt 7385 3466 L + 982 3537 mt +( ) s +1070 3369 mt 1133 3369 L +7449 3369 mt 7385 3369 L + 982 3440 mt +( ) s +1070 3283 mt 1133 3283 L +7449 3283 mt 7385 3283 L + 822 3354 mt +(10) s +1070 2715 mt 1133 2715 L +7449 2715 mt 7385 2715 L + 982 2786 mt +( ) s +1070 2382 mt 1133 2382 L +7449 2382 mt 7385 2382 L + 982 2453 mt +( ) s +1070 2147 mt 1133 2147 L +7449 2147 mt 7385 2147 L + 982 2218 mt +( ) s +1070 1964 mt 1133 1964 L +7449 1964 mt 7385 1964 L + 982 2035 mt +( ) s +1070 1814 mt 1133 1814 L +7449 1814 mt 7385 1814 L + 982 1885 mt +( ) s +1070 1688 mt 1133 1688 L +7449 1688 mt 7385 1688 L + 982 1759 mt +( ) s +1070 1578 mt 1133 1578 L +7449 1578 mt 7385 1578 L + 982 1649 mt +( ) s +1070 1482 mt 1133 1482 L +7449 1482 mt 7385 1482 L + 982 1553 mt +( ) s +1070 1395 mt 1133 1395 L +7449 1395 mt 7385 1395 L + 715 1466 mt +(100) s +1070 827 mt 1133 827 L +7449 827 mt 7385 827 L + 982 898 mt +( ) s +1070 495 mt 1133 495 L +7449 495 mt 7385 495 L + 982 566 mt +( ) s +1070 5360 mt 7449 5360 L +1070 452 mt 7449 452 L +1070 5360 mt 1070 452 L +7449 5360 mt 7449 452 L +gs 1070 452 6380 4909 rc +24 w +438 -1 439 -2 439 -1 439 -2 439 -1 439 -2 439 -1 439 -2 +438 -1 439 -2 439 -1 439 -3 439 -3 1466 5171 14 MP stroke +gr + +24 w + 48 48 1466 5171 FO + 48 48 1905 5168 FO + 48 48 2344 5165 FO + 48 48 2783 5164 FO + 48 48 3222 5162 FO + 48 48 3660 5161 FO + 48 48 4099 5159 FO + 48 48 4538 5158 FO + 48 48 4977 5157 FO + 48 48 5416 5154 FO + 48 48 5855 5154 FO + 48 48 6294 5153 FO + 48 48 6733 5151 FO + 48 48 7171 5153 FO +gs 1070 452 6380 4909 rc +gr + +0 j +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 5200 4 MP +DP +-55 95 -55 -95 110 0 2289 5197 4 MP +DP +-55 95 -55 -95 110 0 2728 5196 4 MP +DP +-55 95 -55 -95 110 0 3167 5194 4 MP +DP +-55 95 -55 -95 110 0 3605 5193 4 MP +DP +-55 95 -55 -95 110 0 4044 5191 4 MP +DP +-55 95 -55 -95 110 0 4483 5190 4 MP +DP +-55 95 -55 -95 110 0 4922 5189 4 MP +DP +-55 95 -55 -95 110 0 5361 5185 4 MP +DP +-55 95 -55 -95 110 0 5800 5186 4 MP +DP +-55 95 -55 -95 110 0 6239 5184 4 MP +DP +-55 95 -55 -95 110 0 6678 5182 4 MP +DP +-55 95 -55 -95 110 0 7116 5187 4 MP +DP +gs 1070 452 6380 4909 rc +/c8 { 1.000000 0.400000 0.200000 sr} bdef +c8 +438 -12 439 -12 439 -13 439 -13 439 -13 439 -13 439 -14 439 -13 +438 -15 439 -15 439 -14 439 -19 439 -31 1466 5171 14 MP stroke +gr + +c8 + 48 48 1466 5171 FO + 48 48 1905 5140 FO + 48 48 2344 5121 FO + 48 48 2783 5108 FO + 48 48 3222 5092 FO + 48 48 3660 5077 FO + 48 48 4099 5065 FO + 48 48 4538 5050 FO + 48 48 4977 5036 FO + 48 48 5416 5022 FO + 48 48 5855 5013 FO + 48 48 6294 4999 FO + 48 48 6733 4995 FO + 48 48 7171 4967 FO +gs 1070 452 6380 4909 rc +gr + +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 5173 4 MP +DP +-55 95 -55 -95 110 0 2289 5153 4 MP +DP +-55 95 -55 -95 110 0 2728 5139 4 MP +DP +-55 95 -55 -95 110 0 3167 5122 4 MP +DP +-55 95 -55 -95 110 0 3605 5105 4 MP +DP +-55 95 -55 -95 110 0 4044 5089 4 MP +DP +-55 95 -55 -95 110 0 4483 5070 4 MP +DP +-55 95 -55 -95 110 0 4922 5053 4 MP +DP +-55 95 -55 -95 110 0 5361 5036 4 MP +DP +-55 95 -55 -95 110 0 5800 5021 4 MP +DP +-55 95 -55 -95 110 0 6239 5007 4 MP +DP +-55 95 -55 -95 110 0 6678 4984 4 MP +DP +-55 95 -55 -95 110 0 7116 4944 4 MP +DP +gs 1070 452 6380 4909 rc +/c9 { 0.400000 0.400000 1.000000 sr} bdef +c9 +438 -44 439 -46 439 -50 439 -51 439 -57 439 -61 439 -65 439 -72 +438 -81 439 -89 439 -85 439 -130 439 -215 1466 5171 14 MP stroke +gr + +c9 + 48 48 1466 5171 FO + 48 48 1905 4956 FO + 48 48 2344 4826 FO + 48 48 2783 4741 FO + 48 48 3222 4651 FO + 48 48 3660 4571 FO + 48 48 4099 4500 FO + 48 48 4538 4435 FO + 48 48 4977 4372 FO + 48 48 5416 4316 FO + 48 48 5855 4268 FO + 48 48 6294 4214 FO + 48 48 6733 4167 FO + 48 48 7171 4121 FO +gs 1070 452 6380 4909 rc +gr + +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 5057 4 MP +DP +-55 95 -55 -95 110 0 2289 4938 4 MP +DP +-55 95 -55 -95 110 0 2728 4838 4 MP +DP +-55 95 -55 -95 110 0 3167 4706 4 MP +DP +-55 95 -55 -95 110 0 3605 4555 4 MP +DP +-55 95 -55 -95 110 0 4044 4385 4 MP +DP +-55 95 -55 -95 110 0 4483 4185 4 MP +DP +-55 95 -55 -95 110 0 4922 3940 4 MP +DP +-55 95 -55 -95 110 0 5361 3650 4 MP +DP +-55 95 -55 -95 110 0 5800 3310 4 MP +DP +-55 95 -55 -95 110 0 6239 2895 4 MP +DP +-55 95 -55 -95 110 0 6678 2492 4 MP +DP +-55 95 -55 -95 110 0 7116 2000 4 MP +DP +gs 1070 452 6380 4909 rc +/c10 { 0.000000 0.700000 0.500000 sr} bdef +c10 +438 -74 439 -78 439 -89 439 -98 439 -111 439 -129 439 -148 439 -173 +438 -211 439 -248 439 -244 439 -364 439 -564 1466 5171 14 MP stroke +gr + +c10 + 48 48 1466 5171 FO + 48 48 1905 4607 FO + 48 48 2344 4243 FO + 48 48 2783 3999 FO + 48 48 3222 3751 FO + 48 48 3660 3540 FO + 48 48 4099 3368 FO + 48 48 4538 3220 FO + 48 48 4977 3090 FO + 48 48 5416 2979 FO + 48 48 5855 2882 FO + 48 48 6294 2791 FO + 48 48 6733 2713 FO + 48 48 7171 2638 FO +gs 1070 452 6380 4909 rc +gr + +-55 95 -55 -95 110 0 1411 5203 4 MP +DP +-55 95 -55 -95 110 0 1850 4905 4 MP +DP +-55 95 -55 -95 110 0 2289 4630 4 MP +DP +-55 95 -55 -95 110 0 2728 4368 4 MP +DP +-55 95 -55 -95 110 0 3167 4001 4 MP +DP +-55 95 -55 -95 110 0 3605 3574 4 MP +DP +-55 95 -55 -95 110 0 4044 3123 4 MP +DP +-55 95 -55 -95 110 0 4483 2661 4 MP +DP +-55 95 -55 -95 110 0 4922 2196 4 MP +DP +-55 95 -55 -95 110 0 5361 1735 4 MP +DP +-55 95 -55 -95 110 0 5800 1279 4 MP +DP +-55 95 -55 -95 110 0 6239 873 4 MP +DP +gs 1070 452 6380 4909 rc +gr + +0 sg + 617 4557 mt -90 rotate +(Mean number of lexical entries \(tables\)) s +90 rotate +3390 5724 mt +(Word frequency \(n) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +4963 5820 mt +(w) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +5066 5724 mt +(\)) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1053 5403 mt +( ) s +7433 494 mt +( ) s +6 w +1 sg +0 729 2519 0 0 -729 1129 1241 4 MP +PP +-2519 0 0 729 2519 0 0 -729 1129 1241 5 MP stroke +4 w +DO +SO +6 w +0 sg +1129 1241 mt 3648 1241 L +1129 512 mt 3648 512 L +1129 1241 mt 1129 512 L +3648 1241 mt 3648 512 L +1129 1241 mt 3648 1241 L +1129 1241 mt 1129 512 L +1129 1241 mt 3648 1241 L +1129 512 mt 3648 512 L +1129 1241 mt 1129 512 L +3648 1241 mt 3648 512 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 192 FMSR + +1609 712 mt +(Expectation) s +gs 1129 512 2520 730 rc +24 w +370 0 1202 644 2 MP stroke +gr + +24 w +1609 945 mt +(Empirical, fixed base) s +gs 1129 512 2520 730 rc +gs 1265 754 245 245 rc + 48 48 1387 876 FO +gr + +gr + +1609 1178 mt +(Empirical, inferred base) s +gs 1129 512 2520 730 rc +gs 1265 986 245 245 rc +-55 95 -55 -95 110 0 1332 1140 4 MP +DP +gr + +6 w +gr + +6 w + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/report/pyp_clustering/acl09-short/code/plot3.pdf b/report/pyp_clustering/acl09-short/code/plot3.pdf Binary files differnew file mode 100644 index 00000000..a3e81faa --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/plot3.pdf diff --git a/report/pyp_clustering/acl09-short/code/pygibbs3.c b/report/pyp_clustering/acl09-short/code/pygibbs3.c new file mode 100644 index 00000000..3c2240a1 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/pygibbs3.c @@ -0,0 +1,198 @@ +#include <stdio.h> +#include <math.h> + +#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.) + +#define W 30114 +#define N 831190 +#define KWMAX 1000 + +#define NLOOPS 1000 +#define BURNIN 0 +#define SAMPLEFREQ 1 + +#define ALPHA 0.0 // PYB a +//#define GAMMA 1000000000.0 +#define GAMMA .01 // Dirichlet over multinomial P0 + +double BETA; // CRP alpha (PYB b) +int w[N], z[N]; // words, table assignments +int typecount[W], typetot; //# of tables of each type, total # tables +int usedcount[W]; +double ztot[W][KWMAX]; +double k; // total # tables +int nactive; + +void initialise(void); +void anderson(void); +void fileread(void); + +void initialise(void) +{ + int i,j; + + for (i = 1; i < W; i++) { + typecount[i] = 0; + usedcount[i] = 0; + for (j = 0; j < KWMAX; j++) { + ztot[i][j] = 0; + } + } + +} + +void anderson(void) //stochastic Anderson-style initialisation +{ + int i,j, tag; + double max, totprob, r, runtot; + double probs[KWMAX]; + int ind, temp; + + ztot[w[0]][0] = 1; + z[0] = 0; + typecount[w[0]] = 1; + usedcount[w[0]] = 1; + k = 1; + typetot = 1; + + for (i = 1; i < N; i++) { + // printf("%5d\n", w[i]); + max = 0; tag = 0; totprob = 0; + for (j = 0; j < usedcount[w[i]]; j++) { + probs[j] = ztot[w[i]][j] - ALPHA; + totprob += probs[j]; + } + probs[usedcount[w[i]]] = (ALPHA*k+BETA)*((double) typecount[w[i]]+GAMMA)/((double) typetot+W*GAMMA); + totprob += probs[usedcount[w[i]]]; + // printf("%10.6lf\n",totprob); + r = myrand()*totprob; + max = probs[0]; + j = 0; + while (r>max) { + j++; + max += probs[j]; + } + // printf("%5d\n",j); + z[i] = j; + ztot[w[i]][j]++; + if (ztot[w[i]][j]==1) { + typecount[w[i]]++; + usedcount[w[i]]++; + if (usedcount[w[i]]==KWMAX) { + printf("Maximum number of tables exceeded!!!\n"); + } + typetot++; + k++; + } + } +} + +void fileread(void) +{ + int i,j, wt; + FILE *fileptr; + + fileptr = fopen("wsj.dat", "r"); + + for (i = 1; i < N; i++) { + fscanf(fileptr, "%d", &wt); + w[i] = wt-1; + z[i] = 0; + } + printf("Total cases: %10d\n", N); + fclose(fileptr); +} + +main(int argc, char* argv[]) +{ + int i,j,loop,run; + int temp,ind, tag; + double newprob, WBETA; + double probs[KWMAX]; + double max, totprob, r; + int sampcount; + FILE *fileptr; + char filename[30]; + double score; + + if (argc < 2) { + printf("Please provide a value of b\n"); + exit(0); + } + BETA = strtol(argv[1]); + printf("Basic initialising...\n"); + + // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) + seedMT(4157U); + + sprintf(filename,"typecountrecordwsjpeak%0.1f.%0.1f.dat",ALPHA,BETA); + fileptr = fopen(filename, "w"); + + printf("Reading from file...\n"); + fileread(); + + printf("Initialising...\n"); + initialise(); + printf("k = %1.0f, typetot = %d\n",k,typetot); + + printf("Finding start state...\n"); + anderson(); + printf("Beginning burnin...\n"); + for (loop = 0; loop < NLOOPS; loop++) { + for (i = 0; i < N; i++) { + j = z[i]; + ztot[w[i]][j]--; + if (ztot[w[i]][j] == 0) { + if (j==usedcount[w[i]]) { + usedcount[w[i]]--; + } + typecount[w[i]]--; + typetot--; + k--; + } + max = 0; tag = 0; totprob = 0; + for (j = 0; j <= usedcount[w[i]]; j++) { + if (ztot[w[i]][j] > 0) { + probs[j] = ztot[w[i]][j] - ALPHA; + } else { + probs[j] = 0; + if (tag == 0) { + probs[j] = (ALPHA*k+BETA)*(((double) typecount[w[i]])+GAMMA)/(((double) typetot)+((double) W)*GAMMA); + tag = 1; + } + } + totprob += probs[j]; + } + r = myrand()*totprob; + max = probs[0]; + j = 0; + while (r>max) { + j++; + max += probs[j]; + } + z[i] = j; + ztot[w[i]][j]++; + if (ztot[w[i]][j]==1) { + if (j == usedcount[w[i]]) { + usedcount[w[i]]++; + if (usedcount[w[i]]==KWMAX) { + printf("Maximum number of tables exceeded!!!\n"); + } + } + typecount[w[i]]++; + typetot++; + k++; + } + } + printf("Completed sample # %5d\n", loop); + if (k != typetot) printf("k = %1.0f, typetot = %d\n",k,typetot); + if (loop >= BURNIN && loop % SAMPLEFREQ == 0) { + for (i = 0; i < W; i++) { + fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type + } + fprintf(fileptr,"\n"); + } + } + fclose(fileptr); +} + diff --git a/report/pyp_clustering/acl09-short/code/pygibbs_geom b/report/pyp_clustering/acl09-short/code/pygibbs_geom Binary files differnew file mode 100755 index 00000000..14ae82f1 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/pygibbs_geom diff --git a/report/pyp_clustering/acl09-short/code/pygibbs_geom.c b/report/pyp_clustering/acl09-short/code/pygibbs_geom.c new file mode 100644 index 00000000..bafa0416 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/pygibbs_geom.c @@ -0,0 +1,212 @@ +#include <stdio.h> +#include <math.h> + +#define myrand() (double) (((unsigned long) randomMT()) / 4294967296.) + +#define W 30114 +#define N 831190 +#define KWMAX 5000 + +#define NLOOPS 11000 +#define BURNIN 1000 +#define SAMPLEFREQ 10 + +#define ALPHA 0.0 // PYB a +//#define GAMMA 1000000000.0 +#define GAMMA .01 // Dirichlet over multinomial P0 + +double BETA; // CRP alpha (PYB b) +int w[N], z[N]; // words, table assignments +double base[N]; // base prob of word under geometric +int typecount[W], typetot; //# of tables of each type, total # tables +int usedcount[W]; +double ztot[W][KWMAX]; +double k; // total # tables +int nactive; + +void initialise(void); +void anderson(void); +void fileread(void); + +void initialise(void) +{ + int i,j; + + for (i = 1; i < W; i++) { + typecount[i] = 0; + usedcount[i] = 0; + for (j = 0; j < KWMAX; j++) { + ztot[i][j] = 0; + } + } + +} + +double base_p(int len) { + double p = 1.0/26; + return pow(p,len)*pow(.5,len); //assume p_# = .5 +} + +void anderson(void) //stochastic Anderson-style initialisation +{ + int i,j, tag; + double max, totprob, r, runtot; + double probs[KWMAX]; + int ind, temp; + + ztot[w[0]][0] = 1; + z[0] = 0; + typecount[w[0]] = 1; + usedcount[w[0]] = 1; + k = 1; + typetot = 1; + + for (i = 1; i < N; i++) { + // printf("%5d\n", w[i]); + max = 0; tag = 0; totprob = 0; + for (j = 0; j < usedcount[w[i]]; j++) { + probs[j] = ztot[w[i]][j] - ALPHA; + totprob += probs[j]; + } + probs[usedcount[w[i]]] = (ALPHA*k+BETA)*base[i]; + totprob += probs[usedcount[w[i]]]; + // printf("%10.6lf\n",totprob); + r = myrand()*totprob; + max = probs[0]; + j = 0; + while (r>max) { + j++; + max += probs[j]; + } + // printf("%5d\n",j); + z[i] = j; + ztot[w[i]][j]++; + if (ztot[w[i]][j]==1) { + typecount[w[i]]++; + usedcount[w[i]]++; + if (usedcount[w[i]]==KWMAX) { + printf("Maximum number of tables exceeded!!!\n"); + } + typetot++; + k++; + } + } +} + +void fileread(void) +{ + int i,j, wt, len; + FILE *fileptr; + + fileptr = fopen("wsj.dat", "r"); + + for (i = 1; i < N; i++) { + fscanf(fileptr, "%d", &wt); + w[i] = wt-1; + z[i] = 0; + } + printf("Total cases: %10d\n", N); + fclose(fileptr); + + fileptr = fopen("wsj_lengths.dat", "r"); + + for (i = 1; i < N; i++) { + fscanf(fileptr, "%d", &len); + base[i] = base_p(len); + } + fclose(fileptr); +} + +main(int argc, char* argv[]) +{ + int i,j,loop,run; + int temp,ind, tag; + double newprob, WBETA; + double probs[KWMAX]; + double max, totprob, r; + int sampcount; + FILE *fileptr; + char filename[30]; + double score; + + if (argc < 2) { + printf("Please provide a value of b\n"); + exit(0); + } + BETA = strtol(argv[1]); + printf("Basic initialising...\n"); + + // you can seed with any uint32, but the best are odds in 0..(2^32 - 1) + seedMT(4157U); + + sprintf(filename,"typecountrecordwsjgeom%0.1f.%0.1f.dat",ALPHA,BETA); + fileptr = fopen(filename, "w"); + + printf("Reading from file...\n"); + fileread(); + + printf("Initialising...\n"); + initialise(); + printf("k = %1.0f, typetot = %d\n",k,typetot); + + printf("Finding start state...\n"); + anderson(); + printf("Beginning burnin...\n"); + for (loop = 0; loop < NLOOPS; loop++) { + for (i = 0; i < N; i++) { + j = z[i]; + ztot[w[i]][j]--; + if (ztot[w[i]][j] == 0) { + if (j==usedcount[w[i]]) { + usedcount[w[i]]--; + } + typecount[w[i]]--; + typetot--; + k--; + } + max = 0; tag = 0; totprob = 0; + for (j = 0; j <= usedcount[w[i]]; j++) { + if (ztot[w[i]][j] > 0) { + probs[j] = ztot[w[i]][j] - ALPHA; + } else { + probs[j] = 0; + if (tag == 0) { + probs[j] = (ALPHA*k+BETA)*base[i]; + tag = 1; + } + } + totprob += probs[j]; + } + r = myrand()*totprob; + max = probs[0]; + j = 0; + while (r>max) { + j++; + max += probs[j]; + } + z[i] = j; + ztot[w[i]][j]++; + if (ztot[w[i]][j]==1) { + if (j == usedcount[w[i]]) { + usedcount[w[i]]++; + if (usedcount[w[i]]==KWMAX) { + printf("Maximum number of tables exceeded!!!\n"); + } + } + typecount[w[i]]++; + typetot++; + k++; + } + } + printf("Completed sample # %5d\n", loop); + if (k != typetot) printf("k = %1.0f, typetot = %d\n",k,typetot); + if (loop >= BURNIN && loop % SAMPLEFREQ == 0) { + for (i = 0; i < W; i++) { + fprintf(fileptr," %d", typecount[i]); //print (table?) count for each word type + } + fprintf(fileptr,"\n"); + } + } + fclose(fileptr); +} + diff --git a/report/pyp_clustering/acl09-short/code/run-peak.prl b/report/pyp_clustering/acl09-short/code/run-peak.prl new file mode 100755 index 00000000..fb1e798a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/run-peak.prl @@ -0,0 +1,8 @@ +#!/usr/bin/perl + +for $i (0..5) { +$beta = 10**$i; +$cmd = "pygibbs_peak $beta\n"; +print $cmd; +`$cmd`; +} diff --git a/report/pyp_clustering/acl09-short/code/run.prl b/report/pyp_clustering/acl09-short/code/run.prl new file mode 100755 index 00000000..ac69559c --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/run.prl @@ -0,0 +1,8 @@ +#!/usr/bin/perl + +for $i (0..5) { +$beta = 10**$i; +$cmd = "pygibbs_geom $beta\n"; +print $cmd; +`$cmd`; +} diff --git a/report/pyp_clustering/acl09-short/code/word_lengths.prl b/report/pyp_clustering/acl09-short/code/word_lengths.prl new file mode 100755 index 00000000..4b4ed03b --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/word_lengths.prl @@ -0,0 +1,21 @@ +#!/usr/bin/perl -w +use Getopt::Std; +use File::Basename; +use List::Util qw(max maxstr min minstr reduce shuffle sum); +use lib "$ENV{HOME}/src/perl/"; +use sg_utils; +use strict; +use vars qw(); + +my $usage = "Usage: $0 \n"; + +getopts(''); + +die $usage unless (1); + +while (<>) { +chomp; +print length; +print "\n"; +} + diff --git a/report/pyp_clustering/acl09-short/code/wsjplots2.m b/report/pyp_clustering/acl09-short/code/wsjplots2.m new file mode 100644 index 00000000..eed41846 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots2.m @@ -0,0 +1,99 @@ + +load wsj + +figure(1) +clf +subplot(1,2,2) +hold on + +for i = 1:9 + a = i/10; + [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20); + ph = plot(log10(logbins),log10(predicted),'k'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) +end + +for i = 1:9 + a = i/10; + disp(['Loading results for a = ' num2str(a) ]); + + typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']); + + typecountrecordmean = mean(typecountrecord(500:1000,:)); + + save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean'); + + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + drawnow +end + + + + +[logbins meanval seval] = logbinmean(counts,counts,20,20) +[logbins predicted dummy] = logbinmean(counts,counts,20,20) +ph = plot(log10(logbins),log10(predicted),'r'); +hold on +errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 3.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); + +title('Pitman-Yor process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + +subplot(1,2,1) + +for i = 1:5 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); + typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + + typecountrecordmean = mean(typecountrecord(500:1000,:)); + save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) + [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20) +% errorbar(log10(logbins),meanval,seval,'k.'); + hold on + ph = plot(log10(logbins),log10(predicted),'r'); + % ph = plot(log10(logbins),predicted,'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 1.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + + diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl.m new file mode 100644 index 00000000..50582e7f --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl.m @@ -0,0 +1,74 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +hold on + +for i = 3:6 + + b = 10^(i-1) + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + + % plot lines for CRP Antoniak prediction + [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--') + + % plot lines for CRP Cohn prediction + %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20); + %ph = plot(log10(logbins),log10(predicted),'r'); + %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.') + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + plot(log10(logbins),log10(meanval),'k*'); + %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + plot(log10(logbins),log10(meanval),'ko'); + %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko'); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Antoniak approx.','Empirical, fixed base','Empirical, inferred base','Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m new file mode 100644 index 00000000..33419845 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_monkeys.m @@ -0,0 +1,164 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +subplot(1,3,1); +hold on + +for i = 2:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); +%%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(500:999,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + + % plot lines for CRP Antoniak prediction + [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--') + + % plot lines for incorrect CRP Antoniak prediction (ACL07) + %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20); + %ph = plot(log10(logbins),log10(predicted),'r'); + %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.') + + % plot lines for CRP Cohn prediction + %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20); + %ph = plot(log10(logbins),log10(predicted),'r'); + %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.') + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 1.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest') +box on + + +subplot(1,3,2); +hold on + +for i =2:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); +%%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(500:999,:)); + %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Location','NorthWest') +box on +%axis square + + +subplot(1,3,3); +hold on + +for i =2:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); +%%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjgeom0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(500:999,:)); + %save([ 'outputs/typecountrecordmeanwsjgeom0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjgeom0.0.' num2str(b) '.0.mat']); + + % plot lines for CRP exact prediction using summation +% [logbins meaneval seval] = logbinmean(counts, crppred_geom(counts,wsj_lengths,b),20,20) +[logbins meaneval seval] = logbinmean(counts, crppred(counts,b),20,20) + plot(log10(logbins),log10(meaneval),'r.'); +%errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'r.'); +% ph = plot(log10(logbins),log10(meaneval),'r'); +% set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Location','NorthWest') +box on +hold off +%axis square + + diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m new file mode 100644 index 00000000..1d07e54c --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_pair.m @@ -0,0 +1,117 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +subplot(1,2,1); +hold on + +for i = 3:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); +%%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + + % plot lines for CRP Antoniak prediction + [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','--') + + %plot lines for incorrect CRP Antoniak prediction (ACL07) + %[logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20); + %ph = plot(log10(logbins),log10(predicted),'r'); + %set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle',':') + + % plot lines for CRP Cohn prediction + %[logbins predicted dummy] = logbinmean(counts, cohnpred(counts,b),20,20); + %ph = plot(log10(logbins),log10(predicted),'r'); + %set(ph,'color',[0.2 0.2 1],'linewidth',1.5,'linestyle','.') + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 1.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','Antoniak approx.','Empirical','Location','NorthWest') +box on + + +subplot(1,2,2); +hold on + +for i =3:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); +%%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + + %plot lines for incorrect CRP Antoniak prediction (ACL07) + [logbins predicted dummy] = logbinmean(counts, noP0pred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5,'linestyle','-.') + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',14) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {...%'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +legend('Expectation','GGJ07 approx.','Empirical','Location','NorthWest') +box on +%axis square
\ No newline at end of file diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m new file mode 100644 index 00000000..dc54dea4 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk0.m @@ -0,0 +1,54 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 9-[3:6] + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + ph = plot(log10(logbins),log10(meanval)); + set(ph,'color',colors(i-2,:),'linestyle','o','linewidth',2,'markersize',10); + %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'\alpha = 100000','\alpha = 10000','\alpha = 1000','\alpha = 100'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m new file mode 100644 index 00000000..dd3615ac --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk1.m @@ -0,0 +1,59 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 3:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + ph = plot(log10(logbins),log10(meanval)); + set(ph,'color',colors(i-2,:),'linestyle','o','linewidth',2,'markersize',8); + %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',colors(i-2,:),'linewidth',2); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'Empirical','Expectation'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m new file mode 100644 index 00000000..dd039289 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk2.m @@ -0,0 +1,58 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %same but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 3:6 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',colors(i-2,:),'linewidth',2); + + % plot lines for CRP Antoniak prediction + [logbins predicted dummy] = logbinmean(counts, antoniakpred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted),'r'); + set(ph,'color',colors(i-2,:),'linewidth',2,'linestyle','--') + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-1.1 2]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'Expectation','Antoniak approximation'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m new file mode 100644 index 00000000..8d570b7a --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_acl_talk3.m @@ -0,0 +1,74 @@ +%wsj_lengths = load([ 'wsj_lengths.dat']); +%save([ 'wsj_lengths.mat'],'wsj_lengths'); +load wsj +load wsj_lengths + +figure(1) +clf + +hold on + +%colors = [0 0 0; 0 0 1; 1 0 0; 0 1 0]; %pure black, red, blue, green +colors = [0 0 0; 1 .4 .2; .4 .4 1; 0 .7 .5]; %similar but less garish +%colors = [0 0 0; .6 .4 .4; .9 .6 .6; 1 .8 .8]; %shades of pink +%colors = [0 0 0; .3 .3 1; .4 .8 1; .5 1 .8]; %blue/green + +for i = 3:6 + col = colors(i-2,:); + b = 10^(i-1) + + % plot lines for CRP exact prediction using summation + [logbins predicted dummy] = logbinmean(counts, crppred(counts,b),20,20); + ph = plot(log10(logbins),log10(predicted)); + set(ph,'color',col,'linewidth',2); + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat']); + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + ph = plot(log10(logbins),log10(meanval)); + %set(ph,'color',col,'linestyle','o','markerfacecolor',col,'markersize',8); + set(ph,'color',col,'linestyle','o','linewidth',2,'markersize',8); + %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + + disp(['Loading results for b = ' num2str(b) ]); + %%% uncomment these lines if .mat file is not yet generated. %%% + %typecountrecord= load([ 'outputs/typecountrecordwsjpeak0.0.' num2str(b) '.0.dat']); + %typecountrecordmean = mean(typecountrecord(:,:)); + %save([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + load([ 'outputs/typecountrecordmeanwsjpeak0.0.' num2str(b) '.0.mat']); + + %plot emprical counts with error bars + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20); + ph = plot(log10(logbins),log10(meanval)); + %set(ph,'color',col,'linestyle','^','markerfacecolor',col,'markersize',8); + set(ph,'color',col,'linestyle','^','linewidth',2,'markersize',8); + %errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'ko'); + +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([.1:.1:1 2:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-.1 2.5]) +set(gca,'FontSize',16) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'0.1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +%title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries (tables)') +xlabel('Word frequency (n_w)') +labs = {'Expectation','Empirical, fixed base','Empirical, inferred base'}; +legend(labs,'Location','NorthWest') +box on diff --git a/report/pyp_clustering/acl09-short/code/wsjplots_cl.m b/report/pyp_clustering/acl09-short/code/wsjplots_cl.m new file mode 100644 index 00000000..eed41846 --- /dev/null +++ b/report/pyp_clustering/acl09-short/code/wsjplots_cl.m @@ -0,0 +1,99 @@ + +load wsj + +figure(1) +clf +subplot(1,2,2) +hold on + +for i = 1:9 + a = i/10; + [logbins predicted dummy] = logbinmean(counts,counts.^a,20,20); + ph = plot(log10(logbins),log10(predicted),'k'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) +end + +for i = 1:9 + a = i/10; + disp(['Loading results for a = ' num2str(a) ]); + + typecountrecord= load([ 'typecountrecordwsjflat' num2str(a) '.1.0.dat']); + + typecountrecordmean = mean(typecountrecord(500:1000,:)); + + save([ 'typecountrecordmeanwsjflat' num2str(a) '.1.0.mat'],'typecountrecordmean'); + + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + drawnow +end + + + + +[logbins meanval seval] = logbinmean(counts,counts,20,20) +[logbins predicted dummy] = logbinmean(counts,counts,20,20) +ph = plot(log10(logbins),log10(predicted),'r'); +hold on +errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); + +set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 3.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); + +title('Pitman-Yor process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + +subplot(1,2,1) + +for i = 1:5 + + b = 10^(i-1) + + disp(['Loading results for b = ' num2str(b) ]); + typecountrecord= load([ 'typecountrecordwsjflat0.0.' num2str(b) '.0.dat']); + + typecountrecordmean = mean(typecountrecord(500:1000,:)); + save([ 'typecountrecordmeanwsjflat0.0.' num2str(b) '.0.mat'],'typecountrecordmean'); + + [logbins meanval seval] = logbinmean(counts,typecountrecordmean,20,20) + [logbins predicted dummy] = logbinmean(counts,crppred(counts,b),20,20) +% errorbar(log10(logbins),meanval,seval,'k.'); + hold on + ph = plot(log10(logbins),log10(predicted),'r'); + % ph = plot(log10(logbins),predicted,'r'); + set(ph,'color',[0.7 0.7 0.7],'linewidth',1.5) + errorbar(log10(logbins),log10(meanval),log10(meanval+seval)-log10(meanval),log10(meanval-seval)-log10(meanval),'k.'); +end + +set(gca,'xtick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'ytick',log10([1:10 20:10:100 200:100:1000 2000:1000:5000])) +set(gca,'xlim',[-0.1 3.5]) +set(gca,'ylim',[-0.1 1.5]) +set(gca,'xticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +set(gca,'yticklabel', {'1',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ... + '10',' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '100', ... + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '1000', ... + ' ', ' ', ' ', ' '}); +title('Chinese restaurant process adaptor') +ylabel('Mean number of lexical entries') +xlabel('Word frequency (n_w)') +box on + + |