summaryrefslogtreecommitdiff
path: root/report/viewer
diff options
context:
space:
mode:
Diffstat (limited to 'report/viewer')
-rw-r--r--report/viewer/PMingLiU.ttfbin19389344 -> 0 bytes
-rw-r--r--report/viewer/alignment.py162
-rwxr-xr-xreport/viewer/display.py22
-rw-r--r--report/viewer/render.py128
4 files changed, 0 insertions, 312 deletions
diff --git a/report/viewer/PMingLiU.ttf b/report/viewer/PMingLiU.ttf
deleted file mode 100644
index 03e923bf..00000000
--- a/report/viewer/PMingLiU.ttf
+++ /dev/null
Binary files differ
diff --git a/report/viewer/alignment.py b/report/viewer/alignment.py
deleted file mode 100644
index 5fe03734..00000000
--- a/report/viewer/alignment.py
+++ /dev/null
@@ -1,162 +0,0 @@
-class Alignment:
- SURE, POSSIBLE = 'S', 'P'
-
- def __init__(self, swords, twords, align):
- self.swords = swords
- self.twords = twords
- self.align = align
-
- def reverse(self):
- als = {}
- for (frm, to), conf in self.align.items():
- als[to, frm] = conf
- return Alignment(self.twords, self.swords, als)
-
- def merge(self, other):
- assert self.swords == other.swords
- assert self.twords == other.twords
-
- als = {}
- for frm, to in self.align.keys():
- als[frm, to] = Alignment.POSSIBLE
-
- for frm, to in other.align.keys():
- if (frm, to) in als:
- als[frm, to] = Alignment.SURE
- else:
- als[frm, to] = Alignment.POSSIBLE
-
- return Alignment(self.swords, self.twords, als)
-
- def __repr__(self):
- return 'Alignment(swords=%s, twords=%s, align=%s)' % (self.swords, self.twords, self.align)
-
-def read_pharaoh_text(infile):
- return infile.readline().strip().split()
-
-def parse_pharaoh_align(text):
- als = {}
- for part in text.strip().split():
- frm, to = map(int, part.split('-'))
- als[frm, to] = Alignment.SURE
- return als
-
-def read_pharaoh_align(infile):
- als = {}
- for part in infile.readline().strip().split():
- frm, to = map(int, part.split('-'))
- als[frm, to] = Alignment.SURE
- return als
-
-def read_pharaoh_alignment(swfile, twfile, afile):
- sw = read_pharaoh_text(swfile)
- tw = read_pharaoh_text(twfile)
- als = read_pharaoh_align(afile)
- return Alignment(sw, tw, als)
-
-def read_giza_alignment(infile):
- infile.readline() # ignore
- swords = infile.readline().strip().split()
- twords = []
- als = {}
- state = 0
- for token in infile.readline().strip().split():
- if state == 0:
- if token != 'NULL':
- if token != '({':
- twords.append(token)
- else:
- state = 1
- elif state == 1:
- if token != '})':
- if twords:
- als[int(token)-1, len(twords)-1] = Alignment.SURE
- else:
- state = 0
- return Alignment(swords, twords, als)
-
-def read_naacl_aligns(infile):
- aligns = []
- last = None
- for line in infile:
- index, frm, to, conf = line.rstrip().split()
- if int(index) != last:
- aligns.append({})
- aligns[-1][int(frm)-1, int(to)-1] = conf
- last = int(index)
- return aligns
-
-#
-# This phrase-extraction function largely mimics Pharaoh's phrase-extract
-# code. It also supports the option to not advance over NULL alignments.
-#
-
-def xextract_phrases(alignment, maxPhraseLength=None, advance=True):
- T = len(alignment.twords)
- S = len(alignment.swords)
- if not maxPhraseLength:
- maxPhraseLength = max(T, S)
-
- alignedCountS = [0 for s in alignment.swords]
- alignedToT = [[] for t in alignment.twords]
- alignedToS = [[] for s in alignment.swords]
- for (s, t), conf in alignment.align.items():
- if conf == Alignment.SURE:
- alignedCountS[s] += 1
- alignedToT[t].append(s)
- alignedToS[s].append(t)
-
- # check alignments for english phrase startT...endT
- for st in range(T):
- for et in range(st, min(T, st + maxPhraseLength)):
- minS = 9999
- maxS = -1
- usedS = alignedCountS[:]
- for ti in range(st, et+1):
- for si in alignedToT[ti]:
- #print 'point (%d, %d)' % (si, ti)
- if si<minS: minS = si
- if si>maxS: maxS = si
- usedS[si] -= 1
-
- #print 's projected (%d-%d, %d, %d)' % (minS, maxS, st, et)
- if (maxS >= 0 and # aligned to any foreign words at all
- maxS-minS < maxPhraseLength): # foreign phrase within limits
- # check if foreign words are aligned to out of bound english words
- out_of_bounds = False
- for si in range(minS, maxS):
- if usedS[si] > 0:
- #print 'out of bounds:', si
- out_of_bounds = True
- break
-
- # Pharoah doesn't use this check, but I think it's required
- if not out_of_bounds:
- for s in range(minS, maxS+1):
- for t in alignedToS[s]:
- if not (st <= t <= et):
- #print 'out of bounds2:', t,s
- out_of_bounds = True
- break
-
- #print 'doing it for (%d-%d, %d, %d)' % (minS, maxS, st, et)
- if not out_of_bounds:
- if advance:
- #print 'attempting to advance'
- # start point of foreign phrase may advance over unaligned
- ss = minS
- while (ss>=0 and
- ss>maxS-maxPhraseLength and # within length limit
- (ss==minS or alignedCountS[ss]==0)): # unaligned
- # end point of foreign phrase may advance over unaligned
- es = maxS
- while (es<S and
- es<ss+maxPhraseLength and # within length limit
- (es==maxS or alignedCountS[es]==0)): #unaligned
- yield (ss, es, st, et)
- es += 1
- ss -= 1
- else:
- ss, es = minS, maxS
- yield (minS, maxS, st, et)
-
diff --git a/report/viewer/display.py b/report/viewer/display.py
deleted file mode 100755
index bf9d2a1f..00000000
--- a/report/viewer/display.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os, gzip, re
-import render, alignment
-
-import reportlab.rl_config
-reportlab.rl_config.warnOnMissingFontGlyphs = 0
-from reportlab.pdfbase.ttfonts import TTFont
-from reportlab.pdfbase import pdfmetrics
-
-pdfmetrics.registerFont(TTFont('PMingLiU', 'PMingLiU.ttf'))
-
-doc = render.Document(sys.argv[1])
-
-for line in sys.stdin:
- src, tgt, align = line.split(' ||| ')
- src = src.split()
- tgt = tgt.split()
- align = alignment.parse_pharaoh_align(align)
- doc.append(render.Alignment(src, tgt, align, 'PMingLiU', 'Helvetica', 8, 0.4))
-
-doc.render()
diff --git a/report/viewer/render.py b/report/viewer/render.py
deleted file mode 100644
index 0934c39c..00000000
--- a/report/viewer/render.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from reportlab.pdfgen import canvas
-from reportlab.lib.colors import black, gray, white, magenta, Color
-from reportlab.lib.pagesizes import A4
-from reportlab.lib.styles import getSampleStyleSheet
-from reportlab.lib.units import cm, inch
-from reportlab.platypus import SimpleDocTemplate, Spacer, Paragraph
-from reportlab.platypus.flowables import Flowable
-import re
-
-class Alignment(Flowable):
- def __init__(self, x_words, y_words, alignment, x_font, y_font, ptsize, unit, scale=True, colours=None):
- self._x_words = x_words
- self._y_words = y_words
- self._alignment = alignment
- self._unit = unit*cm
- self._x_font = x_font
- self._y_font = y_font
- self._ptsize = ptsize
- self._scale = 1
- self._do_scale = scale
- self._colours = colours
- if not colours:
- self._colours = {'S':black, 'P':gray, 'N':magenta}
-
- def wrap(self, rw, rh):
- xws = [self.canv.stringWidth(w, self._x_font, self._ptsize)
- for w in self._x_words]
- yws = [self.canv.stringWidth(w, self._y_font, self._ptsize)
- for w in self._y_words]
- width = (len(self._x_words) + 0.22)* self._unit + max(yws)
- height = (len(self._y_words) + 0.22)* self._unit + max(xws)
-
- if self._do_scale:
- self._scale = min(rw / width, 1.5)
- width *= self._scale
- height *= self._scale
-
- return (width, height)
-
- def draw(self):
- c = self.canv
- print c.getAvailableFonts()
-
- X=len(self._x_words)
- Y=len(self._y_words)
-
- c.saveState()
- c.scale(self._scale, self._scale)
-
- for (x, y), conf in self._alignment.items():
- col = self._colours[conf]
- if isinstance(col, Color):
- c.setFillColor(col)
- c.rect((0.02 + x)*self._unit, (0.02+Y-y-1)*self._unit,
- self._unit, self._unit, 0, 1)
- else:
- bl = (x*self._unit, (Y-y-1)*self._unit)
- tl = (x*self._unit, (Y-y)*self._unit)
- tr = ((x+1)*self._unit, (Y-y)*self._unit)
- br = ((x+1)*self._unit, (Y-y-1)*self._unit)
-
- p = c.beginPath()
- p.moveTo(*br)
- p.lineTo(*tr)
- p.lineTo(*tl)
- c.setFillColor(col[0])
- c.drawPath(p, fill=1)
- p = c.beginPath()
- p.moveTo(*br)
- p.lineTo(*bl)
- p.lineTo(*tl)
- c.setFillColor(col[1])
- c.drawPath(p, fill=1)
-
- c.setStrokeColor(black)
- c.grid(map(lambda x: (0.02+x)*self._unit, range(X+1)),
- map(lambda y: (0.02+y)*self._unit, range(Y+1)))
-
- c.setFont(self._x_font, self._ptsize)
- c.setFillColor(black)
- for x, word in enumerate(self._x_words):
- c.saveState()
- c.translate((x+0.52)*self._unit, (Y+0.22)*self._unit)
- c.rotate(60)
- c.drawString(0, 0, word)
- c.restoreState()
-
- c.setFont(self._y_font, self._ptsize)
- for y, word in enumerate(self._y_words):
- c.drawString((X+0.22)*self._unit, (Y-y+0.42-1)*self._unit, word)
-
- c.restoreState()
-
-class Document:
- def __init__(self, filename):
- self._styles = getSampleStyleSheet()
- self._doc = SimpleDocTemplate(filename)
- self._story = []
-
- def append(self, flowable):
- self._story.append(flowable)
- self._story.append(Spacer(1, 1*cm))
-
- def render(self):
- self._doc.build(self._story[:-1])
-
-class Canvas:
- def __init__(self, filename):
- self._filename = filename
- self._canvas = canvas.Canvas('.' + filename, A4)
- self._size = A4
- self._body = None
-
- def append(self, flowable):
- if self._body:
- print >>sys.stderr, 'WARNING: replacing existing flowable'
- self._body = flowable
-
- def render(self):
- self._body.canv = self._canvas
- width, height = self._body.wrap(*self._size)
- width *= 1.02
- height *= 1.02
-
- self._canvas = canvas.Canvas(self._filename, (width, height))
- self._body.canv = self._canvas
- self._body.draw()
- self._canvas.save()