diff options
Diffstat (limited to 'report/viewer')
-rw-r--r-- | report/viewer/PMingLiU.ttf | bin | 19389344 -> 0 bytes | |||
-rw-r--r-- | report/viewer/alignment.py | 162 | ||||
-rwxr-xr-x | report/viewer/display.py | 22 | ||||
-rw-r--r-- | report/viewer/render.py | 128 |
4 files changed, 0 insertions, 312 deletions
diff --git a/report/viewer/PMingLiU.ttf b/report/viewer/PMingLiU.ttf Binary files differdeleted file mode 100644 index 03e923bf..00000000 --- a/report/viewer/PMingLiU.ttf +++ /dev/null diff --git a/report/viewer/alignment.py b/report/viewer/alignment.py deleted file mode 100644 index 5fe03734..00000000 --- a/report/viewer/alignment.py +++ /dev/null @@ -1,162 +0,0 @@ -class Alignment: - SURE, POSSIBLE = 'S', 'P' - - def __init__(self, swords, twords, align): - self.swords = swords - self.twords = twords - self.align = align - - def reverse(self): - als = {} - for (frm, to), conf in self.align.items(): - als[to, frm] = conf - return Alignment(self.twords, self.swords, als) - - def merge(self, other): - assert self.swords == other.swords - assert self.twords == other.twords - - als = {} - for frm, to in self.align.keys(): - als[frm, to] = Alignment.POSSIBLE - - for frm, to in other.align.keys(): - if (frm, to) in als: - als[frm, to] = Alignment.SURE - else: - als[frm, to] = Alignment.POSSIBLE - - return Alignment(self.swords, self.twords, als) - - def __repr__(self): - return 'Alignment(swords=%s, twords=%s, align=%s)' % (self.swords, self.twords, self.align) - -def read_pharaoh_text(infile): - return infile.readline().strip().split() - -def parse_pharaoh_align(text): - als = {} - for part in text.strip().split(): - frm, to = map(int, part.split('-')) - als[frm, to] = Alignment.SURE - return als - -def read_pharaoh_align(infile): - als = {} - for part in infile.readline().strip().split(): - frm, to = map(int, part.split('-')) - als[frm, to] = Alignment.SURE - return als - -def read_pharaoh_alignment(swfile, twfile, afile): - sw = read_pharaoh_text(swfile) - tw = read_pharaoh_text(twfile) - als = read_pharaoh_align(afile) - return Alignment(sw, tw, als) - -def read_giza_alignment(infile): - infile.readline() # ignore - swords = infile.readline().strip().split() - twords = [] - als = {} - state = 0 - for token in infile.readline().strip().split(): - if state == 0: - if token != 'NULL': - if token != '({': - twords.append(token) - else: - state = 1 - elif state == 1: - if token != '})': - if twords: - als[int(token)-1, len(twords)-1] = Alignment.SURE - else: - state = 0 - return Alignment(swords, twords, als) - -def read_naacl_aligns(infile): - aligns = [] - last = None - for line in infile: - index, frm, to, conf = line.rstrip().split() - if int(index) != last: - aligns.append({}) - aligns[-1][int(frm)-1, int(to)-1] = conf - last = int(index) - return aligns - -# -# This phrase-extraction function largely mimics Pharaoh's phrase-extract -# code. It also supports the option to not advance over NULL alignments. -# - -def xextract_phrases(alignment, maxPhraseLength=None, advance=True): - T = len(alignment.twords) - S = len(alignment.swords) - if not maxPhraseLength: - maxPhraseLength = max(T, S) - - alignedCountS = [0 for s in alignment.swords] - alignedToT = [[] for t in alignment.twords] - alignedToS = [[] for s in alignment.swords] - for (s, t), conf in alignment.align.items(): - if conf == Alignment.SURE: - alignedCountS[s] += 1 - alignedToT[t].append(s) - alignedToS[s].append(t) - - # check alignments for english phrase startT...endT - for st in range(T): - for et in range(st, min(T, st + maxPhraseLength)): - minS = 9999 - maxS = -1 - usedS = alignedCountS[:] - for ti in range(st, et+1): - for si in alignedToT[ti]: - #print 'point (%d, %d)' % (si, ti) - if si<minS: minS = si - if si>maxS: maxS = si - usedS[si] -= 1 - - #print 's projected (%d-%d, %d, %d)' % (minS, maxS, st, et) - if (maxS >= 0 and # aligned to any foreign words at all - maxS-minS < maxPhraseLength): # foreign phrase within limits - # check if foreign words are aligned to out of bound english words - out_of_bounds = False - for si in range(minS, maxS): - if usedS[si] > 0: - #print 'out of bounds:', si - out_of_bounds = True - break - - # Pharoah doesn't use this check, but I think it's required - if not out_of_bounds: - for s in range(minS, maxS+1): - for t in alignedToS[s]: - if not (st <= t <= et): - #print 'out of bounds2:', t,s - out_of_bounds = True - break - - #print 'doing it for (%d-%d, %d, %d)' % (minS, maxS, st, et) - if not out_of_bounds: - if advance: - #print 'attempting to advance' - # start point of foreign phrase may advance over unaligned - ss = minS - while (ss>=0 and - ss>maxS-maxPhraseLength and # within length limit - (ss==minS or alignedCountS[ss]==0)): # unaligned - # end point of foreign phrase may advance over unaligned - es = maxS - while (es<S and - es<ss+maxPhraseLength and # within length limit - (es==maxS or alignedCountS[es]==0)): #unaligned - yield (ss, es, st, et) - es += 1 - ss -= 1 - else: - ss, es = minS, maxS - yield (minS, maxS, st, et) - diff --git a/report/viewer/display.py b/report/viewer/display.py deleted file mode 100755 index bf9d2a1f..00000000 --- a/report/viewer/display.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -import sys, os, gzip, re -import render, alignment - -import reportlab.rl_config -reportlab.rl_config.warnOnMissingFontGlyphs = 0 -from reportlab.pdfbase.ttfonts import TTFont -from reportlab.pdfbase import pdfmetrics - -pdfmetrics.registerFont(TTFont('PMingLiU', 'PMingLiU.ttf')) - -doc = render.Document(sys.argv[1]) - -for line in sys.stdin: - src, tgt, align = line.split(' ||| ') - src = src.split() - tgt = tgt.split() - align = alignment.parse_pharaoh_align(align) - doc.append(render.Alignment(src, tgt, align, 'PMingLiU', 'Helvetica', 8, 0.4)) - -doc.render() diff --git a/report/viewer/render.py b/report/viewer/render.py deleted file mode 100644 index 0934c39c..00000000 --- a/report/viewer/render.py +++ /dev/null @@ -1,128 +0,0 @@ -from reportlab.pdfgen import canvas -from reportlab.lib.colors import black, gray, white, magenta, Color -from reportlab.lib.pagesizes import A4 -from reportlab.lib.styles import getSampleStyleSheet -from reportlab.lib.units import cm, inch -from reportlab.platypus import SimpleDocTemplate, Spacer, Paragraph -from reportlab.platypus.flowables import Flowable -import re - -class Alignment(Flowable): - def __init__(self, x_words, y_words, alignment, x_font, y_font, ptsize, unit, scale=True, colours=None): - self._x_words = x_words - self._y_words = y_words - self._alignment = alignment - self._unit = unit*cm - self._x_font = x_font - self._y_font = y_font - self._ptsize = ptsize - self._scale = 1 - self._do_scale = scale - self._colours = colours - if not colours: - self._colours = {'S':black, 'P':gray, 'N':magenta} - - def wrap(self, rw, rh): - xws = [self.canv.stringWidth(w, self._x_font, self._ptsize) - for w in self._x_words] - yws = [self.canv.stringWidth(w, self._y_font, self._ptsize) - for w in self._y_words] - width = (len(self._x_words) + 0.22)* self._unit + max(yws) - height = (len(self._y_words) + 0.22)* self._unit + max(xws) - - if self._do_scale: - self._scale = min(rw / width, 1.5) - width *= self._scale - height *= self._scale - - return (width, height) - - def draw(self): - c = self.canv - print c.getAvailableFonts() - - X=len(self._x_words) - Y=len(self._y_words) - - c.saveState() - c.scale(self._scale, self._scale) - - for (x, y), conf in self._alignment.items(): - col = self._colours[conf] - if isinstance(col, Color): - c.setFillColor(col) - c.rect((0.02 + x)*self._unit, (0.02+Y-y-1)*self._unit, - self._unit, self._unit, 0, 1) - else: - bl = (x*self._unit, (Y-y-1)*self._unit) - tl = (x*self._unit, (Y-y)*self._unit) - tr = ((x+1)*self._unit, (Y-y)*self._unit) - br = ((x+1)*self._unit, (Y-y-1)*self._unit) - - p = c.beginPath() - p.moveTo(*br) - p.lineTo(*tr) - p.lineTo(*tl) - c.setFillColor(col[0]) - c.drawPath(p, fill=1) - p = c.beginPath() - p.moveTo(*br) - p.lineTo(*bl) - p.lineTo(*tl) - c.setFillColor(col[1]) - c.drawPath(p, fill=1) - - c.setStrokeColor(black) - c.grid(map(lambda x: (0.02+x)*self._unit, range(X+1)), - map(lambda y: (0.02+y)*self._unit, range(Y+1))) - - c.setFont(self._x_font, self._ptsize) - c.setFillColor(black) - for x, word in enumerate(self._x_words): - c.saveState() - c.translate((x+0.52)*self._unit, (Y+0.22)*self._unit) - c.rotate(60) - c.drawString(0, 0, word) - c.restoreState() - - c.setFont(self._y_font, self._ptsize) - for y, word in enumerate(self._y_words): - c.drawString((X+0.22)*self._unit, (Y-y+0.42-1)*self._unit, word) - - c.restoreState() - -class Document: - def __init__(self, filename): - self._styles = getSampleStyleSheet() - self._doc = SimpleDocTemplate(filename) - self._story = [] - - def append(self, flowable): - self._story.append(flowable) - self._story.append(Spacer(1, 1*cm)) - - def render(self): - self._doc.build(self._story[:-1]) - -class Canvas: - def __init__(self, filename): - self._filename = filename - self._canvas = canvas.Canvas('.' + filename, A4) - self._size = A4 - self._body = None - - def append(self, flowable): - if self._body: - print >>sys.stderr, 'WARNING: replacing existing flowable' - self._body = flowable - - def render(self): - self._body.canv = self._canvas - width, height = self._body.wrap(*self._size) - width *= 1.02 - height *= 1.02 - - self._canvas = canvas.Canvas(self._filename, (width, height)) - self._body.canv = self._canvas - self._body.draw() - self._canvas.save() |