class Alignment:
    SURE, POSSIBLE = 'S', 'P'
    
    def __init__(self, swords, twords, align):
        self.swords = swords
        self.twords = twords
        self.align = align

    def reverse(self):
        als = {}
        for (frm, to), conf in self.align.items():
            als[to, frm] = conf
        return Alignment(self.twords, self.swords, als)

    def merge(self, other):
        assert self.swords == other.swords
        assert self.twords == other.twords

        als = {}
        for frm, to in self.align.keys():
            als[frm, to] = Alignment.POSSIBLE

        for frm, to in other.align.keys():
            if (frm, to) in als:
                als[frm, to] = Alignment.SURE
            else:
                als[frm, to] = Alignment.POSSIBLE

        return Alignment(self.swords, self.twords, als)

    def __repr__(self):
        return 'Alignment(swords=%s, twords=%s, align=%s)' % (self.swords, self.twords, self.align)

def read_pharaoh_text(infile):
    return infile.readline().strip().split()

def parse_pharaoh_align(text):
    als = {}
    for part in text.strip().split():
        frm, to = map(int, part.split('-'))
        als[frm, to] = Alignment.SURE
    return als

def read_pharaoh_align(infile):
    als = {}
    for part in infile.readline().strip().split():
        frm, to = map(int, part.split('-'))
        als[frm, to] = Alignment.SURE
    return als

def read_pharaoh_alignment(swfile, twfile, afile):
    sw = read_pharaoh_text(swfile)
    tw = read_pharaoh_text(twfile)
    als = read_pharaoh_align(afile)
    return Alignment(sw, tw, als)
    
def read_giza_alignment(infile):
    infile.readline() # ignore
    swords = infile.readline().strip().split()
    twords = []
    als = {}
    state = 0
    for token in infile.readline().strip().split():
        if state == 0:
            if token != 'NULL':
                if token != '({':
                    twords.append(token)
                else:
                    state = 1
        elif state == 1:
            if token != '})':
                if twords:
                    als[int(token)-1, len(twords)-1] = Alignment.SURE
            else:
                state = 0
    return Alignment(swords, twords, als)

def read_naacl_aligns(infile):
    aligns = []
    last = None
    for line in infile:
        index, frm, to, conf = line.rstrip().split()
        if int(index) != last:
            aligns.append({})
        aligns[-1][int(frm)-1, int(to)-1] = conf
        last = int(index)
    return aligns

#
# This phrase-extraction function largely mimics Pharaoh's phrase-extract
# code. It also supports the option to not advance over NULL alignments.
#

def xextract_phrases(alignment, maxPhraseLength=None, advance=True):
    T = len(alignment.twords)
    S = len(alignment.swords)
    if not maxPhraseLength:
        maxPhraseLength = max(T, S)

    alignedCountS = [0 for s in alignment.swords]
    alignedToT = [[] for t in alignment.twords]
    alignedToS = [[] for s in alignment.swords]
    for (s, t), conf in alignment.align.items():
        if conf == Alignment.SURE:
            alignedCountS[s] += 1
            alignedToT[t].append(s)
            alignedToS[s].append(t)

    # check alignments for english phrase startT...endT
    for st in range(T):
        for et in range(st, min(T, st + maxPhraseLength)):
            minS = 9999
            maxS = -1
            usedS = alignedCountS[:]
            for ti in range(st, et+1):
                for si in alignedToT[ti]:
                    #print 'point (%d, %d)' % (si, ti)
                    if si<minS: minS = si
                    if si>maxS: maxS = si
                    usedS[si] -= 1
                    
            #print 's projected (%d-%d, %d, %d)' % (minS, maxS, st, et)
            if (maxS >= 0 and  # aligned to any foreign words at all
                    maxS-minS < maxPhraseLength): # foreign phrase within limits
                # check if foreign words are aligned to out of bound english words
                out_of_bounds = False
                for si in range(minS, maxS):
                    if usedS[si] > 0:
                        #print 'out of bounds:', si
                        out_of_bounds = True
                        break

                # Pharoah doesn't use this check, but I think it's required
                if not out_of_bounds:
                    for s in range(minS, maxS+1):
                        for t in alignedToS[s]:
                            if not (st <= t <= et):
                                #print 'out of bounds2:', t,s
                                out_of_bounds = True
                                break

                #print 'doing it for (%d-%d, %d, %d)' % (minS, maxS, st, et)
                if not out_of_bounds:
                    if advance:
                        #print 'attempting to advance'
                        # start point of foreign phrase may advance over unaligned
                        ss = minS
                        while (ss>=0 and
                                 ss>maxS-maxPhraseLength and # within length limit
                                 (ss==minS or alignedCountS[ss]==0)): # unaligned
                            # end point of foreign phrase may advance over unaligned
                            es = maxS
                            while (es<S and 
                                     es<ss+maxPhraseLength and # within length limit
                                     (es==maxS or alignedCountS[es]==0)): #unaligned
                                yield (ss, es, st, et)
                                es += 1
                            ss -= 1
                    else:
                        ss, es = minS, maxS
                        yield (minS, maxS, st, et)