1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
import csuf
import cdat
import cintlist
class Sampler(object):
'''A Sampler implements a logic for choosing
samples from a population range'''
def __init__(self):
pass
def registerContext(self, context_manager):
self.context_manager = context_manager
def sample(self, phrase_location):
return cintlist.CIntList()
class Extractor(object):
'''Extractor is responsible for extracting rules
from a given context; once a sentence id/location
is found for the source fwords, extractor is
responsible for producing any matching rule(s).
Optionally, extractor may return an empty list'''
def __init__(self):
pass
def registerContext(self, context_manager):
self.context_manager = context_manager
def extract(self, fwords, loc):
return []
class RuleFactory(object):
'''RuleFactory is a class that manages the
generation of translation rules, using the Context
and (optionally) any of its contained classes or
data. The RuleFactory is responsible for handling
any caching (i.e. when presented with an input
sentence, it may lookup a rule from its cache
rather than extracting a new rule)'''
def __init__(self):
self.num_lookups = 0
self.num_extractions = 0
self.num_rules = 0
self.time = 0.0
def registerContext(self, context_manager):
self.context_manager = context_manager
def input(self, fwords):
'''Manages the process of enumerating
rules for a given input sentence, and
looking them with calls to Context,
Sampler, and Extractor'''
return []
class ContextManager(object):
def __init__(self, ffile, efile, extractor=None, sampler=None, rulefactory=None, from_binary=False):
# NOTE: Extractor does not have a default value because
# the only nontrivial extractor right now depends on an
# alignment file
self.fsarray = csuf.SuffixArray(ffile, from_binary)
self.edarray = cdat.DataArray(efile, from_binary)
self.factory = rulefactory
self.factory.registerContext(self)
self.sampler = sampler
self.sampler.registerContext(self)
self.models = []
self.owner = None
def add_model(self, model):
if self.owner is None:
self.owner = model
model_id = len(self.models)
self.models.append(model)
return model_id
def input(self, model, fwords, meta):
if model != self.owner:
return
self.fwords = fwords
self.factory.input(self.fwords, meta)
|