diff options
author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 |
commit | ef6085e558e26c8819f1735425761103021b6470 (patch) | |
tree | 5cf70e4c48c64d838e1326b5a505c8c4061bff4a /sa-extract/manager.py | |
parent | 10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff) | |
parent | dfbc278c1057555fda9312291c8024049e00b7d8 (diff) |
merge with upstream
Diffstat (limited to 'sa-extract/manager.py')
-rw-r--r-- | sa-extract/manager.py | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/sa-extract/manager.py b/sa-extract/manager.py new file mode 100644 index 00000000..767192c1 --- /dev/null +++ b/sa-extract/manager.py @@ -0,0 +1,100 @@ +import csuf +import cdat + +class Sampler(object): + '''A Sampler implements a logic for choosing + samples from a population range''' + + def __init__(self): + pass + + def registerContext(self, context_manager): + self.context_manager = context_manager + + def sample(self, phrase_location): + return cintlist.CIntList() + + + +class Extractor(object): + '''Extractor is responsible for extracting rules + from a given context; once a sentence id/location + is found for the source fwords, extractor is + responsible for producing any matching rule(s). + Optionally, extractor may return an empty list''' + + def __init__(self): + pass + + def registerContext(self, context_manager): + self.context_manager = context_manager + + def extract(self, fwords, loc): + return [] + + + +class RuleFactory(object): + '''RuleFactory is a class that manages the + generation of translation rules, using the Context + and (optionally) any of its contained classes or + data. The RuleFactory is responsible for handling + any caching (i.e. when presented with an input + sentence, it may lookup a rule from its cache + rather than extracting a new rule)''' + + def __init__(self): + self.num_lookups = 0 + self.num_extractions = 0 + self.num_rules = 0 + self.time = 0.0 + + + def registerContext(self, context_manager): + self.context_manager = context_manager + + + def input(self, fwords): + '''Manages the process of enumerating + rules for a given input sentence, and + looking them with calls to Context, + Sampler, and Extractor''' + return [] + + +class ContextManager(object): + + def __init__(self, ffile, efile, extractor=None, sampler=None, rulefactory=None, from_binary=False): + # NOTE: Extractor does not have a default value because + # the only nontrivial extractor right now depends on an + # alignment file + + self.fsarray = csuf.SuffixArray(ffile, from_binary) + self.edarray = cdat.DataArray(efile, from_binary) + + self.factory = rulefactory + self.factory.registerContext(self) + + self.sampler = sampler + self.sampler.registerContext(self) + + self.models = [] + self.owner = None + + + def add_model(self, model): + if self.owner is None: + self.owner = model + model_id = len(self.models) + self.models.append(model) + return model_id + + + def input(self, model, fwords, meta): + if model != self.owner: + return + self.fwords = fwords + self.factory.input(self.fwords, meta) + + + |