diff options
author | Chris Dyer <prguest11@taipan.cs> | 2012-02-02 06:29:50 +0000 |
---|---|---|
committer | Chris Dyer <prguest11@taipan.cs> | 2012-02-02 06:29:50 +0000 |
commit | 7d37102fde8d8b46ed5218245a11496e870fd09f (patch) | |
tree | 97bb3de03d634ffb754a6381c961b6d5cf8d3a7d /sa-extract/manager.py | |
parent | a95fe83969d15b074892c8c06fc2a948b75910ff (diff) |
lopez suffix array extractor with copyrighted david chiang code excised
Diffstat (limited to 'sa-extract/manager.py')
-rw-r--r-- | sa-extract/manager.py | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/sa-extract/manager.py b/sa-extract/manager.py new file mode 100644 index 00000000..767192c1 --- /dev/null +++ b/sa-extract/manager.py @@ -0,0 +1,100 @@ +import csuf +import cdat + +class Sampler(object): + '''A Sampler implements a logic for choosing + samples from a population range''' + + def __init__(self): + pass + + def registerContext(self, context_manager): + self.context_manager = context_manager + + def sample(self, phrase_location): + return cintlist.CIntList() + + + +class Extractor(object): + '''Extractor is responsible for extracting rules + from a given context; once a sentence id/location + is found for the source fwords, extractor is + responsible for producing any matching rule(s). + Optionally, extractor may return an empty list''' + + def __init__(self): + pass + + def registerContext(self, context_manager): + self.context_manager = context_manager + + def extract(self, fwords, loc): + return [] + + + +class RuleFactory(object): + '''RuleFactory is a class that manages the + generation of translation rules, using the Context + and (optionally) any of its contained classes or + data. The RuleFactory is responsible for handling + any caching (i.e. when presented with an input + sentence, it may lookup a rule from its cache + rather than extracting a new rule)''' + + def __init__(self): + self.num_lookups = 0 + self.num_extractions = 0 + self.num_rules = 0 + self.time = 0.0 + + + def registerContext(self, context_manager): + self.context_manager = context_manager + + + def input(self, fwords): + '''Manages the process of enumerating + rules for a given input sentence, and + looking them with calls to Context, + Sampler, and Extractor''' + return [] + + +class ContextManager(object): + + def __init__(self, ffile, efile, extractor=None, sampler=None, rulefactory=None, from_binary=False): + # NOTE: Extractor does not have a default value because + # the only nontrivial extractor right now depends on an + # alignment file + + self.fsarray = csuf.SuffixArray(ffile, from_binary) + self.edarray = cdat.DataArray(efile, from_binary) + + self.factory = rulefactory + self.factory.registerContext(self) + + self.sampler = sampler + self.sampler.registerContext(self) + + self.models = [] + self.owner = None + + + def add_model(self, model): + if self.owner is None: + self.owner = model + model_id = len(self.models) + self.models.append(model) + return model_id + + + def input(self, model, fwords, meta): + if model != self.owner: + return + self.fwords = fwords + self.factory.input(self.fwords, meta) + + + |