diff options
author | Jacob <andqso@gmail.com> | 2013-07-28 10:29:17 +0100 |
---|---|---|
committer | Jacob <andqso@gmail.com> | 2013-07-28 10:29:17 +0100 |
commit | 4f818256a13a61cbedee919a07637b8ed225783e (patch) | |
tree | 21f35b7d44556969c541fdc98cc6cb73df8160cf /nlp_tools/vocabulary.py | |
parent | a0c270b926b7fb1f981281c0ad8ae085272364fb (diff) |
remove nlp_tools
Diffstat (limited to 'nlp_tools/vocabulary.py')
-rw-r--r-- | nlp_tools/vocabulary.py | 49 |
1 files changed, 0 insertions, 49 deletions
diff --git a/nlp_tools/vocabulary.py b/nlp_tools/vocabulary.py deleted file mode 100644 index ed200f5..0000000 --- a/nlp_tools/vocabulary.py +++ /dev/null @@ -1,49 +0,0 @@ -import cPickle - -class Vocabulary: - - OOV_VAL = -1 - - def __init__(self): - self.str_to_tok = {} - self.tok_to_str = {} - - def put(self, string): - if string in self.str_to_tok: - raise ValueError("%s is already in this vocabulary (token %d)" % \ - (string, self.str_to_tok[string])) - return self.ensure(string) - - def ensure(self, string): - if string in self.str_to_tok: - return - tok = len(self) - self.str_to_tok[string] = tok - self.tok_to_str[tok] = string - return tok - - def gett(self, string): - if string not in self.str_to_tok: - return self.OOV_VAL - return self.str_to_tok[string] - - def gets(self, tok): - return self.tok_to_str[tok] - - def strs(self): - return self.str_to_tok.keys() - - def toks(self): - return self.tok_to_str.keys() - - def __len__(self): - return len(self.str_to_tok) - - def save(self, path): - with open(path, 'w') as f: - cPickle.dump(self, f) - - @classmethod - def load(cls, path): - with open(path) as f: - return cPickle.load(f) |