summaryrefslogtreecommitdiff
path: root/nlp_tools/vocabulary.py
diff options
context:
space:
mode:
authorJacob <andqso@gmail.com>2013-07-28 10:29:17 +0100
committerJacob <andqso@gmail.com>2013-07-28 10:29:17 +0100
commit4f818256a13a61cbedee919a07637b8ed225783e (patch)
tree21f35b7d44556969c541fdc98cc6cb73df8160cf /nlp_tools/vocabulary.py
parenta0c270b926b7fb1f981281c0ad8ae085272364fb (diff)
remove nlp_tools
Diffstat (limited to 'nlp_tools/vocabulary.py')
-rw-r--r--nlp_tools/vocabulary.py49
1 files changed, 0 insertions, 49 deletions
diff --git a/nlp_tools/vocabulary.py b/nlp_tools/vocabulary.py
deleted file mode 100644
index ed200f5..0000000
--- a/nlp_tools/vocabulary.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import cPickle
-
-class Vocabulary:
-
- OOV_VAL = -1
-
- def __init__(self):
- self.str_to_tok = {}
- self.tok_to_str = {}
-
- def put(self, string):
- if string in self.str_to_tok:
- raise ValueError("%s is already in this vocabulary (token %d)" % \
- (string, self.str_to_tok[string]))
- return self.ensure(string)
-
- def ensure(self, string):
- if string in self.str_to_tok:
- return
- tok = len(self)
- self.str_to_tok[string] = tok
- self.tok_to_str[tok] = string
- return tok
-
- def gett(self, string):
- if string not in self.str_to_tok:
- return self.OOV_VAL
- return self.str_to_tok[string]
-
- def gets(self, tok):
- return self.tok_to_str[tok]
-
- def strs(self):
- return self.str_to_tok.keys()
-
- def toks(self):
- return self.tok_to_str.keys()
-
- def __len__(self):
- return len(self.str_to_tok)
-
- def save(self, path):
- with open(path, 'w') as f:
- cPickle.dump(self, f)
-
- @classmethod
- def load(cls, path):
- with open(path) as f:
- return cPickle.load(f)