diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-03-10 01:01:01 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-03-10 01:01:01 +0000 |
commit | e6181c89ab8f29d8bd0fc6a3a8a359cb50c2304c (patch) | |
tree | c05eaae595c711605e6ccb596b8b634756a95c5b /extractor/vocabulary.h | |
parent | 65a67c6921ee6da0477531224effe38559739455 (diff) |
Added comments. Hooray!
Diffstat (limited to 'extractor/vocabulary.h')
-rw-r--r-- | extractor/vocabulary.h | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/extractor/vocabulary.h b/extractor/vocabulary.h index 03c7dc66..c8fd9411 100644 --- a/extractor/vocabulary.h +++ b/extractor/vocabulary.h @@ -9,16 +9,33 @@ using namespace std; namespace extractor { +/** + * Data structure for mapping words to word ids. + * + * This strucure contains words located in the frequent collocations and words + * encountered during the grammar extraction time. This dictionary is + * considerably smaller than the dictionaries in the data arrays (and so is the + * query time). Note that this is the single data structure that changes state + * and needs to have thread safe read/write operations. + * + * Note: For an experiment using different vocabulary instances for each thread, + * the running time did not improve implying that the critical regions do not + * cause bottlenecks. + */ class Vocabulary { public: virtual ~Vocabulary(); + // Returns the word id for the given word. virtual int GetTerminalIndex(const string& word); + // Returns the id for a nonterminal located at the given position in a phrase. int GetNonterminalIndex(int position); + // Checks if a symbol is a nonterminal. bool IsTerminal(int symbol); + // Returns the word corresponding to the given word id. virtual string GetTerminalValue(int symbol); private: |