blob: c8fd9411588986bca2bbe7eb4194161285b5d6f4 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#ifndef _VOCABULARY_H_
#define _VOCABULARY_H_
#include <string>
#include <unordered_map>
#include <vector>
using namespace std;
namespace extractor {
/**
* Data structure for mapping words to word ids.
*
* This strucure contains words located in the frequent collocations and words
* encountered during the grammar extraction time. This dictionary is
* considerably smaller than the dictionaries in the data arrays (and so is the
* query time). Note that this is the single data structure that changes state
* and needs to have thread safe read/write operations.
*
* Note: For an experiment using different vocabulary instances for each thread,
* the running time did not improve implying that the critical regions do not
* cause bottlenecks.
*/
class Vocabulary {
public:
virtual ~Vocabulary();
// Returns the word id for the given word.
virtual int GetTerminalIndex(const string& word);
// Returns the id for a nonterminal located at the given position in a phrase.
int GetNonterminalIndex(int position);
// Checks if a symbol is a nonterminal.
bool IsTerminal(int symbol);
// Returns the word corresponding to the given word id.
virtual string GetTerminalValue(int symbol);
private:
unordered_map<string, int> dictionary;
vector<string> words;
};
} // namespace extractor
#endif
|