diff options
author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 16:15:13 -0500 |
---|---|---|
committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 16:15:13 -0500 |
commit | 6829a0bc624b02ebefc79f8cf9ec89d7d64a7c30 (patch) | |
tree | 125dfb20f73342873476c793995397b26fd202dd /klm/lm/neural/wordvecs.hh | |
parent | b455a108a21f4ba5a58ab1bc53a8d2bf4d829067 (diff) | |
parent | 7468e8d85e99b4619442c7afaf4a0d92870111bb (diff) |
Merge branch 'const_reorder_2' into softsyn_2
Diffstat (limited to 'klm/lm/neural/wordvecs.hh')
-rw-r--r-- | klm/lm/neural/wordvecs.hh | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/klm/lm/neural/wordvecs.hh b/klm/lm/neural/wordvecs.hh new file mode 100644 index 00000000..921a2b22 --- /dev/null +++ b/klm/lm/neural/wordvecs.hh @@ -0,0 +1,38 @@ +#ifndef LM_NEURAL_WORDVECS_H +#define LM_NEURAL_WORDVECS_H + +#include "util/scoped.hh" +#include "lm/vocab.hh" + +#include <Eigen/Dense> + +namespace util { class FilePiece; } + +namespace lm { +namespace neural { + +class WordVecs { + public: + // Columns of the matrix are word vectors. The column index is the word. + typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor> Storage; + + /* The file should begin with a line stating the number of word vectors and + * the length of the vectors. Then it's followed by lines containing a + * word followed by floating-point values. + */ + explicit WordVecs(util::FilePiece &in); + + const Storage &Vectors() const { return vecs_; } + + WordIndex Index(StringPiece str) const { return vocab_.Index(str); } + + private: + util::scoped_malloc vocab_backing_; + ngram::ProbingVocabulary vocab_; + + Storage vecs_; +}; + +}} // namespaces + +#endif // LM_NEURAL_WORDVECS_H |