diff options
author | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2009-12-14 20:35:11 -0500 |
commit | 851e389dffdd6996ea32d70defb8906de80b9edc (patch) | |
tree | 8c68ee77205badc056b8ab5b332e67e3e98017df /src/sparse_vector.cc | |
parent | dc6930c00b4b276883280cff1ed6dcd9ddef03c7 (diff) |
few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec
Diffstat (limited to 'src/sparse_vector.cc')
-rw-r--r-- | src/sparse_vector.cc | 98 |
1 files changed, 0 insertions, 98 deletions
diff --git a/src/sparse_vector.cc b/src/sparse_vector.cc deleted file mode 100644 index 4035b9ef..00000000 --- a/src/sparse_vector.cc +++ /dev/null @@ -1,98 +0,0 @@ -#include "sparse_vector.h" - -#include <iostream> -#include <cstring> - -#include "hg_io.h" - -using namespace std; - -namespace B64 { - -void Encode(double objective, const SparseVector<double>& v, ostream* out) { - const int num_feats = v.num_active(); - size_t tot_size = 0; - const size_t off_objective = tot_size; - tot_size += sizeof(double); // objective - const size_t off_num_feats = tot_size; - tot_size += sizeof(int); // num_feats - const size_t off_data = tot_size; - tot_size += sizeof(unsigned char) * num_feats; // lengths of feature names; - typedef SparseVector<double>::const_iterator const_iterator; - for (const_iterator it = v.begin(); it != v.end(); ++it) - tot_size += FD::Convert(it->first).size(); // feature names; - tot_size += sizeof(double) * num_feats; // gradient - const size_t off_magic = tot_size; - tot_size += 4; // magic - - // size_t b64_size = tot_size * 4 / 3; - // cerr << "Sparse vector binary size: " << tot_size << " (b64 size=" << b64_size << ")\n"; - char* data = new char[tot_size]; - *reinterpret_cast<double*>(&data[off_objective]) = objective; - *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats; - char* cur = &data[off_data]; - assert(cur - data == off_data); - for (const_iterator it = v.begin(); it != v.end(); ++it) { - const string& fname = FD::Convert(it->first); - *cur++ = static_cast<char>(fname.size()); // name len - memcpy(cur, &fname[0], fname.size()); - cur += fname.size(); - *reinterpret_cast<double*>(cur) = it->second; - cur += sizeof(double); - } - assert(cur - data == off_magic); - *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu; - cur += sizeof(unsigned int); - assert(cur - data == tot_size); - b64encode(data, tot_size, out); - delete[] data; -} - -bool Decode(double* objective, SparseVector<double>* v, const char* in, size_t size) { - v->clear(); - if (size % 4 != 0) { - cerr << "B64 error - line % 4 != 0\n"; - return false; - } - const size_t decoded_size = size * 3 / 4 - sizeof(unsigned int); - const size_t buf_size = decoded_size + sizeof(unsigned int); - if (decoded_size < 6) { cerr << "SparseVector decoding error: too short!\n"; return false; } - char* data = new char[buf_size]; - if (!b64decode(reinterpret_cast<const unsigned char*>(in), size, data, buf_size)) { - delete[] data; - return false; - } - size_t cur = 0; - *objective = *reinterpret_cast<double*>(data); - cur += sizeof(double); - const int num_feats = *reinterpret_cast<int*>(&data[cur]); - cur += sizeof(int); - int fc = 0; - while(fc < num_feats && cur < decoded_size) { - ++fc; - const int fname_len = data[cur++]; - assert(fname_len > 0); - assert(fname_len < 256); - string fname(fname_len, '\0'); - memcpy(&fname[0], &data[cur], fname_len); - cur += fname_len; - const double val = *reinterpret_cast<double*>(&data[cur]); - cur += sizeof(double); - int fid = FD::Convert(fname); - v->set_value(fid, val); - } - if(num_feats != fc) { - cerr << "Expected " << num_feats << " but only decoded " << fc << "!\n"; - delete[] data; - return false; - } - if (*reinterpret_cast<unsigned int*>(&data[cur]) != 0xBAABABBAu) { - cerr << "SparseVector decodeding error : magic does not match!\n"; - delete[] data; - return false; - } - delete[] data; - return true; -} - -} |