summaryrefslogtreecommitdiff
path: root/src/sparse_vector.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2009-12-03 16:33:55 -0500
committerChris Dyer <redpony@gmail.com>2009-12-03 16:33:55 -0500
commit671c21451542e2dd20e45b4033d44d8e8735f87b (patch)
treeb1773b077dd65b826f067a423d26f7942ce4e043 /src/sparse_vector.cc
initial check in
Diffstat (limited to 'src/sparse_vector.cc')
-rw-r--r--src/sparse_vector.cc98
1 files changed, 98 insertions, 0 deletions
diff --git a/src/sparse_vector.cc b/src/sparse_vector.cc
new file mode 100644
index 00000000..4035b9ef
--- /dev/null
+++ b/src/sparse_vector.cc
@@ -0,0 +1,98 @@
+#include "sparse_vector.h"
+
+#include <iostream>
+#include <cstring>
+
+#include "hg_io.h"
+
+using namespace std;
+
+namespace B64 {
+
+void Encode(double objective, const SparseVector<double>& v, ostream* out) {
+ const int num_feats = v.num_active();
+ size_t tot_size = 0;
+ const size_t off_objective = tot_size;
+ tot_size += sizeof(double); // objective
+ const size_t off_num_feats = tot_size;
+ tot_size += sizeof(int); // num_feats
+ const size_t off_data = tot_size;
+ tot_size += sizeof(unsigned char) * num_feats; // lengths of feature names;
+ typedef SparseVector<double>::const_iterator const_iterator;
+ for (const_iterator it = v.begin(); it != v.end(); ++it)
+ tot_size += FD::Convert(it->first).size(); // feature names;
+ tot_size += sizeof(double) * num_feats; // gradient
+ const size_t off_magic = tot_size;
+ tot_size += 4; // magic
+
+ // size_t b64_size = tot_size * 4 / 3;
+ // cerr << "Sparse vector binary size: " << tot_size << " (b64 size=" << b64_size << ")\n";
+ char* data = new char[tot_size];
+ *reinterpret_cast<double*>(&data[off_objective]) = objective;
+ *reinterpret_cast<int*>(&data[off_num_feats]) = num_feats;
+ char* cur = &data[off_data];
+ assert(cur - data == off_data);
+ for (const_iterator it = v.begin(); it != v.end(); ++it) {
+ const string& fname = FD::Convert(it->first);
+ *cur++ = static_cast<char>(fname.size()); // name len
+ memcpy(cur, &fname[0], fname.size());
+ cur += fname.size();
+ *reinterpret_cast<double*>(cur) = it->second;
+ cur += sizeof(double);
+ }
+ assert(cur - data == off_magic);
+ *reinterpret_cast<unsigned int*>(cur) = 0xBAABABBAu;
+ cur += sizeof(unsigned int);
+ assert(cur - data == tot_size);
+ b64encode(data, tot_size, out);
+ delete[] data;
+}
+
+bool Decode(double* objective, SparseVector<double>* v, const char* in, size_t size) {
+ v->clear();
+ if (size % 4 != 0) {
+ cerr << "B64 error - line % 4 != 0\n";
+ return false;
+ }
+ const size_t decoded_size = size * 3 / 4 - sizeof(unsigned int);
+ const size_t buf_size = decoded_size + sizeof(unsigned int);
+ if (decoded_size < 6) { cerr << "SparseVector decoding error: too short!\n"; return false; }
+ char* data = new char[buf_size];
+ if (!b64decode(reinterpret_cast<const unsigned char*>(in), size, data, buf_size)) {
+ delete[] data;
+ return false;
+ }
+ size_t cur = 0;
+ *objective = *reinterpret_cast<double*>(data);
+ cur += sizeof(double);
+ const int num_feats = *reinterpret_cast<int*>(&data[cur]);
+ cur += sizeof(int);
+ int fc = 0;
+ while(fc < num_feats && cur < decoded_size) {
+ ++fc;
+ const int fname_len = data[cur++];
+ assert(fname_len > 0);
+ assert(fname_len < 256);
+ string fname(fname_len, '\0');
+ memcpy(&fname[0], &data[cur], fname_len);
+ cur += fname_len;
+ const double val = *reinterpret_cast<double*>(&data[cur]);
+ cur += sizeof(double);
+ int fid = FD::Convert(fname);
+ v->set_value(fid, val);
+ }
+ if(num_feats != fc) {
+ cerr << "Expected " << num_feats << " but only decoded " << fc << "!\n";
+ delete[] data;
+ return false;
+ }
+ if (*reinterpret_cast<unsigned int*>(&data[cur]) != 0xBAABABBAu) {
+ cerr << "SparseVector decodeding error : magic does not match!\n";
+ delete[] data;
+ return false;
+ }
+ delete[] data;
+ return true;
+}
+
+}