diff options
author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 16:11:38 -0500 |
---|---|---|
committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 16:11:38 -0500 |
commit | 7468e8d85e99b4619442c7afaf4a0d92870111bb (patch) | |
tree | a6f17da7c69048c8900260b5490bb9d8611be3bb /utils/b64featvector.cc | |
parent | b6dd5a683db9dda2d634dd2fdb76606819594901 (diff) | |
parent | 1a79175f9a101d46cf27ca921213d5dd9300518f (diff) |
Merge with upstream
Diffstat (limited to 'utils/b64featvector.cc')
-rw-r--r-- | utils/b64featvector.cc | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/utils/b64featvector.cc b/utils/b64featvector.cc new file mode 100644 index 00000000..c7d08b29 --- /dev/null +++ b/utils/b64featvector.cc @@ -0,0 +1,55 @@ +#include "b64featvector.h" + +#include <sstream> +#include <boost/scoped_array.hpp> +#include "b64tools.h" +#include "fdict.h" + +using namespace std; + +static inline void EncodeFeatureWeight(const string &featname, weight_t weight, + ostream *output) { + output->write(featname.data(), featname.size() + 1); + output->write(reinterpret_cast<char *>(&weight), sizeof(weight_t)); +} + +string EncodeFeatureVector(const SparseVector<weight_t> &vec) { + string b64; + { + ostringstream base64_strm; + { + ostringstream strm; + for (SparseVector<weight_t>::const_iterator it = vec.begin(); + it != vec.end(); ++it) + if (it->second != 0) + EncodeFeatureWeight(FD::Convert(it->first), it->second, &strm); + string data(strm.str()); + B64::b64encode(data.data(), data.size(), &base64_strm); + } + b64 = base64_strm.str(); + } + return b64; +} + +void DecodeFeatureVector(const string &data, SparseVector<weight_t> *vec) { + vec->clear(); + if (data.empty()) return; + // Decode data + size_t b64_len = data.size(), len = b64_len / 4 * 3; + boost::scoped_array<char> buf(new char[len]); + bool res = + B64::b64decode(reinterpret_cast<const unsigned char *>(data.data()), + b64_len, buf.get(), len); + assert(res); + // Apply updates + size_t cur = 0; + while (cur < len) { + string feat_name(buf.get() + cur); + if (feat_name.empty()) break; // Encountered trailing \0 + int feat_id = FD::Convert(feat_name); + weight_t feat_delta = + *reinterpret_cast<weight_t *>(buf.get() + cur + feat_name.size() + 1); + (*vec)[feat_id] = feat_delta; + cur += feat_name.size() + 1 + sizeof(weight_t); + } +} |