From 83eb31deb8a2056c098715c8cb29f2498fc213c3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 8 Sep 2011 00:06:52 +0200 Subject: a lot of stuff, fast_sparse_vector, perceptron, removed sofia, sample [...] --- dtrain/sample.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 dtrain/sample.h (limited to 'dtrain/sample.h') diff --git a/dtrain/sample.h b/dtrain/sample.h new file mode 100644 index 00000000..b9bc4461 --- /dev/null +++ b/dtrain/sample.h @@ -0,0 +1,52 @@ +#include "kbestget.h" + + +namespace dtrain +{ + + +struct TPair +{ + double type; + SparseVector first; + SparseVector second; +}; + +typedef vector TrainingInstances; + + +void +sample_all( KBestList* kb, TrainingInstances &training ) +{ + double type; + for ( size_t i = 0; i < kb->GetSize()-1; i++ ) { + for ( size_t j = i+1; j < kb->GetSize(); j++ ) { + if ( kb->scores[i] - kb->scores[j] < 0 ) { + type = -1; + } else { + type = 1; + } + TPair p; + p.type = type; + p.first = kb->feats[i]; + p.second = kb->feats[j]; + training.push_back( p ); + } + } +} + +/*void +sample_all_only_neg(, vector > pairs) +{ + +} + +void +sample_random_pos() +{ + if ( rand() % 2 ) { // sample it? +}*/ + + +} // namespace + -- cgit v1.2.3