summaryrefslogtreecommitdiff
path: root/dtrain/pairsampling.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-09-23 20:53:15 +0200
committerPatrick Simianer <p@simianer.de>2011-09-23 20:53:15 +0200
commitdc9fd7a3adc863510d79a718e919b6833a86729c (patch)
tree4baf0c6cadff000a20039994237ccaf468daee40 /dtrain/pairsampling.h
parent1ad0eb820ee946e5a142567380fc0488c9a5d6de (diff)
begin refactoring
Diffstat (limited to 'dtrain/pairsampling.h')
-rw-r--r--dtrain/pairsampling.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h
new file mode 100644
index 00000000..502901af
--- /dev/null
+++ b/dtrain/pairsampling.h
@@ -0,0 +1,64 @@
+#ifndef _DTRAIN_SAMPLE_H_
+#define _DTRAIN_SAMPLE_H_
+
+
+#include "kbestget.h"
+
+
+namespace dtrain
+{
+
+
+struct TPair
+{
+ SparseVector<double> first, second;
+ size_t first_rank, second_rank;
+ double first_score, second_score;
+};
+
+typedef vector<TPair> TrainingInstances;
+
+
+void
+sample_all( KBestList* kb, TrainingInstances &training )
+{
+ for ( size_t i = 0; i < kb->GetSize()-1; i++ ) {
+ for ( size_t j = i+1; j < kb->GetSize(); j++ ) {
+ TPair p;
+ p.first = kb->feats[i];
+ p.second = kb->feats[j];
+ p.first_rank = i;
+ p.second_rank = j;
+ p.first_score = kb->scores[i];
+ p.second_score = kb->scores[j];
+ training.push_back( p );
+ }
+ }
+}
+
+void
+sample_rand( KBestList* kb, TrainingInstances &training )
+{
+ srand( time(NULL) );
+ for ( size_t i = 0; i < kb->GetSize()-1; i++ ) {
+ for ( size_t j = i+1; j < kb->GetSize(); j++ ) {
+ if ( rand() % 2 ) {
+ TPair p;
+ p.first = kb->feats[i];
+ p.second = kb->feats[j];
+ p.first_rank = i;
+ p.second_rank = j;
+ p.first_score = kb->scores[i];
+ p.second_score = kb->scores[j];
+ training.push_back( p );
+ }
+ }
+ }
+}
+
+
+} // namespace
+
+
+#endif
+