summaryrefslogtreecommitdiff
path: root/dtrain/scfg/features/count/featurecount.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-12-03 21:38:59 +0100
committerPatrick Simianer <p@simianer.de>2011-12-03 21:38:59 +0100
commit578ed26bf818c1f18d5a319cde9b13ef9ff59f46 (patch)
treeb315bbaa2d44e8f54b1d70b1b1bb21704d89a266 /dtrain/scfg/features/count/featurecount.cc
parent4d1679b517ff46a1d6a152796f1033a4c380492c (diff)
new rule count
Diffstat (limited to 'dtrain/scfg/features/count/featurecount.cc')
-rw-r--r--dtrain/scfg/features/count/featurecount.cc49
1 files changed, 0 insertions, 49 deletions
diff --git a/dtrain/scfg/features/count/featurecount.cc b/dtrain/scfg/features/count/featurecount.cc
deleted file mode 100644
index db31885c..00000000
--- a/dtrain/scfg/features/count/featurecount.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "featurecount.hh"
-
-
-void
-FeatureCountMapper::map( HadoopPipes::MapContext &context )
-{
- string line = context.getInputValue();
-
- // get features substr
- size_t i = 0, c = 0, beg = 0, end = 0;
- string::iterator it = line.begin();
- string s;
- while ( c != 12 ) {
- s = *it;
- if ( s == "|" ) c += 1;
- if ( beg == 0 && c == 9 ) beg = i+2;
- if ( c == 12 ) end = i-beg-3;
- it++;
- i++;
- }
- string sub = line.substr( beg, end );
-
- // emit feature:1
- vector<string> f_tok;
- boost::split( f_tok, sub, boost::is_any_of(" ") );
- vector<string>::iterator f;
- for ( f = f_tok.begin(); f != f_tok.end(); f++ ) {
- if ( f->find("=1") != string::npos ) context.emit(*f, "1");
- }
-}
-
-void
-FeatureCountReducer::reduce( HadoopPipes::ReduceContext &context )
-{
- size_t sum = 0;
- while ( context.nextValue() ) sum += HadoopUtils::toInt( context.getInputValue() );
- context.emit( context.getInputKey(), HadoopUtils::toString(sum) );
-}
-
-
-int
-main( int argc, char * argv[] )
-{
- HadoopPipes::TemplateFactory2<FeatureCountMapper,
- FeatureCountReducer> factory;
-
- return HadoopPipes::runTask(factory);
-}
-