summaryrefslogtreecommitdiff
path: root/extools/featurize_grammar.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extools/featurize_grammar.cc')
-rw-r--r--extools/featurize_grammar.cc40
1 files changed, 20 insertions, 20 deletions
diff --git a/extools/featurize_grammar.cc b/extools/featurize_grammar.cc
index 1ca20a4b..4c9821ec 100644
--- a/extools/featurize_grammar.cc
+++ b/extools/featurize_grammar.cc
@@ -41,7 +41,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description clo("Command line options");
po::options_description dcmdline_options;
dcmdline_options.add(opts);
-
+
po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
po::notify(*conf);
@@ -139,9 +139,9 @@ void ParseLine(const char* buf, vector<WordID>* cur_key, ID2RuleStatistics* coun
void LexTranslationTable::createTTable(const char* buf){
AnnotatedParallelSentence sent;
sent.ParseInputLine(buf);
-
+
//iterate over the alignment to compute aligned words
-
+
for(int i =0;i<sent.aligned.width();i++)
{
for (int j=0;j<sent.aligned.height();j++)
@@ -158,7 +158,7 @@ void LexTranslationTable::createTTable(const char* buf){
if (DEBUG) cerr << endl;
}
if (DEBUG) cerr << endl;
-
+
const WordID NULL_ = TD::Convert("NULL");
//handle unaligned words - align them to null
for (int j =0; j < sent.e_len; j++) {
@@ -167,7 +167,7 @@ void LexTranslationTable::createTTable(const char* buf){
++total_foreign[NULL_];
++total_english[sent.e[j]];
}
-
+
for (int i =0; i < sent.f_len; i++) {
if (sent.f_aligned[i]) continue;
++word_translation[pair<WordID,WordID> (sent.f[i], NULL_)];
@@ -187,16 +187,16 @@ static bool IsZero(float f) { return (f > 0.999 && f < 1.001); }
struct FeatureExtractor {
// create any keys necessary
- virtual void ObserveFilteredRule(const WordID lhs,
- const vector<WordID>& src,
- const vector<WordID>& trg) {}
+ virtual void ObserveFilteredRule(const WordID /* lhs */,
+ const vector<WordID>& /* src */,
+ const vector<WordID>& /* trg */) {}
// compute statistics over keys, the same lhs-src-trg tuple may be seen
// more than once
- virtual void ObserveUnfilteredRule(const WordID lhs,
- const vector<WordID>& src,
- const vector<WordID>& trg,
- const RuleStatistics& info) {}
+ virtual void ObserveUnfilteredRule(const WordID /* lhs */,
+ const vector<WordID>& /* src */,
+ const vector<WordID>& /* trg */,
+ const RuleStatistics& /* info */) {}
// compute features, a unique lhs-src-trg tuple will be seen exactly once
virtual void ExtractFeatures(const WordID lhs,
@@ -241,7 +241,7 @@ struct LexProbExtractor : public FeatureExtractor {
while(alignment) {
alignment.getline(buf, MAX_LINE_LENGTH);
if (buf[0] == 0) continue;
- table.createTTable(buf);
+ table.createTTable(buf);
}
delete[] buf;
}
@@ -271,7 +271,7 @@ struct LexProbExtractor : public FeatureExtractor {
if ( table.total_english[trg[ita->second]] !=0 )
e2f = (float) temp / table.total_english[trg[ita->second]];
if (DEBUG) printf (" %d %E %E\n", temp, f2e, e2f);
-
+
//local counts to keep track of which things haven't been aligned, to later compute their null alignment
if (foreign_aligned.count(src[ita->first])) {
foreign_aligned[ src[ita->first] ].first++;
@@ -279,7 +279,7 @@ struct LexProbExtractor : public FeatureExtractor {
} else {
foreign_aligned[ src[ita->first] ] = pair<int,float> (1,e2f);
}
-
+
if (english_aligned.count( trg[ ita->second] )) {
english_aligned[ trg[ ita->second] ].first++;
english_aligned[ trg[ ita->second] ].second += f2e;
@@ -294,8 +294,8 @@ struct LexProbExtractor : public FeatureExtractor {
//compute lexical weight P(F|E) and include unaligned foreign words
for(int i=0;i<src.size(); i++) {
if (!table.total_foreign.count(src[i])) continue; //if we dont have it in the translation table, we won't know its lexical weight
-
- if (foreign_aligned.count(src[i]))
+
+ if (foreign_aligned.count(src[i]))
{
pair<int, float> temp_lex_prob = foreign_aligned[src[i]];
final_lex_e2f *= temp_lex_prob.second / temp_lex_prob.first;
@@ -305,14 +305,14 @@ struct LexProbExtractor : public FeatureExtractor {
int temp_count = table.word_translation[pair<WordID,WordID> (src[i],NULL_)];
float temp_e2f = (float) temp_count / table.total_english[NULL_];
final_lex_e2f *= temp_e2f;
- }
+ }
}
//compute P(E|F) unaligned english words
for(int j=0; j< trg.size(); j++) {
if (!table.total_english.count(trg[j])) continue;
-
+
if (english_aligned.count(trg[j]))
{
pair<int, float> temp_lex_prob = english_aligned[trg[j]];
@@ -338,7 +338,6 @@ int main(int argc, char** argv){
ifstream alignment (conf["aligned_corpus"].as<string>().c_str());
ReadFile fg1(conf["filtered_grammar"].as<string>());
- istream& fs1 = *fg1.stream();
// TODO make this list configurable
vector<boost::shared_ptr<FeatureExtractor> > extractors;
@@ -355,6 +354,7 @@ int main(int argc, char** argv){
vector<WordID> src;
#if 0
+ istream& fs1 = *fg1.stream();
int line = 0;
while(fs1) {
fs1.getline(buf, MAX_LINE_LENGTH);