From 3e1f5d7ad9aff3e572e25e0afc2ae10b438e8962 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 23 Jul 2010 22:12:06 +0000 Subject: pretty git-svn-id: https://ws10smt.googlecode.com/svn/trunk@390 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_fsa.h | 4 ++-- decoder/ff_sample_fsa.h | 28 +++++++++++++++++----------- decoder/filelib.h | 8 ++++++++ decoder/sentences.h | 8 ++++++++ 4 files changed, 35 insertions(+), 13 deletions(-) (limited to 'decoder') diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index 8ca1951f..41343f6c 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -62,8 +62,8 @@ protected: std::memcpy(state,begin,n); } template - inline void static to_state(void *state,T const* begin,int n) { - to_state(state,(char const*)begin,n); + inline void static to_state(void *state,T const* begin,int n=1) { + to_state(state,(char const*)begin,n*sizeof(T)); } template inline void static to_state(void *state,T const* begin,T const* end) { diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 8befc0bb..d8aa7830 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -31,11 +31,12 @@ typedef FeatureFunctionFromFsa WordPenaltyFromFsa; // struct LongerThanPrev : public FsaFeatureFunctionBase { + typedef FsaFeatureFunctionBase Base; static std::string usage(bool param,bool verbose) { return FeatureFunction::usage_helper( "LongerThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (always fires for first word of sentence)", + "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word ( sentence begin considered 3 chars long, is sentence end.)", param,verbose); } @@ -49,16 +50,21 @@ struct LongerThanPrev : public FsaFeatureFunctionBase { return std::strlen(TD::Convert(w)); } int markov_order() const { return 1; } - LongerThanPrev(std::string const& param) { + LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) { Init(); - set_state_bytes(sizeof(int)); -// start.resize(state_bytes()); // this is done by set_state_bytes already. -// h_start.resize(state_bytes()); -// int ss=-1; -// wordcpy((WordID*)start.begin(),&ss,&ss+1); - //to_state(start.begin(),&ss,1); - wordlen(start.begin())=-1; + if (0) { // all this is done in constructor already + set_state_bytes(sizeof(int)); + start.resize(state_bytes()); // this is done by set_state_bytes already. + h_start.resize(state_bytes()); + int ss=3; + to_state(start.begin(),&ss,1); + ss=4; + to_state(h_start.begin(),&ss,1); + } + + wordlen(start.begin())=3; wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous + } static const float val_per_target_word=-1; @@ -79,7 +85,7 @@ struct ShorterThanPrev : FsaTypedBase { return FeatureFunction::usage_helper( "ShorterThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (always fires for end of sentence)", + "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '')", param,verbose); } @@ -87,7 +93,7 @@ struct ShorterThanPrev : FsaTypedBase { return std::strlen(TD::Convert(w)); } ShorterThanPrev(std::string const& param) - : Base(-1,4,Sentence(1,TD::Convert(""))) + : Base(3,4,singleton_sentence(TD::se)) // start, h_start, end_phrase // estimate: anything <4 chars is usually shorter than previous { diff --git a/decoder/filelib.h b/decoder/filelib.h index 2d347c00..af66dd05 100644 --- a/decoder/filelib.h +++ b/decoder/filelib.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "gzstream.h" bool FileExists(const std::string& file_name); @@ -37,6 +38,10 @@ struct BaseFile { } std::string filename_; protected: + void error(std::string const& reason,std::string const& filename) { + throw std::runtime_error("File "+filename+" - "+reason); + } + PS ps_; static bool EndsWith(const std::string& f, const std::string& suf) { return (f.size() > suf.size()) && (f.rfind(suf) == f.size() - suf.size()); @@ -56,6 +61,7 @@ class ReadFile : public BaseFile { } else { if (!FileExists(filename)) { std::cerr << "File does not exist: " << filename << std::endl; + error(filename," couldn't read nonexistant file."); abort(); } char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2. warning in valgrind that I'm hoping will disappear - it makes no sense. @@ -64,6 +70,7 @@ class ReadFile : public BaseFile { static_cast(new std::ifstream(file))); if (!*ps_) { std::cerr << "Failed to open " << filename << std::endl; + error(filename," open for reading failed."); abort(); } } @@ -86,6 +93,7 @@ class WriteFile : public BaseFile { static_cast(new std::ofstream(file))); if (!*ps_) { std::cerr << "Failed to open " << filename << std::endl; + error(filename," open for writing failed."); abort(); } } diff --git a/decoder/sentences.h b/decoder/sentences.h index 482d3be9..452fb21e 100755 --- a/decoder/sentences.h +++ b/decoder/sentences.h @@ -30,6 +30,14 @@ inline void wordcpy(WordID *dest,WordID const* src,WordID const* src_end) { wordcpy(dest,src,src_end-src); } +inline Sentence singleton_sentence(WordID t) { + return Sentence(1,t); +} + +inline Sentence singleton_sentence(std::string const& s) { + return singleton_sentence(TD::Convert(s)); +} + inline std::ostream & operator<<(std::ostream &out,Sentence const& s) { return out<