diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-23 22:12:06 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-23 22:12:06 +0000 |
commit | 3e1f5d7ad9aff3e572e25e0afc2ae10b438e8962 (patch) | |
tree | 77c8c7b55de7d3e458cd6fa079f4ee12f35480e0 | |
parent | b9574d2c459b60037bf1e97e64e0c4aed524a71c (diff) |
pretty
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@390 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-x | decoder/ff_fsa.h | 4 | ||||
-rwxr-xr-x | decoder/ff_sample_fsa.h | 28 | ||||
-rw-r--r-- | decoder/filelib.h | 8 | ||||
-rwxr-xr-x | decoder/sentences.h | 8 |
4 files changed, 35 insertions, 13 deletions
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h index 8ca1951f..41343f6c 100755 --- a/decoder/ff_fsa.h +++ b/decoder/ff_fsa.h @@ -62,8 +62,8 @@ protected: std::memcpy(state,begin,n); } template <class T> - inline void static to_state(void *state,T const* begin,int n) { - to_state(state,(char const*)begin,n); + inline void static to_state(void *state,T const* begin,int n=1) { + to_state(state,(char const*)begin,n*sizeof(T)); } template <class T> inline void static to_state(void *state,T const* begin,T const* end) { diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h index 8befc0bb..d8aa7830 100755 --- a/decoder/ff_sample_fsa.h +++ b/decoder/ff_sample_fsa.h @@ -31,11 +31,12 @@ typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa; // struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { + typedef FsaFeatureFunctionBase<LongerThanPrev> Base; static std::string usage(bool param,bool verbose) { return FeatureFunction::usage_helper( "LongerThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (always fires for first word of sentence)", + "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)", param,verbose); } @@ -49,16 +50,21 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> { return std::strlen(TD::Convert(w)); } int markov_order() const { return 1; } - LongerThanPrev(std::string const& param) { + LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) { Init(); - set_state_bytes(sizeof(int)); -// start.resize(state_bytes()); // this is done by set_state_bytes already. -// h_start.resize(state_bytes()); -// int ss=-1; -// wordcpy((WordID*)start.begin(),&ss,&ss+1); - //to_state(start.begin(),&ss,1); - wordlen(start.begin())=-1; + if (0) { // all this is done in constructor already + set_state_bytes(sizeof(int)); + start.resize(state_bytes()); // this is done by set_state_bytes already. + h_start.resize(state_bytes()); + int ss=3; + to_state(start.begin(),&ss,1); + ss=4; + to_state(h_start.begin(),&ss,1); + } + + wordlen(start.begin())=3; wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous + } static const float val_per_target_word=-1; @@ -79,7 +85,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { return FeatureFunction::usage_helper( "ShorterThanPrev", "", - "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (always fires for end of sentence)", + "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '</s>')", param,verbose); } @@ -87,7 +93,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> { return std::strlen(TD::Convert(w)); } ShorterThanPrev(std::string const& param) - : Base(-1,4,Sentence(1,TD::Convert(""))) + : Base(3,4,singleton_sentence(TD::se)) // start, h_start, end_phrase // estimate: anything <4 chars is usually shorter than previous { diff --git a/decoder/filelib.h b/decoder/filelib.h index 2d347c00..af66dd05 100644 --- a/decoder/filelib.h +++ b/decoder/filelib.h @@ -6,6 +6,7 @@ #include <iostream> #include <cstdlib> #include <boost/shared_ptr.hpp> +#include <stdexcept> #include "gzstream.h" bool FileExists(const std::string& file_name); @@ -37,6 +38,10 @@ struct BaseFile { } std::string filename_; protected: + void error(std::string const& reason,std::string const& filename) { + throw std::runtime_error("File "+filename+" - "+reason); + } + PS ps_; static bool EndsWith(const std::string& f, const std::string& suf) { return (f.size() > suf.size()) && (f.rfind(suf) == f.size() - suf.size()); @@ -56,6 +61,7 @@ class ReadFile : public BaseFile<std::istream> { } else { if (!FileExists(filename)) { std::cerr << "File does not exist: " << filename << std::endl; + error(filename," couldn't read nonexistant file."); abort(); } char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2. warning in valgrind that I'm hoping will disappear - it makes no sense. @@ -64,6 +70,7 @@ class ReadFile : public BaseFile<std::istream> { static_cast<std::istream*>(new std::ifstream(file))); if (!*ps_) { std::cerr << "Failed to open " << filename << std::endl; + error(filename," open for reading failed."); abort(); } } @@ -86,6 +93,7 @@ class WriteFile : public BaseFile<std::ostream> { static_cast<std::ostream*>(new std::ofstream(file))); if (!*ps_) { std::cerr << "Failed to open " << filename << std::endl; + error(filename," open for writing failed."); abort(); } } diff --git a/decoder/sentences.h b/decoder/sentences.h index 482d3be9..452fb21e 100755 --- a/decoder/sentences.h +++ b/decoder/sentences.h @@ -30,6 +30,14 @@ inline void wordcpy(WordID *dest,WordID const* src,WordID const* src_end) { wordcpy(dest,src,src_end-src); } +inline Sentence singleton_sentence(WordID t) { + return Sentence(1,t); +} + +inline Sentence singleton_sentence(std::string const& s) { + return singleton_sentence(TD::Convert(s)); +} + inline std::ostream & operator<<(std::ostream &out,Sentence const& s) { return out<<TD::GetString(s); |