summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdecoder/ff_fsa.h4
-rwxr-xr-xdecoder/ff_sample_fsa.h28
-rw-r--r--decoder/filelib.h8
-rwxr-xr-xdecoder/sentences.h8
4 files changed, 35 insertions, 13 deletions
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 8ca1951f..41343f6c 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -62,8 +62,8 @@ protected:
std::memcpy(state,begin,n);
}
template <class T>
- inline void static to_state(void *state,T const* begin,int n) {
- to_state(state,(char const*)begin,n);
+ inline void static to_state(void *state,T const* begin,int n=1) {
+ to_state(state,(char const*)begin,n*sizeof(T));
}
template <class T>
inline void static to_state(void *state,T const* begin,T const* end) {
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index 8befc0bb..d8aa7830 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -31,11 +31,12 @@ typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa;
//
struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
+ typedef FsaFeatureFunctionBase<LongerThanPrev> Base;
static std::string usage(bool param,bool verbose) {
return FeatureFunction::usage_helper(
"LongerThanPrev",
"",
- "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (always fires for first word of sentence)",
+ "stupid example stateful (bigram) feature: -1 per target word that's longer than the previous word (<s> sentence begin considered 3 chars long, </s> is sentence end.)",
param,verbose);
}
@@ -49,16 +50,21 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
return std::strlen(TD::Convert(w));
}
int markov_order() const { return 1; }
- LongerThanPrev(std::string const& param) {
+ LongerThanPrev(std::string const& param) : Base(sizeof(int),singleton_sentence(TD::se)) {
Init();
- set_state_bytes(sizeof(int));
-// start.resize(state_bytes()); // this is done by set_state_bytes already.
-// h_start.resize(state_bytes());
-// int ss=-1;
-// wordcpy((WordID*)start.begin(),&ss,&ss+1);
- //to_state(start.begin(),&ss,1);
- wordlen(start.begin())=-1;
+ if (0) { // all this is done in constructor already
+ set_state_bytes(sizeof(int));
+ start.resize(state_bytes()); // this is done by set_state_bytes already.
+ h_start.resize(state_bytes());
+ int ss=3;
+ to_state(start.begin(),&ss,1);
+ ss=4;
+ to_state(h_start.begin(),&ss,1);
+ }
+
+ wordlen(start.begin())=3;
wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
+
}
static const float val_per_target_word=-1;
@@ -79,7 +85,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
return FeatureFunction::usage_helper(
"ShorterThanPrev",
"",
- "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (always fires for end of sentence)",
+ "stupid example stateful (bigram) feature: -1 per target word that's shorter than the previous word (end of sentence considered '</s>')",
param,verbose);
}
@@ -87,7 +93,7 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
return std::strlen(TD::Convert(w));
}
ShorterThanPrev(std::string const& param)
- : Base(-1,4,Sentence(1,TD::Convert("")))
+ : Base(3,4,singleton_sentence(TD::se))
// start, h_start, end_phrase
// estimate: anything <4 chars is usually shorter than previous
{
diff --git a/decoder/filelib.h b/decoder/filelib.h
index 2d347c00..af66dd05 100644
--- a/decoder/filelib.h
+++ b/decoder/filelib.h
@@ -6,6 +6,7 @@
#include <iostream>
#include <cstdlib>
#include <boost/shared_ptr.hpp>
+#include <stdexcept>
#include "gzstream.h"
bool FileExists(const std::string& file_name);
@@ -37,6 +38,10 @@ struct BaseFile {
}
std::string filename_;
protected:
+ void error(std::string const& reason,std::string const& filename) {
+ throw std::runtime_error("File "+filename+" - "+reason);
+ }
+
PS ps_;
static bool EndsWith(const std::string& f, const std::string& suf) {
return (f.size() > suf.size()) && (f.rfind(suf) == f.size() - suf.size());
@@ -56,6 +61,7 @@ class ReadFile : public BaseFile<std::istream> {
} else {
if (!FileExists(filename)) {
std::cerr << "File does not exist: " << filename << std::endl;
+ error(filename," couldn't read nonexistant file.");
abort();
}
char const* file=filename_.c_str(); // just in case the gzstream keeps using the filename for longer than the constructor, e.g. inflateReset2. warning in valgrind that I'm hoping will disappear - it makes no sense.
@@ -64,6 +70,7 @@ class ReadFile : public BaseFile<std::istream> {
static_cast<std::istream*>(new std::ifstream(file)));
if (!*ps_) {
std::cerr << "Failed to open " << filename << std::endl;
+ error(filename," open for reading failed.");
abort();
}
}
@@ -86,6 +93,7 @@ class WriteFile : public BaseFile<std::ostream> {
static_cast<std::ostream*>(new std::ofstream(file)));
if (!*ps_) {
std::cerr << "Failed to open " << filename << std::endl;
+ error(filename," open for writing failed.");
abort();
}
}
diff --git a/decoder/sentences.h b/decoder/sentences.h
index 482d3be9..452fb21e 100755
--- a/decoder/sentences.h
+++ b/decoder/sentences.h
@@ -30,6 +30,14 @@ inline void wordcpy(WordID *dest,WordID const* src,WordID const* src_end) {
wordcpy(dest,src,src_end-src);
}
+inline Sentence singleton_sentence(WordID t) {
+ return Sentence(1,t);
+}
+
+inline Sentence singleton_sentence(std::string const& s) {
+ return singleton_sentence(TD::Convert(s));
+}
+
inline std::ostream & operator<<(std::ostream &out,Sentence const& s) {
return out<<TD::GetString(s);