From 606e3e38b8a830dbbe65963ebf6c5ce7866b7800 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 18 Mar 2014 01:41:17 -0400 Subject: star function --- utils/exp_semiring.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 utils/exp_semiring.h (limited to 'utils/exp_semiring.h') diff --git a/utils/exp_semiring.h b/utils/exp_semiring.h new file mode 100644 index 00000000..7572ccf5 --- /dev/null +++ b/utils/exp_semiring.h @@ -0,0 +1,64 @@ +#ifndef _EXP_SEMIRING_H_ +#define _EXP_SEMIRING_H_ + +#include +#include "star.h" + +// this file implements the first-order expectation semiring described +// in Li & Eisner (EMNLP 2009) + +// requirements: +// RType * RType ==> RType +// PType * PType ==> PType +// RType * PType ==> RType +// good examples: +// PType scalar, RType vector +// BAD examples: +// PType vector, RType scalar +template +struct PRPair { + PRPair() : p(), r() {} + // Inside algorithm requires that T(0) and T(1) + // return the 0 and 1 values of the semiring + explicit PRPair(double x) : p(x), r() {} + PRPair(const PType& p, const RType& r) : p(p), r(r) {} + PRPair& operator+=(const PRPair& o) { + p += o.p; + r += o.r; + return *this; + } + PRPair& operator*=(const PRPair& o) { + r = (o.r * p) + (o.p * r); + p *= o.p; + return *this; + } + PType p; + RType r; +}; + +template +std::ostream& operator<<(std::ostream& o, const PRPair& x) { + return o << '<' << x.p << ", " << x.r << '>'; +} + +template +const PRPair operator+(const PRPair& a, const PRPair& b) { + PRPair result = a; + result += b; + return result; +} + +template +const PRPair operator*(const PRPair& a, const PRPair& b) { + PRPair result = a; + result *= b; + return result; +} + +template +const PRPair star(const PRPair& x) { + const P pstar = star(x.p); + return PRPair(pstar, pstar * x.r * pstar); +} + +#endif -- cgit v1.2.3 From 2a9ee1febae6a63173f74ae24e2bfe439e409525 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 18 Mar 2014 02:05:25 -0400 Subject: chris edits --- corpus/support/tokenizer.pl | 4 ++++ utils/exp_semiring.h | 2 +- utils/logval.h | 2 +- utils/star.h | 4 ++-- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'utils/exp_semiring.h') diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index 7771201f..f57bc87a 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -240,6 +240,10 @@ sub proc_token { return $token; } + if($token =~ /^\d+(.\d+)+(亿|百万|万|千)?$/){ + return $token; + } + ## 1,234,345.34 if($token =~ /^\d+(\.\d{3})*,\d+$/){ ## number diff --git a/utils/exp_semiring.h b/utils/exp_semiring.h index 7572ccf5..26a22071 100644 --- a/utils/exp_semiring.h +++ b/utils/exp_semiring.h @@ -56,7 +56,7 @@ const PRPair operator*(const PRPair& a, const PRPair& b) { } template -const PRPair star(const PRPair& x) { +inline const PRPair star(const PRPair& x) { const P pstar = star(x.p); return PRPair(pstar, pstar * x.r * pstar); } diff --git a/utils/logval.h b/utils/logval.h index 7f1e1024..0c9ee982 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -244,7 +244,7 @@ template std::size_t hash_value(const LogVal& x) { return x.hash_impl(); } template -LogVal star(LogVal x) { +inline LogVal star(LogVal x) { if (x.is_0()) return x; if (x.v_ >= 0) { x.v_ = std::numeric_limits::infinity(); diff --git a/utils/star.h b/utils/star.h index e7358ffa..21977dc9 100644 --- a/utils/star.h +++ b/utils/star.h @@ -4,14 +4,14 @@ // star(x) computes the infinite sum x^0 + x^1 + x^2 + ... template -T star(const T& x) { +inline T star(const T& x) { if (!x) return T(); if (x > T(1)) return std::numeric_limits::infinity(); if (x < -T(1)) return -std::numeric_limits::infinity(); return T(1) / (T(1) - x); } -bool star(bool x) { +inline bool star(bool x) { return x; } -- cgit v1.2.3