summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-07 16:58:55 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-07 16:58:55 +0200
commit715245dc7042ac0dca4fea94031d7c6de8058033 (patch)
tree3a7ff0b88f2e113a08aef663d2487edec0b5f67f /utils
parent89211ab30937672d84a54fac8fa435805499e38d (diff)
parent6001b81eba37985d2e7dea6e6ebb488b787789a6 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'utils')
-rw-r--r--utils/Makefile.am2
-rw-r--r--utils/alignment_io.cc (renamed from utils/alignment_pharaoh.cc)36
-rw-r--r--utils/alignment_io.h42
-rw-r--r--utils/alignment_pharaoh.h14
-rw-r--r--utils/atools.cc11
-rw-r--r--utils/ccrp.h10
-rw-r--r--utils/mfcr.h4
7 files changed, 89 insertions, 30 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 3ea21835..2fc6ae21 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -23,7 +23,7 @@ atools_SOURCES = atools.cc
noinst_LIBRARIES = libutils.a
libutils_a_SOURCES = \
- alignment_pharaoh.cc \
+ alignment_io.cc \
b64tools.cc \
corpus_tools.cc \
dict.cc \
diff --git a/utils/alignment_pharaoh.cc b/utils/alignment_io.cc
index 890ff565..1d923f7f 100644
--- a/utils/alignment_pharaoh.cc
+++ b/utils/alignment_io.cc
@@ -1,12 +1,10 @@
-#include "utils/alignment_pharaoh.h"
-
-#include <set>
+#include "utils/alignment_io.h"
using namespace std;
static bool is_digit(char x) { return x >= '0' && x <= '9'; }
-boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(const string& al) {
+boost::shared_ptr<Array2D<bool> > AlignmentIO::ReadPharaohAlignmentGrid(const string& al) {
int max_x = 0;
int max_y = 0;
int i = 0;
@@ -64,14 +62,36 @@ boost::shared_ptr<Array2D<bool> > AlignmentPharaoh::ReadPharaohAlignmentGrid(con
return grid;
}
-void AlignmentPharaoh::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* out) {
+void AlignmentIO::SerializePharaohFormat(const Array2D<bool>& alignment, ostream* o) {
+ ostream& out = *o;
bool need_space = false;
for (int i = 0; i < alignment.width(); ++i)
for (int j = 0; j < alignment.height(); ++j)
if (alignment(i,j)) {
- if (need_space) (*out) << ' '; else need_space = true;
- (*out) << i << '-' << j;
+ if (need_space) out << ' '; else need_space = true;
+ out << i << '-' << j;
}
- (*out) << endl;
+ out << endl;
+}
+
+void AlignmentIO::SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, ostream* o) {
+ ostream& out = *o;
+ bool need_space = false;
+ for (int i = 0; i < alignment.width(); ++i)
+ for (int j = 0; j < alignment.height(); ++j) {
+ const AlignmentType& aij = alignment(i,j);
+ if (aij != kNONE) {
+ if (need_space) out << ' '; else need_space = true;
+ if (aij == kTRANSLATION) {}
+ else if (aij == kTRANSLITERATION) {
+ out << 'T' << ':';
+ } else {
+ cerr << "\nUnexpected alignment point type: " << static_cast<int>(aij) << endl;
+ abort();
+ }
+ out << i << '-' << j;
+ }
+ }
+ out << endl;
}
diff --git a/utils/alignment_io.h b/utils/alignment_io.h
new file mode 100644
index 00000000..36bcecd7
--- /dev/null
+++ b/utils/alignment_io.h
@@ -0,0 +1,42 @@
+#ifndef _ALIGNMENT_IO_H_
+#define _ALIGNMENT_IO_H_
+
+#include <string>
+#include <iostream>
+#include <boost/shared_ptr.hpp>
+#include "array2d.h"
+
+struct AlignmentIO {
+ enum AlignmentType { kNONE = 0, kTRANSLATION = 1, kTRANSLITERATION = 2 };
+
+ static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
+ static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
+ static void SerializeTypedAlignment(const Array2D<AlignmentType>& alignment, std::ostream* out);
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Array2D<AlignmentIO::AlignmentType>& m) {
+ os << ' ';
+ for (int j=0; j<m.height(); ++j)
+ os << (j%10);
+ os << "\n";
+ for (int i=0; i<m.width(); ++i) {
+ os << (i%10);
+ for (int j=0; j<m.height(); ++j) {
+ switch (m(i,j)) {
+ case AlignmentIO::kNONE: os << '.'; break;
+ case AlignmentIO::kTRANSLATION: os << '*'; break;
+ case AlignmentIO::kTRANSLITERATION: os << '#'; break;
+ default: os << '?'; break;
+ }
+ }
+ os << (i%10) << "\n";
+ }
+ os << ' ';
+ for (int j=0; j<m.height(); ++j)
+ os << (j%10);
+ os << "\n";
+ return os;
+}
+
+
+#endif
diff --git a/utils/alignment_pharaoh.h b/utils/alignment_pharaoh.h
deleted file mode 100644
index d111c8bf..00000000
--- a/utils/alignment_pharaoh.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _PHARAOH_ALIGNMENT_H_
-#define _PHARAOH_ALIGNMENT_H_
-
-#include <string>
-#include <iostream>
-#include <boost/shared_ptr.hpp>
-#include "array2d.h"
-
-struct AlignmentPharaoh {
- static boost::shared_ptr<Array2D<bool> > ReadPharaohAlignmentGrid(const std::string& al);
- static void SerializePharaohFormat(const Array2D<bool>& alignment, std::ostream* out);
-};
-
-#endif
diff --git a/utils/atools.cc b/utils/atools.cc
index c0a91731..bce7822e 100644
--- a/utils/atools.cc
+++ b/utils/atools.cc
@@ -8,11 +8,10 @@
#include <boost/shared_ptr.hpp>
#include "filelib.h"
-#include "alignment_pharaoh.h"
+#include "alignment_io.h"
namespace po = boost::program_options;
using namespace std;
-using boost::shared_ptr;
struct Command {
virtual ~Command() {}
@@ -348,10 +347,10 @@ int main(int argc, char **argv) {
}
}
if (line1.empty() && !*in1) break;
- shared_ptr<Array2D<bool> > out(new Array2D<bool>);
- shared_ptr<Array2D<bool> > a1 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line1);
+ boost::shared_ptr<Array2D<bool> > out(new Array2D<bool>);
+ boost::shared_ptr<Array2D<bool> > a1 = AlignmentIO::ReadPharaohAlignmentGrid(line1);
if (in2) {
- shared_ptr<Array2D<bool> > a2 = AlignmentPharaoh::ReadPharaohAlignmentGrid(line2);
+ boost::shared_ptr<Array2D<bool> > a2 = AlignmentIO::ReadPharaohAlignmentGrid(line2);
cmd.Apply(*a1, *a2, out.get());
} else {
Array2D<bool> dummy;
@@ -359,7 +358,7 @@ int main(int argc, char **argv) {
}
if (cmd.Result() == 1) {
- AlignmentPharaoh::SerializePharaohFormat(*out, &cout);
+ AlignmentIO::SerializePharaohFormat(*out, &cout);
}
}
if (cmd.Result() == 2)
diff --git a/utils/ccrp.h b/utils/ccrp.h
index 4a8b80e7..8635b422 100644
--- a/utils/ccrp.h
+++ b/utils/ccrp.h
@@ -55,6 +55,10 @@ class CCRP {
double discount() const { return discount_; }
double strength() const { return strength_; }
+ void set_hyperparameters(double d, double s) {
+ discount_ = d; strength_ = s;
+ check_hyperparameters();
+ }
void set_discount(double d) { discount_ = d; check_hyperparameters(); }
void set_strength(double a) { strength_ = a; check_hyperparameters(); }
@@ -93,8 +97,10 @@ class CCRP {
}
// returns +1 or 0 indicating whether a new table was opened
+ // p = probability with which the particular table was selected
+ // excluding p0
template <typename T>
- int increment(const Dish& dish, const T& p0, MT19937* rng) {
+ int increment(const Dish& dish, const T& p0, MT19937* rng, T* p = NULL) {
DishLocations& loc = dish_locs_[dish];
bool share_table = false;
if (loc.total_dish_count_) {
@@ -108,6 +114,7 @@ class CCRP {
ti != loc.table_counts_.end(); ++ti) {
r -= (*ti - discount_);
if (r <= 0.0) {
+ if (p) { *p = T(*ti - discount_) / T(strength_ + num_customers_); }
++(*ti);
break;
}
@@ -119,6 +126,7 @@ class CCRP {
}
} else {
loc.table_counts_.push_back(1u);
+ if (p) { *p = T(strength_ + discount_ * num_tables_) / T(strength_ + num_customers_); }
++num_tables_;
}
++loc.total_dish_count_;
diff --git a/utils/mfcr.h b/utils/mfcr.h
index 886f01ef..4aacb567 100644
--- a/utils/mfcr.h
+++ b/utils/mfcr.h
@@ -73,6 +73,10 @@ class MFCR {
double discount() const { return discount_; }
double strength() const { return strength_; }
+ void set_hyperparameters(double d, double s) {
+ discount_ = d; strength_ = s;
+ check_hyperparameters();
+ }
void set_discount(double d) { discount_ = d; check_hyperparameters(); }
void set_strength(double a) { strength_ = a; check_hyperparameters(); }