summaryrefslogtreecommitdiff
path: root/klm/util/stream/line_input.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-02-21 14:13:55 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-02-21 14:13:55 +0000
commitbca26d953a774b8efca12f30407390b3f5eef9d0 (patch)
treefe922de5c89b1844f677d550dcc24e87edd67a55 /klm/util/stream/line_input.cc
parent54a1c0e2bde259e3acc9c0a8ec8da3c7704e80ca (diff)
parent95c364f2cb002241c4a62bedb1c5ef6f1e9a7f22 (diff)
Merge branch 'master' of https://github.com/pauldb89/cdec
Diffstat (limited to 'klm/util/stream/line_input.cc')
-rw-r--r--klm/util/stream/line_input.cc52
1 files changed, 52 insertions, 0 deletions
diff --git a/klm/util/stream/line_input.cc b/klm/util/stream/line_input.cc
new file mode 100644
index 00000000..dafa5020
--- /dev/null
+++ b/klm/util/stream/line_input.cc
@@ -0,0 +1,52 @@
+#include "util/stream/line_input.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+#include "util/stream/chain.hh"
+
+#include <algorithm>
+#include <vector>
+
+namespace util { namespace stream {
+
+void LineInput::Run(const ChainPosition &position) {
+ ReadCompressed reader(fd_);
+ // Holding area for beginning of line to be placed in next block.
+ std::vector<char> carry;
+
+ for (Link block(position); ; ++block) {
+ char *to = static_cast<char*>(block->Get());
+ char *begin = to;
+ char *end = to + position.GetChain().BlockSize();
+ std::copy(carry.begin(), carry.end(), to);
+ to += carry.size();
+ while (to != end) {
+ std::size_t got = reader.Read(to, end - to);
+ if (!got) {
+ // EOF
+ block->SetValidSize(to - begin);
+ ++block;
+ block.Poison();
+ return;
+ }
+ to += got;
+ }
+
+ // Find the last newline.
+ char *newline;
+ for (newline = to - 1; ; --newline) {
+ UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ". Is this a text file?");
+ if (*newline == '\n') break;
+ }
+
+ // Copy everything after the last newline to the carry.
+ carry.clear();
+ carry.resize(to - (newline + 1));
+ std::copy(newline + 1, to, &*carry.begin());
+
+ block->SetValidSize(newline + 1 - begin);
+ }
+}
+
+}} // namespaces