summaryrefslogtreecommitdiff
path: root/decoder/hg_io.h
blob: 7162106ea8b43ece27ec148659293240e25ae826 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#ifndef _HG_IO_H_
#define _HG_IO_H_

#include <iostream>

#include "lattice.h"
class Hypergraph;

struct HypergraphIO {

  // the format is basically a list of nodes and edges in topological order
  // any edge you read, you must have already read its tail nodes
  // any node you read, you must have already read its incoming edges
  // this may make writing a bit more challenging if your forest is not
  // topologically sorted (but that probably doesn't happen very often),
  // but it makes reading much more memory efficient.
  // see test_data/small.json.gz for an email encoding
  static bool ReadFromJSON(std::istream* in, Hypergraph* out);

  // if remove_rules is used, the hypergraph is serialized without rule information
  // (so it only contains structure and feature information)
  static bool WriteToJSON(const Hypergraph& hg, bool remove_rules, std::ostream* out);

  static void WriteAsCFG(const Hypergraph& hg);

  // serialization utils
  static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0);
  // return PLF string representation (undefined behavior on non-lattices)
  static std::string AsPLF(const Hypergraph& hg, bool include_global_parentheses = true);
  static void PLFtoLattice(const std::string& plf, Lattice* pl);
  static std::string Escape(const std::string& s);  // PLF helper
};

namespace B64 {
  bool b64decode(const unsigned char* data, const size_t insize, char* out, const size_t outsize);
  void b64encode(const char* data, const size_t size, std::ostream* out);
}

#endif