summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rw-r--r--decoder/JSON_parser.c192
-rw-r--r--decoder/JSON_parser.h42
-rw-r--r--decoder/fdict.cc9
-rw-r--r--decoder/hg.h1
-rw-r--r--decoder/logval.h29
-rw-r--r--decoder/timing_stats.cc2
6 files changed, 137 insertions, 138 deletions
diff --git a/decoder/JSON_parser.c b/decoder/JSON_parser.c
index 175b7cc9..5e392bc6 100644
--- a/decoder/JSON_parser.c
+++ b/decoder/JSON_parser.c
@@ -28,31 +28,31 @@ SOFTWARE.
/*
Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2009.
-
+
For the added features the license above applies also.
-
+
Changelog:
- 2009-05-17
+ 2009-05-17
Incorporated benrudiak@googlemail.com fix for UTF16 decoding.
-
- 2009-05-14
+
+ 2009-05-14
Fixed float parsing bug related to a locale being set that didn't
use '.' as decimal point character (charles@transmissionbt.com).
-
- 2008-10-14
+
+ 2008-10-14
Renamed states.IN to states.IT to avoid name clash which IN macro
defined in windef.h (alexey.pelykh@gmail.com)
-
- 2008-07-19
+
+ 2008-07-19
Removed some duplicate code & debugging variable (charles@transmissionbt.com)
-
- 2008-05-28
- Made JSON_value structure ansi C compliant. This bug was report by
+
+ 2008-05-28
+ Made JSON_value structure ansi C compliant. This bug was report by
trisk@acm.jhu.edu
-
- 2008-05-20
- Fixed bug reported by charles@transmissionbt.com where the switching
- from static to dynamic parse buffer did not copy the static parse
+
+ 2008-05-20
+ Fixed bug reported by charles@transmissionbt.com where the switching
+ from static to dynamic parse buffer did not copy the static parse
buffer's content.
*/
@@ -109,7 +109,7 @@ struct JSON_parser_struct {
char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE];
};
-#define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
+#define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
/*
Characters are mapped into these character classes. This allows for
@@ -150,7 +150,7 @@ enum classes {
C_ABCDF, /* ABCDF */
C_E, /* E */
C_ETC, /* everything else */
- C_STAR, /* * */
+ C_STAR, /* * */
NR_CLASSES
};
@@ -295,9 +295,9 @@ static int state_transition_table[NR_STATES][NR_CLASSES] = {
These modes can be pushed on the stack.
*/
enum modes {
- MODE_ARRAY = 1,
- MODE_DONE = 2,
- MODE_KEY = 3,
+ MODE_ARRAY = 1,
+ MODE_DONE = 2,
+ MODE_KEY = 3,
MODE_OBJECT = 4
};
@@ -325,7 +325,7 @@ push(JSON_parser jc, int mode)
return false;
}
}
-
+
jc->stack[jc->top] = mode;
return true;
}
@@ -351,14 +351,14 @@ pop(JSON_parser jc, int mode)
jc->parse_buffer_count = 0;\
jc->parse_buffer[0] = 0;\
} while (0)
-
+
#define parse_buffer_pop_back_char(jc)\
do {\
assert(jc->parse_buffer_count >= 1);\
--jc->parse_buffer_count;\
jc->parse_buffer[jc->parse_buffer_count] = 0;\
- } while (0)
-
+ } while (0)
+
void delete_JSON_parser(JSON_parser jc)
{
if (jc) {
@@ -369,7 +369,7 @@ void delete_JSON_parser(JSON_parser jc)
free((void*)jc->parse_buffer);
}
free((void*)jc);
- }
+ }
}
@@ -388,30 +388,30 @@ new_JSON_parser(JSON_config* config)
int depth = 0;
JSON_config default_config;
-
+
JSON_parser jc = (JSON_parser)malloc(sizeof(struct JSON_parser_struct));
-
+
memset(jc, 0, sizeof(*jc));
-
-
+
+
/* initialize configuration */
init_JSON_config(&default_config);
-
+
/* set to default configuration if none was provided */
if (config == NULL) {
config = &default_config;
}
depth = config->depth;
-
+
/* We need to be able to push at least one object */
if (depth == 0) {
depth = 1;
}
-
+
jc->state = GO;
jc->top = -1;
-
+
/* Do we want non-bound stack? */
if (depth > 0) {
jc->stack_capacity = depth;
@@ -426,24 +426,24 @@ new_JSON_parser(JSON_config* config)
jc->depth = -1;
jc->stack = &jc->static_stack[0];
}
-
+
/* set parser to start */
push(jc, MODE_DONE);
-
+
/* set up the parse buffer */
jc->parse_buffer = &jc->static_parse_buffer[0];
jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer);
parse_buffer_clear(jc);
-
+
/* set up callback, comment & float handling */
jc->callback = config->callback;
jc->ctx = config->callback_ctx;
jc->allow_comments = config->allow_comments != 0;
jc->handle_floats_manually = config->handle_floats_manually != 0;
-
+
/* set up decimal point */
jc->decimal_point = *localeconv()->decimal_point;
-
+
return jc;
}
@@ -475,27 +475,27 @@ static void grow_parse_buffer(JSON_parser jc)
jc->type == JSON_T_FLOAT || \
jc->type == JSON_T_INTEGER || \
jc->type == JSON_T_STRING)
-
+
static int parse_parse_buffer(JSON_parser jc)
{
if (jc->callback) {
JSON_value value, *arg = NULL;
-
+
if (jc->type != JSON_T_NONE) {
assert_is_non_container_type(jc);
-
+
switch(jc->type) {
case JSON_T_FLOAT:
arg = &value;
if (jc->handle_floats_manually) {
value.vu.str.value = jc->parse_buffer;
value.vu.str.length = jc->parse_buffer_count;
- } else {
+ } else {
/*sscanf(jc->parse_buffer, "%Lf", &value.vu.float_value);*/
-
+
/* not checking with end pointer b/c there may be trailing ws */
- value.vu.float_value = strtold(jc->parse_buffer, NULL);
+ value.vu.float_value = strtod(jc->parse_buffer, NULL);
}
break;
case JSON_T_INTEGER:
@@ -508,15 +508,15 @@ static int parse_parse_buffer(JSON_parser jc)
value.vu.str.length = jc->parse_buffer_count;
break;
}
-
+
if (!(*jc->callback)(jc->ctx, jc->type, arg)) {
return false;
}
}
}
-
+
parse_buffer_clear(jc);
-
+
return true;
}
@@ -531,14 +531,14 @@ static int decode_unicode_char(JSON_parser jc)
unsigned uc = 0;
char* p;
int trail_bytes;
-
+
assert(jc->parse_buffer_count >= 6);
-
+
p = &jc->parse_buffer[jc->parse_buffer_count - 4];
-
+
for (i = 12; i >= 0; i -= 4, ++p) {
unsigned x = *p;
-
+
if (x >= 'a') {
x -= ('a' - 10);
} else if (x >= 'A') {
@@ -546,16 +546,16 @@ static int decode_unicode_char(JSON_parser jc)
} else {
x &= ~0x30u;
}
-
+
assert(x < 16);
-
+
uc |= x << i;
}
-
+
/* clear UTF-16 char from buffer */
jc->parse_buffer_count -= 6;
jc->parse_buffer[jc->parse_buffer_count] = 0;
-
+
/* attempt decoding ... */
if (jc->utf16_high_surrogate) {
if (IS_LOW_SURROGATE(uc)) {
@@ -582,15 +582,15 @@ static int decode_unicode_char(JSON_parser jc)
trail_bytes = 2;
}
}
-
+
jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
-
+
for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80);
}
jc->parse_buffer[jc->parse_buffer_count] = 0;
-
+
return true;
}
@@ -646,7 +646,7 @@ static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char)
} \
} \
} while (0)
-
+
#define assert_type_isnt_string_null_or_bool(jc) \
assert(jc->type != JSON_T_FALSE); \
@@ -665,7 +665,7 @@ JSON_parser_char(JSON_parser jc, int next_char)
text, it returns false.
*/
int next_class, next_state;
-
+
/*
Determine the character's class.
*/
@@ -680,9 +680,9 @@ JSON_parser_char(JSON_parser jc, int next_char)
return false;
}
}
-
+
add_char_to_parse_buffer(jc, next_char, next_class);
-
+
/*
Get the next state from the state transition table.
*/
@@ -697,7 +697,7 @@ JSON_parser_char(JSON_parser jc, int next_char)
Or perform one of the actions.
*/
switch (next_state) {
-/* Unicode character */
+/* Unicode character */
case UC:
if(!decode_unicode_char(jc)) {
return false;
@@ -718,74 +718,74 @@ JSON_parser_char(JSON_parser jc, int next_char)
case MX:
jc->type = JSON_T_INTEGER;
jc->state = MI;
- break;
-/* integer detected by zero */
+ break;
+/* integer detected by zero */
case ZX:
jc->type = JSON_T_INTEGER;
jc->state = ZE;
- break;
-/* integer detected by 1-9 */
+ break;
+/* integer detected by 1-9 */
case IX:
jc->type = JSON_T_INTEGER;
jc->state = IT;
- break;
-
+ break;
+
/* floating point number detected by exponent*/
case DE:
assert_type_isnt_string_null_or_bool(jc);
jc->type = JSON_T_FLOAT;
jc->state = E1;
- break;
-
+ break;
+
/* floating point number detected by fraction */
case DF:
assert_type_isnt_string_null_or_bool(jc);
if (!jc->handle_floats_manually) {
/*
- Some versions of strtod (which underlies sscanf) don't support converting
+ Some versions of strtod (which underlies sscanf) don't support converting
C-locale formated floating point values.
-*/
+*/
assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.');
jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point;
- }
+ }
jc->type = JSON_T_FLOAT;
jc->state = FX;
- break;
+ break;
/* string begin " */
case SB:
parse_buffer_clear(jc);
assert(jc->type == JSON_T_NONE);
jc->type = JSON_T_STRING;
jc->state = ST;
- break;
-
+ break;
+
/* n */
case NU:
assert(jc->type == JSON_T_NONE);
jc->type = JSON_T_NULL;
jc->state = N1;
- break;
+ break;
/* f */
case FA:
assert(jc->type == JSON_T_NONE);
jc->type = JSON_T_FALSE;
jc->state = F1;
- break;
+ break;
/* t */
case TR:
assert(jc->type == JSON_T_NONE);
jc->type = JSON_T_TRUE;
jc->state = T1;
- break;
-
+ break;
+
/* closing comment */
case CE:
jc->comment = 0;
assert(jc->parse_buffer_count == 0);
assert(jc->type == JSON_T_NONE);
jc->state = jc->before_comment_state;
- break;
-
+ break;
+
/* opening comment */
case CB:
if (!jc->allow_comments) {
@@ -799,7 +799,7 @@ JSON_parser_char(JSON_parser jc, int next_char)
assert(jc->type != JSON_T_STRING);
switch (jc->stack[jc->top]) {
case MODE_ARRAY:
- case MODE_OBJECT:
+ case MODE_OBJECT:
switch(jc->state) {
case VA:
case AR:
@@ -819,7 +819,7 @@ JSON_parser_char(JSON_parser jc, int next_char)
jc->comment = 1;
break;
/* empty } */
- case -9:
+ case -9:
parse_buffer_clear(jc);
if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
return false;
@@ -856,7 +856,7 @@ JSON_parser_char(JSON_parser jc, int next_char)
if (!pop(jc, MODE_ARRAY)) {
return false;
}
-
+
jc->type = JSON_T_NONE;
jc->state = OK;
break;
@@ -892,7 +892,7 @@ JSON_parser_char(JSON_parser jc, int next_char)
assert(jc->type == JSON_T_STRING);
jc->type = JSON_T_NONE;
jc->state = CO;
-
+
if (jc->callback) {
JSON_value value;
value.vu.str.value = jc->parse_buffer;
@@ -978,25 +978,25 @@ JSON_parser_done(JSON_parser jc)
int JSON_parser_is_legal_white_space_string(const char* s)
{
int c, char_class;
-
+
if (s == NULL) {
return false;
}
-
- for (; *s; ++s) {
+
+ for (; *s; ++s) {
c = *s;
-
+
if (c < 0 || c >= 128) {
return false;
}
-
+
char_class = ascii_class[c];
-
+
if (char_class != C_SPACE && char_class != C_WHITE) {
return false;
}
}
-
+
return true;
}
@@ -1006,7 +1006,7 @@ void init_JSON_config(JSON_config* config)
{
if (config) {
memset(config, 0, sizeof(*config));
-
+
config->depth = JSON_PARSER_STACK_SIZE - 1;
}
}
diff --git a/decoder/JSON_parser.h b/decoder/JSON_parser.h
index ceb5b24b..de980072 100644
--- a/decoder/JSON_parser.h
+++ b/decoder/JSON_parser.h
@@ -14,7 +14,7 @@
# define JSON_PARSER_DLL_API __declspec(dllimport)
# endif
#else
-# define JSON_PARSER_DLL_API
+# define JSON_PARSER_DLL_API
#endif
/* Determine the integer type use to parse non-floating point numbers */
@@ -22,7 +22,7 @@
typedef long long JSON_int_t;
#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%lld"
#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%lld"
-#else
+#else
typedef long JSON_int_t;
#define JSON_PARSER_INTEGER_SSCANF_TOKEN "%ld"
#define JSON_PARSER_INTEGER_SPRINTF_TOKEN "%ld"
@@ -31,9 +31,9 @@ typedef long JSON_int_t;
#ifdef __cplusplus
extern "C" {
-#endif
+#endif
-typedef enum
+typedef enum
{
JSON_T_NONE = 0,
JSON_T_ARRAY_BEGIN, // 1
@@ -53,9 +53,9 @@ typedef enum
typedef struct JSON_value_struct {
union {
JSON_int_t integer_value;
-
- long double float_value;
-
+
+ double float_value;
+
struct {
const char* value;
size_t length;
@@ -65,22 +65,22 @@ typedef struct JSON_value_struct {
typedef struct JSON_parser_struct* JSON_parser;
-/*! \brief JSON parser callback
+/*! \brief JSON parser callback
\param ctx The pointer passed to new_JSON_parser.
- \param type An element of JSON_type but not JSON_T_NONE.
+ \param type An element of JSON_type but not JSON_T_NONE.
\param value A representation of the parsed value. This parameter is NULL for
JSON_T_ARRAY_BEGIN, JSON_T_ARRAY_END, JSON_T_OBJECT_BEGIN, JSON_T_OBJECT_END,
JSON_T_NULL, JSON_T_TRUE, and SON_T_FALSE. String values are always returned
as zero-terminated C strings.
\return Non-zero if parsing should continue, else zero.
-*/
+*/
typedef int (*JSON_parser_callback)(void* ctx, int type, const struct JSON_value_struct* value);
-/*! \brief The structure used to configure a JSON parser object
-
+/*! \brief The structure used to configure a JSON parser object
+
\param depth If negative, the parser can parse arbitrary levels of JSON, otherwise
the depth is the limit
\param Pointer to a callback. This parameter may be NULL. In this case the input is merely checked for validity.
@@ -88,7 +88,7 @@ typedef int (*JSON_parser_callback)(void* ctx, int type, const struct JSON_value
\param depth. Specifies the levels of nested JSON to allow. Negative numbers yield unlimited nesting.
\param allowComments. To allow C style comments in JSON, set to non-zero.
\param handleFloatsManually. To decode floating point numbers manually set this parameter to non-zero.
-
+
\return The parser object.
*/
typedef struct {
@@ -111,11 +111,11 @@ typedef struct {
*/
JSON_PARSER_DLL_API void init_JSON_config(JSON_config* config);
-/*! \brief Create a JSON parser object
-
- \param config. Used to configure the parser. Set to NULL to use the default configuration.
+/*! \brief Create a JSON parser object
+
+ \param config. Used to configure the parser. Set to NULL to use the default configuration.
See init_JSON_config
-
+
\return The parser object.
*/
JSON_PARSER_DLL_API extern JSON_parser new_JSON_parser(JSON_config* config);
@@ -132,12 +132,12 @@ JSON_PARSER_DLL_API extern int JSON_parser_char(JSON_parser jc, int next_char);
/*! \brief Finalize parsing.
Call this method once after all input characters have been consumed.
-
+
\return Non-zero, if all parsed characters are valid JSON, zero otherwise.
*/
JSON_PARSER_DLL_API extern int JSON_parser_done(JSON_parser jc);
-/*! \brief Determine if a given string is valid JSON white space
+/*! \brief Determine if a given string is valid JSON white space
\return Non-zero if the string is valid, zero otherwise.
*/
@@ -146,7 +146,7 @@ JSON_PARSER_DLL_API extern int JSON_parser_is_legal_white_space_string(const cha
#ifdef __cplusplus
}
-#endif
-
+#endif
+
#endif /* JSON_PARSER_H */
diff --git a/decoder/fdict.cc b/decoder/fdict.cc
index 7e1b0e1f..da80c260 100644
--- a/decoder/fdict.cc
+++ b/decoder/fdict.cc
@@ -1,5 +1,6 @@
#include "fdict.h"
-
+#include "stdlib.h"
+//for malloc (need on cygwin); todo <cstdlib> and std::malloc
#include <string>
using namespace std;
@@ -59,10 +60,10 @@ int UrlDecode(const char *source, char *dest)
}
source++;
}
-
+
*dest = 0;
return dest - start;
-}
+}
int UrlEncode(const char *source, char *dest, unsigned max) {
static const char *digits = "0123456789ABCDEF";
@@ -83,7 +84,7 @@ int UrlEncode(const char *source, char *dest, unsigned max) {
}
else {
*dest++ = *source;
- }
+ }
source++;
}
*dest = 0;
diff --git a/decoder/hg.h b/decoder/hg.h
index 8d056358..50c9048a 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -168,6 +168,7 @@ class Hypergraph {
// case. To investigate, change false to true and see where ftrans crashes
void PruneEdges(const std::vector<bool>& prune_edge, bool run_inside_algorithm = false);
+ // for density>=1.0, keep this many times the edges needed for the 1best derivation
// if you don't know, use_sum_prod_semiring should be false
void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,
const std::vector<bool>* preserve_mask = NULL);
diff --git a/decoder/logval.h b/decoder/logval.h
index 7099b9be..622b308e 100644
--- a/decoder/logval.h
+++ b/decoder/logval.h
@@ -79,32 +79,29 @@ class LogVal {
T v_;
};
+// copy elision - as opposed to explicit copy of LogVal<T> const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r. todo: return std::move(o1) - C++0x
template<typename T>
-LogVal<T> operator+(const LogVal<T>& o1, const LogVal<T>& o2) {
- LogVal<T> res(o1);
- res += o2;
- return res;
+LogVal<T> operator+(LogVal<T> o1, const LogVal<T>& o2) {
+ o1 += o2;
+ return o1;
}
template<typename T>
-LogVal<T> operator*(const LogVal<T>& o1, const LogVal<T>& o2) {
- LogVal<T> res(o1);
- res *= o2;
- return res;
+LogVal<T> operator*(LogVal<T> o1, const LogVal<T>& o2) {
+ o1 *= o2;
+ return o1;
}
template<typename T>
-LogVal<T> operator/(const LogVal<T>& o1, const LogVal<T>& o2) {
- LogVal<T> res(o1);
- res /= o2;
- return res;
+LogVal<T> operator/(LogVal<T> o1, const LogVal<T>& o2) {
+ o1 /= o2;
+ return o1;
}
template<typename T>
-LogVal<T> operator-(const LogVal<T>& o1, const LogVal<T>& o2) {
- LogVal<T> res(o1);
- res -= o2;
- return res;
+LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) {
+ o1 -= o2;
+ return o1;
}
template<typename T>
diff --git a/decoder/timing_stats.cc b/decoder/timing_stats.cc
index 85b95de5..fc8e9df1 100644
--- a/decoder/timing_stats.cc
+++ b/decoder/timing_stats.cc
@@ -1,7 +1,7 @@
#include "timing_stats.h"
#include <iostream>
-
+#include "time.h" //cygwin needs
using namespace std;
map<string, TimerInfo> Timer::stats;