summaryrefslogtreecommitdiff
path: root/algorithms/transducer
diff options
context:
space:
mode:
Diffstat (limited to 'algorithms/transducer')
-rw-r--r--algorithms/transducer/Makefile24
-rw-r--r--algorithms/transducer/test.input1
-rw-r--r--algorithms/transducer/test.transducer11
-rw-r--r--algorithms/transducer/test1.input1
-rw-r--r--algorithms/transducer/test1.transducer4
-rw-r--r--algorithms/transducer/test2.input1
-rw-r--r--algorithms/transducer/test2.transducer11
-rw-r--r--algorithms/transducer/test3.input1
-rw-r--r--algorithms/transducer/test3.transducer6
-rw-r--r--algorithms/transducer/transducer.c707
-rw-r--r--algorithms/transducer/transducer.h90
11 files changed, 857 insertions, 0 deletions
diff --git a/algorithms/transducer/Makefile b/algorithms/transducer/Makefile
new file mode 100644
index 0000000..d747166
--- /dev/null
+++ b/algorithms/transducer/Makefile
@@ -0,0 +1,24 @@
+CFLAGS += -Wall -std=gnu99 -pedantic -g
+
+.PHONY: all depend clean check install
+
+SRCS = transducer.c
+OBJS = transducer.o
+
+all: transducer
+
+main: $(OBJS)
+ $(CC) -o $@ $(LDFLAGS) $(CFLAGS) $^
+
+depend:
+ makedepend -s "# makedepend depends on me"\
+ -- $(CFLAGS) -- $(SRCS)
+
+clean:
+ rm -f $(OBJS) transducer
+
+check:
+ @echo "No checks defined, sorry"
+
+install:
+ @echo "Nothing to install yet"
diff --git a/algorithms/transducer/test.input b/algorithms/transducer/test.input
new file mode 100644
index 0000000..be44a36
--- /dev/null
+++ b/algorithms/transducer/test.input
@@ -0,0 +1 @@
+0110110 \ No newline at end of file
diff --git a/algorithms/transducer/test.transducer b/algorithms/transducer/test.transducer
new file mode 100644
index 0000000..635fb17
--- /dev/null
+++ b/algorithms/transducer/test.transducer
@@ -0,0 +1,11 @@
+"S"
+"S" "0" "0" "S"
+"S" "1" "0" "E"
+"E" "1" "0" "E"
+"E" "0" "0" "S"
+"S" "1" "1" "L"
+"L" "1" "1" "L"
+"L" "0" "0" "F"
+"F" "0" "0" "F"
+"F" "\n" "\n" "F"
+
diff --git a/algorithms/transducer/test1.input b/algorithms/transducer/test1.input
new file mode 100644
index 0000000..533805d
--- /dev/null
+++ b/algorithms/transducer/test1.input
@@ -0,0 +1 @@
+lala \ No newline at end of file
diff --git a/algorithms/transducer/test1.transducer b/algorithms/transducer/test1.transducer
new file mode 100644
index 0000000..05e921f
--- /dev/null
+++ b/algorithms/transducer/test1.transducer
@@ -0,0 +1,4 @@
+"F"
+"X" "0" "1" "X"
+"Q" "abcdefghijklmnopqrstuvwxyz" "" "X"
+
diff --git a/algorithms/transducer/test2.input b/algorithms/transducer/test2.input
new file mode 100644
index 0000000..be44a36
--- /dev/null
+++ b/algorithms/transducer/test2.input
@@ -0,0 +1 @@
+0110110 \ No newline at end of file
diff --git a/algorithms/transducer/test2.transducer b/algorithms/transducer/test2.transducer
new file mode 100644
index 0000000..bfb0d77
--- /dev/null
+++ b/algorithms/transducer/test2.transducer
@@ -0,0 +1,11 @@
+"S"
+"S" "0" "0" "S"
+"S" "1" "0" "E"
+"S" "1" "1" "L"
+"E" "1" "0" "E"
+"E" "0" "0" "S"
+"L" "1" "1" "L"
+"L" "0" "0" "F"
+"F" "0" "0" "F"
+"F" "\n" "\n" "F"
+
diff --git a/algorithms/transducer/test3.input b/algorithms/transducer/test3.input
new file mode 100644
index 0000000..acfdf57
--- /dev/null
+++ b/algorithms/transducer/test3.input
@@ -0,0 +1 @@
+kram \ No newline at end of file
diff --git a/algorithms/transducer/test3.transducer b/algorithms/transducer/test3.transducer
new file mode 100644
index 0000000..40223b1
--- /dev/null
+++ b/algorithms/transducer/test3.transducer
@@ -0,0 +1,6 @@
+"S1"
+"S6" "m" "k" "F"
+"S4" "a" "r" "S5"
+"S2" "r" "a" "S3"
+"S1" "k" "m" "S2"
+
diff --git a/algorithms/transducer/transducer.c b/algorithms/transducer/transducer.c
new file mode 100644
index 0000000..c9818e6
--- /dev/null
+++ b/algorithms/transducer/transducer.c
@@ -0,0 +1,707 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+#include "transducer.h"
+
+#define START 0
+#define TOKEN 1
+#define ESCAPE 2
+#define ERROR 3
+#define END 4
+
+#define MIN_TRANSITIONS 100 /* Minimum count of transitions. */
+#define MIN_INPUT_LENGTH 20 /* Minimum length of input file. */
+
+
+/*
+ * Is called when an error occurs and exits the program
+ * immediately with status 1.
+ */
+void
+die(char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ vfprintf(stderr, msg, args);
+ fprintf(stderr, "\n");
+ va_end(args);
+
+ exit(1);
+}
+
+/*
+ * Like strcmp(), just for 2 chars.
+ *
+ */
+int
+chrcmp(char a, char b)
+{
+ if (a < b) {
+ return -1;
+ }
+ if (a == b) {
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/*
+ * Prints out status of the parser.
+ *
+ */
+int
+printParserStatus(ParserState *p, int c, char *from, char *to)
+{
+ printf("Line: %d, State: %s, current char: '%c' - to %s\n",
+ p->cur_line+1, from, c, to);
+
+ return 0;
+}
+
+/*
+ * Status START for parse automata.
+ *
+ */
+int
+state_START(int c, ParserState* p)
+{
+ if (isspace(c)) {
+ printParserStatus(p, c, "START", "START");
+ return START;
+ }
+
+ switch(c) {
+ case '"':
+ printParserStatus(p, c, "START", "TOKEN");
+ return TOKEN;
+ case EOF:
+ printParserStatus(p, c, "START", "END");
+ return END;
+ default:
+ printParserStatus(p, c, "START", "ERROR");
+ return ERROR;
+ }
+}
+
+/*
+ * Status TOKEN for parse automata.
+ *
+ */
+int
+state_TOKEN(int c, ParserState* p)
+{
+ switch(c) {
+ case '"':
+ printParserStatus(p, c, "TOKEN", "END");
+ return END;
+ case '\\':
+ printParserStatus(p, c, "TOKEN", "ESCAPE");
+ return ESCAPE;
+ default:
+ if (p->buf->next < MAX_TOK_LEN) {
+ printParserStatus(p, c, "TOKEN", "TOKEN");
+ p->buf->tok[p->buf->next] = c;
+ p->buf->next++;
+
+ return TOKEN;
+ }
+ else {
+ printParserStatus(p, c, "TOKEN", "ERROR");
+ return ERROR;
+ }
+ }
+}
+
+/*
+ * Status ESCAPE for parse automata.
+ *
+ */
+int
+state_ESCAPE(int c, ParserState* p)
+{
+ if (c == 'n') {
+ printParserStatus(p, c, "ESCAPE", "TOKEN");
+ p->buf->tok[p->buf->next] = '\n';
+ }
+ else if (c == 't') {
+ printParserStatus(p, c, "ESCAPE", "TOKEN");
+ p->buf->tok[p->buf->next] = '\t';
+ }
+ else if (c == 'r') {
+ printParserStatus(p, c, "ESCAPE", "TOKEN");
+ p->buf->tok[p->buf->next] = '\r';
+ }
+ else if (c == EOF) {
+ printParserStatus(p, c, "ESCAPE", "ERROR");
+ return ERROR;
+ }
+ else {
+ printParserStatus(p, c, "ESCAPE", "TOKEN");
+ p->buf->tok[p->buf->next] = c;
+ }
+
+ p->buf->next++;
+
+ return TOKEN;
+}
+
+/*
+ * Status END for parse automata.
+ *
+ */
+int
+state_END(int c, ParserState* p)
+{
+ printParserStatus(p, c, "END", "START");
+ p->buf->tok[p->buf->next] = '\0';
+ p->buf->next = 0;
+ return START;
+}
+
+/*
+ * Runs the parse automata.
+ *
+ */
+int
+run_automata(ParserState* p)
+{
+ int c, s = START;
+
+ while ((c = fgetc(p->in)) != EOF) {
+ if (c == '\n') {
+ p->cur_line++;
+ }
+
+ switch(s) {
+ case START:
+ s = state_START(c, p);
+ break;
+ case TOKEN:
+ s = state_TOKEN(c, p);
+ break;
+ case ESCAPE:
+ s = state_ESCAPE(c, p);
+ break;
+ case END:
+ s = state_END(c, p);
+ p->buf->next = 0;
+ return *(p->buf->tok);
+ break;
+ case ERROR:
+ fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n",
+ p->cur_line+1, p->file_name); /* +1 for readability */
+ fclose(p->in);
+ free(p->buf);
+ free(p);
+ exit(1);
+ break;
+ }
+ }
+
+ return EOF;
+}
+
+/*
+ * Returns next token.
+ *
+ */
+char*
+get_next_token(ParserState* p)
+{
+ char* res;
+
+ while (run_automata(p) != EOF) {
+ res = strdup(p->buf->tok);
+ return res;
+ }
+
+ return NULL;
+}
+
+/*
+ * Returns a Quadruple consisting of src_state, input, output and to_state.
+ * Like a Transition, but input is here a char* - that means that the
+ * output can be longer than 1 char.
+ */
+Quadruple*
+getQuadruples(Transducer* t, Quadruple* buf)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (!(buf->src_state = get_next_token(t->p))) {
+ return NULL;
+ }
+
+ if (!(buf->input = get_next_token(t->p))) {
+ fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n",
+ t->p->cur_line+1, t->p->file_name); /* +1 for readability */
+ exit(1);
+ }
+
+ buf->output = get_next_token(t->p);
+
+ if (!(buf->to_state = get_next_token(t->p))) {
+ fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n",
+ t->p->cur_line+1, t->p->file_name); /* +1 for readability */
+ fprintf(stderr, "Probable reason: Your transducer file does not \
+end with a newline.\n");
+ exit(1);
+ }
+
+ return buf;
+ }
+
+ return NULL;
+}
+
+/*
+ * Compare function for sort_transitions.
+ *
+ */
+int
+compare_transitions(const void* a, const void* b)
+{
+ Transition* t1 = (Transition*)a, *t2 = (Transition*)b;
+ int res;
+
+ res = strcmp(t1->src_state, t2->src_state);
+ if (!res) {
+ res = chrcmp(t1->input, t2->input);
+ }
+
+ return res;
+}
+
+/*
+ * Sorts the transitions array in Transducer with qsort.
+ *
+ */
+void sort_transitions(Transducer* t)
+{
+ qsort(t->transitions, t->num_transitions, sizeof(Transition),
+ compare_transitions);
+}
+
+/*
+ * Checks if memory for transitions array of Transducer is sufficent
+ * for transitions that will be added (int need) - if not it calls realloc
+ * for more memory.
+ */
+void
+check_transitions_array_size(Transducer* t, int need)
+{
+ if (t->num_transitions == t->max_transitions-need) {
+ t->max_transitions = t->max_transitions*2;
+ t->transitions = (Transition*)realloc(t->transitions,
+ sizeof(Transition)*t->max_transitions);
+ }
+}
+
+/*
+ * Adds Transition to transitions array of transducer.
+ *
+ */
+void
+add_transition(Transducer* t, char* src_state, int input,
+ char* output, char* to_state)
+{
+ t->transitions[t->num_transitions].src_state = src_state;
+
+ if (strlen(output) == 0) {
+ fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n",
+ t->p->cur_line+1, t->p->file_name); /* +1 for readability */
+ fprintf(stderr, "Reason: Output is empty.\n");
+ exit(1);
+ }
+ else {
+ t->transitions[t->num_transitions].input = input;
+ }
+
+ t->transitions[t->num_transitions].output = output;
+ t->transitions[t->num_transitions].to_state = to_state;
+ t->num_transitions++;
+}
+
+/*
+ * Prints out a list of the transitions in transducer t.
+ *
+ */
+int
+print_transitions(Transducer* t)
+{
+ for (unsigned int i = 0; i < t->num_transitions; i++)
+ {
+ printf("\nTransition No. %d:\n", i+1);
+ printf("src_state: %s\n", t->transitions[i].src_state);
+
+ if (t->transitions[i].input == '\n') {
+ printf("input: \\n\n");
+ }
+ else if (t->transitions[i].input == '\t') {
+ printf("input: \\t\n");
+ }
+ else if (t->transitions[i].input == '\r') {
+ printf("input: \\r\n");
+ }
+ else {
+ printf("input: %c\n", t->transitions[i].input);
+ }
+
+ if (t->transitions[i].output[0] == '\n') {
+ printf("output: \\n\n");
+ }
+ else if (t->transitions[i].output[0] == '\t') {
+ printf("output: \\t\n");
+ }
+ else if (t->transitions[i].output[0] == '\r') {
+ printf("output: \\r\n");
+ }
+ else {
+ printf("output: %s\n", t->transitions[i].output);
+ }
+
+ printf("to_state: %s\n", t->transitions[i].to_state);
+ }
+
+ return 0;
+}
+
+/*
+ * Reads transducer file and returns a finished Transducer.
+ *
+ */
+Transducer*
+readTransducerFromFile(char* file_name)
+{
+ Transducer* t;
+ Quadruple* buf;
+ char output[2];
+ int need;
+
+ /* allocating some memory... */
+ if (!(t = (Transducer*)malloc(sizeof(Transducer)))) {
+ die("ERROR: Could not allocate memory for Transducer!");
+ }
+ if (!(t->transitions = (Transition*)malloc(sizeof(Transition)
+ *MIN_TRANSITIONS))) {
+ die("ERROR: Could not allocate memory for transitions!");
+ }
+ if (!(t->p = (ParserState*)malloc(sizeof(ParserState)))) {
+ die("ERROR: Could not allocate memory for ParserState!");
+ }
+ if (!(t->p->buf = (TokenBuffer*)malloc(sizeof(TokenBuffer)))) {
+ die("ERROR: Could not allocate memory for TokenBuffer!");
+ }
+ if (!(buf = (Quadruple*)malloc(sizeof(Quadruple)))) {
+ die("ERROR: Could not allocate memory for transition buffer!");
+ }
+
+ t->p->file_name = file_name;
+ t->p->cur_line = 0;
+ t->p->buf->next = 0;
+ t->max_transitions = MIN_TRANSITIONS;
+
+ if (!(t->p->in = fopen(t->p->file_name, "r"))) {
+ die("ERROR: Could not open stated transducer file '%s'!",
+ t->p->file_name);
+ }
+ else {
+ t->start_state = get_next_token(t->p);
+ t->num_transitions = 0;
+ while ((buf = getQuadruples(t, buf)) != NULL) {
+ if (strlen(buf->input) > 1) {
+ int need = strlen(buf->input);
+ check_transitions_array_size(t, need);
+ for (int i = 0; i < need; i++) {
+ output[0] = buf->input[i];
+ output[1] = '\0';
+ add_transition(t, buf->src_state, buf->input[i], output,
+ buf->to_state);
+ }
+ }
+ else {
+ need = 1;
+ check_transitions_array_size(t, need);
+ add_transition(t, buf->src_state, buf->input[0], buf->output,
+ buf->to_state);
+ }
+ }
+ printf("...done!\n");
+
+ sort_transitions(t);
+ print_transitions(t);
+ }
+
+ return t;
+}
+
+
+/*
+ * Returns char at read_pos of StringFile. Also increases read_pos by 1.
+ *
+ */
+int
+string_file_getc(StringFile* f)
+{
+ if (!(f->buf[f->read_pos])) {
+ return EOF;
+ }
+
+ return f->buf[f->read_pos++];
+}
+
+/*
+ * Decreases read_pos in StringFile by 1.
+ *
+ */
+void
+string_file_ungetc(StringFile* f)
+{
+ if (f->read_pos > 0) {
+ f->read_pos--;
+ }
+}
+
+/*
+ * Generates the StringFile from an input file.
+ *
+ */
+StringFile*
+create_string_file_from_file(FILE* inputFile)
+{
+ int c, i=0;
+ StringFile* f;
+
+ if (!(f = (StringFile*)malloc(sizeof(StringFile)))) {
+ die("ERROR: Could not allocate memory for StringFile!");
+ }
+
+ if (!(f->buf = (char*)malloc(sizeof(char)*MIN_INPUT_LENGTH))) {
+ die("ERROR: Could not allocate memory for buffer in StringFile!");
+ }
+
+ f->max_chars = MIN_INPUT_LENGTH;
+ f->read_pos = 0;
+
+ while ((c = fgetc(inputFile)) != EOF) {
+ if (i == f->max_chars-1) {
+ f->max_chars = f->max_chars*2;
+ f->buf = (char*)realloc(f->buf, sizeof(char)*f->max_chars);
+ }
+ f->buf[i] = c;
+ f->num_chars = i;
+ i++;
+ }
+
+ f->num_chars++;
+
+ return f;
+}
+
+
+/*
+ * Adds a node to a OutputList (to front!).
+ *
+ */
+OutputList*
+cons(char* output, OutputList* next)
+{
+ OutputList* out;
+
+ if (!(out = (OutputList*)malloc(sizeof(OutputList)))) {
+ die("ERROR: Could not allocate memory for OutputList!");
+ }
+
+ out->next = next;
+ out->output = output;
+
+ return out;
+}
+
+/*
+ * Finally prints output from OutputList.
+ *
+ */
+void
+print_output_list(OutputList* output_list)
+{
+ if (!output_list) {
+ printf("\n\nSorry, your input could not be processed.\n");
+ }
+ else {
+ printf("\nMy Output:\n");
+
+ while (output_list) {
+ printf("%s", output_list->output);
+ output_list = output_list->next;
+ }
+ printf("\n");
+ }
+}
+
+/*
+ * Compare function for bsearch.
+ *
+ */
+int
+compare_trans_iter_with_transition(const void* key, const void* value)
+{
+ TransIter* ti = (TransIter*)key;
+ Transition* trans = (Transition*)value;
+ int res;
+
+ res = strcmp(ti->src_state, trans->src_state);
+ if (!res) {
+ res = chrcmp(ti->input, trans->input);
+ }
+
+ return res;
+}
+
+/*
+ * Returns next fitting Transition of transition array for TransIter.
+ *
+ */
+Transition*
+trans_iter_next(TransIter* ti)
+{
+ ti->found++;
+
+ if (!(ti->found->src_state)) {
+ printf("ti->found == NULL\n");
+ return NULL;
+ }
+
+ if ((strcmp(ti->found->src_state, ti->src_state)) ||
+ (chrcmp(ti->found->input, ti->input))) {
+ return NULL;
+ }
+
+ return ti->found;
+}
+
+/*
+ * 'Init-method' of a TransIter.
+ *
+ */
+void
+trans_iter_init(TransIter* ti, Transducer* t, char* cur_state, int input)
+{
+ ti->t = t;
+ ti->src_state = cur_state;
+ ti->input = input;
+ ti->found = bsearch(ti, ti->t->transitions, ti->t->num_transitions,
+ sizeof(Transition), compare_trans_iter_with_transition);
+
+ while (1) {
+ if (ti->found == t->transitions) {
+ break;
+ }
+ if (ti->found == NULL) {
+ break;
+ }
+
+ ti->found--;
+
+ if ((!(strcmp(ti->found->src_state, cur_state))) &&
+ (!(chrcmp(ti->found->input, input)))) {
+ continue;
+ } else {
+ ti->found++;
+ break;
+ }
+ }
+}
+
+
+/*
+ * Returns an OutputList, generated by recursion.
+ *
+ */
+OutputList*
+propagate(Transducer* t, char* cur_state, StringFile* inputFile)
+{
+ TransIter* ti;
+ OutputList* out;
+ int cur_char;
+
+ if (!(ti = (TransIter*)malloc(sizeof(TransIter)))) {
+ die("ERROR: Could not allocate memory for TransIter!");
+ }
+
+ if ((cur_char = string_file_getc(inputFile)) == EOF) {
+ if (cur_state[0] == 'F') {
+ return cons("", NULL);
+ }
+ else {
+ return NULL;
+ }
+ }
+
+ trans_iter_init(ti, t, cur_state, cur_char);
+
+ if (!ti->found) {
+ string_file_ungetc(inputFile);
+ return NULL;
+ }
+
+ while (1) {
+ if (!(out = propagate(t, ti->found->to_state, inputFile))) {
+ if (!trans_iter_next(ti)) {
+ string_file_ungetc(inputFile);
+ return NULL;
+ }
+ }
+ else {
+ return cons(ti->found->output, out);
+ }
+ }
+}
+
+
+/*
+ * main
+ *
+ */
+int
+main(int argc, char** argv)
+{
+ char* file_name;
+ FILE* inputFile;
+ Transducer* t;
+ StringFile* f;
+ OutputList* out;
+
+ if ((!argv[1]) || (!argv[2])) {
+ die("USAGE: transducer <transducer file> <input file>");
+ }
+ else {
+ file_name = argv[1];
+ printf("Please wait while I process the input file...\n");
+ t = readTransducerFromFile(file_name);
+ if (!(inputFile = fopen(argv[2], "r"))) {
+ die("ERROR: Could not open stated input file '%s'!", argv[2]);
+ }
+ f = create_string_file_from_file(inputFile);
+ }
+
+ out = propagate(t, t->start_state, f);
+
+ /* final output */
+ f->read_pos = 0;
+ printf("\nYour Input:\n");
+ int c;
+ while ((c = string_file_getc(f)) != EOF) {
+ printf("%c", c);
+ }
+ print_output_list(out);
+
+ return 0;
+}
+
diff --git a/algorithms/transducer/transducer.h b/algorithms/transducer/transducer.h
new file mode 100644
index 0000000..7493317
--- /dev/null
+++ b/algorithms/transducer/transducer.h
@@ -0,0 +1,90 @@
+#define MAX_TOK_LEN 26 /* Hard coded max length of a token */
+
+
+/*
+ * Temporary buffer for tokens.
+ *
+ */
+typedef struct TokenBuffer_s {
+ char tok[MAX_TOK_LEN];
+ int next;
+} TokenBuffer;
+
+/*
+ * Struct that collects parser related data.
+ *
+ */
+typedef struct ParserState_s {
+ char* file_name;
+ FILE* in;
+ int cur_line;
+ TokenBuffer* buf;
+} ParserState;
+
+/*
+ * Used for temporary storage of transitions.
+ *
+ */
+typedef struct Quadruple_s {
+ char* src_state;
+ char* input;
+ char* output;
+ char* to_state;
+} Quadruple;
+
+/*
+ * Stores a transition.
+ *
+ */
+typedef struct Transition_s {
+ char* src_state;
+ int input;
+ char* output;
+ char* to_state;
+} Transition;
+
+/*
+ * The transducer struct.
+ * num_transitions and max_transitions are used for memory management.
+ *
+ */
+typedef struct Transducer_s {
+ char* start_state;
+ Transition* transitions;
+ int num_transitions;
+ int max_transitions;
+ ParserState* p;
+} Transducer;
+
+/*
+ * Struct that stores the string file.
+ * max_chars and num_chars are used for memory management.
+ *
+ */
+typedef struct StringFile_s {
+ char* buf;
+ int read_pos;
+ int max_chars;
+ int num_chars;
+} StringFile;
+
+/*
+ * Struct for output.
+ *
+ */
+typedef struct OutputList_s {
+ struct OutputList_s* next;
+ char* output;
+} OutputList;
+
+/*
+ * Struct for iterator pattern.
+ *
+ */
+typedef struct TransIter_s {
+ char* src_state;
+ int input;
+ Transducer* t;
+ Transition* found;
+} TransIter;
+