From cf3a29feb5887344b6633ead1b4b6d5657a15a4b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 15 Jun 2014 03:24:33 +0200 Subject: old stuff: algorithms --- algorithms/transducer/Makefile | 24 ++ algorithms/transducer/test.input | 1 + algorithms/transducer/test.transducer | 11 + algorithms/transducer/test1.input | 1 + algorithms/transducer/test1.transducer | 4 + algorithms/transducer/test2.input | 1 + algorithms/transducer/test2.transducer | 11 + algorithms/transducer/test3.input | 1 + algorithms/transducer/test3.transducer | 6 + algorithms/transducer/transducer.c | 707 +++++++++++++++++++++++++++++++++ algorithms/transducer/transducer.h | 90 +++++ 11 files changed, 857 insertions(+) create mode 100644 algorithms/transducer/Makefile create mode 100644 algorithms/transducer/test.input create mode 100644 algorithms/transducer/test.transducer create mode 100644 algorithms/transducer/test1.input create mode 100644 algorithms/transducer/test1.transducer create mode 100644 algorithms/transducer/test2.input create mode 100644 algorithms/transducer/test2.transducer create mode 100644 algorithms/transducer/test3.input create mode 100644 algorithms/transducer/test3.transducer create mode 100644 algorithms/transducer/transducer.c create mode 100644 algorithms/transducer/transducer.h (limited to 'algorithms/transducer') diff --git a/algorithms/transducer/Makefile b/algorithms/transducer/Makefile new file mode 100644 index 0000000..d747166 --- /dev/null +++ b/algorithms/transducer/Makefile @@ -0,0 +1,24 @@ +CFLAGS += -Wall -std=gnu99 -pedantic -g + +.PHONY: all depend clean check install + +SRCS = transducer.c +OBJS = transducer.o + +all: transducer + +main: $(OBJS) + $(CC) -o $@ $(LDFLAGS) $(CFLAGS) $^ + +depend: + makedepend -s "# makedepend depends on me"\ + -- $(CFLAGS) -- $(SRCS) + +clean: + rm -f $(OBJS) transducer + +check: + @echo "No checks defined, sorry" + +install: + @echo "Nothing to install yet" diff --git a/algorithms/transducer/test.input b/algorithms/transducer/test.input new file mode 100644 index 0000000..be44a36 --- /dev/null +++ b/algorithms/transducer/test.input @@ -0,0 +1 @@ +0110110 \ No newline at end of file diff --git a/algorithms/transducer/test.transducer b/algorithms/transducer/test.transducer new file mode 100644 index 0000000..635fb17 --- /dev/null +++ b/algorithms/transducer/test.transducer @@ -0,0 +1,11 @@ +"S" +"S" "0" "0" "S" +"S" "1" "0" "E" +"E" "1" "0" "E" +"E" "0" "0" "S" +"S" "1" "1" "L" +"L" "1" "1" "L" +"L" "0" "0" "F" +"F" "0" "0" "F" +"F" "\n" "\n" "F" + diff --git a/algorithms/transducer/test1.input b/algorithms/transducer/test1.input new file mode 100644 index 0000000..533805d --- /dev/null +++ b/algorithms/transducer/test1.input @@ -0,0 +1 @@ +lala \ No newline at end of file diff --git a/algorithms/transducer/test1.transducer b/algorithms/transducer/test1.transducer new file mode 100644 index 0000000..05e921f --- /dev/null +++ b/algorithms/transducer/test1.transducer @@ -0,0 +1,4 @@ +"F" +"X" "0" "1" "X" +"Q" "abcdefghijklmnopqrstuvwxyz" "" "X" + diff --git a/algorithms/transducer/test2.input b/algorithms/transducer/test2.input new file mode 100644 index 0000000..be44a36 --- /dev/null +++ b/algorithms/transducer/test2.input @@ -0,0 +1 @@ +0110110 \ No newline at end of file diff --git a/algorithms/transducer/test2.transducer b/algorithms/transducer/test2.transducer new file mode 100644 index 0000000..bfb0d77 --- /dev/null +++ b/algorithms/transducer/test2.transducer @@ -0,0 +1,11 @@ +"S" +"S" "0" "0" "S" +"S" "1" "0" "E" +"S" "1" "1" "L" +"E" "1" "0" "E" +"E" "0" "0" "S" +"L" "1" "1" "L" +"L" "0" "0" "F" +"F" "0" "0" "F" +"F" "\n" "\n" "F" + diff --git a/algorithms/transducer/test3.input b/algorithms/transducer/test3.input new file mode 100644 index 0000000..acfdf57 --- /dev/null +++ b/algorithms/transducer/test3.input @@ -0,0 +1 @@ +kram \ No newline at end of file diff --git a/algorithms/transducer/test3.transducer b/algorithms/transducer/test3.transducer new file mode 100644 index 0000000..40223b1 --- /dev/null +++ b/algorithms/transducer/test3.transducer @@ -0,0 +1,6 @@ +"S1" +"S6" "m" "k" "F" +"S4" "a" "r" "S5" +"S2" "r" "a" "S3" +"S1" "k" "m" "S2" + diff --git a/algorithms/transducer/transducer.c b/algorithms/transducer/transducer.c new file mode 100644 index 0000000..c9818e6 --- /dev/null +++ b/algorithms/transducer/transducer.c @@ -0,0 +1,707 @@ +#include +#include +#include +#include +#include +#include "transducer.h" + +#define START 0 +#define TOKEN 1 +#define ESCAPE 2 +#define ERROR 3 +#define END 4 + +#define MIN_TRANSITIONS 100 /* Minimum count of transitions. */ +#define MIN_INPUT_LENGTH 20 /* Minimum length of input file. */ + + +/* + * Is called when an error occurs and exits the program + * immediately with status 1. + */ +void +die(char *msg, ...) +{ + va_list args; + + va_start(args, msg); + vfprintf(stderr, msg, args); + fprintf(stderr, "\n"); + va_end(args); + + exit(1); +} + +/* + * Like strcmp(), just for 2 chars. + * + */ +int +chrcmp(char a, char b) +{ + if (a < b) { + return -1; + } + if (a == b) { + return 0; + } + + return 1; +} + + +/* + * Prints out status of the parser. + * + */ +int +printParserStatus(ParserState *p, int c, char *from, char *to) +{ + printf("Line: %d, State: %s, current char: '%c' - to %s\n", + p->cur_line+1, from, c, to); + + return 0; +} + +/* + * Status START for parse automata. + * + */ +int +state_START(int c, ParserState* p) +{ + if (isspace(c)) { + printParserStatus(p, c, "START", "START"); + return START; + } + + switch(c) { + case '"': + printParserStatus(p, c, "START", "TOKEN"); + return TOKEN; + case EOF: + printParserStatus(p, c, "START", "END"); + return END; + default: + printParserStatus(p, c, "START", "ERROR"); + return ERROR; + } +} + +/* + * Status TOKEN for parse automata. + * + */ +int +state_TOKEN(int c, ParserState* p) +{ + switch(c) { + case '"': + printParserStatus(p, c, "TOKEN", "END"); + return END; + case '\\': + printParserStatus(p, c, "TOKEN", "ESCAPE"); + return ESCAPE; + default: + if (p->buf->next < MAX_TOK_LEN) { + printParserStatus(p, c, "TOKEN", "TOKEN"); + p->buf->tok[p->buf->next] = c; + p->buf->next++; + + return TOKEN; + } + else { + printParserStatus(p, c, "TOKEN", "ERROR"); + return ERROR; + } + } +} + +/* + * Status ESCAPE for parse automata. + * + */ +int +state_ESCAPE(int c, ParserState* p) +{ + if (c == 'n') { + printParserStatus(p, c, "ESCAPE", "TOKEN"); + p->buf->tok[p->buf->next] = '\n'; + } + else if (c == 't') { + printParserStatus(p, c, "ESCAPE", "TOKEN"); + p->buf->tok[p->buf->next] = '\t'; + } + else if (c == 'r') { + printParserStatus(p, c, "ESCAPE", "TOKEN"); + p->buf->tok[p->buf->next] = '\r'; + } + else if (c == EOF) { + printParserStatus(p, c, "ESCAPE", "ERROR"); + return ERROR; + } + else { + printParserStatus(p, c, "ESCAPE", "TOKEN"); + p->buf->tok[p->buf->next] = c; + } + + p->buf->next++; + + return TOKEN; +} + +/* + * Status END for parse automata. + * + */ +int +state_END(int c, ParserState* p) +{ + printParserStatus(p, c, "END", "START"); + p->buf->tok[p->buf->next] = '\0'; + p->buf->next = 0; + return START; +} + +/* + * Runs the parse automata. + * + */ +int +run_automata(ParserState* p) +{ + int c, s = START; + + while ((c = fgetc(p->in)) != EOF) { + if (c == '\n') { + p->cur_line++; + } + + switch(s) { + case START: + s = state_START(c, p); + break; + case TOKEN: + s = state_TOKEN(c, p); + break; + case ESCAPE: + s = state_ESCAPE(c, p); + break; + case END: + s = state_END(c, p); + p->buf->next = 0; + return *(p->buf->tok); + break; + case ERROR: + fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", + p->cur_line+1, p->file_name); /* +1 for readability */ + fclose(p->in); + free(p->buf); + free(p); + exit(1); + break; + } + } + + return EOF; +} + +/* + * Returns next token. + * + */ +char* +get_next_token(ParserState* p) +{ + char* res; + + while (run_automata(p) != EOF) { + res = strdup(p->buf->tok); + return res; + } + + return NULL; +} + +/* + * Returns a Quadruple consisting of src_state, input, output and to_state. + * Like a Transition, but input is here a char* - that means that the + * output can be longer than 1 char. + */ +Quadruple* +getQuadruples(Transducer* t, Quadruple* buf) +{ + int i; + + for (i = 0; i < 4; i++) { + if (!(buf->src_state = get_next_token(t->p))) { + return NULL; + } + + if (!(buf->input = get_next_token(t->p))) { + fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", + t->p->cur_line+1, t->p->file_name); /* +1 for readability */ + exit(1); + } + + buf->output = get_next_token(t->p); + + if (!(buf->to_state = get_next_token(t->p))) { + fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", + t->p->cur_line+1, t->p->file_name); /* +1 for readability */ + fprintf(stderr, "Probable reason: Your transducer file does not \ +end with a newline.\n"); + exit(1); + } + + return buf; + } + + return NULL; +} + +/* + * Compare function for sort_transitions. + * + */ +int +compare_transitions(const void* a, const void* b) +{ + Transition* t1 = (Transition*)a, *t2 = (Transition*)b; + int res; + + res = strcmp(t1->src_state, t2->src_state); + if (!res) { + res = chrcmp(t1->input, t2->input); + } + + return res; +} + +/* + * Sorts the transitions array in Transducer with qsort. + * + */ +void sort_transitions(Transducer* t) +{ + qsort(t->transitions, t->num_transitions, sizeof(Transition), + compare_transitions); +} + +/* + * Checks if memory for transitions array of Transducer is sufficent + * for transitions that will be added (int need) - if not it calls realloc + * for more memory. + */ +void +check_transitions_array_size(Transducer* t, int need) +{ + if (t->num_transitions == t->max_transitions-need) { + t->max_transitions = t->max_transitions*2; + t->transitions = (Transition*)realloc(t->transitions, + sizeof(Transition)*t->max_transitions); + } +} + +/* + * Adds Transition to transitions array of transducer. + * + */ +void +add_transition(Transducer* t, char* src_state, int input, + char* output, char* to_state) +{ + t->transitions[t->num_transitions].src_state = src_state; + + if (strlen(output) == 0) { + fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", + t->p->cur_line+1, t->p->file_name); /* +1 for readability */ + fprintf(stderr, "Reason: Output is empty.\n"); + exit(1); + } + else { + t->transitions[t->num_transitions].input = input; + } + + t->transitions[t->num_transitions].output = output; + t->transitions[t->num_transitions].to_state = to_state; + t->num_transitions++; +} + +/* + * Prints out a list of the transitions in transducer t. + * + */ +int +print_transitions(Transducer* t) +{ + for (unsigned int i = 0; i < t->num_transitions; i++) + { + printf("\nTransition No. %d:\n", i+1); + printf("src_state: %s\n", t->transitions[i].src_state); + + if (t->transitions[i].input == '\n') { + printf("input: \\n\n"); + } + else if (t->transitions[i].input == '\t') { + printf("input: \\t\n"); + } + else if (t->transitions[i].input == '\r') { + printf("input: \\r\n"); + } + else { + printf("input: %c\n", t->transitions[i].input); + } + + if (t->transitions[i].output[0] == '\n') { + printf("output: \\n\n"); + } + else if (t->transitions[i].output[0] == '\t') { + printf("output: \\t\n"); + } + else if (t->transitions[i].output[0] == '\r') { + printf("output: \\r\n"); + } + else { + printf("output: %s\n", t->transitions[i].output); + } + + printf("to_state: %s\n", t->transitions[i].to_state); + } + + return 0; +} + +/* + * Reads transducer file and returns a finished Transducer. + * + */ +Transducer* +readTransducerFromFile(char* file_name) +{ + Transducer* t; + Quadruple* buf; + char output[2]; + int need; + + /* allocating some memory... */ + if (!(t = (Transducer*)malloc(sizeof(Transducer)))) { + die("ERROR: Could not allocate memory for Transducer!"); + } + if (!(t->transitions = (Transition*)malloc(sizeof(Transition) + *MIN_TRANSITIONS))) { + die("ERROR: Could not allocate memory for transitions!"); + } + if (!(t->p = (ParserState*)malloc(sizeof(ParserState)))) { + die("ERROR: Could not allocate memory for ParserState!"); + } + if (!(t->p->buf = (TokenBuffer*)malloc(sizeof(TokenBuffer)))) { + die("ERROR: Could not allocate memory for TokenBuffer!"); + } + if (!(buf = (Quadruple*)malloc(sizeof(Quadruple)))) { + die("ERROR: Could not allocate memory for transition buffer!"); + } + + t->p->file_name = file_name; + t->p->cur_line = 0; + t->p->buf->next = 0; + t->max_transitions = MIN_TRANSITIONS; + + if (!(t->p->in = fopen(t->p->file_name, "r"))) { + die("ERROR: Could not open stated transducer file '%s'!", + t->p->file_name); + } + else { + t->start_state = get_next_token(t->p); + t->num_transitions = 0; + while ((buf = getQuadruples(t, buf)) != NULL) { + if (strlen(buf->input) > 1) { + int need = strlen(buf->input); + check_transitions_array_size(t, need); + for (int i = 0; i < need; i++) { + output[0] = buf->input[i]; + output[1] = '\0'; + add_transition(t, buf->src_state, buf->input[i], output, + buf->to_state); + } + } + else { + need = 1; + check_transitions_array_size(t, need); + add_transition(t, buf->src_state, buf->input[0], buf->output, + buf->to_state); + } + } + printf("...done!\n"); + + sort_transitions(t); + print_transitions(t); + } + + return t; +} + + +/* + * Returns char at read_pos of StringFile. Also increases read_pos by 1. + * + */ +int +string_file_getc(StringFile* f) +{ + if (!(f->buf[f->read_pos])) { + return EOF; + } + + return f->buf[f->read_pos++]; +} + +/* + * Decreases read_pos in StringFile by 1. + * + */ +void +string_file_ungetc(StringFile* f) +{ + if (f->read_pos > 0) { + f->read_pos--; + } +} + +/* + * Generates the StringFile from an input file. + * + */ +StringFile* +create_string_file_from_file(FILE* inputFile) +{ + int c, i=0; + StringFile* f; + + if (!(f = (StringFile*)malloc(sizeof(StringFile)))) { + die("ERROR: Could not allocate memory for StringFile!"); + } + + if (!(f->buf = (char*)malloc(sizeof(char)*MIN_INPUT_LENGTH))) { + die("ERROR: Could not allocate memory for buffer in StringFile!"); + } + + f->max_chars = MIN_INPUT_LENGTH; + f->read_pos = 0; + + while ((c = fgetc(inputFile)) != EOF) { + if (i == f->max_chars-1) { + f->max_chars = f->max_chars*2; + f->buf = (char*)realloc(f->buf, sizeof(char)*f->max_chars); + } + f->buf[i] = c; + f->num_chars = i; + i++; + } + + f->num_chars++; + + return f; +} + + +/* + * Adds a node to a OutputList (to front!). + * + */ +OutputList* +cons(char* output, OutputList* next) +{ + OutputList* out; + + if (!(out = (OutputList*)malloc(sizeof(OutputList)))) { + die("ERROR: Could not allocate memory for OutputList!"); + } + + out->next = next; + out->output = output; + + return out; +} + +/* + * Finally prints output from OutputList. + * + */ +void +print_output_list(OutputList* output_list) +{ + if (!output_list) { + printf("\n\nSorry, your input could not be processed.\n"); + } + else { + printf("\nMy Output:\n"); + + while (output_list) { + printf("%s", output_list->output); + output_list = output_list->next; + } + printf("\n"); + } +} + +/* + * Compare function for bsearch. + * + */ +int +compare_trans_iter_with_transition(const void* key, const void* value) +{ + TransIter* ti = (TransIter*)key; + Transition* trans = (Transition*)value; + int res; + + res = strcmp(ti->src_state, trans->src_state); + if (!res) { + res = chrcmp(ti->input, trans->input); + } + + return res; +} + +/* + * Returns next fitting Transition of transition array for TransIter. + * + */ +Transition* +trans_iter_next(TransIter* ti) +{ + ti->found++; + + if (!(ti->found->src_state)) { + printf("ti->found == NULL\n"); + return NULL; + } + + if ((strcmp(ti->found->src_state, ti->src_state)) || + (chrcmp(ti->found->input, ti->input))) { + return NULL; + } + + return ti->found; +} + +/* + * 'Init-method' of a TransIter. + * + */ +void +trans_iter_init(TransIter* ti, Transducer* t, char* cur_state, int input) +{ + ti->t = t; + ti->src_state = cur_state; + ti->input = input; + ti->found = bsearch(ti, ti->t->transitions, ti->t->num_transitions, + sizeof(Transition), compare_trans_iter_with_transition); + + while (1) { + if (ti->found == t->transitions) { + break; + } + if (ti->found == NULL) { + break; + } + + ti->found--; + + if ((!(strcmp(ti->found->src_state, cur_state))) && + (!(chrcmp(ti->found->input, input)))) { + continue; + } else { + ti->found++; + break; + } + } +} + + +/* + * Returns an OutputList, generated by recursion. + * + */ +OutputList* +propagate(Transducer* t, char* cur_state, StringFile* inputFile) +{ + TransIter* ti; + OutputList* out; + int cur_char; + + if (!(ti = (TransIter*)malloc(sizeof(TransIter)))) { + die("ERROR: Could not allocate memory for TransIter!"); + } + + if ((cur_char = string_file_getc(inputFile)) == EOF) { + if (cur_state[0] == 'F') { + return cons("", NULL); + } + else { + return NULL; + } + } + + trans_iter_init(ti, t, cur_state, cur_char); + + if (!ti->found) { + string_file_ungetc(inputFile); + return NULL; + } + + while (1) { + if (!(out = propagate(t, ti->found->to_state, inputFile))) { + if (!trans_iter_next(ti)) { + string_file_ungetc(inputFile); + return NULL; + } + } + else { + return cons(ti->found->output, out); + } + } +} + + +/* + * main + * + */ +int +main(int argc, char** argv) +{ + char* file_name; + FILE* inputFile; + Transducer* t; + StringFile* f; + OutputList* out; + + if ((!argv[1]) || (!argv[2])) { + die("USAGE: transducer "); + } + else { + file_name = argv[1]; + printf("Please wait while I process the input file...\n"); + t = readTransducerFromFile(file_name); + if (!(inputFile = fopen(argv[2], "r"))) { + die("ERROR: Could not open stated input file '%s'!", argv[2]); + } + f = create_string_file_from_file(inputFile); + } + + out = propagate(t, t->start_state, f); + + /* final output */ + f->read_pos = 0; + printf("\nYour Input:\n"); + int c; + while ((c = string_file_getc(f)) != EOF) { + printf("%c", c); + } + print_output_list(out); + + return 0; +} + diff --git a/algorithms/transducer/transducer.h b/algorithms/transducer/transducer.h new file mode 100644 index 0000000..7493317 --- /dev/null +++ b/algorithms/transducer/transducer.h @@ -0,0 +1,90 @@ +#define MAX_TOK_LEN 26 /* Hard coded max length of a token */ + + +/* + * Temporary buffer for tokens. + * + */ +typedef struct TokenBuffer_s { + char tok[MAX_TOK_LEN]; + int next; +} TokenBuffer; + +/* + * Struct that collects parser related data. + * + */ +typedef struct ParserState_s { + char* file_name; + FILE* in; + int cur_line; + TokenBuffer* buf; +} ParserState; + +/* + * Used for temporary storage of transitions. + * + */ +typedef struct Quadruple_s { + char* src_state; + char* input; + char* output; + char* to_state; +} Quadruple; + +/* + * Stores a transition. + * + */ +typedef struct Transition_s { + char* src_state; + int input; + char* output; + char* to_state; +} Transition; + +/* + * The transducer struct. + * num_transitions and max_transitions are used for memory management. + * + */ +typedef struct Transducer_s { + char* start_state; + Transition* transitions; + int num_transitions; + int max_transitions; + ParserState* p; +} Transducer; + +/* + * Struct that stores the string file. + * max_chars and num_chars are used for memory management. + * + */ +typedef struct StringFile_s { + char* buf; + int read_pos; + int max_chars; + int num_chars; +} StringFile; + +/* + * Struct for output. + * + */ +typedef struct OutputList_s { + struct OutputList_s* next; + char* output; +} OutputList; + +/* + * Struct for iterator pattern. + * + */ +typedef struct TransIter_s { + char* src_state; + int input; + Transducer* t; + Transition* found; +} TransIter; + -- cgit v1.2.3