#include #include #include #include #include #include "transducer.h" #define START 0 #define TOKEN 1 #define ESCAPE 2 #define ERROR 3 #define END 4 #define MIN_TRANSITIONS 100 /* Minimum count of transitions. */ #define MIN_INPUT_LENGTH 20 /* Minimum length of input file. */ /* * Is called when an error occurs and exits the program * immediately with status 1. */ void die(char *msg, ...) { va_list args; va_start(args, msg); vfprintf(stderr, msg, args); fprintf(stderr, "\n"); va_end(args); exit(1); } /* * Like strcmp(), just for 2 chars. * */ int chrcmp(char a, char b) { if (a < b) { return -1; } if (a == b) { return 0; } return 1; } /* * Prints out status of the parser. * */ int printParserStatus(ParserState *p, int c, char *from, char *to) { printf("Line: %d, State: %s, current char: '%c' - to %s\n", p->cur_line+1, from, c, to); return 0; } /* * Status START for parse automata. * */ int state_START(int c, ParserState* p) { if (isspace(c)) { printParserStatus(p, c, "START", "START"); return START; } switch(c) { case '"': printParserStatus(p, c, "START", "TOKEN"); return TOKEN; case EOF: printParserStatus(p, c, "START", "END"); return END; default: printParserStatus(p, c, "START", "ERROR"); return ERROR; } } /* * Status TOKEN for parse automata. * */ int state_TOKEN(int c, ParserState* p) { switch(c) { case '"': printParserStatus(p, c, "TOKEN", "END"); return END; case '\\': printParserStatus(p, c, "TOKEN", "ESCAPE"); return ESCAPE; default: if (p->buf->next < MAX_TOK_LEN) { printParserStatus(p, c, "TOKEN", "TOKEN"); p->buf->tok[p->buf->next] = c; p->buf->next++; return TOKEN; } else { printParserStatus(p, c, "TOKEN", "ERROR"); return ERROR; } } } /* * Status ESCAPE for parse automata. * */ int state_ESCAPE(int c, ParserState* p) { if (c == 'n') { printParserStatus(p, c, "ESCAPE", "TOKEN"); p->buf->tok[p->buf->next] = '\n'; } else if (c == 't') { printParserStatus(p, c, "ESCAPE", "TOKEN"); p->buf->tok[p->buf->next] = '\t'; } else if (c == 'r') { printParserStatus(p, c, "ESCAPE", "TOKEN"); p->buf->tok[p->buf->next] = '\r'; } else if (c == EOF) { printParserStatus(p, c, "ESCAPE", "ERROR"); return ERROR; } else { printParserStatus(p, c, "ESCAPE", "TOKEN"); p->buf->tok[p->buf->next] = c; } p->buf->next++; return TOKEN; } /* * Status END for parse automata. * */ int state_END(int c, ParserState* p) { printParserStatus(p, c, "END", "START"); p->buf->tok[p->buf->next] = '\0'; p->buf->next = 0; return START; } /* * Runs the parse automata. * */ int run_automata(ParserState* p) { int c, s = START; while ((c = fgetc(p->in)) != EOF) { if (c == '\n') { p->cur_line++; } switch(s) { case START: s = state_START(c, p); break; case TOKEN: s = state_TOKEN(c, p); break; case ESCAPE: s = state_ESCAPE(c, p); break; case END: s = state_END(c, p); p->buf->next = 0; return *(p->buf->tok); break; case ERROR: fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", p->cur_line+1, p->file_name); /* +1 for readability */ fclose(p->in); free(p->buf); free(p); exit(1); break; } } return EOF; } /* * Returns next token. * */ char* get_next_token(ParserState* p) { char* res; while (run_automata(p) != EOF) { res = strdup(p->buf->tok); return res; } return NULL; } /* * Returns a Quadruple consisting of src_state, input, output and to_state. * Like a Transition, but input is here a char* - that means that the * output can be longer than 1 char. */ Quadruple* getQuadruples(Transducer* t, Quadruple* buf) { int i; for (i = 0; i < 4; i++) { if (!(buf->src_state = get_next_token(t->p))) { return NULL; } if (!(buf->input = get_next_token(t->p))) { fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", t->p->cur_line+1, t->p->file_name); /* +1 for readability */ exit(1); } buf->output = get_next_token(t->p); if (!(buf->to_state = get_next_token(t->p))) { fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", t->p->cur_line+1, t->p->file_name); /* +1 for readability */ fprintf(stderr, "Probable reason: Your transducer file does not \ end with a newline.\n"); exit(1); } return buf; } return NULL; } /* * Compare function for sort_transitions. * */ int compare_transitions(const void* a, const void* b) { Transition* t1 = (Transition*)a, *t2 = (Transition*)b; int res; res = strcmp(t1->src_state, t2->src_state); if (!res) { res = chrcmp(t1->input, t2->input); } return res; } /* * Sorts the transitions array in Transducer with qsort. * */ void sort_transitions(Transducer* t) { qsort(t->transitions, t->num_transitions, sizeof(Transition), compare_transitions); } /* * Checks if memory for transitions array of Transducer is sufficent * for transitions that will be added (int need) - if not it calls realloc * for more memory. */ void check_transitions_array_size(Transducer* t, int need) { if (t->num_transitions == t->max_transitions-need) { t->max_transitions = t->max_transitions*2; t->transitions = (Transition*)realloc(t->transitions, sizeof(Transition)*t->max_transitions); } } /* * Adds Transition to transitions array of transducer. * */ void add_transition(Transducer* t, char* src_state, int input, char* output, char* to_state) { t->transitions[t->num_transitions].src_state = src_state; if (strlen(output) == 0) { fprintf(stderr, "\nSyntax Error in line %d of file '%s'!\n", t->p->cur_line+1, t->p->file_name); /* +1 for readability */ fprintf(stderr, "Reason: Output is empty.\n"); exit(1); } else { t->transitions[t->num_transitions].input = input; } t->transitions[t->num_transitions].output = output; t->transitions[t->num_transitions].to_state = to_state; t->num_transitions++; } /* * Prints out a list of the transitions in transducer t. * */ int print_transitions(Transducer* t) { for (unsigned int i = 0; i < t->num_transitions; i++) { printf("\nTransition No. %d:\n", i+1); printf("src_state: %s\n", t->transitions[i].src_state); if (t->transitions[i].input == '\n') { printf("input: \\n\n"); } else if (t->transitions[i].input == '\t') { printf("input: \\t\n"); } else if (t->transitions[i].input == '\r') { printf("input: \\r\n"); } else { printf("input: %c\n", t->transitions[i].input); } if (t->transitions[i].output[0] == '\n') { printf("output: \\n\n"); } else if (t->transitions[i].output[0] == '\t') { printf("output: \\t\n"); } else if (t->transitions[i].output[0] == '\r') { printf("output: \\r\n"); } else { printf("output: %s\n", t->transitions[i].output); } printf("to_state: %s\n", t->transitions[i].to_state); } return 0; } /* * Reads transducer file and returns a finished Transducer. * */ Transducer* readTransducerFromFile(char* file_name) { Transducer* t; Quadruple* buf; char output[2]; int need; /* allocating some memory... */ if (!(t = (Transducer*)malloc(sizeof(Transducer)))) { die("ERROR: Could not allocate memory for Transducer!"); } if (!(t->transitions = (Transition*)malloc(sizeof(Transition) *MIN_TRANSITIONS))) { die("ERROR: Could not allocate memory for transitions!"); } if (!(t->p = (ParserState*)malloc(sizeof(ParserState)))) { die("ERROR: Could not allocate memory for ParserState!"); } if (!(t->p->buf = (TokenBuffer*)malloc(sizeof(TokenBuffer)))) { die("ERROR: Could not allocate memory for TokenBuffer!"); } if (!(buf = (Quadruple*)malloc(sizeof(Quadruple)))) { die("ERROR: Could not allocate memory for transition buffer!"); } t->p->file_name = file_name; t->p->cur_line = 0; t->p->buf->next = 0; t->max_transitions = MIN_TRANSITIONS; if (!(t->p->in = fopen(t->p->file_name, "r"))) { die("ERROR: Could not open stated transducer file '%s'!", t->p->file_name); } else { t->start_state = get_next_token(t->p); t->num_transitions = 0; while ((buf = getQuadruples(t, buf)) != NULL) { if (strlen(buf->input) > 1) { int need = strlen(buf->input); check_transitions_array_size(t, need); for (int i = 0; i < need; i++) { output[0] = buf->input[i]; output[1] = '\0'; add_transition(t, buf->src_state, buf->input[i], output, buf->to_state); } } else { need = 1; check_transitions_array_size(t, need); add_transition(t, buf->src_state, buf->input[0], buf->output, buf->to_state); } } printf("...done!\n"); sort_transitions(t); print_transitions(t); } return t; } /* * Returns char at read_pos of StringFile. Also increases read_pos by 1. * */ int string_file_getc(StringFile* f) { if (!(f->buf[f->read_pos])) { return EOF; } return f->buf[f->read_pos++]; } /* * Decreases read_pos in StringFile by 1. * */ void string_file_ungetc(StringFile* f) { if (f->read_pos > 0) { f->read_pos--; } } /* * Generates the StringFile from an input file. * */ StringFile* create_string_file_from_file(FILE* inputFile) { int c, i=0; StringFile* f; if (!(f = (StringFile*)malloc(sizeof(StringFile)))) { die("ERROR: Could not allocate memory for StringFile!"); } if (!(f->buf = (char*)malloc(sizeof(char)*MIN_INPUT_LENGTH))) { die("ERROR: Could not allocate memory for buffer in StringFile!"); } f->max_chars = MIN_INPUT_LENGTH; f->read_pos = 0; while ((c = fgetc(inputFile)) != EOF) { if (i == f->max_chars-1) { f->max_chars = f->max_chars*2; f->buf = (char*)realloc(f->buf, sizeof(char)*f->max_chars); } f->buf[i] = c; f->num_chars = i; i++; } f->num_chars++; return f; } /* * Adds a node to a OutputList (to front!). * */ OutputList* cons(char* output, OutputList* next) { OutputList* out; if (!(out = (OutputList*)malloc(sizeof(OutputList)))) { die("ERROR: Could not allocate memory for OutputList!"); } out->next = next; out->output = output; return out; } /* * Finally prints output from OutputList. * */ void print_output_list(OutputList* output_list) { if (!output_list) { printf("\n\nSorry, your input could not be processed.\n"); } else { printf("\nMy Output:\n"); while (output_list) { printf("%s", output_list->output); output_list = output_list->next; } printf("\n"); } } /* * Compare function for bsearch. * */ int compare_trans_iter_with_transition(const void* key, const void* value) { TransIter* ti = (TransIter*)key; Transition* trans = (Transition*)value; int res; res = strcmp(ti->src_state, trans->src_state); if (!res) { res = chrcmp(ti->input, trans->input); } return res; } /* * Returns next fitting Transition of transition array for TransIter. * */ Transition* trans_iter_next(TransIter* ti) { ti->found++; if (!(ti->found->src_state)) { printf("ti->found == NULL\n"); return NULL; } if ((strcmp(ti->found->src_state, ti->src_state)) || (chrcmp(ti->found->input, ti->input))) { return NULL; } return ti->found; } /* * 'Init-method' of a TransIter. * */ void trans_iter_init(TransIter* ti, Transducer* t, char* cur_state, int input) { ti->t = t; ti->src_state = cur_state; ti->input = input; ti->found = bsearch(ti, ti->t->transitions, ti->t->num_transitions, sizeof(Transition), compare_trans_iter_with_transition); while (1) { if (ti->found == t->transitions) { break; } if (ti->found == NULL) { break; } ti->found--; if ((!(strcmp(ti->found->src_state, cur_state))) && (!(chrcmp(ti->found->input, input)))) { continue; } else { ti->found++; break; } } } /* * Returns an OutputList, generated by recursion. * */ OutputList* propagate(Transducer* t, char* cur_state, StringFile* inputFile) { TransIter* ti; OutputList* out; int cur_char; if (!(ti = (TransIter*)malloc(sizeof(TransIter)))) { die("ERROR: Could not allocate memory for TransIter!"); } if ((cur_char = string_file_getc(inputFile)) == EOF) { if (cur_state[0] == 'F') { return cons("", NULL); } else { return NULL; } } trans_iter_init(ti, t, cur_state, cur_char); if (!ti->found) { string_file_ungetc(inputFile); return NULL; } while (1) { if (!(out = propagate(t, ti->found->to_state, inputFile))) { if (!trans_iter_next(ti)) { string_file_ungetc(inputFile); return NULL; } } else { return cons(ti->found->output, out); } } } /* * main * */ int main(int argc, char** argv) { char* file_name; FILE* inputFile; Transducer* t; StringFile* f; OutputList* out; if ((!argv[1]) || (!argv[2])) { die("USAGE: transducer "); } else { file_name = argv[1]; printf("Please wait while I process the input file...\n"); t = readTransducerFromFile(file_name); if (!(inputFile = fopen(argv[2], "r"))) { die("ERROR: Could not open stated input file '%s'!", argv[2]); } f = create_string_file_from_file(inputFile); } out = propagate(t, t->start_state, f); /* final output */ f->read_pos = 0; printf("\nYour Input:\n"); int c; while ((c = string_file_getc(f)) != EOF) { printf("%c", c); } print_output_list(out); return 0; }