From cde3febf76396e200e1cf8d922329af5cb5f0b32 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 10 Jun 2010 02:05:49 +0200 Subject: presentation --- resources/literature/04-regulaere-ausdruecke.pdf | Bin 0 -> 150900 bytes resources/literature/GNfaToDfa.cs | 629 +++++++++++++++++++++++ resources/literature/J00-1005.pdf | Bin 0 -> 925842 bytes 3 files changed, 629 insertions(+) create mode 100644 resources/literature/04-regulaere-ausdruecke.pdf create mode 100644 resources/literature/GNfaToDfa.cs create mode 100644 resources/literature/J00-1005.pdf (limited to 'resources') diff --git a/resources/literature/04-regulaere-ausdruecke.pdf b/resources/literature/04-regulaere-ausdruecke.pdf new file mode 100644 index 0000000..fe4e985 Binary files /dev/null and b/resources/literature/04-regulaere-ausdruecke.pdf differ diff --git a/resources/literature/GNfaToDfa.cs b/resources/literature/GNfaToDfa.cs new file mode 100644 index 0000000..df88b1d --- /dev/null +++ b/resources/literature/GNfaToDfa.cs @@ -0,0 +1,629 @@ +// RegExp -> NFA -> DFA -> Graph in Generic C# +// This program requires .Net version 2.0. +// Peter Sestoft (sestoft@dina.kvl.dk) +// Java 2000-10-07, GC# 2001-10-23, 2003-09-03, 2004-07-26, 2006-03-05 + +// This file contains, in order: +// * A class Nfa for representing an NFA (a nondeterministic finite +// automaton), and for converting it to a DFA (a deterministic +// finite automaton). Most complexity is in this class. +// * A class Dfa for representing a DFA, a deterministic finite +// automaton, and for writing a dot input file representing the DFA. +// * Classes for representing regular expressions, and for building an +// NFA from a regular expression +// * A test class that creates an NFA, a DFA, and a dot input file +// for a number of small regular expressions. The DFAs are +// not minimized. + +using System; +using System.Text; +using System.Collections; +using System.Collections.Generic; +using System.IO; + +// A set represented as the collection of keys of a Dictionary + +class Set : ICollection { + // Only the keys matter; the type bool used for the value is arbitrary + private Dictionary dict; + public Set() { + dict = new Dictionary(); + } + + public Set(T x) : this() { + Add(x); + } + + public Set(IEnumerable coll) : this() { + foreach (T x in coll) + Add(x); + } + + public Set(T[] arr) : this() { + foreach (T x in arr) + Add(x); + } + + public bool Contains(T x) { + return dict.ContainsKey(x); + } + + public void Add(T x) { + if (!Contains(x)) + dict.Add(x, false); + } + + public bool Remove(T x) { + return dict.Remove(x); + } + + public void Clear() { + dict.Clear(); + } + + public bool IsReadOnly { + get { return false; } + } + + public IEnumerator GetEnumerator() { + return dict.Keys.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() { + return GetEnumerator(); + } + + public int Count { + get { return dict.Count; } + } + + public void CopyTo(T[] arr, int i) { + dict.Keys.CopyTo(arr, i); + } + + // Is this set a subset of that? + public bool Subset(Set that) { + foreach (T x in this) + if (!that.Contains(x)) + return false; + return true; + } + + // Create new set as intersection of this and that + public Set Intersection(Set that) { + Set res = new Set(); + foreach (T x in this) + if (that.Contains(x)) + res.Add(x); + return res; + } + + // Create new set as union of this and that + public Set Union(Set that) { + Set res = new Set(this); + foreach (T x in that) + res.Add(x); + return res; + } + + // Compute hash code -- should be cached for efficiency + public override int GetHashCode() { + int res = 0; + foreach (T x in this) + res ^= x.GetHashCode(); + return res; + } + + public override bool Equals(Object that) { + if (that is Set) { + Set thatSet = (Set)that; + return thatSet.Count == this.Count + && thatSet.Subset(this) && this.Subset(thatSet); + } else + return false; + } + + public override String ToString() { + StringBuilder res = new StringBuilder(); + res.Append("{ "); + bool first = true; + foreach (T x in this) { + if (!first) + res.Append(", "); + res.Append(x); + first = false; + } + res.Append(" }"); + return res.ToString(); + } +} + +// ---------------------------------------------------------------------- + +// Regular expressions, NFAs, DFAs, and dot graphs +// sestoft@dina.kvl.dk * +// Java 2001-07-10 * C# 2001-10-22 * Gen C# 2001-10-23, 2003-09-03 + +// In the Generic C# 2.0 version we +// use Queue and Queue> for worklists +// use Set for pre-DFA states +// use List for NFA transition relations +// use Dictionary, Dictionary>> +// and Dictionary> for DFA transition relations + +/* Class Nfa and conversion from NFA to DFA --------------------------- + + A nondeterministic finite automaton (NFA) is represented as a + Map from state number (int) to a List of Transitions, a + Transition being a pair of a label lab (a String, null meaning + epsilon) and a target state (an int). + + A DFA is created from an NFA in two steps: + + (1) Construct a DFA whose each of whose states is composite, + namely a set of NFA states (Set of int). This is done by + methods CompositeDfaTrans and EpsilonClose. + + (2) Replace composite states (Set of int) by simple states + (int). This is done by methods Rename and MkRenamer. + + Method CompositeDfaTrans works as follows: + + Create the epsilon-closure S0 (a Set of ints) of the start + state s0, and put it in a worklist (a Queue). Create an + empty DFA transition relation, which is a Map from a + composite state (an epsilon-closed Set of ints) to a Map + from a label (a non-null String) to a composite state. + + Repeatedly choose a composite state S from the worklist. If it is + not already in the keyset of the DFA transition relation, compute + for every non-epsilon label lab the set T of states reachable by + that label from some state s in S. Compute the epsilon-closure + Tclose of every such state T and put it on the worklist. Then add + the transition S -lab-> Tclose to the DFA transition relation, for + every lab. + + Method EpsilonClose works as follows: + + Given a set S of states. Put the states of S in a worklist. + Repeatedly choose a state s from the worklist, and consider all + epsilon-transitions s -eps-> s' from s. If s' is in S already, + then do nothing; otherwise add s' to S and the worklist. When the + worklist is empty, S is epsilon-closed; return S. + + Method MkRenamer works as follows: + + Given a Map from Set of int to something, create an + injective Map from Set of int to int, by choosing a fresh + int for every value of the map. + + Method Rename works as follows: + + Given a Map from Set of int to Map from String to Set of + int, use the result of MkRenamer to replace all Sets of ints + by ints. + +*/ + + +class Nfa { + private readonly int startState; + private readonly int exitState; // This is the unique accept state + private readonly IDictionary> trans; + + public Nfa(int startState, int exitState) { + this.startState = startState; this.exitState = exitState; + trans = new Dictionary>(); + if (!startState.Equals(exitState)) + trans.Add(exitState, new List()); + } + + public int Start { get { return startState; } } + + public int Exit { get { return exitState; } } + + public IDictionary> Trans { + get { return trans; } + } + + public void AddTrans(int s1, String lab, int s2) { + List s1Trans; + if (trans.ContainsKey(s1)) + s1Trans = trans[s1]; + else { + s1Trans = new List(); + trans.Add(s1, s1Trans); + } + s1Trans.Add(new Transition(lab, s2)); + } + + public void AddTrans(KeyValuePair> tr) { + // Assumption: if tr is in trans, it maps to an empty list (end state) + trans.Remove(tr.Key); + trans.Add(tr.Key, tr.Value); + } + + public override String ToString() { + return "NFA start=" + startState + " exit=" + exitState; + } + + // Construct the transition relation of a composite-state DFA + // from an NFA with start state s0 and transition relation + // trans (a Map from int to List of Transition). The start + // state of the constructed DFA is the epsilon closure of s0, + // and its transition relation is a Map from a composite state + // (a Set of ints) to a Map from label (a String) to a + // composite state (a Set of ints). + + static IDictionary, IDictionary>> + CompositeDfaTrans(int s0, IDictionary> trans) { + Set S0 = EpsilonClose(new Set(s0), trans); + Queue> worklist = new Queue>(); + worklist.Enqueue(S0); + // The transition relation of the DFA + IDictionary, IDictionary>> res = + new Dictionary, IDictionary>>(); + while (worklist.Count != 0) { + Set S = worklist.Dequeue(); + if (!res.ContainsKey(S)) { + // The S -lab-> T transition relation being constructed for a given S + IDictionary> STrans = + new Dictionary>(); + // For all s in S, consider all transitions s -lab-> t + foreach (int s in S) { + // For all non-epsilon transitions s -lab-> t, add t to T + foreach (Transition tr in trans[s]) { + if (tr.lab != null) { // Already a transition on lab + Set toState; + if (STrans.ContainsKey(tr.lab)) + toState = STrans[tr.lab]; + else { // No transitions on lab yet + toState = new Set(); + STrans.Add(tr.lab, toState); + } + toState.Add(tr.target); + } + } + } + // Epsilon-close all T such that S -lab-> T, and put on worklist + Dictionary> STransClosed = + new Dictionary >(); + foreach (KeyValuePair> entry in STrans) { + Set Tclose = EpsilonClose(entry.Value, trans); + STransClosed.Add(entry.Key, Tclose); + worklist.Enqueue(Tclose); + } + res.Add(S, STransClosed); + } + } + return res; + } + + // Compute epsilon-closure of state set S in transition relation trans. + + static Set + EpsilonClose(Set S, IDictionary> trans) { + // The worklist initially contains all S members + Queue worklist = new Queue(S); + Set res = new Set(S); + while (worklist.Count != 0) { + int s = worklist.Dequeue(); + foreach (Transition tr in trans[s]) { + if (tr.lab == null && !res.Contains(tr.target)) { + res.Add(tr.target); + worklist.Enqueue(tr.target); + } + } + } + return res; + } + + // Compute a renamer, which is a Map from Set of int to int + + static IDictionary, int> MkRenamer(ICollection> states) { + IDictionary, int> renamer = new Dictionary, int>(); + int count = 0; + foreach (Set k in states) + renamer.Add(k, count++); + return renamer; + } + + // Using a renamer (a Map from Set of int to int), replace + // composite (Set of int) states with simple (int) states in + // the transition relation trans, which is assumed to be a Map + // from Set of int to Map from String to Set of int. The + // result is a Map from int to Map from String to int. + + static IDictionary> + Rename(IDictionary, int> renamer, + IDictionary, IDictionary>> trans) { + IDictionary> newtrans = + new Dictionary>(); + foreach (KeyValuePair, IDictionary>> entry + in trans) { + Set k = entry.Key; + IDictionary newktrans = new Dictionary(); + foreach (KeyValuePair> tr in entry.Value) + newktrans.Add(tr.Key, renamer[tr.Value]); + newtrans.Add(renamer[k], newktrans); + } + return newtrans; + } + + static Set AcceptStates(ICollection> states, + IDictionary, int> renamer, + int exit) { + Set acceptStates = new Set(); + foreach (Set state in states) + if (state.Contains(exit)) + acceptStates.Add(renamer[state]); + return acceptStates; + } + + public Dfa ToDfa() { + IDictionary, IDictionary>> + cDfaTrans = CompositeDfaTrans(startState, trans); + Set cDfaStart = EpsilonClose(new Set(startState), trans); + ICollection> cDfaStates = cDfaTrans.Keys; + IDictionary, int> renamer = MkRenamer(cDfaStates); + IDictionary> simpleDfaTrans = + Rename(renamer, cDfaTrans); + int simpleDfaStart = renamer[cDfaStart]; + Set simpleDfaAccept = AcceptStates(cDfaStates, renamer, exitState); + return new Dfa(simpleDfaStart, simpleDfaAccept, simpleDfaTrans); + } + + // Nested class for creating distinctly named states when constructing NFAs + + public class NameSource { + private static int nextName = 0; + + public int next() { + return nextName++; + } + } +} + +// Class Transition, a transition from one state to another ---------- + + public class Transition { + public String lab; + public int target; + + public Transition(String lab, int target) { + this.lab = lab; this.target = target; + } + + public override String ToString() { + return "-" + lab + "-> " + target; + } + } + +// Class Dfa, deterministic finite automata -------------------------- + +/* + A deterministic finite automaton (DFA) is represented as a Map + from state number (int) to a Map from label (a String, + non-null) to a target state (an int). +*/ + +class Dfa { + private readonly int startState; + private readonly Set acceptStates; + private readonly IDictionary> trans; + + public Dfa(int startState, Set acceptStates, + IDictionary> trans) { + this.startState = startState; + this.acceptStates = acceptStates; + this.trans = trans; + } + + public int Start { get { return startState; } } + + public Set Accept { get { return acceptStates; } } + + public IDictionary> Trans { + get { return trans; } + } + + public override String ToString() { + return "DFA start=" + startState + "\naccept=" + acceptStates; + } + + // Write an input file for the dot program. You can find dot at + // http://www.research.att.com/sw/tools/graphviz/ + + public void WriteDot(String filename) { + TextWriter wr = + new StreamWriter(new FileStream(filename, FileMode.Create, + FileAccess.Write)); + wr.WriteLine("// Format this file as a Postscript file with "); + wr.WriteLine("// dot " + filename + " -Tps -o out.ps\n"); + wr.WriteLine("digraph dfa {"); + wr.WriteLine("size=\"11,8.25\";"); + wr.WriteLine("rotate=90;"); + wr.WriteLine("rankdir=LR;"); + wr.WriteLine("n999999 [style=invis];"); // Invisible start node + wr.WriteLine("n999999 -> n" + startState); // Edge into start state + + // Accept states are double circles + foreach (int state in trans.Keys) + if (acceptStates.Contains(state)) + wr.WriteLine("n" + state + " [peripheries=2];"); + + // The transitions + foreach (KeyValuePair> entry in trans) { + int s1 = entry.Key; + foreach (KeyValuePair s1Trans in entry.Value) { + String lab = s1Trans.Key; + int s2 = s1Trans.Value; + wr.WriteLine("n" + s1 + " -> n" + s2 + " [label=\"" + lab + "\"];"); + } + } + wr.WriteLine("}"); + wr.Close(); + } +} + +// Regular expressions ---------------------------------------------- +// +// Abstract syntax of regular expressions +// r ::= A | r1 r2 | (r1|r2) | r* +// + +abstract class Regex { + abstract public Nfa MkNfa(Nfa.NameSource names); +} + +class Eps : Regex { + // The resulting nfa0 has form s0s -eps-> s0e + + public override Nfa MkNfa(Nfa.NameSource names) { + int s0s = names.next(); + int s0e = names.next(); + Nfa nfa0 = new Nfa(s0s, s0e); + nfa0.AddTrans(s0s, null, s0e); + return nfa0; + } +} + +class Sym : Regex { + String sym; + + public Sym(String sym) { + this.sym = sym; + } + + // The resulting nfa0 has form s0s -sym-> s0e + + public override Nfa MkNfa(Nfa.NameSource names) { + int s0s = names.next(); + int s0e = names.next(); + Nfa nfa0 = new Nfa(s0s, s0e); + nfa0.AddTrans(s0s, sym, s0e); + return nfa0; + } +} + +class Seq : Regex { + Regex r1, r2; + + public Seq(Regex r1, Regex r2) { + this.r1 = r1; this.r2 = r2; + } + + // If nfa1 has form s1s ----> s1e + // and nfa2 has form s2s ----> s2e + // then nfa0 has form s1s ----> s1e -eps-> s2s ----> s2e + + public override Nfa MkNfa(Nfa.NameSource names) { + Nfa nfa1 = r1.MkNfa(names); + Nfa nfa2 = r2.MkNfa(names); + Nfa nfa0 = new Nfa(nfa1.Start, nfa2.Exit); + foreach (KeyValuePair> entry in nfa1.Trans) + nfa0.AddTrans(entry); + foreach (KeyValuePair> entry in nfa2.Trans) + nfa0.AddTrans(entry); + nfa0.AddTrans(nfa1.Exit, null, nfa2.Start); + return nfa0; + } +} + +class Alt : Regex { + Regex r1, r2; + + public Alt(Regex r1, Regex r2) { + this.r1 = r1; this.r2 = r2; + } + + // If nfa1 has form s1s ----> s1e + // and nfa2 has form s2s ----> s2e + // then nfa0 has form s0s -eps-> s1s ----> s1e -eps-> s0e + // s0s -eps-> s2s ----> s2e -eps-> s0e + + public override Nfa MkNfa(Nfa.NameSource names) { + Nfa nfa1 = r1.MkNfa(names); + Nfa nfa2 = r2.MkNfa(names); + int s0s = names.next(); + int s0e = names.next(); + Nfa nfa0 = new Nfa(s0s, s0e); + foreach (KeyValuePair> entry in nfa1.Trans) + nfa0.AddTrans(entry); + foreach (KeyValuePair> entry in nfa2.Trans) + nfa0.AddTrans(entry); + nfa0.AddTrans(s0s, null, nfa1.Start); + nfa0.AddTrans(s0s, null, nfa2.Start); + nfa0.AddTrans(nfa1.Exit, null, s0e); + nfa0.AddTrans(nfa2.Exit, null, s0e); + return nfa0; + } +} + +class Star : Regex { + Regex r; + + public Star(Regex r) { + this.r = r; + } + + // If nfa1 has form s1s ----> s1e + // then nfa0 has form s0s ----> s0s + // s0s -eps-> s1s + // s1e -eps-> s0s + + public override Nfa MkNfa(Nfa.NameSource names) { + Nfa nfa1 = r.MkNfa(names); + int s0s = names.next(); + Nfa nfa0 = new Nfa(s0s, s0s); + foreach (KeyValuePair> entry in nfa1.Trans) + nfa0.AddTrans(entry); + nfa0.AddTrans(s0s, null, nfa1.Start); + nfa0.AddTrans(nfa1.Exit, null, s0s); + return nfa0; + } +} + +// Trying the RE->NFA->DFA translation on three regular expressions + +class TestNFA { + public static void Main(String[] args) { + Regex a = new Sym("A"); + Regex b = new Sym("B"); + Regex c = new Sym("C"); + Regex abStar = new Star(new Alt(a, b)); + Regex bb = new Seq(b, b); + Regex r = new Seq(abStar, new Seq(a, b)); + // The regular expression (a|b)*ab + BuildAndShow("dfa1.dot", r); + // The regular expression ((a|b)*ab)* + BuildAndShow("dfa2.dot", new Star(r)); + // The regular expression ((a|b)*ab)((a|b)*ab) + BuildAndShow("dfa3.dot", new Seq(r, r)); + // The regular expression (a|b)*abb, from ASU 1986 p 136 + BuildAndShow("dfa4.dot", new Seq(abStar, new Seq(a, bb))); + // SML reals: sign?((digit+(\.digit+)?))([eE]sign?digit+)? + Regex d = new Sym("digit"); + Regex dPlus = new Seq(d, new Star(d)); + Regex s = new Sym("sign"); + Regex sOpt = new Alt(s, new Eps()); + Regex dot = new Sym("."); + Regex dotDigOpt = new Alt(new Eps(), new Seq(dot, dPlus)); + Regex mant = new Seq(sOpt, new Seq(dPlus, dotDigOpt)); + Regex e = new Sym("e"); + Regex exp = new Alt(new Eps(), new Seq(e, new Seq(sOpt, dPlus))); + Regex smlReal = new Seq(mant, exp); + BuildAndShow("dfa5.dot", smlReal); + } + + public static void BuildAndShow(String filename, Regex r) { + Nfa nfa = r.MkNfa(new Nfa.NameSource()); + Console.WriteLine(nfa); + Console.WriteLine("---"); + Dfa dfa = nfa.ToDfa(); + Console.WriteLine(dfa); + Console.WriteLine("Writing DFA graph to file " + filename); + dfa.WriteDot(filename); + Console.WriteLine(); + } +} diff --git a/resources/literature/J00-1005.pdf b/resources/literature/J00-1005.pdf new file mode 100644 index 0000000..90f4704 Binary files /dev/null and b/resources/literature/J00-1005.pdf differ -- cgit v1.2.3