% simianer-regexvis.tex
% Patrick Simianer
% YYYY-MM-DD
\documentclass[ignorenonframetext]{beamer}
\mode
{
\usetheme{Singapore}
\usecolortheme{dolphin}
\usefonttheme{professionalfonts}
\useinnertheme{circles}
\useoutertheme{miniframes}
\setbeamertemplate{navigation symbols}{}
\beamersetuncovermixins{\opaqueness<1>{5}}{\opaqueness<2->{15}}
\setbeamertemplate{footline}[frame number]
}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[ngerman]{babel}
\usepackage{lmodern}
\usepackage{framed}
\usepackage{color}
\usepackage{multirow}
\usepackage{listings}
\lstset{basicstyle=\scriptsize\ttfamily, commentstyle=\color{gray}, emphstyle=\sffamily\bfseries}
\lstset{keywordstyle=\sffamily\bfseries, commentstyle=\color{gray}, stringstyle=\color{black}}
\lstset{numbers=none, numberstyle=\tiny}
\usepackage{tikz}
\usetikzlibrary{arrows,automata}
\title[regexvis]{Patrick Simianer\\ Visualisierung regulärer Ausdrücke}
\author{Patrick~Simianer \tiny 2508483\\\normalsize 2010-06-28}
\date{Endliche Automaten HS bei Dr. Karin Haenelt\\ Universitiät Heidelberg im Sommersemester 2010}
\AtBeginSection[]{%
\begin{frame}
\tableofcontents[currentsection]
\end{frame}
}% AtBeginSection
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame[plain]{\titlepage}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[plain]
\frametitle{Gliederung}
\tableofcontents
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Einleitung}
\subsection{"Uberlegungen}
\begin{frame}
\frametitle{Visualisierung regul"arer Ausdr"ucke}
\begin{itemize}
\item[] Wie soll die Visualisierung aussehen?
\begin{enumerate}
\item Hervorheben von \textit{\textbf{Matches}} oder \textbf{Gruppen} in einem String oder Text
\item Darstellung und Simulation anhand eines \textbf{Automaten}
\end{enumerate}
\item[]
\end{itemize}
\begin{enumerate}
\item Es existieren bereits viele Implementierungen, \\ basierend auf RE-Implementierung der jeweiligen Sprache.\\ $\rightarrow$ Keine "`\textit{step by step}"'-Visualisierung m"oglich
\item Grafische Umsetzung schwierig,\\ eigene RE-Implementierung n"otig\\ $\rightarrow$ Jeder Schritt der Erkennung nachvollziehbar
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{Visualisierung regul"arer Ausdr"ucke /2}
\begin{enumerate}
\item Wie k"onnen regul"are Ausdr"ucke m"oglichst einfach und effizient implementiert werden?
\begin{itemize}
\item "`Herk"ommliche"' \textit{backtracking}-Methode (\textit{Perl}, \textit{PCRE})
\item[$\Rightarrow$] Direkte Konstruktion eines \textbf{endlichen Automaten}
\end{itemize}
\item[]
\item Soll der Automat dargestellt werden -- und wenn ja, wie?
\begin{itemize}
\item[$\Rightarrow$] \textbf{Ja}, im besten Fall mit Animationen...
\end{itemize}
\item[]
\item In welcher Umgebung k"onnen alle Teile (1. Parser, 2. GUI, 3. Visualisierung) gut implementiert werden?
\begin{itemize}
\item[$\Rightarrow$] Im \textbf{Browser} (1. \textit{JavaScript}, 2. \textit{HTML}, 3. \textit{SVG})
\end{itemize}
\end{enumerate}
\end{frame}
\subsection{Protoypisches Vorgehen}
\begin{frame}
\frametitle{Protoypisches Vorgehen}
\begin{enumerate}
\item \textbf{Parsen} des Ausdrucks
\item Umsetzung in einen \textbf{nichtdeterministischen endlichen Automaten}
\item Übersetzung des NDEA in einen \textbf{deterministischen} endlichen Automaten
\item Grafische \textbf{Darstellung} des Automaten und dessen \textbf{Simulation} durch Animation
\item[]
\item[] Umsetzung im Browser: \textit{JavaScript} (\textit{Rapha\"el} f"ur \textit{SVG}, \textit{jQuery}), \textit{HTML}+\textit{CSS}
\end{enumerate}
\end{frame}
\subsection{Konkreter Aufbau}
\begin{frame}[plain]
%\frametitle{Konkreter Aufbau}
\begin{center}
\begin{figure}
\includegraphics[scale=0.65]{a1.pdf}
\caption{Konkreter System-Aufbau}
\end{figure}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Einfaches Parsing regul"arer Ausdr"ucke}
\subsection{Recursive Descent-Methode}
\begin{frame}[fragile]
\frametitle{\textit{Recursive Descent}-Methode}
\begin{columns}
\column{4cm}
\textbf{Grammatik:}
\hrule
\column{7.5cm}
\textbf{Code:}
\hrule
\end{columns}
\begin{columns}
\column{4cm}
{\scriptsize\begin{tabular}{ll}
\textbf{expr} & $\rightarrow$ term | term \underline{|} expr \\
term & $\rightarrow$ factor | term \\
factor & $\rightarrow$ atom kleene \\
atom & $\rightarrow$ literal | \underline{(} expr \underline{)}\\
kleene & $\rightarrow$ \underline{$*$} kleene | $\epsilon$\\
literal & $\rightarrow$ \underline{a} | \underline{b} | \underline{c} | \underline{\%}
\end{tabular}}
\column{7.5cm}
\lstset{language=C++, emph={RegexParser,expr}}
\begin{lstlisting}
RegexParser.prototype.expr = function() {
var nfa = this.term();
if (this.trymatch('|')) {
return nfa.union(this.expr());
};
return nfa;
};
\end{lstlisting}
\end{columns}
\begin{itemize}
\item Nahezu direktes "Ubersetzen einer Grammatik\footnote{keine Links-Rekursionen, sonst: Endlosschleife} in den Quelltext des Parsers, \textit{top-down} (LL(1))
\item $\forall$ Nichtterminale $\exists$ Funktion, welche die rechte Seite der jeweiligen Regel behandelt
\item Direkte Erzeugung des NDEA, mittels Konstruktion nach Thompson in $O(|r|)$ ($|r|$ L"ange des regul"aren Ausdrucks)
\item Ergebnis: Automat mit max. $2|r|$ Zust"anden, $4|r|$ Transitionen
%\item Schnell, einfach und effizient
\end{itemize}
\end{frame}
\subsection{Konstruktion nach Thompson}
\begin{frame}
\frametitle{Konstruktion nach Thompson}
\begin{itemize}
\item[{\color{red}Konkatenation: \texttt{ab}}]
\begin{center}\includegraphics[scale=0.22]{ab.pdf}\end{center}
\item[]
\item[{\color{green}H"ulle: \texttt{a*}}]
\begin{center}\includegraphics[scale=0.22]{astar.pdf}\end{center}
\item[]
\item[{\color{blue}Vereinigung: \texttt{(a|b)}}]
\begin{center}\includegraphics[scale=0.22]{aorb.pdf}\end{center}
\end{itemize}
\end{frame}
\subsection{Beispiel}
\begin{frame}
\frametitle{Thompsons Algorithmus: Beispiel}
Regulärer Ausdruck: \texttt{a(a|b)c*}
\vspace{0.5cm}
\begin{centering}
\includegraphics[scale=0.23]{aabc.pdf}
\end{centering}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{"Uberf"uhrung NDEA zu einem DEA}
\subsection*{Einleitung}
\begin{frame}
\frametitle{Einleitung}
\begin{itemize}
\item[] Warum den erzeugten NDEA in einen DEA "uberf"uhren?
\item[]
\item[] \textit{trade-off}:
\begin{tabular}{rll}
& NDEA & DEA\\
Platzbedarf & $2|r| = m$, $4|r| = n$ & $2^{m}$\\
Erstellungszeit & $O(|r|)$ & $O(|r|^2 2^{|r|})$, $O(|r|^3)$\\
Simulation & $O(|x|(m+n))$ & $O(|x|)$ \\
& $\approx O(|x|*|r|)$ & \\
\end{tabular}
\item[]\footnotesize $|x|$ L"ange des Eingabe-Strings; bei Erstellungszeit DEA: links \textit{worst-case}, rechts der Durchschnitsfall\normalsize
\item[]
\item Die hier erzeugten NDEA umfassen f"ur gew"ohnlich sehr viele Zust"ande, die Darstellung eines DEA ist praktikabler
\end{itemize}
\end{frame}
\subsection{$\epsilon$-Abschluss}
\begin{frame}[fragile]
\frametitle{$\epsilon$-Abschluss}
\textbf{Pseudo-Code}
\hrule
\begin{columns}
\column{4.5cm}
\lstset{language=C++, emph={epsclosure,add,move,pop,push,stack,foreach}}
\begin{lstlisting}
epsclosure(dState): stack s
foreach nState in dState {
s.push(nState)
}
while s not empty {
nState1 = s.pop()
foreach nState1 e> nState2 {
if nState2 not in dState {
dState.add(nState2)
s.push(nState2)
}
}
}
return dState
\end{lstlisting}
\column{4.3cm}
\lstset{language=C++, emph={nfa2dfa,add,DFA,NFA,stack,epsclosure,move,pop,push,foreach}}
\begin{lstlisting}
nfa2dfa(NFA): stack s, DFA d
s.push(epsclosure(nfa.start))
d.add(epsclosure(nfa.start))
while s not empty {
dState1 = s.pop()
foreach ch in ALPHA {
dState2 = move(dState1, ch)
next = epsclosure(dState2)
if next not in DFA {
d.add(dState ch> next)
}
}
}
return d
\end{lstlisting}
\end{columns}
\begin{itemize}
\item[] \footnotesize \texttt{a e> b}: $\epsilon$-"Ubergang von Z. \texttt{a} nach \texttt{b}; \texttt{a ch> b}: "Ubergang mit Zeichen \texttt{ch}
\item[$\bullet$] \textbf{Formal:} $\forall p \in Q : E(\{p\}) = \{ q \in Q : p \rightarrow_\epsilon q \}$ ($\epsilon$-Abschl. v. a. $p$ aus $Q$)
\item[$\bullet$] \textbf{Simulation} des NDEA, oder vollst"andige Erzeugung eines \textbf{DEA}
%\item \textbf{Laufzeit:} $O(k(n +m))$, $k$ L"ange der Eingabe, $n$ NFA Zust"ande, $m$ NFA "Uberg"ange
\end{itemize}
\end{frame}
\subsection{Beispiel}
\begin{frame}[plain]
\frametitle{NDEA $\rightarrow$ DEA Beispiel}
Regul"arer Ausdruck: \texttt{(a|b)*}
\begin{center}
\includegraphics[scale=0.27]{aorbstar0.pdf}
\end{center}
\end{frame}
\begin{frame}[plain]
\begin{center}
\frametitle{NDEA $\rightarrow$ DEA Beispiel /2}
\begin{tabular}{c|c|c}
Dfa ID & Symbol & $\rightarrow$Dfa ID\\
\hline
\multirow{2}{*}{\{6, 7, 4, 0, 2\}} & $a$ & \{1, 5, 7, 4, 0, 2\} \\
& $b$ & \{3, 5, 7, 4, 0, 2\} \\
\hline
\multirow{2}{*}{\{1, 5, 7, 4, 0, 2\}} & $a$ & \{1, 5, 7, 4, 0, 2\}\\
& $b$ & \{3, 5, 7, 4, 0, 2\}\\
\hline
\multirow{2}{*}{\{3, 5, 7, 4, 0, 2\}}& $a$ & \{1, 5, 7, 4, 0, 2\}\\
& $b$ & \{3, 5, 7, 4, 0, 2\}\\
\end{tabular}
\end{center}
\begin{center}
\includegraphics[scale=.175]{aorbstar.pdf}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Fragen}
\begin{frame}[plain]
\begin{center}
\Huge{Fragen?}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[allowframebreaks]
\frametitle{Literatur}
\bibliographystyle{alpha}
\bibliography{beamer}
\nocite{*}
\end{frame}
\begin{frame}
\frametitle{Ressourcen}
\begin{itemize}
\item \textit{Rapha\"el} JavaScript SVG Library (\url{http://raphaeljs.com/})
\item \textit{jQuery} JavaScript Library (\url{http://jquery.com/})
\item Scalable Vector Graphics (\textit{SVG}) 1.1 Specification (\url{http://www.w3.org/TR/SVG/})
\item Writing your own regular expression parser (\url{http://www.codeproject.com/KB/recipes/OwnRegExpressionsParser.aspx})
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Demo}
\begin{frame}[plain]
\begin{center}
\Huge{Demo}
\end{center}
\end{frame}
\section{Weiterentwicklung}
\begin{frame}
\frametitle{Weiterentwicklung}
\begin{itemize}
\item Vorhanden: $*$, \texttt{|}, \texttt{()}
\item[]
\item Zeichenklassen: \texttt{.}, \texttt{\textbackslash w}, \texttt{\textbackslash d}, \lbrack\ \rbrack, $\hdots$ $\rightarrow$ Einfach implementierbar, Vorverarbeitung der Eingabe
\item Operatoren: $+$, ?, \{$m$, $n$\}, $\hdots$ $\rightarrow$ Ebenfalls durch Vorverarbeitung l"osbar, beziehungsweise durch Anpassung des Automaten
\item[]
\item \textit{Lookahead} oder \textit{lookbehind} sind leider nicht ohne Weiteres mit endlichen Automaten zu implementieren, da die zugrunde liegenden Grammatiken nicht mehr regul"ar w"aren.
\end{itemize}
\end{frame}
\end{document}