stapregex-dfa.cxx - systemtap
Functions defined
Source code
#include <string>
#include <iostream>
#include <sstream>
#include <set>
#include <list>
#include <map>
#include <vector>
#include <queue>
#include <utility>
#include "translator-output.h"
#include "stapregex-parse.h"
#include "stapregex-tree.h"
#include "stapregex-dfa.h"
using namespace std;
namespace stapregex {
regexp *pad_re = NULL;
regexp *fail_re = NULL;
dfa *
stapregex_compile (regexp *re, const std::string& match_snippet,
const std::string& fail_snippet)
{
if (pad_re == NULL) {
pad_re = make_dot ();
pad_re = new close_op (pad_re, true); pad_re = new alt_op (pad_re, new null_op, true); }
if (fail_re == NULL) {
fail_re = make_dot (true); fail_re = new close_op (fail_re, true); fail_re = new alt_op (fail_re, new null_op, true); fail_re = new cat_op (fail_re, new anchor_op('$'));
fail_re = new rule_op(fail_re, 0);
XXX }
vector<string> outcomes(2);
outcomes[0] = fail_snippet;
outcomes[1] = match_snippet;
int num_tags = re->num_tags;
bool anchored = re->anchored ();
if (!anchored) re = new cat_op(pad_re, re); re = new rule_op(re, 1);
re = new alt_op(re, fail_re);
#ifdef STAPREGEX_DEBUG_INS
cerr << "RESULTING INS FROM REGEX " << re << ":" << endl;
#endif
ins *i = re->compile();
#ifdef STAPREGEX_DEBUG_INS
for (const ins *j = i; (j - i) < re->ins_size() + 1; )
{
j = show_ins(cerr, j, i); cerr << endl;
}
cerr << endl;
#endif
dfa *d = new dfa(i, num_tags, outcomes);
if (!anchored) delete ((rule_op*) ((alt_op*) re)->a)->re; delete ((alt_op*) re)->a; delete re;
return d;
}
arc_priority
refine_higher(const arc_priority& a)
{
return make_pair(2 * a.first + 1, a.second + 1);
}
arc_priority
refine_lower (const arc_priority& a)
{
return make_pair(2 * a.first, a.second + 1);
}
int
arc_compare (const arc_priority& a, const arc_priority& b)
{
unsigned long x = a.first;
unsigned long y = b.first;
if (a.second > b.second)
x = x << (a.second - b.second);
else if (a.second < b.second)
y = y << (b.second - a.second);
return ( x == y ? 0 : x < y ? -1 : 1 );
}
state::state (state_kernel *kernel)
: label(~0), next(NULL), kernel(kernel), accepts(false), accept_outcome(0) {}
state *
dfa::add_state (state *s)
{
s->label = nstates++;
if (last == NULL)
{
last = s;
first = last;
}
else
{
last->next = s;
last = last->next;
}
return last;
}
void
add_kernel (state_kernel *kernel, ins *i)
{
kernel_point point;
point.i = i;
point.priority = make_pair(0,0);
kernel->push_back(point);
}
state_kernel *
make_kernel (ins *i)
{
state_kernel *kernel = new state_kernel;
add_kernel (kernel, i);
return kernel;
}
state_kernel *
te_closure (state_kernel *start, int ntags, bool is_initial = false)
{
state_kernel *closure = new state_kernel(*start);
queue<kernel_point> worklist;
vector<unsigned> max_tags (ntags, 0);
map<ins *, list<kernel_point> > closure_map;
for (state_kernel::iterator it = closure->begin();
it != closure->end(); it++)
{
it->priority = make_pair(0,0);
worklist.push(*it);
for (list<map_item>::const_iterator jt = it->map_items.begin();
jt != it->map_items.end(); jt++)
max_tags[jt->first] = max(jt->second, max_tags[jt->first]);
closure_map[it->i].push_back(*it);
}
while (!worklist.empty())
{
kernel_point point = worklist.front(); worklist.pop();
ins *target = NULL; int tag = -1;
ins *other_target = NULL; int other_tag = -1;
bool do_split = false;
if (point.i->i.tag == TAG)
{
target = &point.i[1];
tag = (int) point.i->i.param;
}
else if (point.i->i.tag == FORK && point.i == (ins *) point.i->i.link)
{
target = &point.i[1];
}
else if (point.i->i.tag == FORK)
{
do_split = true;
if (point.i->i.param)
{
target = &point.i[1];
other_target = (ins *) point.i->i.link;
}
else
{
target = (ins *) point.i->i.link;
other_target = &point.i[1];
}
}
else if (point.i->i.tag == GOTO)
{
target = (ins *) point.i->i.link;
}
else if (point.i->i.tag == INIT && is_initial)
{
target = &point.i[1];
}
bool already_found;
kernel_point next;
next.i = target;
next.priority = do_split ? refine_lower(point.priority) : point.priority;
next.map_items = point.map_items;
kernel_point other_next;
other_next.i = other_target;
other_next.priority = do_split ? refine_higher(point.priority) : point.priority;
other_next.map_items = point.map_items;
other_next.parents = point.parents;
if (point.parents.find(other_next.i) != point.parents.end())
{
other_target = NULL;
other_tag = -1;
}
other_next.parents.insert(other_next.i);
next.parents = point.parents;
if (point.parents.find(next.i) != point.parents.end())
{
target = NULL;
tag = -1;
goto next_target;
}
next.parents.insert(next.i);
another_transition:
if (target == NULL)
continue;
if (tag >= 0)
{
for (list<map_item>::iterator it = next.map_items.begin();
it != next.map_items.end(); )
if (it->first == (unsigned) tag)
{
list<map_item>::iterator next_it = it;
next_it++;
next.map_items.erase (it);
it = next_it;
}
else it++;
unsigned x = max_tags[tag];
next.map_items.push_back(make_pair(tag, ++x));
max_tags[tag] = x;
}
already_found = false;
for (list<kernel_point>::iterator it = closure_map[next.i].begin();
it != closure_map[next.i].end(); )
{
int result = arc_compare(it->priority, next.priority);
if (result > 0) {
list<kernel_point>::iterator old_it = it;
it++;
closure_map[next.i].erase(old_it);
continue;
} else if (result == 0) {
already_found = true;
}
it++;
}
if (!already_found) {
closure_map[next.i].push_back(next);
for (list<map_item>::iterator jt = next.map_items.begin();
jt != next.map_items.end(); jt++)
max_tags[jt->first] = max(jt->second, max_tags[jt->first]);
closure->push_back(next);
worklist.push(next);
}
next_target:
target = other_target; other_target = NULL;
tag = other_tag; other_tag = -1;
next = other_next;
goto another_transition;
}
return closure;
}
state *
dfa::find_equivalent (state *s, tdfa_action &r)
{
state *answer = NULL;
for (state_kernel::iterator it = s->kernel->begin();
it != s->kernel->end(); it++)
mark(it->i);
unsigned n = s->kernel->size();
for (state *t = first; t != NULL; t = t->next)
{
if (t->kernel->size() == n)
{
for (state_kernel::iterator it = t->kernel->begin();
it != t->kernel->end(); it++)
if (!marked(it->i))
goto next_state;
answer = t;
goto cleanup;
}
next_state:
;
}
cleanup:
for (state_kernel::iterator it = s->kernel->begin();
it != s->kernel->end(); it++)
unmark(it->i);
return answer;
}
dfa::dfa (ins *i, int ntags, vector<string>& outcome_snippets)
: orig_nfa(i), nstates(0), ntags(ntags), outcome_snippets(outcome_snippets)
{
first = last = NULL;
ins *start = &i[0];
state_kernel *seed_kernel = make_kernel(start);
state_kernel *initial_kernel = te_closure(seed_kernel, ntags, true);
delete seed_kernel;
state *initial = add_state(new state(initial_kernel));
queue<state *> worklist; worklist.push(initial);
while (!worklist.empty())
{
state *curr = worklist.front(); worklist.pop();
vector<list<ins *> > edges(NUM_REAL_CHARS);
for (list<kernel_point>::iterator it = curr->kernel->begin();
it != curr->kernel->end(); it++)
{
if (it->i->i.tag == CHAR)
{
for (ins *j = &it->i[1]; j < (ins *) it->i->i.link; j++)
edges[j->c.value].push_back((ins *) it->i->i.link);
}
else if (it->i->i.tag == ACCEPT)
{
curr->accepts = true;
curr->accept_outcome = max(it->i->i.param, curr->accept_outcome);
}
}
for (unsigned c = 0; c < NUM_REAL_CHARS; )
{
list <ins *> e = edges[c];
assert (!e.empty()); XXX
span s;
s.lb = c;
while (++c < NUM_REAL_CHARS && edges[c] == e) ;
s.ub = c - 1;
s.reach_pairs = new state_kernel;
for (list<ins *>::iterator it = e.begin();
it != e.end(); it++)
add_kernel (s.reach_pairs, *it);
curr->spans.push_back(s);
}
for (list<span>::iterator it = curr->spans.begin();
it != curr->spans.end(); it++)
{
state_kernel *reach_pairs = it->reach_pairs;
state_kernel *u_pairs = te_closure(reach_pairs, ntags);
state *target = new state(u_pairs);
tdfa_action c;
set<map_item> all_items;
for (state_kernel::const_iterator jt = curr->kernel->begin();
jt != curr->kernel->end(); jt++)
for (list<map_item>::const_iterator kt = jt->map_items.begin();
kt != jt->map_items.end(); jt++)
all_items.insert(*kt);
list<map_item> store_items;
for (state_kernel::const_iterator jt = u_pairs->begin();
jt != u_pairs->end(); jt++)
for (list<map_item>::const_iterator kt = jt->map_items.begin();
kt != jt->map_items.end(); kt++)
if (all_items.find(*kt) == all_items.end())
store_items.push_back(*kt);
for (list<map_item>::iterator jt = store_items.begin();
jt != store_items.end(); jt++)
{
tdfa_insn insn;
insn.to = *jt;
insn.save_pos = true;
c.push_back(insn);
}
state *t_prime = find_equivalent(target, c);
if (t_prime != NULL)
{
delete target;
}
else
{
t_prime = target;
add_state(t_prime);
worklist.push(t_prime);
if (t_prime->accepts)
{
}
}
it->to = t_prime;
it->action = c;
}
}
}
dfa::~dfa ()
{
state * s;
while ((s = first))
{
first = s->next;
delete s;
}
delete orig_nfa;
}
void
span::emit_jump (translator_output *o, const dfa *d) const
{
#ifdef STAPREGEX_DEBUG_MATCH
o->newline () << "printf(\" --> GOTO yystate%d\\n\", " << to->label << ");";
#endif
if (to->accepts)
{
emit_final(o, d);
}
else
{
o->newline () << "YYCURSOR++;";
o->newline () << "goto yystate" << to->label << ";";
}
}
void
span::emit_final (translator_output *o, const dfa *d) const
{
assert (to->accepts); XXX o->newline() << d->outcome_snippets[to->accept_outcome];
o->newline() << "goto yyfinish;";
}
string c_char(char c)
{
stringstream o;
o << "'";
print_escaped(o, c);
o << "'";
return o.str();
}
void
state::emit (translator_output *o, const dfa *d) const
{
o->newline() << "yystate" << label << ": ";
#ifdef STAPREGEX_DEBUG_MATCH
o->newline () << "printf(\"READ '%s' %c\", cur, *YYCURSOR);";
#endif
o->newline() << "switch (*YYCURSOR) {";
o->indent(1);
for (list<span>::const_iterator it = spans.begin();
it != spans.end(); it++)
{
if (it->lb == '\0')
{
o->newline() << "case " << c_char('\0') << ":";
it->emit_final(o, d); }
for (unsigned c = max('\1', it->lb); c <= (unsigned) it->ub; c++) {
o->newline() << "case " << c_char((char) c) << ":";
}
it->emit_jump(o, d);
}
o->newline(-1) << "}";
}
void
dfa::emit (translator_output *o) const
{
#ifdef STAPREGEX_DEBUG_DFA
print(o);
#else
o->newline() << "{";
o->newline(1);
XXX if (first->accepts)
{
o->newline() << outcome_snippets[first->accept_outcome];
o->newline() << "goto yyfinish;";
}
for (state *s = first; s; s = s->next)
s->emit(o, this);
o->newline() << "yyfinish: ;";
o->newline(-1) << "}";
#endif
}
void
dfa::emit_tagsave (translator_output *o, std::string tag_states,
std::string tag_vals, std::string tag_count) const
{
}
std::ostream&
operator << (std::ostream &o, const tdfa_action& a)
{
for (list<tdfa_insn>::const_iterator it = a.begin();
it != a.end(); it++)
{
if (it != a.begin()) o << "; ";
o << "m[" << it->to.first << "," << it->to.second << "] <- ";
if (it->save_pos)
o << "p";
else
o << "m[" << it->from.first << "," << it->from.second << "]";
}
return o;
}
std::ostream&
operator << (std::ostream &o, const arc_priority& p)
{
o << p.first << "/" << (1 << p.second);
return o;
}
void
state::print (translator_output *o) const
{
o->line() << "state " << label;
if (accepts)
o->line() << " accepts " << accept_outcome;
if (!finalizer.empty())
o->line() << " [" << finalizer << "]";
o->indent(1);
for (list<span>::const_iterator it = spans.begin();
it != spans.end(); it++)
{
o->newline() << "'";
if (it->lb == it->ub)
{
print_escaped (o->line(), it->lb);
o->line() << " ";
}
else
{
print_escaped (o->line(), it->lb);
o->line() << "-";
print_escaped (o->line(), it->ub);
}
o->line() << "' -> " << it->to->label;
if (!it->action.empty())
o->line() << " [" << it->action << "]";
}
o->newline(-1);
}
void
dfa::print (std::ostream& o) const
{
translator_output to(o); print(&to);
}
void
dfa::print (translator_output *o) const
{
o->newline();
for (state *s = first; s; s = s->next)
{
s->print(o);
o->newline();
}
o->newline();
}
std::ostream&
operator << (std::ostream& o, const dfa& d)
{
d.print(o);
return o;
}
std::ostream&
operator << (std::ostream &o, const dfa *d)
{
o << *d;
return o;
}
};