slr parse table

This commit is contained in:
jusax23 2024-11-05 17:56:28 +01:00
parent 982d0767e4
commit 8d52c340bd
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
4 changed files with 139 additions and 41 deletions

View file

@ -78,4 +78,7 @@ fn main() {
println!("follow: {:?}", grammar.follow);
grammar.gen_lr0_automaton();
println!("automaton: {:?}", grammar.lr0_automaton);
grammar.gen_slr_parse_table();
println!("parse_table: {:?}", grammar.slr_parse_table);
}

View file

@ -4,6 +4,9 @@ use super::{Grammar, Sentential};
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
pub fn gen_ll_parse_table(&mut self) -> bool {
if self.first.is_none() {
self.gen_first();
}
if self.follow.is_none() {
self.gen_follow();
}
@ -11,6 +14,9 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
return false;
}
let mut conflict = false;
// left derivation
// when hidding N and T is next (None = e)
// then the n'th (usize) variant of N can be used.
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
for (from, to) in self.rules.iter() {
for (id, to) in to.iter().enumerate() {

View file

@ -4,64 +4,80 @@ use std::{
rc::{Rc, Weak},
};
use super::{Grammar, Sentential};
use super::{Grammar, RuleIndex, Sentential};
pub type RL0Automaton<N, T> =
HashMap<Rc<LR0State<N, T>>, Vec<(Sentential<N, T>, Weak<LR0State<N, T>>)>>;
pub type RL0Rule<N, T> = (N, Vec<Sentential<N, T>>, usize);
HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>;
#[derive(Debug, Eq, PartialEq)]
pub struct LR0State<N: Hash + Eq, T: Hash + Eq>(HashSet<RL0Rule<N, T>>);
pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>);
impl<N: Hash + Eq + Clone, T: Hash + Eq + Clone> LR0State<N, T> {
pub fn next_kernel(&self, read: &Sentential<N, T>) -> Self {
let mut next_state: LR0State<N, T> = LR0State(HashSet::new());
for (from, to, dot) in self.0.iter() {
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
next_state.0.insert((from.clone(), to.clone(), dot + 1));
}
}
next_state
}
pub fn readable(&self) -> HashSet<Sentential<N, T>> {
let mut readbles = HashSet::new();
for (_, to, dot) in self.0.iter() {
if let Some(l) = to.get(*dot) {
readbles.insert(l.clone());
}
}
readbles
}
}
impl<N: Hash + Eq + Ord, T: Hash + Eq + Ord> Hash for LR0State<N, T> {
impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut a: Vec<&RL0Rule<N, T>> = self.0.iter().collect();
let mut a: Vec<&(RuleIndex<N>, usize)> = self.0.iter().collect();
a.sort();
for s in a.iter() {
s.hash(state);
}
}
}
#[derive(Debug)]
pub enum SlrAction<Shift, Reduce> {
Shift(Shift),
Reduce(Reduce),
}
pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>;
pub type SlrGotoTable<N> = HashMap<(usize, N), usize>;
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>);
impl<N, T> Grammar<N, T>
where
N: PartialEq + Eq + Hash + Clone + Ord,
T: PartialEq + Eq + Hash + Clone + Ord,
{
pub fn lr0_clozure(&self, mut state: LR0State<N, T>) -> LR0State<N, T> {
pub fn next_kernel(&self, state: &LR0State<N>, read: &Sentential<N, T>) -> LR0State<N> {
let mut next_state: LR0State<N> = LR0State(HashSet::new());
for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
next_state.0.insert(((from.clone(), *rule_id), dot + 1));
}
}
next_state
}
pub fn readable(&self, state: &LR0State<N>) -> HashSet<Sentential<N, T>> {
let mut readables = HashSet::new();
for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if let Some(l) = to.get(*dot) {
readables.insert(l.clone());
}
}
readables
}
pub fn lr0_clozure(&self, mut state: LR0State<N>) -> LR0State<N> {
loop {
let mut change = false;
let nt = state
.0
.iter()
.filter_map(|(_, to, dot)| to.get(*dot).cloned())
.filter_map(|((from, rule_id), dot)| {
self.rules
.get(from)
.and_then(|v| v.get(*rule_id))
.and_then(|to| to.get(*dot).cloned())
})
.collect::<Vec<_>>();
for n in nt {
if let Sentential::NoneTerminal(n) = n {
if let Some(rule) = self.rules.get(&n) {
for to in rule {
change |= state.0.insert((n.clone(), to.clone(), 0));
for to in 0..rule.len() {
change |= state.0.insert(((n.clone(), to), 0));
}
}
}
@ -75,34 +91,101 @@ where
pub fn gen_lr0_automaton(&mut self) {
let mut out: RL0Automaton<N, T> = HashMap::new();
// add state zero
let mut start_state = LR0State(HashSet::new());
if let Some(rule) = self.rules.get(&self.start) {
for to in rule {
start_state.0.insert((self.start.clone(), to.clone(), 0));
for to in 0..rule.len() {
start_state.0.insert(((self.start.clone(), to), 0));
}
}
// add state to graph and mark for todo
let rc = Rc::new(self.lr0_clozure(start_state));
let mut todo = vec![Rc::downgrade(&rc)];
out.insert(rc, Vec::new());
while let Some(elem) = todo.pop() {
if let Some(elem) = elem.upgrade() {
out.insert(rc, HashMap::new());
// add states while marked states exists
while let Some(state) = todo.pop() {
if let Some(state) = state.upgrade() {
// new adjacent list
let mut vec = Vec::new();
for none_terminal in elem.readable() {
let next_state = self.lr0_clozure(elem.next_kernel(&none_terminal));
// add clozures from the kernels from all readable symbols
for none_terminal in self.readable(&state) {
let next_state = self.lr0_clozure(self.next_kernel(&state, &none_terminal));
let rc = Rc::new(next_state);
if let Some((k, _)) = out.get_key_value(&rc) {
vec.push((none_terminal, Rc::downgrade(k)));
} else {
todo.push(Rc::downgrade(&rc));
vec.push((none_terminal, Rc::downgrade(&rc)));
out.insert(rc, Vec::new());
out.insert(rc, HashMap::new());
}
}
out.entry(elem).and_modify(|elem| {
// write adjacent list to state
// does not check duplicates. Is not needed, because `readable` returns a set
out.entry(state).and_modify(|elem| {
elem.extend(vec);
});
}
}
self.lr0_automaton = Some(out);
}
pub fn gen_slr_parse_table(&mut self) {
if self.follow.is_none() {
self.gen_follow();
}
if self.lr0_automaton.is_none() {
self.gen_lr0_automaton();
}
let lr0_automaton = self.lr0_automaton.as_ref().unwrap();
let ids: HashMap<Rc<LR0State<N>>, usize> = HashMap::from_iter(
lr0_automaton
.iter()
.enumerate()
.map(|(id, (a, _))| (a.clone(), id)),
);
// none is $
let mut action: SlrActionTable<N, T> = HashMap::new();
let mut goto: SlrGotoTable<N> = HashMap::new();
for (state, to) in lr0_automaton {
let id = ids
.get(state)
.expect("Found broken state in slr parse table gen.");
for go in self.readable(state) {
let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else {
continue;
};
let to_id = ids
.get(&to)
.expect("Found broken state in slr parse table gen.");
match go {
Sentential::Terminal(t) => {
action.insert((*id, Some(t)), SlrAction::Shift(*to_id));
}
Sentential::NoneTerminal(nt) => {
goto.insert((*id, nt), *to_id);
}
};
}
for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if to.len() <= *dot {
for follow in self.follow(from) {
action.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id)));
}
}
}
}
self.slr_parse_table = Some((action, goto));
}
}

View file

@ -3,7 +3,7 @@ use std::{
hash::Hash,
};
use lr0::RL0Automaton;
use lr0::{RL0Automaton, SlrParseTable};
pub mod ll_grammar;
pub mod lr0;
@ -33,6 +33,7 @@ macro_rules! cfg_grammar {
follow: None,
ll_parse_table: None,
lr0_automaton: None,
slr_parse_table: None,
}
}
};
@ -66,6 +67,8 @@ impl<T: Ord, N: Ord> Ord for Sentential<N, T> {
}
}
pub type RuleIndex<N> = (N, usize);
pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
pub start: N,
pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>,
@ -83,6 +86,9 @@ pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash +
/// - key: states
/// - value: list with read symbol and linked node.
pub lr0_automaton: Option<RL0Automaton<N, T>>,
///
pub slr_parse_table: Option<SlrParseTable<N, T>>,
}
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {