From 8d52c340bde838ce9f919b22c366f3088f1d78d8 Mon Sep 17 00:00:00 2001 From: jusax23 Date: Tue, 5 Nov 2024 17:56:28 +0100 Subject: [PATCH] slr parse table --- src/bin/book.rs | 3 + src/cfg/ll_grammar.rs | 6 ++ src/cfg/lr0.rs | 163 +++++++++++++++++++++++++++++++----------- src/cfg/mod.rs | 8 ++- 4 files changed, 139 insertions(+), 41 deletions(-) diff --git a/src/bin/book.rs b/src/bin/book.rs index 23f33b0..b553b26 100644 --- a/src/bin/book.rs +++ b/src/bin/book.rs @@ -78,4 +78,7 @@ fn main() { println!("follow: {:?}", grammar.follow); grammar.gen_lr0_automaton(); println!("automaton: {:?}", grammar.lr0_automaton); + grammar.gen_slr_parse_table(); + println!("parse_table: {:?}", grammar.slr_parse_table); + } diff --git a/src/cfg/ll_grammar.rs b/src/cfg/ll_grammar.rs index 058f14a..12197e3 100644 --- a/src/cfg/ll_grammar.rs +++ b/src/cfg/ll_grammar.rs @@ -4,6 +4,9 @@ use super::{Grammar, Sentential}; impl Grammar { pub fn gen_ll_parse_table(&mut self) -> bool { + if self.first.is_none() { + self.gen_first(); + } if self.follow.is_none() { self.gen_follow(); } @@ -11,6 +14,9 @@ impl Grammar return false; } let mut conflict = false; + // left derivation + // when hidding N and T is next (None = e) + // then the n'th (usize) variant of N can be used. let mut parse_table: HashMap<(N, Option), usize> = HashMap::new(); for (from, to) in self.rules.iter() { for (id, to) in to.iter().enumerate() { diff --git a/src/cfg/lr0.rs b/src/cfg/lr0.rs index cca8361..f02f7a6 100644 --- a/src/cfg/lr0.rs +++ b/src/cfg/lr0.rs @@ -4,64 +4,80 @@ use std::{ rc::{Rc, Weak}, }; -use super::{Grammar, Sentential}; +use super::{Grammar, RuleIndex, Sentential}; pub type RL0Automaton = - HashMap>, Vec<(Sentential, Weak>)>>; - -pub type RL0Rule = (N, Vec>, usize); + HashMap>, HashMap, Weak>>>; #[derive(Debug, Eq, PartialEq)] -pub struct LR0State(HashSet>); +pub struct LR0State(HashSet<(RuleIndex, usize)>); -impl LR0State { - pub fn next_kernel(&self, read: &Sentential) -> Self { - let mut next_state: LR0State = LR0State(HashSet::new()); - for (from, to, dot) in self.0.iter() { - if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) { - next_state.0.insert((from.clone(), to.clone(), dot + 1)); - } - } - next_state - } - pub fn readable(&self) -> HashSet> { - let mut readbles = HashSet::new(); - for (_, to, dot) in self.0.iter() { - if let Some(l) = to.get(*dot) { - readbles.insert(l.clone()); - } - } - readbles - } -} -impl Hash for LR0State { +impl Hash for LR0State { fn hash(&self, state: &mut H) { - let mut a: Vec<&RL0Rule> = self.0.iter().collect(); + let mut a: Vec<&(RuleIndex, usize)> = self.0.iter().collect(); a.sort(); for s in a.iter() { s.hash(state); } } } +#[derive(Debug)] +pub enum SlrAction { + Shift(Shift), + Reduce(Reduce), +} + +pub type SlrActionTable = HashMap<(usize, Option), SlrAction>>; +pub type SlrGotoTable = HashMap<(usize, N), usize>; +pub type SlrParseTable = (SlrActionTable, SlrGotoTable); impl Grammar where N: PartialEq + Eq + Hash + Clone + Ord, T: PartialEq + Eq + Hash + Clone + Ord, { - pub fn lr0_clozure(&self, mut state: LR0State) -> LR0State { + pub fn next_kernel(&self, state: &LR0State, read: &Sentential) -> LR0State { + let mut next_state: LR0State = LR0State(HashSet::new()); + for ((from, rule_id), dot) in state.0.iter() { + let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { + continue; + }; + if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) { + next_state.0.insert(((from.clone(), *rule_id), dot + 1)); + } + } + next_state + } + pub fn readable(&self, state: &LR0State) -> HashSet> { + let mut readables = HashSet::new(); + for ((from, rule_id), dot) in state.0.iter() { + let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { + continue; + }; + if let Some(l) = to.get(*dot) { + readables.insert(l.clone()); + } + } + readables + } + pub fn lr0_clozure(&self, mut state: LR0State) -> LR0State { loop { let mut change = false; let nt = state .0 .iter() - .filter_map(|(_, to, dot)| to.get(*dot).cloned()) + .filter_map(|((from, rule_id), dot)| { + self.rules + .get(from) + .and_then(|v| v.get(*rule_id)) + .and_then(|to| to.get(*dot).cloned()) + }) .collect::>(); for n in nt { if let Sentential::NoneTerminal(n) = n { if let Some(rule) = self.rules.get(&n) { - for to in rule { - change |= state.0.insert((n.clone(), to.clone(), 0)); + for to in 0..rule.len() { + change |= state.0.insert(((n.clone(), to), 0)); } } } @@ -75,34 +91,101 @@ where pub fn gen_lr0_automaton(&mut self) { let mut out: RL0Automaton = HashMap::new(); + // add state zero let mut start_state = LR0State(HashSet::new()); if let Some(rule) = self.rules.get(&self.start) { - for to in rule { - start_state.0.insert((self.start.clone(), to.clone(), 0)); + for to in 0..rule.len() { + start_state.0.insert(((self.start.clone(), to), 0)); } } + // add state to graph and mark for todo let rc = Rc::new(self.lr0_clozure(start_state)); let mut todo = vec![Rc::downgrade(&rc)]; - out.insert(rc, Vec::new()); - while let Some(elem) = todo.pop() { - if let Some(elem) = elem.upgrade() { + out.insert(rc, HashMap::new()); + + // add states while marked states exists + while let Some(state) = todo.pop() { + if let Some(state) = state.upgrade() { + // new adjacent list let mut vec = Vec::new(); - for none_terminal in elem.readable() { - let next_state = self.lr0_clozure(elem.next_kernel(&none_terminal)); + + // add clozures from the kernels from all readable symbols + for none_terminal in self.readable(&state) { + let next_state = self.lr0_clozure(self.next_kernel(&state, &none_terminal)); let rc = Rc::new(next_state); if let Some((k, _)) = out.get_key_value(&rc) { vec.push((none_terminal, Rc::downgrade(k))); } else { todo.push(Rc::downgrade(&rc)); vec.push((none_terminal, Rc::downgrade(&rc))); - out.insert(rc, Vec::new()); + out.insert(rc, HashMap::new()); } } - out.entry(elem).and_modify(|elem| { + // write adjacent list to state + // does not check duplicates. Is not needed, because `readable` returns a set + out.entry(state).and_modify(|elem| { elem.extend(vec); }); } } self.lr0_automaton = Some(out); } + + pub fn gen_slr_parse_table(&mut self) { + if self.follow.is_none() { + self.gen_follow(); + } + if self.lr0_automaton.is_none() { + self.gen_lr0_automaton(); + } + + let lr0_automaton = self.lr0_automaton.as_ref().unwrap(); + + let ids: HashMap>, usize> = HashMap::from_iter( + lr0_automaton + .iter() + .enumerate() + .map(|(id, (a, _))| (a.clone(), id)), + ); + + // none is $ + let mut action: SlrActionTable = HashMap::new(); + let mut goto: SlrGotoTable = HashMap::new(); + for (state, to) in lr0_automaton { + let id = ids + .get(state) + .expect("Found broken state in slr parse table gen."); + + for go in self.readable(state) { + let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else { + continue; + }; + + let to_id = ids + .get(&to) + .expect("Found broken state in slr parse table gen."); + + match go { + Sentential::Terminal(t) => { + action.insert((*id, Some(t)), SlrAction::Shift(*to_id)); + } + Sentential::NoneTerminal(nt) => { + goto.insert((*id, nt), *to_id); + } + }; + } + + for ((from, rule_id), dot) in state.0.iter() { + let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { + continue; + }; + if to.len() <= *dot { + for follow in self.follow(from) { + action.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id))); + } + } + } + } + self.slr_parse_table = Some((action, goto)); + } } diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index 20c7586..feb10d2 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -3,7 +3,7 @@ use std::{ hash::Hash, }; -use lr0::RL0Automaton; +use lr0::{RL0Automaton, SlrParseTable}; pub mod ll_grammar; pub mod lr0; @@ -33,6 +33,7 @@ macro_rules! cfg_grammar { follow: None, ll_parse_table: None, lr0_automaton: None, + slr_parse_table: None, } } }; @@ -66,6 +67,8 @@ impl Ord for Sentential { } } +pub type RuleIndex = (N, usize); + pub struct Grammar { pub start: N, pub rules: HashMap>>>, @@ -83,6 +86,9 @@ pub struct Grammar>, + + /// + pub slr_parse_table: Option>, } impl Grammar {