slr parse table

This commit is contained in:
jusax23 2024-11-05 17:56:28 +01:00
parent 982d0767e4
commit 8d52c340bd
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
4 changed files with 139 additions and 41 deletions

View file

@ -78,4 +78,7 @@ fn main() {
println!("follow: {:?}", grammar.follow); println!("follow: {:?}", grammar.follow);
grammar.gen_lr0_automaton(); grammar.gen_lr0_automaton();
println!("automaton: {:?}", grammar.lr0_automaton); println!("automaton: {:?}", grammar.lr0_automaton);
grammar.gen_slr_parse_table();
println!("parse_table: {:?}", grammar.slr_parse_table);
} }

View file

@ -4,6 +4,9 @@ use super::{Grammar, Sentential};
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> { impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
pub fn gen_ll_parse_table(&mut self) -> bool { pub fn gen_ll_parse_table(&mut self) -> bool {
if self.first.is_none() {
self.gen_first();
}
if self.follow.is_none() { if self.follow.is_none() {
self.gen_follow(); self.gen_follow();
} }
@ -11,6 +14,9 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
return false; return false;
} }
let mut conflict = false; let mut conflict = false;
// left derivation
// when hidding N and T is next (None = e)
// then the n'th (usize) variant of N can be used.
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new(); let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
for (from, to) in self.rules.iter() { for (from, to) in self.rules.iter() {
for (id, to) in to.iter().enumerate() { for (id, to) in to.iter().enumerate() {

View file

@ -4,64 +4,80 @@ use std::{
rc::{Rc, Weak}, rc::{Rc, Weak},
}; };
use super::{Grammar, Sentential}; use super::{Grammar, RuleIndex, Sentential};
pub type RL0Automaton<N, T> = pub type RL0Automaton<N, T> =
HashMap<Rc<LR0State<N, T>>, Vec<(Sentential<N, T>, Weak<LR0State<N, T>>)>>; HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>;
pub type RL0Rule<N, T> = (N, Vec<Sentential<N, T>>, usize);
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub struct LR0State<N: Hash + Eq, T: Hash + Eq>(HashSet<RL0Rule<N, T>>); pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>);
impl<N: Hash + Eq + Clone, T: Hash + Eq + Clone> LR0State<N, T> { impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
pub fn next_kernel(&self, read: &Sentential<N, T>) -> Self {
let mut next_state: LR0State<N, T> = LR0State(HashSet::new());
for (from, to, dot) in self.0.iter() {
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
next_state.0.insert((from.clone(), to.clone(), dot + 1));
}
}
next_state
}
pub fn readable(&self) -> HashSet<Sentential<N, T>> {
let mut readbles = HashSet::new();
for (_, to, dot) in self.0.iter() {
if let Some(l) = to.get(*dot) {
readbles.insert(l.clone());
}
}
readbles
}
}
impl<N: Hash + Eq + Ord, T: Hash + Eq + Ord> Hash for LR0State<N, T> {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
let mut a: Vec<&RL0Rule<N, T>> = self.0.iter().collect(); let mut a: Vec<&(RuleIndex<N>, usize)> = self.0.iter().collect();
a.sort(); a.sort();
for s in a.iter() { for s in a.iter() {
s.hash(state); s.hash(state);
} }
} }
} }
#[derive(Debug)]
pub enum SlrAction<Shift, Reduce> {
Shift(Shift),
Reduce(Reduce),
}
pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>;
pub type SlrGotoTable<N> = HashMap<(usize, N), usize>;
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>);
impl<N, T> Grammar<N, T> impl<N, T> Grammar<N, T>
where where
N: PartialEq + Eq + Hash + Clone + Ord, N: PartialEq + Eq + Hash + Clone + Ord,
T: PartialEq + Eq + Hash + Clone + Ord, T: PartialEq + Eq + Hash + Clone + Ord,
{ {
pub fn lr0_clozure(&self, mut state: LR0State<N, T>) -> LR0State<N, T> { pub fn next_kernel(&self, state: &LR0State<N>, read: &Sentential<N, T>) -> LR0State<N> {
let mut next_state: LR0State<N> = LR0State(HashSet::new());
for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
next_state.0.insert(((from.clone(), *rule_id), dot + 1));
}
}
next_state
}
pub fn readable(&self, state: &LR0State<N>) -> HashSet<Sentential<N, T>> {
let mut readables = HashSet::new();
for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if let Some(l) = to.get(*dot) {
readables.insert(l.clone());
}
}
readables
}
pub fn lr0_clozure(&self, mut state: LR0State<N>) -> LR0State<N> {
loop { loop {
let mut change = false; let mut change = false;
let nt = state let nt = state
.0 .0
.iter() .iter()
.filter_map(|(_, to, dot)| to.get(*dot).cloned()) .filter_map(|((from, rule_id), dot)| {
self.rules
.get(from)
.and_then(|v| v.get(*rule_id))
.and_then(|to| to.get(*dot).cloned())
})
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for n in nt { for n in nt {
if let Sentential::NoneTerminal(n) = n { if let Sentential::NoneTerminal(n) = n {
if let Some(rule) = self.rules.get(&n) { if let Some(rule) = self.rules.get(&n) {
for to in rule { for to in 0..rule.len() {
change |= state.0.insert((n.clone(), to.clone(), 0)); change |= state.0.insert(((n.clone(), to), 0));
} }
} }
} }
@ -75,34 +91,101 @@ where
pub fn gen_lr0_automaton(&mut self) { pub fn gen_lr0_automaton(&mut self) {
let mut out: RL0Automaton<N, T> = HashMap::new(); let mut out: RL0Automaton<N, T> = HashMap::new();
// add state zero
let mut start_state = LR0State(HashSet::new()); let mut start_state = LR0State(HashSet::new());
if let Some(rule) = self.rules.get(&self.start) { if let Some(rule) = self.rules.get(&self.start) {
for to in rule { for to in 0..rule.len() {
start_state.0.insert((self.start.clone(), to.clone(), 0)); start_state.0.insert(((self.start.clone(), to), 0));
} }
} }
// add state to graph and mark for todo
let rc = Rc::new(self.lr0_clozure(start_state)); let rc = Rc::new(self.lr0_clozure(start_state));
let mut todo = vec![Rc::downgrade(&rc)]; let mut todo = vec![Rc::downgrade(&rc)];
out.insert(rc, Vec::new()); out.insert(rc, HashMap::new());
while let Some(elem) = todo.pop() {
if let Some(elem) = elem.upgrade() { // add states while marked states exists
while let Some(state) = todo.pop() {
if let Some(state) = state.upgrade() {
// new adjacent list
let mut vec = Vec::new(); let mut vec = Vec::new();
for none_terminal in elem.readable() {
let next_state = self.lr0_clozure(elem.next_kernel(&none_terminal)); // add clozures from the kernels from all readable symbols
for none_terminal in self.readable(&state) {
let next_state = self.lr0_clozure(self.next_kernel(&state, &none_terminal));
let rc = Rc::new(next_state); let rc = Rc::new(next_state);
if let Some((k, _)) = out.get_key_value(&rc) { if let Some((k, _)) = out.get_key_value(&rc) {
vec.push((none_terminal, Rc::downgrade(k))); vec.push((none_terminal, Rc::downgrade(k)));
} else { } else {
todo.push(Rc::downgrade(&rc)); todo.push(Rc::downgrade(&rc));
vec.push((none_terminal, Rc::downgrade(&rc))); vec.push((none_terminal, Rc::downgrade(&rc)));
out.insert(rc, Vec::new()); out.insert(rc, HashMap::new());
} }
} }
out.entry(elem).and_modify(|elem| { // write adjacent list to state
// does not check duplicates. Is not needed, because `readable` returns a set
out.entry(state).and_modify(|elem| {
elem.extend(vec); elem.extend(vec);
}); });
} }
} }
self.lr0_automaton = Some(out); self.lr0_automaton = Some(out);
} }
pub fn gen_slr_parse_table(&mut self) {
if self.follow.is_none() {
self.gen_follow();
}
if self.lr0_automaton.is_none() {
self.gen_lr0_automaton();
}
let lr0_automaton = self.lr0_automaton.as_ref().unwrap();
let ids: HashMap<Rc<LR0State<N>>, usize> = HashMap::from_iter(
lr0_automaton
.iter()
.enumerate()
.map(|(id, (a, _))| (a.clone(), id)),
);
// none is $
let mut action: SlrActionTable<N, T> = HashMap::new();
let mut goto: SlrGotoTable<N> = HashMap::new();
for (state, to) in lr0_automaton {
let id = ids
.get(state)
.expect("Found broken state in slr parse table gen.");
for go in self.readable(state) {
let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else {
continue;
};
let to_id = ids
.get(&to)
.expect("Found broken state in slr parse table gen.");
match go {
Sentential::Terminal(t) => {
action.insert((*id, Some(t)), SlrAction::Shift(*to_id));
}
Sentential::NoneTerminal(nt) => {
goto.insert((*id, nt), *to_id);
}
};
}
for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if to.len() <= *dot {
for follow in self.follow(from) {
action.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id)));
}
}
}
}
self.slr_parse_table = Some((action, goto));
}
} }

View file

@ -3,7 +3,7 @@ use std::{
hash::Hash, hash::Hash,
}; };
use lr0::RL0Automaton; use lr0::{RL0Automaton, SlrParseTable};
pub mod ll_grammar; pub mod ll_grammar;
pub mod lr0; pub mod lr0;
@ -33,6 +33,7 @@ macro_rules! cfg_grammar {
follow: None, follow: None,
ll_parse_table: None, ll_parse_table: None,
lr0_automaton: None, lr0_automaton: None,
slr_parse_table: None,
} }
} }
}; };
@ -66,6 +67,8 @@ impl<T: Ord, N: Ord> Ord for Sentential<N, T> {
} }
} }
pub type RuleIndex<N> = (N, usize);
pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> { pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
pub start: N, pub start: N,
pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>, pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>,
@ -83,6 +86,9 @@ pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash +
/// - key: states /// - key: states
/// - value: list with read symbol and linked node. /// - value: list with read symbol and linked node.
pub lr0_automaton: Option<RL0Automaton<N, T>>, pub lr0_automaton: Option<RL0Automaton<N, T>>,
///
pub slr_parse_table: Option<SlrParseTable<N, T>>,
} }
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> { impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {