From a59ac9360cb44c7f8c9f5047b028df1cc9359da5 Mon Sep 17 00:00:00 2001 From: jusax23 Date: Sat, 9 Nov 2024 16:18:15 +0100 Subject: [PATCH] slr parser --- src/bin/book.rs | 11 +- src/cfg/ll_grammar.rs | 45 ++------ src/cfg/{lr0.rs => lr0_grammar.rs} | 172 +++++++++++++++++++++++++++-- src/cfg/mod.rs | 54 ++++++++- src/lib.rs | 2 +- 5 files changed, 230 insertions(+), 54 deletions(-) rename src/cfg/{lr0.rs => lr0_grammar.rs} (50%) diff --git a/src/bin/book.rs b/src/bin/book.rs index b553b26..547f9d5 100644 --- a/src/bin/book.rs +++ b/src/bin/book.rs @@ -69,8 +69,8 @@ fn grammer() -> Grammar { } fn main() { - //let code = String::from("a = b()+c+(d+e())"); - //let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); + let code = String::from("a = b(f)+c+(d+e(f))"); + let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); let mut grammar = grammer(); grammar.gen_follow(); @@ -78,7 +78,10 @@ fn main() { println!("follow: {:?}", grammar.follow); grammar.gen_lr0_automaton(); println!("automaton: {:?}", grammar.lr0_automaton); - grammar.gen_slr_parse_table(); + println!("conflict: {}", grammar.gen_slr_parse_table()); println!("parse_table: {:?}", grammar.slr_parse_table); - + println!( + "parsed: {:?}", + grammar.slr_parser(&mut m.iter_mut()).parse() + ) } diff --git a/src/cfg/ll_grammar.rs b/src/cfg/ll_grammar.rs index 12197e3..935423b 100644 --- a/src/cfg/ll_grammar.rs +++ b/src/cfg/ll_grammar.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, fmt::Debug, hash::Hash}; -use super::{Grammar, Sentential}; +use super::{Grammar, NodeChild, ParseTree, Sentential}; impl Grammar { pub fn gen_ll_parse_table(&mut self) -> bool { @@ -15,7 +15,7 @@ impl Grammar } let mut conflict = false; // left derivation - // when hidding N and T is next (None = e) + // when hiding N and T is next (None = e) // then the n'th (usize) variant of N can be used. let mut parse_table: HashMap<(N, Option), usize> = HashMap::new(); for (from, to) in self.rules.iter() { @@ -85,23 +85,21 @@ impl Grammar } } -/// Just checks a program. Does not generates output. -pub struct LLTabelParser< - 'a, +pub struct LLTabelParser<'a, N, T, S> +where N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone, S: Into + PartialEq + Clone, -> { +{ grammar: &'a Grammar, input: &'a mut dyn Iterator>, } -impl< - 'a, - N: PartialEq + Eq + Hash + Clone + Debug, - T: PartialEq + Eq + Hash + Clone + Debug, - S: Into + PartialEq + Clone + Debug, - > LLTabelParser<'a, N, T, S> +impl<'a, N, T, S> LLTabelParser<'a, N, T, S> +where + N: PartialEq + Eq + Hash + Clone + Debug, + T: PartialEq + Eq + Hash + Clone + Debug, + S: Into + PartialEq + Clone + Debug, { pub fn parse(&mut self) -> Result, String> { // stack of table driven parser @@ -205,29 +203,6 @@ pub trait Skippable { false } } - -#[derive(Debug, Clone)] -pub enum NodeChild { - Child(ParseTree), - Data(S), -} - -#[derive(Debug, Clone)] -#[allow(dead_code)] -pub struct ParseTree { - pub rule: Option<(N, usize)>, - pub childs: Vec>, -} - -impl ParseTree { - pub fn new(rule: Option<(N, usize)>) -> Self { - Self { - rule, - childs: Vec::new(), - } - } -} - impl ParseTree { /// cleanup the parse tree /// does not work on a subtree diff --git a/src/cfg/lr0.rs b/src/cfg/lr0_grammar.rs similarity index 50% rename from src/cfg/lr0.rs rename to src/cfg/lr0_grammar.rs index f02f7a6..b8a1b85 100644 --- a/src/cfg/lr0.rs +++ b/src/cfg/lr0_grammar.rs @@ -1,16 +1,18 @@ use std::{ collections::{HashMap, HashSet}, + fmt::Debug, hash::{Hash, Hasher}, rc::{Rc, Weak}, }; -use super::{Grammar, RuleIndex, Sentential}; +use super::{Grammar, NodeChild, ParseTree, RuleIndex, Sentential}; pub type RL0Automaton = HashMap>, HashMap, Weak>>>; #[derive(Debug, Eq, PartialEq)] pub struct LR0State(HashSet<(RuleIndex, usize)>); +pub type LR0Start = Weak>; impl Hash for LR0State { fn hash(&self, state: &mut H) { @@ -27,9 +29,10 @@ pub enum SlrAction { Reduce(Reduce), } +/// None is $ pub type SlrActionTable = HashMap<(usize, Option), SlrAction>>; pub type SlrGotoTable = HashMap<(usize, N), usize>; -pub type SlrParseTable = (SlrActionTable, SlrGotoTable); +pub type SlrParseTable = (SlrActionTable, SlrGotoTable, usize); impl Grammar where @@ -100,6 +103,7 @@ where } // add state to graph and mark for todo let rc = Rc::new(self.lr0_clozure(start_state)); + let start = Rc::downgrade(&rc); let mut todo = vec![Rc::downgrade(&rc)]; out.insert(rc, HashMap::new()); @@ -128,10 +132,10 @@ where }); } } - self.lr0_automaton = Some(out); + self.lr0_automaton = Some((out, start)); } - pub fn gen_slr_parse_table(&mut self) { + pub fn gen_slr_parse_table(&mut self) -> bool { if self.follow.is_none() { self.gen_follow(); } @@ -143,15 +147,21 @@ where let ids: HashMap>, usize> = HashMap::from_iter( lr0_automaton + .0 .iter() .enumerate() .map(|(id, (a, _))| (a.clone(), id)), ); - // none is $ + let start = *lr0_automaton + .1 + .upgrade() + .and_then(|rc| ids.get(&rc)) + .expect("Found broken state in slr parse table gen."); let mut action: SlrActionTable = HashMap::new(); let mut goto: SlrGotoTable = HashMap::new(); - for (state, to) in lr0_automaton { + let mut conflict = false; + for (state, to) in lr0_automaton.0.iter() { let id = ids .get(state) .expect("Found broken state in slr parse table gen."); @@ -167,10 +177,12 @@ where match go { Sentential::Terminal(t) => { - action.insert((*id, Some(t)), SlrAction::Shift(*to_id)); + conflict |= action + .insert((*id, Some(t)), SlrAction::Shift(*to_id)) + .is_some(); } Sentential::NoneTerminal(nt) => { - goto.insert((*id, nt), *to_id); + conflict |= goto.insert((*id, nt), *to_id).is_some(); } }; } @@ -181,11 +193,151 @@ where }; if to.len() <= *dot { for follow in self.follow(from) { - action.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id))); + conflict |= action + .insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id))) + .is_some(); } } } } - self.slr_parse_table = Some((action, goto)); + self.slr_parse_table = Some((action, goto, start)); + conflict + } + + pub fn slr_parser<'a, S: Into + PartialEq + Clone>( + &'a self, + iter: &'a mut dyn Iterator>, + ) -> SLRTabelParser { + assert!( + self.slr_parse_table.is_some(), + "Please call gen_slr_parse_table before this!" + ); + SLRTabelParser { + input: iter, + grammar: self, + } + } + pub fn get_slr_action( + &self, + state: &usize, + next: &Option, + ) -> Option<&SlrAction> { + assert!( + self.slr_parse_table.is_some(), + "Please call gen_slr_parse_table before this!" + ); + self.slr_parse_table + .as_ref() + .unwrap() + .0 + .get(&(*state, next.clone())) + } + pub fn get_slr_goto(&self, state: &usize, next: &N) -> Option<&usize> { + assert!( + self.slr_parse_table.is_some(), + "Please call gen_slr_parse_table before this!" + ); + self.slr_parse_table + .as_ref() + .unwrap() + .1 + .get(&(*state, next.clone())) + } +} + +pub struct SLRTabelParser<'a, N, T, S> +where + N: PartialEq + Eq + Hash + Clone, + T: PartialEq + Eq + Hash + Clone, + S: Into + PartialEq + Clone, +{ + grammar: &'a Grammar, + input: &'a mut dyn Iterator>, +} + +impl<'a, N, T, S> SLRTabelParser<'a, N, T, S> +where + N: PartialEq + Eq + Hash + Clone + Debug + Ord, + T: PartialEq + Eq + Hash + Clone + Debug + Ord, + S: Into + PartialEq + Clone + Debug, +{ + pub fn parse(&mut self) -> Result, String> { + let mut stack: Vec<(NodeChild, usize)> = Vec::new(); + + let mut next = match self.input.next() { + Some(Ok(d)) => Some(d), + Some(Err(err)) => return Err(format!("Invalid token: {}", err)), + None => None, + }; + let mut first = true; + loop { + let state = if first { + self.grammar.slr_parse_table.as_ref().unwrap().2 + } else { + let Some(state) = stack.last() else { + return Err("Unexpected EOS".into()); + }; + state.1 + }; + first = false; + let Some(current_state) = self + .grammar + .get_slr_action(&state, &next.as_ref().map(|f| f.clone().into())) + else { + return Err(format!( + "Unexpected Token: {}", + next.map(|f| format!("{f:?}")) + .unwrap_or("end of file".to_string()) + )); + }; + + match current_state { + SlrAction::Shift(to) => { + stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to)); + next = match self.input.next() { + Some(Ok(d)) => Some(d), + Some(Err(err)) => return Err(format!("Invalid token: {}", err)), + None => None, + }; + } + SlrAction::Reduce((rule, ind)) => { + let Some(r) = self.grammar.rules.get(rule).and_then(|e| e.get(*ind)) else { + return Err(format!("Invalid rule: {:?}-{}", rule, ind)); + }; + let mut childs = Vec::new(); + for elem in r.iter().rev() { + let Some(last) = stack.pop() else { + return Err("Unexpected EOF".into()); + }; + if last.0 != *elem { + return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem)); + } + childs.push(last); + } + if self.grammar.start == *rule { + return Ok(ParseTree { + rule: Some((rule.clone(), *ind)), + childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(), + }); + } + let Some(state) = stack.last() else { + return Err("Unexpected EOS".into()); + }; + let Some(next) = self.grammar.get_slr_goto(&state.1, rule) else { + return Err(format!( + "Invalid reduction: state: {} rule: {:?}", + state.1, rule + )); + }; + stack.push(( + NodeChild::Child(ParseTree { + rule: Some((rule.clone(), *ind)), + childs: childs.into_iter().rev().map(|(d, _)| d).collect(), + }), + *next, + )); + } + } + } } } diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index feb10d2..32b9323 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -1,12 +1,13 @@ use std::{ collections::{HashMap, HashSet}, + fmt::Debug, hash::Hash, }; -use lr0::{RL0Automaton, SlrParseTable}; +use lr0_grammar::{LR0Start, RL0Automaton, SlrParseTable}; pub mod ll_grammar; -pub mod lr0; +pub mod lr0_grammar; #[macro_export] macro_rules! cfg_grammar { @@ -33,7 +34,7 @@ macro_rules! cfg_grammar { follow: None, ll_parse_table: None, lr0_automaton: None, - slr_parse_table: None, + slr_parse_table: None, } } }; @@ -85,7 +86,7 @@ pub struct Grammar>, + pub lr0_automaton: Option<(RL0Automaton, LR0Start)>, /// pub slr_parse_table: Option>, @@ -278,3 +279,48 @@ impl Grammar .unwrap_or(HashSet::new()) } } + +#[derive(Debug, Clone)] +pub enum NodeChild { + Child(ParseTree), + Data(S), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ParseTree { + pub rule: Option>, + pub childs: Vec>, +} + +impl ParseTree { + pub fn new(rule: Option>) -> Self { + Self { + rule, + childs: Vec::new(), + } + } +} + +impl PartialEq> for NodeChild +where + N: PartialEq + Eq, + T: PartialEq + Eq, + S: PartialEq, +{ + fn eq(&self, other: &Sentential) -> bool { + use NodeChild::*; + use Sentential::*; + match (self, other) { + (Data(s), Terminal(t)) if *s == *t => true, + ( + Child(ParseTree { + rule: Some((rule, _)), + childs: _, + }), + NoneTerminal(nt), + ) if *rule == *nt => true, + _ => false, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index aca18f7..f31c4a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ pub mod scanner; pub mod prelude { pub use crate::cfg::*; pub use crate::cfg::ll_grammar::*; - pub use crate::cfg::lr0::*; + pub use crate::cfg::lr0_grammar::*; pub use crate::cfg_grammar; pub use crate::double_enum; pub use crate::scanner::*;