From 3339bf8fb03ac7bec68058c327d008369f42ad4f Mon Sep 17 00:00:00 2001 From: jusax23 Date: Sat, 9 Nov 2024 20:52:24 +0100 Subject: [PATCH] lr1 parser, unstable --- Cargo.toml | 3 + src/bin/g10.rs | 87 ++++++++++++ src/cfg/ll_grammar.rs | 9 +- src/cfg/lr0_grammar.rs | 171 ++++------------------- src/cfg/lr1_grammar.rs | 299 +++++++++++++++++++++++++++++++++++++++++ src/cfg/lr_parser.rs | 133 ++++++++++++++++++ src/cfg/mod.rs | 96 ++++++++++--- src/main.rs | 12 +- 8 files changed, 632 insertions(+), 178 deletions(-) create mode 100644 src/bin/g10.rs create mode 100644 src/cfg/lr1_grammar.rs create mode 100644 src/cfg/lr_parser.rs diff --git a/Cargo.toml b/Cargo.toml index c4f6532..0a34098 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,6 @@ path = "src/main.rs" [[bin]] name = "book" + +[[bin]] +name = "g10" diff --git a/src/bin/g10.rs b/src/bin/g10.rs new file mode 100644 index 0000000..5b247b1 --- /dev/null +++ b/src/bin/g10.rs @@ -0,0 +1,87 @@ +use rcompiler::prelude::*; +use regex::Match; +use std::collections::HashMap; + +double_enum!( + BareTokens, Tokens { + WhiteSpace, + Assign, + Add, + LBrace, + RBrace, + Ident(String), + } +); + +token_scanner!( + Tokens, + r"^\s|\t|\n|\r" : |_,_| { + Some(WhiteSpace) + } + r"^\+" : |_,_| { + Some(Add) + } + r"^=" : |_,_| { + Some(Assign) + } + r"^\(" : |_,_| { + Some(LBrace) + } + r"^\)" : |_,_| { + Some(RBrace) + } + r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { + Some(Ident(String::from(m.as_str()))) + } +); + +#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] +enum NoneTerminals { + E, + P, + T, +} + +impl From for Sentential { + fn from(value: NoneTerminals) -> Self { + Sentential::NoneTerminal(value) + } +} + +impl From for Sentential { + fn from(value: BareTokens) -> Self { + Sentential::Terminal(value) + } +} + +fn grammer() -> Grammar { + use BareTokens::*; + use NoneTerminals::*; + cfg_grammar![ + start: P; + P -> E; + E -> E, Add, T; + E -> T; + T -> Ident, LBrace, E, RBrace; + T -> Ident; + ] +} + +fn main() { + let code = String::from("b(f)+c"); + let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); + + let mut grammar = grammer(); + grammar.gen_follow(); + println!("first: {:?}", grammar.first); + println!("follow: {:?}", grammar.follow); + grammar.gen_lr1_automaton(); + println!("automaton: {:?}", grammar.lr1_automaton); + println!("conflict: {}", grammar.gen_slr_parse_table()); + println!("conflict: {}", grammar.gen_lr1_parse_table()); + println!("parse_table: {:?}", grammar.lr1_automaton); + println!( + "parsed: {:?}", + grammar.slr_parser(&mut m.iter_mut()).parse() + ) +} diff --git a/src/cfg/ll_grammar.rs b/src/cfg/ll_grammar.rs index 935423b..cd63a1e 100644 --- a/src/cfg/ll_grammar.rs +++ b/src/cfg/ll_grammar.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt::Debug, hash::Hash}; use super::{Grammar, NodeChild, ParseTree, Sentential}; -impl Grammar { +impl Grammar { pub fn gen_ll_parse_table(&mut self) -> bool { if self.first.is_none() { self.gen_first(); @@ -10,9 +10,6 @@ impl Grammar if self.follow.is_none() { self.gen_follow(); } - if self.ll_parse_table.is_some() { - return false; - } let mut conflict = false; // left derivation // when hiding N and T is next (None = e) @@ -88,7 +85,7 @@ impl Grammar pub struct LLTabelParser<'a, N, T, S> where N: PartialEq + Eq + Hash + Clone, - T: PartialEq + Eq + Hash + Clone, + T: PartialEq + Eq + Hash + Clone + Ord, S: Into + PartialEq + Clone, { grammar: &'a Grammar, @@ -98,7 +95,7 @@ where impl<'a, N, T, S> LLTabelParser<'a, N, T, S> where N: PartialEq + Eq + Hash + Clone + Debug, - T: PartialEq + Eq + Hash + Clone + Debug, + T: PartialEq + Eq + Hash + Clone + Debug + Ord, S: Into + PartialEq + Clone + Debug, { pub fn parse(&mut self) -> Result, String> { diff --git a/src/cfg/lr0_grammar.rs b/src/cfg/lr0_grammar.rs index b8a1b85..17e47ec 100644 --- a/src/cfg/lr0_grammar.rs +++ b/src/cfg/lr0_grammar.rs @@ -5,13 +5,20 @@ use std::{ rc::{Rc, Weak}, }; -use super::{Grammar, NodeChild, ParseTree, RuleIndex, Sentential}; +use super::{ + lr_parser::{LRTabelParser, LrAction, LrActionTable, LrGotoTable}, + Grammar, RuleIndex, Sentential, +}; pub type RL0Automaton = HashMap>, HashMap, Weak>>>; #[derive(Debug, Eq, PartialEq)] +/// general state of lr0 automaton +/// rule and reading point pub struct LR0State(HashSet<(RuleIndex, usize)>); + +/// start state of lr0 automaton pub type LR0Start = Weak>; impl Hash for LR0State { @@ -23,23 +30,13 @@ impl Hash for LR0State { } } } -#[derive(Debug)] -pub enum SlrAction { - Shift(Shift), - Reduce(Reduce), -} - -/// None is $ -pub type SlrActionTable = HashMap<(usize, Option), SlrAction>>; -pub type SlrGotoTable = HashMap<(usize, N), usize>; -pub type SlrParseTable = (SlrActionTable, SlrGotoTable, usize); impl Grammar where N: PartialEq + Eq + Hash + Clone + Ord, T: PartialEq + Eq + Hash + Clone + Ord, { - pub fn next_kernel(&self, state: &LR0State, read: &Sentential) -> LR0State { + pub fn lr0_next_kernel(&self, state: &LR0State, read: &Sentential) -> LR0State { let mut next_state: LR0State = LR0State(HashSet::new()); for ((from, rule_id), dot) in state.0.iter() { let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { @@ -51,7 +48,7 @@ where } next_state } - pub fn readable(&self, state: &LR0State) -> HashSet> { + pub fn lr0_readable(&self, state: &LR0State) -> HashSet> { let mut readables = HashSet::new(); for ((from, rule_id), dot) in state.0.iter() { let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { @@ -114,8 +111,8 @@ where let mut vec = Vec::new(); // add clozures from the kernels from all readable symbols - for none_terminal in self.readable(&state) { - let next_state = self.lr0_clozure(self.next_kernel(&state, &none_terminal)); + for none_terminal in self.lr0_readable(&state) { + let next_state = self.lr0_clozure(self.lr0_next_kernel(&state, &none_terminal)); let rc = Rc::new(next_state); if let Some((k, _)) = out.get_key_value(&rc) { vec.push((none_terminal, Rc::downgrade(k))); @@ -136,6 +133,9 @@ where } pub fn gen_slr_parse_table(&mut self) -> bool { + if self.first.is_none() { + self.gen_first(); + } if self.follow.is_none() { self.gen_follow(); } @@ -158,15 +158,15 @@ where .upgrade() .and_then(|rc| ids.get(&rc)) .expect("Found broken state in slr parse table gen."); - let mut action: SlrActionTable = HashMap::new(); - let mut goto: SlrGotoTable = HashMap::new(); + let mut action: LrActionTable = HashMap::new(); + let mut goto: LrGotoTable = HashMap::new(); let mut conflict = false; for (state, to) in lr0_automaton.0.iter() { let id = ids .get(state) .expect("Found broken state in slr parse table gen."); - for go in self.readable(state) { + for go in self.lr0_readable(state) { let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else { continue; }; @@ -178,7 +178,7 @@ where match go { Sentential::Terminal(t) => { conflict |= action - .insert((*id, Some(t)), SlrAction::Shift(*to_id)) + .insert((*id, Some(t)), LrAction::Shift(*to_id)) .is_some(); } Sentential::NoneTerminal(nt) => { @@ -194,7 +194,7 @@ where if to.len() <= *dot { for follow in self.follow(from) { conflict |= action - .insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id))) + .insert((*id, follow), LrAction::Reduce((from.clone(), *rule_id))) .is_some(); } } @@ -207,137 +207,16 @@ where pub fn slr_parser<'a, S: Into + PartialEq + Clone>( &'a self, iter: &'a mut dyn Iterator>, - ) -> SLRTabelParser { + ) -> LRTabelParser { assert!( self.slr_parse_table.is_some(), "Please call gen_slr_parse_table before this!" ); - SLRTabelParser { + LRTabelParser { input: iter, - grammar: self, - } - } - pub fn get_slr_action( - &self, - state: &usize, - next: &Option, - ) -> Option<&SlrAction> { - assert!( - self.slr_parse_table.is_some(), - "Please call gen_slr_parse_table before this!" - ); - self.slr_parse_table - .as_ref() - .unwrap() - .0 - .get(&(*state, next.clone())) - } - pub fn get_slr_goto(&self, state: &usize, next: &N) -> Option<&usize> { - assert!( - self.slr_parse_table.is_some(), - "Please call gen_slr_parse_table before this!" - ); - self.slr_parse_table - .as_ref() - .unwrap() - .1 - .get(&(*state, next.clone())) - } -} - -pub struct SLRTabelParser<'a, N, T, S> -where - N: PartialEq + Eq + Hash + Clone, - T: PartialEq + Eq + Hash + Clone, - S: Into + PartialEq + Clone, -{ - grammar: &'a Grammar, - input: &'a mut dyn Iterator>, -} - -impl<'a, N, T, S> SLRTabelParser<'a, N, T, S> -where - N: PartialEq + Eq + Hash + Clone + Debug + Ord, - T: PartialEq + Eq + Hash + Clone + Debug + Ord, - S: Into + PartialEq + Clone + Debug, -{ - pub fn parse(&mut self) -> Result, String> { - let mut stack: Vec<(NodeChild, usize)> = Vec::new(); - - let mut next = match self.input.next() { - Some(Ok(d)) => Some(d), - Some(Err(err)) => return Err(format!("Invalid token: {}", err)), - None => None, - }; - let mut first = true; - loop { - let state = if first { - self.grammar.slr_parse_table.as_ref().unwrap().2 - } else { - let Some(state) = stack.last() else { - return Err("Unexpected EOS".into()); - }; - state.1 - }; - first = false; - let Some(current_state) = self - .grammar - .get_slr_action(&state, &next.as_ref().map(|f| f.clone().into())) - else { - return Err(format!( - "Unexpected Token: {}", - next.map(|f| format!("{f:?}")) - .unwrap_or("end of file".to_string()) - )); - }; - - match current_state { - SlrAction::Shift(to) => { - stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to)); - next = match self.input.next() { - Some(Ok(d)) => Some(d), - Some(Err(err)) => return Err(format!("Invalid token: {}", err)), - None => None, - }; - } - SlrAction::Reduce((rule, ind)) => { - let Some(r) = self.grammar.rules.get(rule).and_then(|e| e.get(*ind)) else { - return Err(format!("Invalid rule: {:?}-{}", rule, ind)); - }; - let mut childs = Vec::new(); - for elem in r.iter().rev() { - let Some(last) = stack.pop() else { - return Err("Unexpected EOF".into()); - }; - if last.0 != *elem { - return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem)); - } - childs.push(last); - } - if self.grammar.start == *rule { - return Ok(ParseTree { - rule: Some((rule.clone(), *ind)), - childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(), - }); - } - let Some(state) = stack.last() else { - return Err("Unexpected EOS".into()); - }; - let Some(next) = self.grammar.get_slr_goto(&state.1, rule) else { - return Err(format!( - "Invalid reduction: state: {} rule: {:?}", - state.1, rule - )); - }; - stack.push(( - NodeChild::Child(ParseTree { - rule: Some((rule.clone(), *ind)), - childs: childs.into_iter().rev().map(|(d, _)| d).collect(), - }), - *next, - )); - } - } + start_rule: &self.start, + rules: &self.rules, + parse_table: self.slr_parse_table.as_ref().unwrap(), } } } diff --git a/src/cfg/lr1_grammar.rs b/src/cfg/lr1_grammar.rs new file mode 100644 index 0000000..c1ba1d6 --- /dev/null +++ b/src/cfg/lr1_grammar.rs @@ -0,0 +1,299 @@ +use super::{ + lr_parser::{LRTabelParser, LrAction, LrActionTable, LrGotoTable}, + Grammar, RuleIndex, Sentential, +}; +use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, + hash::{Hash, Hasher}, + rc::{Rc, Weak}, +}; + +/// lookahead table of lr1 state, None is $ +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct LR1Lookahead(HashSet>); + +impl Default for LR1Lookahead { + fn default() -> Self { + Self(HashSet::new()) + } +} + +impl Ord for LR1Lookahead { + fn cmp(&self, other: &Self) -> Ordering { + let mut a: Vec<&Option> = self.0.iter().collect(); + let mut b: Vec<&Option> = other.0.iter().collect(); + a.sort(); + b.sort(); + a.cmp(&b) + } +} + +impl PartialOrd for LR1Lookahead { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Hash for LR1Lookahead { + fn hash(&self, state: &mut H) { + let mut a: Vec<&Option> = self.0.iter().collect(); + a.sort(); + for s in a.iter() { + s.hash(state); + } + } +} + +#[derive(Debug, Eq, PartialEq)] +/// general state of lr1 automaton +pub struct LR1State( + HashSet<(RuleIndex, LR1Lookahead, usize)>, +); + +impl Hash for LR1State { + fn hash(&self, state: &mut H) { + let mut a: Vec<&(RuleIndex, LR1Lookahead, usize)> = self.0.iter().collect(); + a.sort(); + for s in a.iter() { + s.hash(state); + } + } +} + +/// start state of lr1 automaton +pub type LR1Start = Weak>; + +pub type RL1Automaton = + HashMap>, HashMap, Weak>>>; + +impl Grammar +where + N: PartialEq + Eq + Hash + Clone + Ord, + T: PartialEq + Eq + Hash + Clone + Ord, +{ + pub fn lr1_next_kernel( + &self, + state: &LR1State, + read: &Sentential, + ) -> LR1State { + let mut next_state: LR1State = LR1State(HashSet::new()); + for ((from, rule_id), lookahead, dot) in state.0.iter() { + let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { + continue; + }; + if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) { + next_state + .0 + .insert(((from.clone(), *rule_id), lookahead.clone(), dot + 1)); + } + } + next_state + } + + pub fn lr1_readable(&self, state: &LR1State) -> HashSet> { + let mut readables = HashSet::new(); + for ((from, rule_id), _, dot) in state.0.iter() { + let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { + continue; + }; + if let Some(l) = to.get(*dot) { + readables.insert(l.clone()); + } + } + readables + } + + pub fn lr1_clozure(&self, mut state: LR1State) -> LR1State { + assert!(self.first.is_some(), "Please call gen_first before this!"); + assert!( + self.produces_epsilon.is_some(), + "Please call gen_produces_epsilon before this!" + ); + loop { + let mut change = false; + + let relevant = state + .0 + .iter() + .filter_map(|((from, rule_id), lookahead, dot)| { + self.rules + .get(from) + .and_then(|v| v.get(*rule_id)) + .and_then(|to| match to.get(*dot) { + Some(Sentential::NoneTerminal(b)) => { + Some((b.clone(), to[dot + 1..].to_vec(), lookahead.clone())) + } + Some(Sentential::Terminal(_)) | None => None, + }) + }) + .collect::>(); + for (nt, rest, lookahead) in relevant { + if let Some(rule) = self.rules.get(&nt) { + let mut lookahead = if self.can_produce_epsilon_sen(&rest) { + lookahead + } else { + LR1Lookahead::default() + }; + lookahead.0.extend( + // extend with first set of rest, except e + self.first(&rest) + .iter() + .filter_map(|t| t.as_ref().map(|elem| Some(elem.clone()))), + ); + for to in 0..rule.len() { + change |= state.0.insert(((nt.clone(), to), lookahead.clone(), 0)); + } + } + } + + if !change { + return state; + } + } + } + + pub fn gen_lr1_automaton(&mut self) { + if self.first.is_none() { + self.gen_first(); + } + if self.produces_epsilon.is_none() { + self.gen_produces_epsilon(); + } + let mut out: RL1Automaton = HashMap::new(); + // add state zero + let mut start_state = LR1State(HashSet::new()); + if let Some(rule) = self.rules.get(&self.start) { + for to in 0..rule.len() { + start_state.0.insert(( + (self.start.clone(), to), + LR1Lookahead(HashSet::from([None])), + 0, + )); + } + } + // add state to graph and mark for todo + let rc = Rc::new(self.lr1_clozure(start_state)); + let start = Rc::downgrade(&rc); + let mut todo = vec![Rc::downgrade(&rc)]; + out.insert(rc, HashMap::new()); + + // add states while marked states exists + while let Some(state) = todo.pop() { + if let Some(state) = state.upgrade() { + // new adjacent list + let mut vec = Vec::new(); + + // add clozures from the kernels from all readable symbols + for none_terminal in self.lr1_readable(&state) { + let next_state = self.lr1_clozure(self.lr1_next_kernel(&state, &none_terminal)); + let rc = Rc::new(next_state); + if let Some((k, _)) = out.get_key_value(&rc) { + vec.push((none_terminal, Rc::downgrade(k))); + } else { + todo.push(Rc::downgrade(&rc)); + vec.push((none_terminal, Rc::downgrade(&rc))); + out.insert(rc, HashMap::new()); + } + } + // write adjacent list to state + // does not check duplicates. Is not needed, because `readable` returns a set + out.entry(state).and_modify(|elem| { + elem.extend(vec); + }); + } + } + self.lr1_automaton = Some((out, start)); + } + + pub fn gen_lr1_parse_table(&mut self) -> bool { + if self.first.is_none() { + self.gen_first(); + } + if self.follow.is_none() { + self.gen_follow(); + } + if self.lr1_automaton.is_none() { + self.gen_lr1_automaton(); + } + + let lr1_automaton = self.lr1_automaton.as_ref().unwrap(); + + let ids: HashMap>, usize> = HashMap::from_iter( + lr1_automaton + .0 + .iter() + .enumerate() + .map(|(id, (a, _))| (a.clone(), id)), + ); + + let start = *lr1_automaton + .1 + .upgrade() + .and_then(|rc| ids.get(&rc)) + .expect("Found broken state in slr parse table gen."); + let mut action: LrActionTable = HashMap::new(); + let mut goto: LrGotoTable = HashMap::new(); + let mut conflict = false; + for (state, to) in lr1_automaton.0.iter() { + let id = ids + .get(state) + .expect("Found broken state in slr parse table gen."); + + for go in self.lr1_readable(state) { + let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else { + continue; + }; + + let to_id = ids + .get(&to) + .expect("Found broken state in slr parse table gen."); + + match go { + Sentential::Terminal(t) => { + conflict |= action + .insert((*id, Some(t)), LrAction::Shift(*to_id)) + .is_some(); + } + Sentential::NoneTerminal(nt) => { + conflict |= goto.insert((*id, nt), *to_id).is_some(); + } + }; + } + + for ((from, rule_id), lookahead, dot) in state.0.iter() { + let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { + continue; + }; + if to.len() <= *dot { + for follow in lookahead.0.iter() { + conflict |= action + .insert( + (*id, follow.clone()), + LrAction::Reduce((from.clone(), *rule_id)), + ) + .is_some(); + } + } + } + } + self.lr1_parse_table = Some((action, goto, start)); + conflict + } + + pub fn lr1_parser<'a, S: Into + PartialEq + Clone>( + &'a self, + iter: &'a mut dyn Iterator>, + ) -> LRTabelParser { + assert!( + self.lr1_parse_table.is_some(), + "Please call gen_lr1_parse_table before this!" + ); + LRTabelParser { + input: iter, + start_rule: &self.start, + rules: &self.rules, + parse_table: self.lr1_parse_table.as_ref().unwrap(), + } + } +} diff --git a/src/cfg/lr_parser.rs b/src/cfg/lr_parser.rs new file mode 100644 index 0000000..c3b279b --- /dev/null +++ b/src/cfg/lr_parser.rs @@ -0,0 +1,133 @@ +use std::{collections::HashMap, fmt::Debug, hash::Hash}; + +use super::{NodeChild, ParseTree, RuleIndex, Sentential}; + +#[derive(Debug)] +pub enum LrAction { + Shift(Shift), + Reduce(Reduce), +} + +/// None is $ +pub type LrActionTable = HashMap<(usize, Option), LrAction>>; +pub type LrGotoTable = HashMap<(usize, N), usize>; +pub type LrParseTable = (LrActionTable, LrGotoTable, usize); + +pub struct LRTabelParser<'a, N, T, S> +where + N: Eq + Hash + Clone, + T: Eq + Hash + Clone + Ord, + S: Into + PartialEq + Clone, +{ + pub start_rule: &'a N, + pub rules: &'a HashMap>>>, + pub parse_table: &'a LrParseTable, + pub input: &'a mut dyn Iterator>, +} + +impl<'a, N, T, S> LRTabelParser<'a, N, T, S> +where + N: Eq + Hash + Clone, + T: Eq + Hash + Clone + Ord, + S: Into + PartialEq + Clone, +{ + pub fn get_action( + &self, + state: &usize, + next: &Option, + ) -> Option<&LrAction> { + self.parse_table.0.get(&(*state, next.clone())) + } + pub fn get_goto(&self, state: &usize, next: &N) -> Option<&usize> { + self.parse_table.1.get(&(*state, next.clone())) + } +} + +impl<'a, N, T, S> LRTabelParser<'a, N, T, S> +where + N: Eq + Hash + Clone + Debug + Ord, + T: Eq + Hash + Clone + Debug + Ord, + S: Into + PartialEq + Clone + Debug, +{ + pub fn parse(&mut self) -> Result, String> { + let mut stack: Vec<(NodeChild, usize)> = Vec::new(); + + let mut next = match self.input.next() { + Some(Ok(d)) => Some(d), + Some(Err(err)) => return Err(format!("Invalid token: {}", err)), + None => None, + }; + let mut first = true; + loop { + let state = if first { + // start with first state + self.parse_table.2 + } else { + let Some(state) = stack.last() else { + return Err("Unexpected EOS 1".into()); + }; + state.1 + }; + first = false; + let Some(current_state) = + self.get_action(&state, &next.as_ref().map(|f| f.clone().into())) + else { + return Err(format!( + "Unexpected Token: {}", + next.map(|f| format!("{f:?}")) + .unwrap_or("end of file".to_string()) + )); + }; + + println!("next: {next:?}, state: {current_state:?}, stack: {stack:?}"); + + match current_state { + LrAction::Shift(to) => { + stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to)); + next = match self.input.next() { + Some(Ok(d)) => Some(d), + Some(Err(err)) => return Err(format!("Invalid token: {}", err)), + None => None, + }; + } + LrAction::Reduce((rule, ind)) => { + let Some(r) = self.rules.get(rule).and_then(|e| e.get(*ind)) else { + return Err(format!("Invalid rule: {:?}-{}", rule, ind)); + }; + let mut childs = Vec::new(); + for elem in r.iter().rev() { + let Some(last) = stack.pop() else { + return Err("Unexpected EOF".into()); + }; + if last.0 != *elem { + return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem)); + } + childs.push(last); + } + if *self.start_rule == *rule { + return Ok(ParseTree { + rule: Some((rule.clone(), *ind)), + childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(), + }); + } + let Some(state) = stack.last() else { + return Err("Unexpected EOS 2".into()); + }; + let Some(next) = self.get_goto(&state.1, rule) else { + return Err(format!( + "Invalid reduction: state: {} rule: {:?}", + state.1, rule + )); + }; + stack.push(( + NodeChild::Child(ParseTree { + rule: Some((rule.clone(), *ind)), + childs: childs.into_iter().rev().map(|(d, _)| d).collect(), + }), + *next, + )); + } + } + } + } +} diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index 32b9323..8d09c61 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -4,10 +4,14 @@ use std::{ hash::Hash, }; -use lr0_grammar::{LR0Start, RL0Automaton, SlrParseTable}; +use lr0_grammar::{LR0Start, RL0Automaton}; +use lr1_grammar::{LR1Start, RL1Automaton}; +use lr_parser::LrParseTable; pub mod ll_grammar; pub mod lr0_grammar; +pub mod lr1_grammar; +pub mod lr_parser; #[macro_export] macro_rules! cfg_grammar { @@ -27,15 +31,7 @@ macro_rules! cfg_grammar { } map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); })* - $crate::cfg::Grammar { - start: $start, - rules: map, - first: None, - follow: None, - ll_parse_table: None, - lr0_automaton: None, - slr_parse_table: None, - } + $crate::cfg::Grammar::new($start, map) } }; } @@ -70,9 +66,14 @@ impl Ord for Sentential { pub type RuleIndex = (N, usize); -pub struct Grammar { +pub struct Grammar +where + N: PartialEq + Eq + Hash + Clone, + T: PartialEq + Eq + Hash + Clone + Ord, +{ pub start: N, pub rules: HashMap>>>, + pub produces_epsilon: Option>, /// none is epsilon pub first: Option>>>, /// none is $ @@ -89,22 +90,73 @@ pub struct Grammar, LR0Start)>, /// - pub slr_parse_table: Option>, + pub slr_parse_table: Option>, + + /// + pub lr1_automaton: Option<(RL1Automaton, LR1Start)>, + + /// + pub lr1_parse_table: Option>, } -impl Grammar { - pub fn can_produce_epsilon(&self, rule: &Sentential) -> bool { - match rule { - Sentential::Terminal(_) => false, - Sentential::NoneTerminal(nt) => self - .rules - .get(nt) - .map(|f| f.iter().any(|v| v.is_empty())) - .unwrap_or(false), +impl Grammar { + pub fn new(start: N, rules: HashMap>>>) -> Self { + Self { + start, + rules, + produces_epsilon: None, + first: None, + follow: None, + ll_parse_table: None, + lr0_automaton: None, + slr_parse_table: None, + lr1_automaton: None, + lr1_parse_table: None, } } + pub fn gen_produces_epsilon(&mut self) { + let mut out: HashSet = HashSet::new(); + loop { + let mut change = false; + for (from, to) in self.rules.iter() { + for to in to.iter() { + if to.iter().all(|sen| match sen { + Sentential::Terminal(_) => false, + Sentential::NoneTerminal(a) => out.contains(a), + }) { + change |= out.insert(from.clone()); + } + } + } + if !change { + break; + } + } + self.produces_epsilon = Some(out); + } + + pub fn can_produce_epsilon(&self, rule: &Sentential) -> bool { + assert!( + self.produces_epsilon.is_some(), + "Please call gen_produces_epsilon before this!" + ); + + match rule { + Sentential::Terminal(_) => false, + Sentential::NoneTerminal(nt) => self.produces_epsilon.as_ref().unwrap().contains(nt), + } + } + + pub fn can_produce_epsilon_sen(&self, rule: &Vec>) -> bool { + rule.iter() + .all(|s: &Sentential| self.can_produce_epsilon(s)) + } + pub fn gen_first(&mut self) { + if self.produces_epsilon.is_none() { + self.gen_produces_epsilon(); + } let mut first: HashMap>> = HashMap::new(); loop { let mut change = false; @@ -165,6 +217,8 @@ impl Grammar self.first = Some(first); } + /// get first of sentential + /// None is e pub fn first(&self, sent: &Vec>) -> HashSet> { assert!(self.first.is_some(), "Please call gen_first before this!"); let mut out = HashSet::>::new(); diff --git a/src/main.rs b/src/main.rs index 6c8bc6d..7ba4c19 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,7 +104,7 @@ token_scanner!( } ); -#[derive(Debug, PartialEq, Eq, Hash, Clone)] +#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] enum NoneTerminals { P, // Program, ; separated L, // Line of code @@ -193,9 +193,11 @@ fn main() { let mut grammar = grammer(); grammar.gen_follow(); - println!("first: {:?}", grammar.first); - println!("follow: {:?}", grammar.follow); - let conflict = grammar.gen_ll_parse_table(); + //println!("first: {:?}", grammar.first); + //println!("follow: {:?}", grammar.follow); + grammar.gen_lr1_automaton(); + println!("conflict: {:?}", grammar.lr1_automaton); + /* let conflict = grammar.gen_ll_parse_table(); println!("conflict: {conflict}"); println!("prase table: {:?}", grammar.ll_parse_table); println!("parse\n\n"); @@ -205,5 +207,5 @@ fn main() { .ll_parser(&mut m.iter_mut()) .parse() .map(|tree| tree.clean()) - ) + ) */ }