slr parser

This commit is contained in:
jusax23 2024-11-09 16:18:15 +01:00
parent 8d52c340bd
commit a59ac9360c
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
5 changed files with 230 additions and 54 deletions

View file

@ -69,8 +69,8 @@ fn grammer() -> Grammar<NoneTerminals, BareTokens> {
} }
fn main() { fn main() {
//let code = String::from("a = b()+c+(d+e())"); let code = String::from("a = b(f)+c+(d+e(f))");
//let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace); let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
let mut grammar = grammer(); let mut grammar = grammer();
grammar.gen_follow(); grammar.gen_follow();
@ -78,7 +78,10 @@ fn main() {
println!("follow: {:?}", grammar.follow); println!("follow: {:?}", grammar.follow);
grammar.gen_lr0_automaton(); grammar.gen_lr0_automaton();
println!("automaton: {:?}", grammar.lr0_automaton); println!("automaton: {:?}", grammar.lr0_automaton);
grammar.gen_slr_parse_table(); println!("conflict: {}", grammar.gen_slr_parse_table());
println!("parse_table: {:?}", grammar.slr_parse_table); println!("parse_table: {:?}", grammar.slr_parse_table);
println!(
"parsed: {:?}",
grammar.slr_parser(&mut m.iter_mut()).parse()
)
} }

View file

@ -1,6 +1,6 @@
use std::{collections::HashMap, fmt::Debug, hash::Hash}; use std::{collections::HashMap, fmt::Debug, hash::Hash};
use super::{Grammar, Sentential}; use super::{Grammar, NodeChild, ParseTree, Sentential};
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> { impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
pub fn gen_ll_parse_table(&mut self) -> bool { pub fn gen_ll_parse_table(&mut self) -> bool {
@ -15,7 +15,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
} }
let mut conflict = false; let mut conflict = false;
// left derivation // left derivation
// when hidding N and T is next (None = e) // when hiding N and T is next (None = e)
// then the n'th (usize) variant of N can be used. // then the n'th (usize) variant of N can be used.
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new(); let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
for (from, to) in self.rules.iter() { for (from, to) in self.rules.iter() {
@ -85,23 +85,21 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
} }
} }
/// Just checks a program. Does not generates output. pub struct LLTabelParser<'a, N, T, S>
pub struct LLTabelParser< where
'a,
N: PartialEq + Eq + Hash + Clone, N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone,
S: Into<T> + PartialEq<T> + Clone, S: Into<T> + PartialEq<T> + Clone,
> { {
grammar: &'a Grammar<N, T>, grammar: &'a Grammar<N, T>,
input: &'a mut dyn Iterator<Item = Result<S, String>>, input: &'a mut dyn Iterator<Item = Result<S, String>>,
} }
impl< impl<'a, N, T, S> LLTabelParser<'a, N, T, S>
'a, where
N: PartialEq + Eq + Hash + Clone + Debug, N: PartialEq + Eq + Hash + Clone + Debug,
T: PartialEq + Eq + Hash + Clone + Debug, T: PartialEq + Eq + Hash + Clone + Debug,
S: Into<T> + PartialEq<T> + Clone + Debug, S: Into<T> + PartialEq<T> + Clone + Debug,
> LLTabelParser<'a, N, T, S>
{ {
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> { pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
// stack of table driven parser // stack of table driven parser
@ -205,29 +203,6 @@ pub trait Skippable {
false false
} }
} }
#[derive(Debug, Clone)]
pub enum NodeChild<N, S> {
Child(ParseTree<N, S>),
Data(S),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ParseTree<N, S> {
pub rule: Option<(N, usize)>,
pub childs: Vec<NodeChild<N, S>>,
}
impl<N, S> ParseTree<N, S> {
pub fn new(rule: Option<(N, usize)>) -> Self {
Self {
rule,
childs: Vec::new(),
}
}
}
impl<N: Skippable + Debug, S: Debug> ParseTree<N, S> { impl<N: Skippable + Debug, S: Debug> ParseTree<N, S> {
/// cleanup the parse tree /// cleanup the parse tree
/// does not work on a subtree /// does not work on a subtree

View file

@ -1,16 +1,18 @@
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
fmt::Debug,
hash::{Hash, Hasher}, hash::{Hash, Hasher},
rc::{Rc, Weak}, rc::{Rc, Weak},
}; };
use super::{Grammar, RuleIndex, Sentential}; use super::{Grammar, NodeChild, ParseTree, RuleIndex, Sentential};
pub type RL0Automaton<N, T> = pub type RL0Automaton<N, T> =
HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>; HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>;
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>); pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>);
pub type LR0Start<N> = Weak<LR0State<N>>;
impl<N: Hash + Eq + Ord> Hash for LR0State<N> { impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
@ -27,9 +29,10 @@ pub enum SlrAction<Shift, Reduce> {
Reduce(Reduce), Reduce(Reduce),
} }
/// None is $
pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>; pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>;
pub type SlrGotoTable<N> = HashMap<(usize, N), usize>; pub type SlrGotoTable<N> = HashMap<(usize, N), usize>;
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>); pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>, usize);
impl<N, T> Grammar<N, T> impl<N, T> Grammar<N, T>
where where
@ -100,6 +103,7 @@ where
} }
// add state to graph and mark for todo // add state to graph and mark for todo
let rc = Rc::new(self.lr0_clozure(start_state)); let rc = Rc::new(self.lr0_clozure(start_state));
let start = Rc::downgrade(&rc);
let mut todo = vec![Rc::downgrade(&rc)]; let mut todo = vec![Rc::downgrade(&rc)];
out.insert(rc, HashMap::new()); out.insert(rc, HashMap::new());
@ -128,10 +132,10 @@ where
}); });
} }
} }
self.lr0_automaton = Some(out); self.lr0_automaton = Some((out, start));
} }
pub fn gen_slr_parse_table(&mut self) { pub fn gen_slr_parse_table(&mut self) -> bool {
if self.follow.is_none() { if self.follow.is_none() {
self.gen_follow(); self.gen_follow();
} }
@ -143,15 +147,21 @@ where
let ids: HashMap<Rc<LR0State<N>>, usize> = HashMap::from_iter( let ids: HashMap<Rc<LR0State<N>>, usize> = HashMap::from_iter(
lr0_automaton lr0_automaton
.0
.iter() .iter()
.enumerate() .enumerate()
.map(|(id, (a, _))| (a.clone(), id)), .map(|(id, (a, _))| (a.clone(), id)),
); );
// none is $ let start = *lr0_automaton
.1
.upgrade()
.and_then(|rc| ids.get(&rc))
.expect("Found broken state in slr parse table gen.");
let mut action: SlrActionTable<N, T> = HashMap::new(); let mut action: SlrActionTable<N, T> = HashMap::new();
let mut goto: SlrGotoTable<N> = HashMap::new(); let mut goto: SlrGotoTable<N> = HashMap::new();
for (state, to) in lr0_automaton { let mut conflict = false;
for (state, to) in lr0_automaton.0.iter() {
let id = ids let id = ids
.get(state) .get(state)
.expect("Found broken state in slr parse table gen."); .expect("Found broken state in slr parse table gen.");
@ -167,10 +177,12 @@ where
match go { match go {
Sentential::Terminal(t) => { Sentential::Terminal(t) => {
action.insert((*id, Some(t)), SlrAction::Shift(*to_id)); conflict |= action
.insert((*id, Some(t)), SlrAction::Shift(*to_id))
.is_some();
} }
Sentential::NoneTerminal(nt) => { Sentential::NoneTerminal(nt) => {
goto.insert((*id, nt), *to_id); conflict |= goto.insert((*id, nt), *to_id).is_some();
} }
}; };
} }
@ -181,11 +193,151 @@ where
}; };
if to.len() <= *dot { if to.len() <= *dot {
for follow in self.follow(from) { for follow in self.follow(from) {
action.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id))); conflict |= action
.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id)))
.is_some();
} }
} }
} }
} }
self.slr_parse_table = Some((action, goto)); self.slr_parse_table = Some((action, goto, start));
conflict
}
pub fn slr_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self,
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> SLRTabelParser<N, T, S> {
assert!(
self.slr_parse_table.is_some(),
"Please call gen_slr_parse_table before this!"
);
SLRTabelParser {
input: iter,
grammar: self,
}
}
pub fn get_slr_action(
&self,
state: &usize,
next: &Option<T>,
) -> Option<&SlrAction<usize, (N, usize)>> {
assert!(
self.slr_parse_table.is_some(),
"Please call gen_slr_parse_table before this!"
);
self.slr_parse_table
.as_ref()
.unwrap()
.0
.get(&(*state, next.clone()))
}
pub fn get_slr_goto(&self, state: &usize, next: &N) -> Option<&usize> {
assert!(
self.slr_parse_table.is_some(),
"Please call gen_slr_parse_table before this!"
);
self.slr_parse_table
.as_ref()
.unwrap()
.1
.get(&(*state, next.clone()))
}
}
pub struct SLRTabelParser<'a, N, T, S>
where
N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone,
S: Into<T> + PartialEq<T> + Clone,
{
grammar: &'a Grammar<N, T>,
input: &'a mut dyn Iterator<Item = Result<S, String>>,
}
impl<'a, N, T, S> SLRTabelParser<'a, N, T, S>
where
N: PartialEq + Eq + Hash + Clone + Debug + Ord,
T: PartialEq + Eq + Hash + Clone + Debug + Ord,
S: Into<T> + PartialEq<T> + Clone + Debug,
{
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
let mut stack: Vec<(NodeChild<N, S>, usize)> = Vec::new();
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
let mut first = true;
loop {
let state = if first {
self.grammar.slr_parse_table.as_ref().unwrap().2
} else {
let Some(state) = stack.last() else {
return Err("Unexpected EOS".into());
};
state.1
};
first = false;
let Some(current_state) = self
.grammar
.get_slr_action(&state, &next.as_ref().map(|f| f.clone().into()))
else {
return Err(format!(
"Unexpected Token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
match current_state {
SlrAction::Shift(to) => {
stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to));
next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
}
SlrAction::Reduce((rule, ind)) => {
let Some(r) = self.grammar.rules.get(rule).and_then(|e| e.get(*ind)) else {
return Err(format!("Invalid rule: {:?}-{}", rule, ind));
};
let mut childs = Vec::new();
for elem in r.iter().rev() {
let Some(last) = stack.pop() else {
return Err("Unexpected EOF".into());
};
if last.0 != *elem {
return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem));
}
childs.push(last);
}
if self.grammar.start == *rule {
return Ok(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
});
}
let Some(state) = stack.last() else {
return Err("Unexpected EOS".into());
};
let Some(next) = self.grammar.get_slr_goto(&state.1, rule) else {
return Err(format!(
"Invalid reduction: state: {} rule: {:?}",
state.1, rule
));
};
stack.push((
NodeChild::Child(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(d, _)| d).collect(),
}),
*next,
));
}
}
}
} }
} }

View file

@ -1,12 +1,13 @@
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
fmt::Debug,
hash::Hash, hash::Hash,
}; };
use lr0::{RL0Automaton, SlrParseTable}; use lr0_grammar::{LR0Start, RL0Automaton, SlrParseTable};
pub mod ll_grammar; pub mod ll_grammar;
pub mod lr0; pub mod lr0_grammar;
#[macro_export] #[macro_export]
macro_rules! cfg_grammar { macro_rules! cfg_grammar {
@ -85,7 +86,7 @@ pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash +
/// Graph, defined throw this adjacent list. /// Graph, defined throw this adjacent list.
/// - key: states /// - key: states
/// - value: list with read symbol and linked node. /// - value: list with read symbol and linked node.
pub lr0_automaton: Option<RL0Automaton<N, T>>, pub lr0_automaton: Option<(RL0Automaton<N, T>, LR0Start<N>)>,
/// ///
pub slr_parse_table: Option<SlrParseTable<N, T>>, pub slr_parse_table: Option<SlrParseTable<N, T>>,
@ -278,3 +279,48 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
.unwrap_or(HashSet::new()) .unwrap_or(HashSet::new())
} }
} }
#[derive(Debug, Clone)]
pub enum NodeChild<N, S> {
Child(ParseTree<N, S>),
Data(S),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ParseTree<N, S> {
pub rule: Option<RuleIndex<N>>,
pub childs: Vec<NodeChild<N, S>>,
}
impl<N, S> ParseTree<N, S> {
pub fn new(rule: Option<RuleIndex<N>>) -> Self {
Self {
rule,
childs: Vec::new(),
}
}
}
impl<N, T, S> PartialEq<Sentential<N, T>> for NodeChild<N, S>
where
N: PartialEq + Eq,
T: PartialEq + Eq,
S: PartialEq<T>,
{
fn eq(&self, other: &Sentential<N, T>) -> bool {
use NodeChild::*;
use Sentential::*;
match (self, other) {
(Data(s), Terminal(t)) if *s == *t => true,
(
Child(ParseTree {
rule: Some((rule, _)),
childs: _,
}),
NoneTerminal(nt),
) if *rule == *nt => true,
_ => false,
}
}
}

View file

@ -5,7 +5,7 @@ pub mod scanner;
pub mod prelude { pub mod prelude {
pub use crate::cfg::*; pub use crate::cfg::*;
pub use crate::cfg::ll_grammar::*; pub use crate::cfg::ll_grammar::*;
pub use crate::cfg::lr0::*; pub use crate::cfg::lr0_grammar::*;
pub use crate::cfg_grammar; pub use crate::cfg_grammar;
pub use crate::double_enum; pub use crate::double_enum;
pub use crate::scanner::*; pub use crate::scanner::*;