slr parser
This commit is contained in:
parent
8d52c340bd
commit
a59ac9360c
5 changed files with 230 additions and 54 deletions
|
@ -69,8 +69,8 @@ fn grammer() -> Grammar<NoneTerminals, BareTokens> {
|
|||
}
|
||||
|
||||
fn main() {
|
||||
//let code = String::from("a = b()+c+(d+e())");
|
||||
//let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||
let code = String::from("a = b(f)+c+(d+e(f))");
|
||||
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||
|
||||
let mut grammar = grammer();
|
||||
grammar.gen_follow();
|
||||
|
@ -78,7 +78,10 @@ fn main() {
|
|||
println!("follow: {:?}", grammar.follow);
|
||||
grammar.gen_lr0_automaton();
|
||||
println!("automaton: {:?}", grammar.lr0_automaton);
|
||||
grammar.gen_slr_parse_table();
|
||||
println!("conflict: {}", grammar.gen_slr_parse_table());
|
||||
println!("parse_table: {:?}", grammar.slr_parse_table);
|
||||
|
||||
println!(
|
||||
"parsed: {:?}",
|
||||
grammar.slr_parser(&mut m.iter_mut()).parse()
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::{collections::HashMap, fmt::Debug, hash::Hash};
|
||||
|
||||
use super::{Grammar, Sentential};
|
||||
use super::{Grammar, NodeChild, ParseTree, Sentential};
|
||||
|
||||
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
|
||||
pub fn gen_ll_parse_table(&mut self) -> bool {
|
||||
|
@ -15,7 +15,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
|
|||
}
|
||||
let mut conflict = false;
|
||||
// left derivation
|
||||
// when hidding N and T is next (None = e)
|
||||
// when hiding N and T is next (None = e)
|
||||
// then the n'th (usize) variant of N can be used.
|
||||
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
|
||||
for (from, to) in self.rules.iter() {
|
||||
|
@ -85,23 +85,21 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
|
|||
}
|
||||
}
|
||||
|
||||
/// Just checks a program. Does not generates output.
|
||||
pub struct LLTabelParser<
|
||||
'a,
|
||||
pub struct LLTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone,
|
||||
T: PartialEq + Eq + Hash + Clone,
|
||||
S: Into<T> + PartialEq<T> + Clone,
|
||||
> {
|
||||
{
|
||||
grammar: &'a Grammar<N, T>,
|
||||
input: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
}
|
||||
|
||||
impl<
|
||||
'a,
|
||||
N: PartialEq + Eq + Hash + Clone + Debug,
|
||||
T: PartialEq + Eq + Hash + Clone + Debug,
|
||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
> LLTabelParser<'a, N, T, S>
|
||||
impl<'a, N, T, S> LLTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone + Debug,
|
||||
T: PartialEq + Eq + Hash + Clone + Debug,
|
||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
{
|
||||
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||
// stack of table driven parser
|
||||
|
@ -205,29 +203,6 @@ pub trait Skippable {
|
|||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum NodeChild<N, S> {
|
||||
Child(ParseTree<N, S>),
|
||||
Data(S),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub struct ParseTree<N, S> {
|
||||
pub rule: Option<(N, usize)>,
|
||||
pub childs: Vec<NodeChild<N, S>>,
|
||||
}
|
||||
|
||||
impl<N, S> ParseTree<N, S> {
|
||||
pub fn new(rule: Option<(N, usize)>) -> Self {
|
||||
Self {
|
||||
rule,
|
||||
childs: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: Skippable + Debug, S: Debug> ParseTree<N, S> {
|
||||
/// cleanup the parse tree
|
||||
/// does not work on a subtree
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fmt::Debug,
|
||||
hash::{Hash, Hasher},
|
||||
rc::{Rc, Weak},
|
||||
};
|
||||
|
||||
use super::{Grammar, RuleIndex, Sentential};
|
||||
use super::{Grammar, NodeChild, ParseTree, RuleIndex, Sentential};
|
||||
|
||||
pub type RL0Automaton<N, T> =
|
||||
HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>);
|
||||
pub type LR0Start<N> = Weak<LR0State<N>>;
|
||||
|
||||
impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
|
@ -27,9 +29,10 @@ pub enum SlrAction<Shift, Reduce> {
|
|||
Reduce(Reduce),
|
||||
}
|
||||
|
||||
/// None is $
|
||||
pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>;
|
||||
pub type SlrGotoTable<N> = HashMap<(usize, N), usize>;
|
||||
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>);
|
||||
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>, usize);
|
||||
|
||||
impl<N, T> Grammar<N, T>
|
||||
where
|
||||
|
@ -100,6 +103,7 @@ where
|
|||
}
|
||||
// add state to graph and mark for todo
|
||||
let rc = Rc::new(self.lr0_clozure(start_state));
|
||||
let start = Rc::downgrade(&rc);
|
||||
let mut todo = vec![Rc::downgrade(&rc)];
|
||||
out.insert(rc, HashMap::new());
|
||||
|
||||
|
@ -128,10 +132,10 @@ where
|
|||
});
|
||||
}
|
||||
}
|
||||
self.lr0_automaton = Some(out);
|
||||
self.lr0_automaton = Some((out, start));
|
||||
}
|
||||
|
||||
pub fn gen_slr_parse_table(&mut self) {
|
||||
pub fn gen_slr_parse_table(&mut self) -> bool {
|
||||
if self.follow.is_none() {
|
||||
self.gen_follow();
|
||||
}
|
||||
|
@ -143,15 +147,21 @@ where
|
|||
|
||||
let ids: HashMap<Rc<LR0State<N>>, usize> = HashMap::from_iter(
|
||||
lr0_automaton
|
||||
.0
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(id, (a, _))| (a.clone(), id)),
|
||||
);
|
||||
|
||||
// none is $
|
||||
let start = *lr0_automaton
|
||||
.1
|
||||
.upgrade()
|
||||
.and_then(|rc| ids.get(&rc))
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
let mut action: SlrActionTable<N, T> = HashMap::new();
|
||||
let mut goto: SlrGotoTable<N> = HashMap::new();
|
||||
for (state, to) in lr0_automaton {
|
||||
let mut conflict = false;
|
||||
for (state, to) in lr0_automaton.0.iter() {
|
||||
let id = ids
|
||||
.get(state)
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
|
@ -167,10 +177,12 @@ where
|
|||
|
||||
match go {
|
||||
Sentential::Terminal(t) => {
|
||||
action.insert((*id, Some(t)), SlrAction::Shift(*to_id));
|
||||
conflict |= action
|
||||
.insert((*id, Some(t)), SlrAction::Shift(*to_id))
|
||||
.is_some();
|
||||
}
|
||||
Sentential::NoneTerminal(nt) => {
|
||||
goto.insert((*id, nt), *to_id);
|
||||
conflict |= goto.insert((*id, nt), *to_id).is_some();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -181,11 +193,151 @@ where
|
|||
};
|
||||
if to.len() <= *dot {
|
||||
for follow in self.follow(from) {
|
||||
action.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id)));
|
||||
conflict |= action
|
||||
.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id)))
|
||||
.is_some();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.slr_parse_table = Some((action, goto));
|
||||
self.slr_parse_table = Some((action, goto, start));
|
||||
conflict
|
||||
}
|
||||
|
||||
pub fn slr_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
|
||||
&'a self,
|
||||
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
) -> SLRTabelParser<N, T, S> {
|
||||
assert!(
|
||||
self.slr_parse_table.is_some(),
|
||||
"Please call gen_slr_parse_table before this!"
|
||||
);
|
||||
SLRTabelParser {
|
||||
input: iter,
|
||||
grammar: self,
|
||||
}
|
||||
}
|
||||
pub fn get_slr_action(
|
||||
&self,
|
||||
state: &usize,
|
||||
next: &Option<T>,
|
||||
) -> Option<&SlrAction<usize, (N, usize)>> {
|
||||
assert!(
|
||||
self.slr_parse_table.is_some(),
|
||||
"Please call gen_slr_parse_table before this!"
|
||||
);
|
||||
self.slr_parse_table
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.0
|
||||
.get(&(*state, next.clone()))
|
||||
}
|
||||
pub fn get_slr_goto(&self, state: &usize, next: &N) -> Option<&usize> {
|
||||
assert!(
|
||||
self.slr_parse_table.is_some(),
|
||||
"Please call gen_slr_parse_table before this!"
|
||||
);
|
||||
self.slr_parse_table
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.1
|
||||
.get(&(*state, next.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SLRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone,
|
||||
T: PartialEq + Eq + Hash + Clone,
|
||||
S: Into<T> + PartialEq<T> + Clone,
|
||||
{
|
||||
grammar: &'a Grammar<N, T>,
|
||||
input: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
}
|
||||
|
||||
impl<'a, N, T, S> SLRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone + Debug + Ord,
|
||||
T: PartialEq + Eq + Hash + Clone + Debug + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
{
|
||||
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||
let mut stack: Vec<(NodeChild<N, S>, usize)> = Vec::new();
|
||||
|
||||
let mut next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
let mut first = true;
|
||||
loop {
|
||||
let state = if first {
|
||||
self.grammar.slr_parse_table.as_ref().unwrap().2
|
||||
} else {
|
||||
let Some(state) = stack.last() else {
|
||||
return Err("Unexpected EOS".into());
|
||||
};
|
||||
state.1
|
||||
};
|
||||
first = false;
|
||||
let Some(current_state) = self
|
||||
.grammar
|
||||
.get_slr_action(&state, &next.as_ref().map(|f| f.clone().into()))
|
||||
else {
|
||||
return Err(format!(
|
||||
"Unexpected Token: {}",
|
||||
next.map(|f| format!("{f:?}"))
|
||||
.unwrap_or("end of file".to_string())
|
||||
));
|
||||
};
|
||||
|
||||
match current_state {
|
||||
SlrAction::Shift(to) => {
|
||||
stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to));
|
||||
next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
}
|
||||
SlrAction::Reduce((rule, ind)) => {
|
||||
let Some(r) = self.grammar.rules.get(rule).and_then(|e| e.get(*ind)) else {
|
||||
return Err(format!("Invalid rule: {:?}-{}", rule, ind));
|
||||
};
|
||||
let mut childs = Vec::new();
|
||||
for elem in r.iter().rev() {
|
||||
let Some(last) = stack.pop() else {
|
||||
return Err("Unexpected EOF".into());
|
||||
};
|
||||
if last.0 != *elem {
|
||||
return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem));
|
||||
}
|
||||
childs.push(last);
|
||||
}
|
||||
if self.grammar.start == *rule {
|
||||
return Ok(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
|
||||
});
|
||||
}
|
||||
let Some(state) = stack.last() else {
|
||||
return Err("Unexpected EOS".into());
|
||||
};
|
||||
let Some(next) = self.grammar.get_slr_goto(&state.1, rule) else {
|
||||
return Err(format!(
|
||||
"Invalid reduction: state: {} rule: {:?}",
|
||||
state.1, rule
|
||||
));
|
||||
};
|
||||
stack.push((
|
||||
NodeChild::Child(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(d, _)| d).collect(),
|
||||
}),
|
||||
*next,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,12 +1,13 @@
|
|||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fmt::Debug,
|
||||
hash::Hash,
|
||||
};
|
||||
|
||||
use lr0::{RL0Automaton, SlrParseTable};
|
||||
use lr0_grammar::{LR0Start, RL0Automaton, SlrParseTable};
|
||||
|
||||
pub mod ll_grammar;
|
||||
pub mod lr0;
|
||||
pub mod lr0_grammar;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! cfg_grammar {
|
||||
|
@ -33,7 +34,7 @@ macro_rules! cfg_grammar {
|
|||
follow: None,
|
||||
ll_parse_table: None,
|
||||
lr0_automaton: None,
|
||||
slr_parse_table: None,
|
||||
slr_parse_table: None,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -85,7 +86,7 @@ pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash +
|
|||
/// Graph, defined throw this adjacent list.
|
||||
/// - key: states
|
||||
/// - value: list with read symbol and linked node.
|
||||
pub lr0_automaton: Option<RL0Automaton<N, T>>,
|
||||
pub lr0_automaton: Option<(RL0Automaton<N, T>, LR0Start<N>)>,
|
||||
|
||||
///
|
||||
pub slr_parse_table: Option<SlrParseTable<N, T>>,
|
||||
|
@ -278,3 +279,48 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
|
|||
.unwrap_or(HashSet::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum NodeChild<N, S> {
|
||||
Child(ParseTree<N, S>),
|
||||
Data(S),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub struct ParseTree<N, S> {
|
||||
pub rule: Option<RuleIndex<N>>,
|
||||
pub childs: Vec<NodeChild<N, S>>,
|
||||
}
|
||||
|
||||
impl<N, S> ParseTree<N, S> {
|
||||
pub fn new(rule: Option<RuleIndex<N>>) -> Self {
|
||||
Self {
|
||||
rule,
|
||||
childs: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N, T, S> PartialEq<Sentential<N, T>> for NodeChild<N, S>
|
||||
where
|
||||
N: PartialEq + Eq,
|
||||
T: PartialEq + Eq,
|
||||
S: PartialEq<T>,
|
||||
{
|
||||
fn eq(&self, other: &Sentential<N, T>) -> bool {
|
||||
use NodeChild::*;
|
||||
use Sentential::*;
|
||||
match (self, other) {
|
||||
(Data(s), Terminal(t)) if *s == *t => true,
|
||||
(
|
||||
Child(ParseTree {
|
||||
rule: Some((rule, _)),
|
||||
childs: _,
|
||||
}),
|
||||
NoneTerminal(nt),
|
||||
) if *rule == *nt => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ pub mod scanner;
|
|||
pub mod prelude {
|
||||
pub use crate::cfg::*;
|
||||
pub use crate::cfg::ll_grammar::*;
|
||||
pub use crate::cfg::lr0::*;
|
||||
pub use crate::cfg::lr0_grammar::*;
|
||||
pub use crate::cfg_grammar;
|
||||
pub use crate::double_enum;
|
||||
pub use crate::scanner::*;
|
||||
|
|
Loading…
Reference in a new issue