lr1 parser, unstable

This commit is contained in:
jusax23 2024-11-09 20:52:24 +01:00
parent a59ac9360c
commit 3339bf8fb0
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
8 changed files with 632 additions and 178 deletions

View file

@ -13,3 +13,6 @@ path = "src/main.rs"
[[bin]] [[bin]]
name = "book" name = "book"
[[bin]]
name = "g10"

87
src/bin/g10.rs Normal file
View file

@ -0,0 +1,87 @@
use rcompiler::prelude::*;
use regex::Match;
use std::collections::HashMap;
double_enum!(
BareTokens, Tokens {
WhiteSpace,
Assign,
Add,
LBrace,
RBrace,
Ident(String),
}
);
token_scanner!(
Tokens,
r"^\s|\t|\n|\r" : |_,_| {
Some(WhiteSpace)
}
r"^\+" : |_,_| {
Some(Add)
}
r"^=" : |_,_| {
Some(Assign)
}
r"^\(" : |_,_| {
Some(LBrace)
}
r"^\)" : |_,_| {
Some(RBrace)
}
r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| {
Some(Ident(String::from(m.as_str())))
}
);
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
enum NoneTerminals {
E,
P,
T,
}
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
fn from(value: NoneTerminals) -> Self {
Sentential::NoneTerminal(value)
}
}
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
fn from(value: BareTokens) -> Self {
Sentential::Terminal(value)
}
}
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
use BareTokens::*;
use NoneTerminals::*;
cfg_grammar![
start: P;
P -> E;
E -> E, Add, T;
E -> T;
T -> Ident, LBrace, E, RBrace;
T -> Ident;
]
}
fn main() {
let code = String::from("b(f)+c");
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
let mut grammar = grammer();
grammar.gen_follow();
println!("first: {:?}", grammar.first);
println!("follow: {:?}", grammar.follow);
grammar.gen_lr1_automaton();
println!("automaton: {:?}", grammar.lr1_automaton);
println!("conflict: {}", grammar.gen_slr_parse_table());
println!("conflict: {}", grammar.gen_lr1_parse_table());
println!("parse_table: {:?}", grammar.lr1_automaton);
println!(
"parsed: {:?}",
grammar.slr_parser(&mut m.iter_mut()).parse()
)
}

View file

@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt::Debug, hash::Hash};
use super::{Grammar, NodeChild, ParseTree, Sentential}; use super::{Grammar, NodeChild, ParseTree, Sentential};
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> { impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone + Ord> Grammar<N, T> {
pub fn gen_ll_parse_table(&mut self) -> bool { pub fn gen_ll_parse_table(&mut self) -> bool {
if self.first.is_none() { if self.first.is_none() {
self.gen_first(); self.gen_first();
@ -10,9 +10,6 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
if self.follow.is_none() { if self.follow.is_none() {
self.gen_follow(); self.gen_follow();
} }
if self.ll_parse_table.is_some() {
return false;
}
let mut conflict = false; let mut conflict = false;
// left derivation // left derivation
// when hiding N and T is next (None = e) // when hiding N and T is next (None = e)
@ -88,7 +85,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
pub struct LLTabelParser<'a, N, T, S> pub struct LLTabelParser<'a, N, T, S>
where where
N: PartialEq + Eq + Hash + Clone, N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone + Ord,
S: Into<T> + PartialEq<T> + Clone, S: Into<T> + PartialEq<T> + Clone,
{ {
grammar: &'a Grammar<N, T>, grammar: &'a Grammar<N, T>,
@ -98,7 +95,7 @@ where
impl<'a, N, T, S> LLTabelParser<'a, N, T, S> impl<'a, N, T, S> LLTabelParser<'a, N, T, S>
where where
N: PartialEq + Eq + Hash + Clone + Debug, N: PartialEq + Eq + Hash + Clone + Debug,
T: PartialEq + Eq + Hash + Clone + Debug, T: PartialEq + Eq + Hash + Clone + Debug + Ord,
S: Into<T> + PartialEq<T> + Clone + Debug, S: Into<T> + PartialEq<T> + Clone + Debug,
{ {
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> { pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {

View file

@ -5,13 +5,20 @@ use std::{
rc::{Rc, Weak}, rc::{Rc, Weak},
}; };
use super::{Grammar, NodeChild, ParseTree, RuleIndex, Sentential}; use super::{
lr_parser::{LRTabelParser, LrAction, LrActionTable, LrGotoTable},
Grammar, RuleIndex, Sentential,
};
pub type RL0Automaton<N, T> = pub type RL0Automaton<N, T> =
HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>; HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>;
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
/// general state of lr0 automaton
/// rule and reading point
pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>); pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>);
/// start state of lr0 automaton
pub type LR0Start<N> = Weak<LR0State<N>>; pub type LR0Start<N> = Weak<LR0State<N>>;
impl<N: Hash + Eq + Ord> Hash for LR0State<N> { impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
@ -23,23 +30,13 @@ impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
} }
} }
} }
#[derive(Debug)]
pub enum SlrAction<Shift, Reduce> {
Shift(Shift),
Reduce(Reduce),
}
/// None is $
pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>;
pub type SlrGotoTable<N> = HashMap<(usize, N), usize>;
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>, usize);
impl<N, T> Grammar<N, T> impl<N, T> Grammar<N, T>
where where
N: PartialEq + Eq + Hash + Clone + Ord, N: PartialEq + Eq + Hash + Clone + Ord,
T: PartialEq + Eq + Hash + Clone + Ord, T: PartialEq + Eq + Hash + Clone + Ord,
{ {
pub fn next_kernel(&self, state: &LR0State<N>, read: &Sentential<N, T>) -> LR0State<N> { pub fn lr0_next_kernel(&self, state: &LR0State<N>, read: &Sentential<N, T>) -> LR0State<N> {
let mut next_state: LR0State<N> = LR0State(HashSet::new()); let mut next_state: LR0State<N> = LR0State(HashSet::new());
for ((from, rule_id), dot) in state.0.iter() { for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
@ -51,7 +48,7 @@ where
} }
next_state next_state
} }
pub fn readable(&self, state: &LR0State<N>) -> HashSet<Sentential<N, T>> { pub fn lr0_readable(&self, state: &LR0State<N>) -> HashSet<Sentential<N, T>> {
let mut readables = HashSet::new(); let mut readables = HashSet::new();
for ((from, rule_id), dot) in state.0.iter() { for ((from, rule_id), dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else { let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
@ -114,8 +111,8 @@ where
let mut vec = Vec::new(); let mut vec = Vec::new();
// add clozures from the kernels from all readable symbols // add clozures from the kernels from all readable symbols
for none_terminal in self.readable(&state) { for none_terminal in self.lr0_readable(&state) {
let next_state = self.lr0_clozure(self.next_kernel(&state, &none_terminal)); let next_state = self.lr0_clozure(self.lr0_next_kernel(&state, &none_terminal));
let rc = Rc::new(next_state); let rc = Rc::new(next_state);
if let Some((k, _)) = out.get_key_value(&rc) { if let Some((k, _)) = out.get_key_value(&rc) {
vec.push((none_terminal, Rc::downgrade(k))); vec.push((none_terminal, Rc::downgrade(k)));
@ -136,6 +133,9 @@ where
} }
pub fn gen_slr_parse_table(&mut self) -> bool { pub fn gen_slr_parse_table(&mut self) -> bool {
if self.first.is_none() {
self.gen_first();
}
if self.follow.is_none() { if self.follow.is_none() {
self.gen_follow(); self.gen_follow();
} }
@ -158,15 +158,15 @@ where
.upgrade() .upgrade()
.and_then(|rc| ids.get(&rc)) .and_then(|rc| ids.get(&rc))
.expect("Found broken state in slr parse table gen."); .expect("Found broken state in slr parse table gen.");
let mut action: SlrActionTable<N, T> = HashMap::new(); let mut action: LrActionTable<N, T> = HashMap::new();
let mut goto: SlrGotoTable<N> = HashMap::new(); let mut goto: LrGotoTable<N> = HashMap::new();
let mut conflict = false; let mut conflict = false;
for (state, to) in lr0_automaton.0.iter() { for (state, to) in lr0_automaton.0.iter() {
let id = ids let id = ids
.get(state) .get(state)
.expect("Found broken state in slr parse table gen."); .expect("Found broken state in slr parse table gen.");
for go in self.readable(state) { for go in self.lr0_readable(state) {
let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else { let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else {
continue; continue;
}; };
@ -178,7 +178,7 @@ where
match go { match go {
Sentential::Terminal(t) => { Sentential::Terminal(t) => {
conflict |= action conflict |= action
.insert((*id, Some(t)), SlrAction::Shift(*to_id)) .insert((*id, Some(t)), LrAction::Shift(*to_id))
.is_some(); .is_some();
} }
Sentential::NoneTerminal(nt) => { Sentential::NoneTerminal(nt) => {
@ -194,7 +194,7 @@ where
if to.len() <= *dot { if to.len() <= *dot {
for follow in self.follow(from) { for follow in self.follow(from) {
conflict |= action conflict |= action
.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id))) .insert((*id, follow), LrAction::Reduce((from.clone(), *rule_id)))
.is_some(); .is_some();
} }
} }
@ -207,137 +207,16 @@ where
pub fn slr_parser<'a, S: Into<T> + PartialEq<T> + Clone>( pub fn slr_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self, &'a self,
iter: &'a mut dyn Iterator<Item = Result<S, String>>, iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> SLRTabelParser<N, T, S> { ) -> LRTabelParser<N, T, S> {
assert!( assert!(
self.slr_parse_table.is_some(), self.slr_parse_table.is_some(),
"Please call gen_slr_parse_table before this!" "Please call gen_slr_parse_table before this!"
); );
SLRTabelParser { LRTabelParser {
input: iter, input: iter,
grammar: self, start_rule: &self.start,
} rules: &self.rules,
} parse_table: self.slr_parse_table.as_ref().unwrap(),
pub fn get_slr_action(
&self,
state: &usize,
next: &Option<T>,
) -> Option<&SlrAction<usize, (N, usize)>> {
assert!(
self.slr_parse_table.is_some(),
"Please call gen_slr_parse_table before this!"
);
self.slr_parse_table
.as_ref()
.unwrap()
.0
.get(&(*state, next.clone()))
}
pub fn get_slr_goto(&self, state: &usize, next: &N) -> Option<&usize> {
assert!(
self.slr_parse_table.is_some(),
"Please call gen_slr_parse_table before this!"
);
self.slr_parse_table
.as_ref()
.unwrap()
.1
.get(&(*state, next.clone()))
}
}
pub struct SLRTabelParser<'a, N, T, S>
where
N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone,
S: Into<T> + PartialEq<T> + Clone,
{
grammar: &'a Grammar<N, T>,
input: &'a mut dyn Iterator<Item = Result<S, String>>,
}
impl<'a, N, T, S> SLRTabelParser<'a, N, T, S>
where
N: PartialEq + Eq + Hash + Clone + Debug + Ord,
T: PartialEq + Eq + Hash + Clone + Debug + Ord,
S: Into<T> + PartialEq<T> + Clone + Debug,
{
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
let mut stack: Vec<(NodeChild<N, S>, usize)> = Vec::new();
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
let mut first = true;
loop {
let state = if first {
self.grammar.slr_parse_table.as_ref().unwrap().2
} else {
let Some(state) = stack.last() else {
return Err("Unexpected EOS".into());
};
state.1
};
first = false;
let Some(current_state) = self
.grammar
.get_slr_action(&state, &next.as_ref().map(|f| f.clone().into()))
else {
return Err(format!(
"Unexpected Token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
match current_state {
SlrAction::Shift(to) => {
stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to));
next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
}
SlrAction::Reduce((rule, ind)) => {
let Some(r) = self.grammar.rules.get(rule).and_then(|e| e.get(*ind)) else {
return Err(format!("Invalid rule: {:?}-{}", rule, ind));
};
let mut childs = Vec::new();
for elem in r.iter().rev() {
let Some(last) = stack.pop() else {
return Err("Unexpected EOF".into());
};
if last.0 != *elem {
return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem));
}
childs.push(last);
}
if self.grammar.start == *rule {
return Ok(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
});
}
let Some(state) = stack.last() else {
return Err("Unexpected EOS".into());
};
let Some(next) = self.grammar.get_slr_goto(&state.1, rule) else {
return Err(format!(
"Invalid reduction: state: {} rule: {:?}",
state.1, rule
));
};
stack.push((
NodeChild::Child(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(d, _)| d).collect(),
}),
*next,
));
}
}
} }
} }
} }

299
src/cfg/lr1_grammar.rs Normal file
View file

@ -0,0 +1,299 @@
use super::{
lr_parser::{LRTabelParser, LrAction, LrActionTable, LrGotoTable},
Grammar, RuleIndex, Sentential,
};
use std::{
cmp::Ordering,
collections::{HashMap, HashSet},
hash::{Hash, Hasher},
rc::{Rc, Weak},
};
/// lookahead table of lr1 state, None is $
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct LR1Lookahead<T: Hash + Eq>(HashSet<Option<T>>);
impl<T: Hash + Eq> Default for LR1Lookahead<T> {
fn default() -> Self {
Self(HashSet::new())
}
}
impl<T: Hash + Eq + Ord> Ord for LR1Lookahead<T> {
fn cmp(&self, other: &Self) -> Ordering {
let mut a: Vec<&Option<T>> = self.0.iter().collect();
let mut b: Vec<&Option<T>> = other.0.iter().collect();
a.sort();
b.sort();
a.cmp(&b)
}
}
impl<T: Hash + Eq + Ord> PartialOrd for LR1Lookahead<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<T: Hash + Eq + Ord> Hash for LR1Lookahead<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut a: Vec<&Option<T>> = self.0.iter().collect();
a.sort();
for s in a.iter() {
s.hash(state);
}
}
}
#[derive(Debug, Eq, PartialEq)]
/// general state of lr1 automaton
pub struct LR1State<N: Hash + Eq, T: Hash + Eq + Ord>(
HashSet<(RuleIndex<N>, LR1Lookahead<T>, usize)>,
);
impl<N: Hash + Eq + Ord, T: Hash + Eq + Ord> Hash for LR1State<N, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut a: Vec<&(RuleIndex<N>, LR1Lookahead<T>, usize)> = self.0.iter().collect();
a.sort();
for s in a.iter() {
s.hash(state);
}
}
}
/// start state of lr1 automaton
pub type LR1Start<N, T> = Weak<LR1State<N, T>>;
pub type RL1Automaton<N, T> =
HashMap<Rc<LR1State<N, T>>, HashMap<Sentential<N, T>, Weak<LR1State<N, T>>>>;
impl<N, T> Grammar<N, T>
where
N: PartialEq + Eq + Hash + Clone + Ord,
T: PartialEq + Eq + Hash + Clone + Ord,
{
pub fn lr1_next_kernel(
&self,
state: &LR1State<N, T>,
read: &Sentential<N, T>,
) -> LR1State<N, T> {
let mut next_state: LR1State<N, T> = LR1State(HashSet::new());
for ((from, rule_id), lookahead, dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
next_state
.0
.insert(((from.clone(), *rule_id), lookahead.clone(), dot + 1));
}
}
next_state
}
pub fn lr1_readable(&self, state: &LR1State<N, T>) -> HashSet<Sentential<N, T>> {
let mut readables = HashSet::new();
for ((from, rule_id), _, dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if let Some(l) = to.get(*dot) {
readables.insert(l.clone());
}
}
readables
}
pub fn lr1_clozure(&self, mut state: LR1State<N, T>) -> LR1State<N, T> {
assert!(self.first.is_some(), "Please call gen_first before this!");
assert!(
self.produces_epsilon.is_some(),
"Please call gen_produces_epsilon before this!"
);
loop {
let mut change = false;
let relevant = state
.0
.iter()
.filter_map(|((from, rule_id), lookahead, dot)| {
self.rules
.get(from)
.and_then(|v| v.get(*rule_id))
.and_then(|to| match to.get(*dot) {
Some(Sentential::NoneTerminal(b)) => {
Some((b.clone(), to[dot + 1..].to_vec(), lookahead.clone()))
}
Some(Sentential::Terminal(_)) | None => None,
})
})
.collect::<Vec<_>>();
for (nt, rest, lookahead) in relevant {
if let Some(rule) = self.rules.get(&nt) {
let mut lookahead = if self.can_produce_epsilon_sen(&rest) {
lookahead
} else {
LR1Lookahead::default()
};
lookahead.0.extend(
// extend with first set of rest, except e
self.first(&rest)
.iter()
.filter_map(|t| t.as_ref().map(|elem| Some(elem.clone()))),
);
for to in 0..rule.len() {
change |= state.0.insert(((nt.clone(), to), lookahead.clone(), 0));
}
}
}
if !change {
return state;
}
}
}
pub fn gen_lr1_automaton(&mut self) {
if self.first.is_none() {
self.gen_first();
}
if self.produces_epsilon.is_none() {
self.gen_produces_epsilon();
}
let mut out: RL1Automaton<N, T> = HashMap::new();
// add state zero
let mut start_state = LR1State(HashSet::new());
if let Some(rule) = self.rules.get(&self.start) {
for to in 0..rule.len() {
start_state.0.insert((
(self.start.clone(), to),
LR1Lookahead(HashSet::from([None])),
0,
));
}
}
// add state to graph and mark for todo
let rc = Rc::new(self.lr1_clozure(start_state));
let start = Rc::downgrade(&rc);
let mut todo = vec![Rc::downgrade(&rc)];
out.insert(rc, HashMap::new());
// add states while marked states exists
while let Some(state) = todo.pop() {
if let Some(state) = state.upgrade() {
// new adjacent list
let mut vec = Vec::new();
// add clozures from the kernels from all readable symbols
for none_terminal in self.lr1_readable(&state) {
let next_state = self.lr1_clozure(self.lr1_next_kernel(&state, &none_terminal));
let rc = Rc::new(next_state);
if let Some((k, _)) = out.get_key_value(&rc) {
vec.push((none_terminal, Rc::downgrade(k)));
} else {
todo.push(Rc::downgrade(&rc));
vec.push((none_terminal, Rc::downgrade(&rc)));
out.insert(rc, HashMap::new());
}
}
// write adjacent list to state
// does not check duplicates. Is not needed, because `readable` returns a set
out.entry(state).and_modify(|elem| {
elem.extend(vec);
});
}
}
self.lr1_automaton = Some((out, start));
}
pub fn gen_lr1_parse_table(&mut self) -> bool {
if self.first.is_none() {
self.gen_first();
}
if self.follow.is_none() {
self.gen_follow();
}
if self.lr1_automaton.is_none() {
self.gen_lr1_automaton();
}
let lr1_automaton = self.lr1_automaton.as_ref().unwrap();
let ids: HashMap<Rc<LR1State<N, T>>, usize> = HashMap::from_iter(
lr1_automaton
.0
.iter()
.enumerate()
.map(|(id, (a, _))| (a.clone(), id)),
);
let start = *lr1_automaton
.1
.upgrade()
.and_then(|rc| ids.get(&rc))
.expect("Found broken state in slr parse table gen.");
let mut action: LrActionTable<N, T> = HashMap::new();
let mut goto: LrGotoTable<N> = HashMap::new();
let mut conflict = false;
for (state, to) in lr1_automaton.0.iter() {
let id = ids
.get(state)
.expect("Found broken state in slr parse table gen.");
for go in self.lr1_readable(state) {
let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else {
continue;
};
let to_id = ids
.get(&to)
.expect("Found broken state in slr parse table gen.");
match go {
Sentential::Terminal(t) => {
conflict |= action
.insert((*id, Some(t)), LrAction::Shift(*to_id))
.is_some();
}
Sentential::NoneTerminal(nt) => {
conflict |= goto.insert((*id, nt), *to_id).is_some();
}
};
}
for ((from, rule_id), lookahead, dot) in state.0.iter() {
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
continue;
};
if to.len() <= *dot {
for follow in lookahead.0.iter() {
conflict |= action
.insert(
(*id, follow.clone()),
LrAction::Reduce((from.clone(), *rule_id)),
)
.is_some();
}
}
}
}
self.lr1_parse_table = Some((action, goto, start));
conflict
}
pub fn lr1_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self,
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> LRTabelParser<N, T, S> {
assert!(
self.lr1_parse_table.is_some(),
"Please call gen_lr1_parse_table before this!"
);
LRTabelParser {
input: iter,
start_rule: &self.start,
rules: &self.rules,
parse_table: self.lr1_parse_table.as_ref().unwrap(),
}
}
}

133
src/cfg/lr_parser.rs Normal file
View file

@ -0,0 +1,133 @@
use std::{collections::HashMap, fmt::Debug, hash::Hash};
use super::{NodeChild, ParseTree, RuleIndex, Sentential};
#[derive(Debug)]
pub enum LrAction<Shift, Reduce> {
Shift(Shift),
Reduce(Reduce),
}
/// None is $
pub type LrActionTable<N, T> = HashMap<(usize, Option<T>), LrAction<usize, RuleIndex<N>>>;
pub type LrGotoTable<N> = HashMap<(usize, N), usize>;
pub type LrParseTable<N, T> = (LrActionTable<N, T>, LrGotoTable<N>, usize);
pub struct LRTabelParser<'a, N, T, S>
where
N: Eq + Hash + Clone,
T: Eq + Hash + Clone + Ord,
S: Into<T> + PartialEq<T> + Clone,
{
pub start_rule: &'a N,
pub rules: &'a HashMap<N, Vec<Vec<Sentential<N, T>>>>,
pub parse_table: &'a LrParseTable<N, T>,
pub input: &'a mut dyn Iterator<Item = Result<S, String>>,
}
impl<'a, N, T, S> LRTabelParser<'a, N, T, S>
where
N: Eq + Hash + Clone,
T: Eq + Hash + Clone + Ord,
S: Into<T> + PartialEq<T> + Clone,
{
pub fn get_action(
&self,
state: &usize,
next: &Option<T>,
) -> Option<&LrAction<usize, (N, usize)>> {
self.parse_table.0.get(&(*state, next.clone()))
}
pub fn get_goto(&self, state: &usize, next: &N) -> Option<&usize> {
self.parse_table.1.get(&(*state, next.clone()))
}
}
impl<'a, N, T, S> LRTabelParser<'a, N, T, S>
where
N: Eq + Hash + Clone + Debug + Ord,
T: Eq + Hash + Clone + Debug + Ord,
S: Into<T> + PartialEq<T> + Clone + Debug,
{
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
let mut stack: Vec<(NodeChild<N, S>, usize)> = Vec::new();
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
let mut first = true;
loop {
let state = if first {
// start with first state
self.parse_table.2
} else {
let Some(state) = stack.last() else {
return Err("Unexpected EOS 1".into());
};
state.1
};
first = false;
let Some(current_state) =
self.get_action(&state, &next.as_ref().map(|f| f.clone().into()))
else {
return Err(format!(
"Unexpected Token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
println!("next: {next:?}, state: {current_state:?}, stack: {stack:?}");
match current_state {
LrAction::Shift(to) => {
stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to));
next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
}
LrAction::Reduce((rule, ind)) => {
let Some(r) = self.rules.get(rule).and_then(|e| e.get(*ind)) else {
return Err(format!("Invalid rule: {:?}-{}", rule, ind));
};
let mut childs = Vec::new();
for elem in r.iter().rev() {
let Some(last) = stack.pop() else {
return Err("Unexpected EOF".into());
};
if last.0 != *elem {
return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem));
}
childs.push(last);
}
if *self.start_rule == *rule {
return Ok(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
});
}
let Some(state) = stack.last() else {
return Err("Unexpected EOS 2".into());
};
let Some(next) = self.get_goto(&state.1, rule) else {
return Err(format!(
"Invalid reduction: state: {} rule: {:?}",
state.1, rule
));
};
stack.push((
NodeChild::Child(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(d, _)| d).collect(),
}),
*next,
));
}
}
}
}
}

View file

@ -4,10 +4,14 @@ use std::{
hash::Hash, hash::Hash,
}; };
use lr0_grammar::{LR0Start, RL0Automaton, SlrParseTable}; use lr0_grammar::{LR0Start, RL0Automaton};
use lr1_grammar::{LR1Start, RL1Automaton};
use lr_parser::LrParseTable;
pub mod ll_grammar; pub mod ll_grammar;
pub mod lr0_grammar; pub mod lr0_grammar;
pub mod lr1_grammar;
pub mod lr_parser;
#[macro_export] #[macro_export]
macro_rules! cfg_grammar { macro_rules! cfg_grammar {
@ -27,15 +31,7 @@ macro_rules! cfg_grammar {
} }
map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]);
})* })*
$crate::cfg::Grammar { $crate::cfg::Grammar::new($start, map)
start: $start,
rules: map,
first: None,
follow: None,
ll_parse_table: None,
lr0_automaton: None,
slr_parse_table: None,
}
} }
}; };
} }
@ -70,9 +66,14 @@ impl<T: Ord, N: Ord> Ord for Sentential<N, T> {
pub type RuleIndex<N> = (N, usize); pub type RuleIndex<N> = (N, usize);
pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> { pub struct Grammar<N, T>
where
N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone + Ord,
{
pub start: N, pub start: N,
pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>, pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>,
pub produces_epsilon: Option<HashSet<N>>,
/// none is epsilon /// none is epsilon
pub first: Option<HashMap<N, HashSet<Option<T>>>>, pub first: Option<HashMap<N, HashSet<Option<T>>>>,
/// none is $ /// none is $
@ -89,22 +90,73 @@ pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash +
pub lr0_automaton: Option<(RL0Automaton<N, T>, LR0Start<N>)>, pub lr0_automaton: Option<(RL0Automaton<N, T>, LR0Start<N>)>,
/// ///
pub slr_parse_table: Option<SlrParseTable<N, T>>, pub slr_parse_table: Option<LrParseTable<N, T>>,
///
pub lr1_automaton: Option<(RL1Automaton<N, T>, LR1Start<N, T>)>,
///
pub lr1_parse_table: Option<LrParseTable<N, T>>,
}
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone + Ord> Grammar<N, T> {
pub fn new(start: N, rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>) -> Self {
Self {
start,
rules,
produces_epsilon: None,
first: None,
follow: None,
ll_parse_table: None,
lr0_automaton: None,
slr_parse_table: None,
lr1_automaton: None,
lr1_parse_table: None,
}
}
pub fn gen_produces_epsilon(&mut self) {
let mut out: HashSet<N> = HashSet::new();
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
for to in to.iter() {
if to.iter().all(|sen| match sen {
Sentential::Terminal(_) => false,
Sentential::NoneTerminal(a) => out.contains(a),
}) {
change |= out.insert(from.clone());
}
}
}
if !change {
break;
}
}
self.produces_epsilon = Some(out);
} }
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
pub fn can_produce_epsilon(&self, rule: &Sentential<N, T>) -> bool { pub fn can_produce_epsilon(&self, rule: &Sentential<N, T>) -> bool {
assert!(
self.produces_epsilon.is_some(),
"Please call gen_produces_epsilon before this!"
);
match rule { match rule {
Sentential::Terminal(_) => false, Sentential::Terminal(_) => false,
Sentential::NoneTerminal(nt) => self Sentential::NoneTerminal(nt) => self.produces_epsilon.as_ref().unwrap().contains(nt),
.rules
.get(nt)
.map(|f| f.iter().any(|v| v.is_empty()))
.unwrap_or(false),
} }
} }
pub fn can_produce_epsilon_sen(&self, rule: &Vec<Sentential<N, T>>) -> bool {
rule.iter()
.all(|s: &Sentential<N, T>| self.can_produce_epsilon(s))
}
pub fn gen_first(&mut self) { pub fn gen_first(&mut self) {
if self.produces_epsilon.is_none() {
self.gen_produces_epsilon();
}
let mut first: HashMap<N, HashSet<Option<T>>> = HashMap::new(); let mut first: HashMap<N, HashSet<Option<T>>> = HashMap::new();
loop { loop {
let mut change = false; let mut change = false;
@ -165,6 +217,8 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
self.first = Some(first); self.first = Some(first);
} }
/// get first of sentential
/// None is e
pub fn first(&self, sent: &Vec<Sentential<N, T>>) -> HashSet<Option<T>> { pub fn first(&self, sent: &Vec<Sentential<N, T>>) -> HashSet<Option<T>> {
assert!(self.first.is_some(), "Please call gen_first before this!"); assert!(self.first.is_some(), "Please call gen_first before this!");
let mut out = HashSet::<Option<T>>::new(); let mut out = HashSet::<Option<T>>::new();

View file

@ -104,7 +104,7 @@ token_scanner!(
} }
); );
#[derive(Debug, PartialEq, Eq, Hash, Clone)] #[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
enum NoneTerminals { enum NoneTerminals {
P, // Program, ; separated P, // Program, ; separated
L, // Line of code L, // Line of code
@ -193,9 +193,11 @@ fn main() {
let mut grammar = grammer(); let mut grammar = grammer();
grammar.gen_follow(); grammar.gen_follow();
println!("first: {:?}", grammar.first); //println!("first: {:?}", grammar.first);
println!("follow: {:?}", grammar.follow); //println!("follow: {:?}", grammar.follow);
let conflict = grammar.gen_ll_parse_table(); grammar.gen_lr1_automaton();
println!("conflict: {:?}", grammar.lr1_automaton);
/* let conflict = grammar.gen_ll_parse_table();
println!("conflict: {conflict}"); println!("conflict: {conflict}");
println!("prase table: {:?}", grammar.ll_parse_table); println!("prase table: {:?}", grammar.ll_parse_table);
println!("parse\n\n"); println!("parse\n\n");
@ -205,5 +207,5 @@ fn main() {
.ll_parser(&mut m.iter_mut()) .ll_parser(&mut m.iter_mut())
.parse() .parse()
.map(|tree| tree.clean()) .map(|tree| tree.clean())
) ) */
} }