lr1 parser, unstable
This commit is contained in:
parent
a59ac9360c
commit
3339bf8fb0
8 changed files with 632 additions and 178 deletions
|
@ -13,3 +13,6 @@ path = "src/main.rs"
|
|||
|
||||
[[bin]]
|
||||
name = "book"
|
||||
|
||||
[[bin]]
|
||||
name = "g10"
|
||||
|
|
87
src/bin/g10.rs
Normal file
87
src/bin/g10.rs
Normal file
|
@ -0,0 +1,87 @@
|
|||
use rcompiler::prelude::*;
|
||||
use regex::Match;
|
||||
use std::collections::HashMap;
|
||||
|
||||
double_enum!(
|
||||
BareTokens, Tokens {
|
||||
WhiteSpace,
|
||||
Assign,
|
||||
Add,
|
||||
LBrace,
|
||||
RBrace,
|
||||
Ident(String),
|
||||
}
|
||||
);
|
||||
|
||||
token_scanner!(
|
||||
Tokens,
|
||||
r"^\s|\t|\n|\r" : |_,_| {
|
||||
Some(WhiteSpace)
|
||||
}
|
||||
r"^\+" : |_,_| {
|
||||
Some(Add)
|
||||
}
|
||||
r"^=" : |_,_| {
|
||||
Some(Assign)
|
||||
}
|
||||
r"^\(" : |_,_| {
|
||||
Some(LBrace)
|
||||
}
|
||||
r"^\)" : |_,_| {
|
||||
Some(RBrace)
|
||||
}
|
||||
r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| {
|
||||
Some(Ident(String::from(m.as_str())))
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
|
||||
enum NoneTerminals {
|
||||
E,
|
||||
P,
|
||||
T,
|
||||
}
|
||||
|
||||
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
|
||||
fn from(value: NoneTerminals) -> Self {
|
||||
Sentential::NoneTerminal(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
|
||||
fn from(value: BareTokens) -> Self {
|
||||
Sentential::Terminal(value)
|
||||
}
|
||||
}
|
||||
|
||||
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
|
||||
use BareTokens::*;
|
||||
use NoneTerminals::*;
|
||||
cfg_grammar![
|
||||
start: P;
|
||||
P -> E;
|
||||
E -> E, Add, T;
|
||||
E -> T;
|
||||
T -> Ident, LBrace, E, RBrace;
|
||||
T -> Ident;
|
||||
]
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let code = String::from("b(f)+c");
|
||||
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||
|
||||
let mut grammar = grammer();
|
||||
grammar.gen_follow();
|
||||
println!("first: {:?}", grammar.first);
|
||||
println!("follow: {:?}", grammar.follow);
|
||||
grammar.gen_lr1_automaton();
|
||||
println!("automaton: {:?}", grammar.lr1_automaton);
|
||||
println!("conflict: {}", grammar.gen_slr_parse_table());
|
||||
println!("conflict: {}", grammar.gen_lr1_parse_table());
|
||||
println!("parse_table: {:?}", grammar.lr1_automaton);
|
||||
println!(
|
||||
"parsed: {:?}",
|
||||
grammar.slr_parser(&mut m.iter_mut()).parse()
|
||||
)
|
||||
}
|
|
@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt::Debug, hash::Hash};
|
|||
|
||||
use super::{Grammar, NodeChild, ParseTree, Sentential};
|
||||
|
||||
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
|
||||
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone + Ord> Grammar<N, T> {
|
||||
pub fn gen_ll_parse_table(&mut self) -> bool {
|
||||
if self.first.is_none() {
|
||||
self.gen_first();
|
||||
|
@ -10,9 +10,6 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
|
|||
if self.follow.is_none() {
|
||||
self.gen_follow();
|
||||
}
|
||||
if self.ll_parse_table.is_some() {
|
||||
return false;
|
||||
}
|
||||
let mut conflict = false;
|
||||
// left derivation
|
||||
// when hiding N and T is next (None = e)
|
||||
|
@ -88,7 +85,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
|
|||
pub struct LLTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone,
|
||||
T: PartialEq + Eq + Hash + Clone,
|
||||
T: PartialEq + Eq + Hash + Clone + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone,
|
||||
{
|
||||
grammar: &'a Grammar<N, T>,
|
||||
|
@ -98,7 +95,7 @@ where
|
|||
impl<'a, N, T, S> LLTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone + Debug,
|
||||
T: PartialEq + Eq + Hash + Clone + Debug,
|
||||
T: PartialEq + Eq + Hash + Clone + Debug + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
{
|
||||
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||
|
|
|
@ -5,13 +5,20 @@ use std::{
|
|||
rc::{Rc, Weak},
|
||||
};
|
||||
|
||||
use super::{Grammar, NodeChild, ParseTree, RuleIndex, Sentential};
|
||||
use super::{
|
||||
lr_parser::{LRTabelParser, LrAction, LrActionTable, LrGotoTable},
|
||||
Grammar, RuleIndex, Sentential,
|
||||
};
|
||||
|
||||
pub type RL0Automaton<N, T> =
|
||||
HashMap<Rc<LR0State<N>>, HashMap<Sentential<N, T>, Weak<LR0State<N>>>>;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
/// general state of lr0 automaton
|
||||
/// rule and reading point
|
||||
pub struct LR0State<N: Hash + Eq>(HashSet<(RuleIndex<N>, usize)>);
|
||||
|
||||
/// start state of lr0 automaton
|
||||
pub type LR0Start<N> = Weak<LR0State<N>>;
|
||||
|
||||
impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
|
||||
|
@ -23,23 +30,13 @@ impl<N: Hash + Eq + Ord> Hash for LR0State<N> {
|
|||
}
|
||||
}
|
||||
}
|
||||
#[derive(Debug)]
|
||||
pub enum SlrAction<Shift, Reduce> {
|
||||
Shift(Shift),
|
||||
Reduce(Reduce),
|
||||
}
|
||||
|
||||
/// None is $
|
||||
pub type SlrActionTable<N, T> = HashMap<(usize, Option<T>), SlrAction<usize, RuleIndex<N>>>;
|
||||
pub type SlrGotoTable<N> = HashMap<(usize, N), usize>;
|
||||
pub type SlrParseTable<N, T> = (SlrActionTable<N, T>, SlrGotoTable<N>, usize);
|
||||
|
||||
impl<N, T> Grammar<N, T>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone + Ord,
|
||||
T: PartialEq + Eq + Hash + Clone + Ord,
|
||||
{
|
||||
pub fn next_kernel(&self, state: &LR0State<N>, read: &Sentential<N, T>) -> LR0State<N> {
|
||||
pub fn lr0_next_kernel(&self, state: &LR0State<N>, read: &Sentential<N, T>) -> LR0State<N> {
|
||||
let mut next_state: LR0State<N> = LR0State(HashSet::new());
|
||||
for ((from, rule_id), dot) in state.0.iter() {
|
||||
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
|
||||
|
@ -51,7 +48,7 @@ where
|
|||
}
|
||||
next_state
|
||||
}
|
||||
pub fn readable(&self, state: &LR0State<N>) -> HashSet<Sentential<N, T>> {
|
||||
pub fn lr0_readable(&self, state: &LR0State<N>) -> HashSet<Sentential<N, T>> {
|
||||
let mut readables = HashSet::new();
|
||||
for ((from, rule_id), dot) in state.0.iter() {
|
||||
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
|
||||
|
@ -114,8 +111,8 @@ where
|
|||
let mut vec = Vec::new();
|
||||
|
||||
// add clozures from the kernels from all readable symbols
|
||||
for none_terminal in self.readable(&state) {
|
||||
let next_state = self.lr0_clozure(self.next_kernel(&state, &none_terminal));
|
||||
for none_terminal in self.lr0_readable(&state) {
|
||||
let next_state = self.lr0_clozure(self.lr0_next_kernel(&state, &none_terminal));
|
||||
let rc = Rc::new(next_state);
|
||||
if let Some((k, _)) = out.get_key_value(&rc) {
|
||||
vec.push((none_terminal, Rc::downgrade(k)));
|
||||
|
@ -136,6 +133,9 @@ where
|
|||
}
|
||||
|
||||
pub fn gen_slr_parse_table(&mut self) -> bool {
|
||||
if self.first.is_none() {
|
||||
self.gen_first();
|
||||
}
|
||||
if self.follow.is_none() {
|
||||
self.gen_follow();
|
||||
}
|
||||
|
@ -158,15 +158,15 @@ where
|
|||
.upgrade()
|
||||
.and_then(|rc| ids.get(&rc))
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
let mut action: SlrActionTable<N, T> = HashMap::new();
|
||||
let mut goto: SlrGotoTable<N> = HashMap::new();
|
||||
let mut action: LrActionTable<N, T> = HashMap::new();
|
||||
let mut goto: LrGotoTable<N> = HashMap::new();
|
||||
let mut conflict = false;
|
||||
for (state, to) in lr0_automaton.0.iter() {
|
||||
let id = ids
|
||||
.get(state)
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
|
||||
for go in self.readable(state) {
|
||||
for go in self.lr0_readable(state) {
|
||||
let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else {
|
||||
continue;
|
||||
};
|
||||
|
@ -178,7 +178,7 @@ where
|
|||
match go {
|
||||
Sentential::Terminal(t) => {
|
||||
conflict |= action
|
||||
.insert((*id, Some(t)), SlrAction::Shift(*to_id))
|
||||
.insert((*id, Some(t)), LrAction::Shift(*to_id))
|
||||
.is_some();
|
||||
}
|
||||
Sentential::NoneTerminal(nt) => {
|
||||
|
@ -194,7 +194,7 @@ where
|
|||
if to.len() <= *dot {
|
||||
for follow in self.follow(from) {
|
||||
conflict |= action
|
||||
.insert((*id, follow), SlrAction::Reduce((from.clone(), *rule_id)))
|
||||
.insert((*id, follow), LrAction::Reduce((from.clone(), *rule_id)))
|
||||
.is_some();
|
||||
}
|
||||
}
|
||||
|
@ -207,137 +207,16 @@ where
|
|||
pub fn slr_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
|
||||
&'a self,
|
||||
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
) -> SLRTabelParser<N, T, S> {
|
||||
) -> LRTabelParser<N, T, S> {
|
||||
assert!(
|
||||
self.slr_parse_table.is_some(),
|
||||
"Please call gen_slr_parse_table before this!"
|
||||
);
|
||||
SLRTabelParser {
|
||||
LRTabelParser {
|
||||
input: iter,
|
||||
grammar: self,
|
||||
}
|
||||
}
|
||||
pub fn get_slr_action(
|
||||
&self,
|
||||
state: &usize,
|
||||
next: &Option<T>,
|
||||
) -> Option<&SlrAction<usize, (N, usize)>> {
|
||||
assert!(
|
||||
self.slr_parse_table.is_some(),
|
||||
"Please call gen_slr_parse_table before this!"
|
||||
);
|
||||
self.slr_parse_table
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.0
|
||||
.get(&(*state, next.clone()))
|
||||
}
|
||||
pub fn get_slr_goto(&self, state: &usize, next: &N) -> Option<&usize> {
|
||||
assert!(
|
||||
self.slr_parse_table.is_some(),
|
||||
"Please call gen_slr_parse_table before this!"
|
||||
);
|
||||
self.slr_parse_table
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.1
|
||||
.get(&(*state, next.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SLRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone,
|
||||
T: PartialEq + Eq + Hash + Clone,
|
||||
S: Into<T> + PartialEq<T> + Clone,
|
||||
{
|
||||
grammar: &'a Grammar<N, T>,
|
||||
input: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
}
|
||||
|
||||
impl<'a, N, T, S> SLRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone + Debug + Ord,
|
||||
T: PartialEq + Eq + Hash + Clone + Debug + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
{
|
||||
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||
let mut stack: Vec<(NodeChild<N, S>, usize)> = Vec::new();
|
||||
|
||||
let mut next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
let mut first = true;
|
||||
loop {
|
||||
let state = if first {
|
||||
self.grammar.slr_parse_table.as_ref().unwrap().2
|
||||
} else {
|
||||
let Some(state) = stack.last() else {
|
||||
return Err("Unexpected EOS".into());
|
||||
};
|
||||
state.1
|
||||
};
|
||||
first = false;
|
||||
let Some(current_state) = self
|
||||
.grammar
|
||||
.get_slr_action(&state, &next.as_ref().map(|f| f.clone().into()))
|
||||
else {
|
||||
return Err(format!(
|
||||
"Unexpected Token: {}",
|
||||
next.map(|f| format!("{f:?}"))
|
||||
.unwrap_or("end of file".to_string())
|
||||
));
|
||||
};
|
||||
|
||||
match current_state {
|
||||
SlrAction::Shift(to) => {
|
||||
stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to));
|
||||
next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
}
|
||||
SlrAction::Reduce((rule, ind)) => {
|
||||
let Some(r) = self.grammar.rules.get(rule).and_then(|e| e.get(*ind)) else {
|
||||
return Err(format!("Invalid rule: {:?}-{}", rule, ind));
|
||||
};
|
||||
let mut childs = Vec::new();
|
||||
for elem in r.iter().rev() {
|
||||
let Some(last) = stack.pop() else {
|
||||
return Err("Unexpected EOF".into());
|
||||
};
|
||||
if last.0 != *elem {
|
||||
return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem));
|
||||
}
|
||||
childs.push(last);
|
||||
}
|
||||
if self.grammar.start == *rule {
|
||||
return Ok(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
|
||||
});
|
||||
}
|
||||
let Some(state) = stack.last() else {
|
||||
return Err("Unexpected EOS".into());
|
||||
};
|
||||
let Some(next) = self.grammar.get_slr_goto(&state.1, rule) else {
|
||||
return Err(format!(
|
||||
"Invalid reduction: state: {} rule: {:?}",
|
||||
state.1, rule
|
||||
));
|
||||
};
|
||||
stack.push((
|
||||
NodeChild::Child(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(d, _)| d).collect(),
|
||||
}),
|
||||
*next,
|
||||
));
|
||||
}
|
||||
}
|
||||
start_rule: &self.start,
|
||||
rules: &self.rules,
|
||||
parse_table: self.slr_parse_table.as_ref().unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
299
src/cfg/lr1_grammar.rs
Normal file
299
src/cfg/lr1_grammar.rs
Normal file
|
@ -0,0 +1,299 @@
|
|||
use super::{
|
||||
lr_parser::{LRTabelParser, LrAction, LrActionTable, LrGotoTable},
|
||||
Grammar, RuleIndex, Sentential,
|
||||
};
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{HashMap, HashSet},
|
||||
hash::{Hash, Hasher},
|
||||
rc::{Rc, Weak},
|
||||
};
|
||||
|
||||
/// lookahead table of lr1 state, None is $
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct LR1Lookahead<T: Hash + Eq>(HashSet<Option<T>>);
|
||||
|
||||
impl<T: Hash + Eq> Default for LR1Lookahead<T> {
|
||||
fn default() -> Self {
|
||||
Self(HashSet::new())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Hash + Eq + Ord> Ord for LR1Lookahead<T> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let mut a: Vec<&Option<T>> = self.0.iter().collect();
|
||||
let mut b: Vec<&Option<T>> = other.0.iter().collect();
|
||||
a.sort();
|
||||
b.sort();
|
||||
a.cmp(&b)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Hash + Eq + Ord> PartialOrd for LR1Lookahead<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Hash + Eq + Ord> Hash for LR1Lookahead<T> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
let mut a: Vec<&Option<T>> = self.0.iter().collect();
|
||||
a.sort();
|
||||
for s in a.iter() {
|
||||
s.hash(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
/// general state of lr1 automaton
|
||||
pub struct LR1State<N: Hash + Eq, T: Hash + Eq + Ord>(
|
||||
HashSet<(RuleIndex<N>, LR1Lookahead<T>, usize)>,
|
||||
);
|
||||
|
||||
impl<N: Hash + Eq + Ord, T: Hash + Eq + Ord> Hash for LR1State<N, T> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
let mut a: Vec<&(RuleIndex<N>, LR1Lookahead<T>, usize)> = self.0.iter().collect();
|
||||
a.sort();
|
||||
for s in a.iter() {
|
||||
s.hash(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// start state of lr1 automaton
|
||||
pub type LR1Start<N, T> = Weak<LR1State<N, T>>;
|
||||
|
||||
pub type RL1Automaton<N, T> =
|
||||
HashMap<Rc<LR1State<N, T>>, HashMap<Sentential<N, T>, Weak<LR1State<N, T>>>>;
|
||||
|
||||
impl<N, T> Grammar<N, T>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone + Ord,
|
||||
T: PartialEq + Eq + Hash + Clone + Ord,
|
||||
{
|
||||
pub fn lr1_next_kernel(
|
||||
&self,
|
||||
state: &LR1State<N, T>,
|
||||
read: &Sentential<N, T>,
|
||||
) -> LR1State<N, T> {
|
||||
let mut next_state: LR1State<N, T> = LR1State(HashSet::new());
|
||||
for ((from, rule_id), lookahead, dot) in state.0.iter() {
|
||||
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
|
||||
continue;
|
||||
};
|
||||
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
|
||||
next_state
|
||||
.0
|
||||
.insert(((from.clone(), *rule_id), lookahead.clone(), dot + 1));
|
||||
}
|
||||
}
|
||||
next_state
|
||||
}
|
||||
|
||||
pub fn lr1_readable(&self, state: &LR1State<N, T>) -> HashSet<Sentential<N, T>> {
|
||||
let mut readables = HashSet::new();
|
||||
for ((from, rule_id), _, dot) in state.0.iter() {
|
||||
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
|
||||
continue;
|
||||
};
|
||||
if let Some(l) = to.get(*dot) {
|
||||
readables.insert(l.clone());
|
||||
}
|
||||
}
|
||||
readables
|
||||
}
|
||||
|
||||
pub fn lr1_clozure(&self, mut state: LR1State<N, T>) -> LR1State<N, T> {
|
||||
assert!(self.first.is_some(), "Please call gen_first before this!");
|
||||
assert!(
|
||||
self.produces_epsilon.is_some(),
|
||||
"Please call gen_produces_epsilon before this!"
|
||||
);
|
||||
loop {
|
||||
let mut change = false;
|
||||
|
||||
let relevant = state
|
||||
.0
|
||||
.iter()
|
||||
.filter_map(|((from, rule_id), lookahead, dot)| {
|
||||
self.rules
|
||||
.get(from)
|
||||
.and_then(|v| v.get(*rule_id))
|
||||
.and_then(|to| match to.get(*dot) {
|
||||
Some(Sentential::NoneTerminal(b)) => {
|
||||
Some((b.clone(), to[dot + 1..].to_vec(), lookahead.clone()))
|
||||
}
|
||||
Some(Sentential::Terminal(_)) | None => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
for (nt, rest, lookahead) in relevant {
|
||||
if let Some(rule) = self.rules.get(&nt) {
|
||||
let mut lookahead = if self.can_produce_epsilon_sen(&rest) {
|
||||
lookahead
|
||||
} else {
|
||||
LR1Lookahead::default()
|
||||
};
|
||||
lookahead.0.extend(
|
||||
// extend with first set of rest, except e
|
||||
self.first(&rest)
|
||||
.iter()
|
||||
.filter_map(|t| t.as_ref().map(|elem| Some(elem.clone()))),
|
||||
);
|
||||
for to in 0..rule.len() {
|
||||
change |= state.0.insert(((nt.clone(), to), lookahead.clone(), 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !change {
|
||||
return state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn gen_lr1_automaton(&mut self) {
|
||||
if self.first.is_none() {
|
||||
self.gen_first();
|
||||
}
|
||||
if self.produces_epsilon.is_none() {
|
||||
self.gen_produces_epsilon();
|
||||
}
|
||||
let mut out: RL1Automaton<N, T> = HashMap::new();
|
||||
// add state zero
|
||||
let mut start_state = LR1State(HashSet::new());
|
||||
if let Some(rule) = self.rules.get(&self.start) {
|
||||
for to in 0..rule.len() {
|
||||
start_state.0.insert((
|
||||
(self.start.clone(), to),
|
||||
LR1Lookahead(HashSet::from([None])),
|
||||
0,
|
||||
));
|
||||
}
|
||||
}
|
||||
// add state to graph and mark for todo
|
||||
let rc = Rc::new(self.lr1_clozure(start_state));
|
||||
let start = Rc::downgrade(&rc);
|
||||
let mut todo = vec![Rc::downgrade(&rc)];
|
||||
out.insert(rc, HashMap::new());
|
||||
|
||||
// add states while marked states exists
|
||||
while let Some(state) = todo.pop() {
|
||||
if let Some(state) = state.upgrade() {
|
||||
// new adjacent list
|
||||
let mut vec = Vec::new();
|
||||
|
||||
// add clozures from the kernels from all readable symbols
|
||||
for none_terminal in self.lr1_readable(&state) {
|
||||
let next_state = self.lr1_clozure(self.lr1_next_kernel(&state, &none_terminal));
|
||||
let rc = Rc::new(next_state);
|
||||
if let Some((k, _)) = out.get_key_value(&rc) {
|
||||
vec.push((none_terminal, Rc::downgrade(k)));
|
||||
} else {
|
||||
todo.push(Rc::downgrade(&rc));
|
||||
vec.push((none_terminal, Rc::downgrade(&rc)));
|
||||
out.insert(rc, HashMap::new());
|
||||
}
|
||||
}
|
||||
// write adjacent list to state
|
||||
// does not check duplicates. Is not needed, because `readable` returns a set
|
||||
out.entry(state).and_modify(|elem| {
|
||||
elem.extend(vec);
|
||||
});
|
||||
}
|
||||
}
|
||||
self.lr1_automaton = Some((out, start));
|
||||
}
|
||||
|
||||
pub fn gen_lr1_parse_table(&mut self) -> bool {
|
||||
if self.first.is_none() {
|
||||
self.gen_first();
|
||||
}
|
||||
if self.follow.is_none() {
|
||||
self.gen_follow();
|
||||
}
|
||||
if self.lr1_automaton.is_none() {
|
||||
self.gen_lr1_automaton();
|
||||
}
|
||||
|
||||
let lr1_automaton = self.lr1_automaton.as_ref().unwrap();
|
||||
|
||||
let ids: HashMap<Rc<LR1State<N, T>>, usize> = HashMap::from_iter(
|
||||
lr1_automaton
|
||||
.0
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(id, (a, _))| (a.clone(), id)),
|
||||
);
|
||||
|
||||
let start = *lr1_automaton
|
||||
.1
|
||||
.upgrade()
|
||||
.and_then(|rc| ids.get(&rc))
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
let mut action: LrActionTable<N, T> = HashMap::new();
|
||||
let mut goto: LrGotoTable<N> = HashMap::new();
|
||||
let mut conflict = false;
|
||||
for (state, to) in lr1_automaton.0.iter() {
|
||||
let id = ids
|
||||
.get(state)
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
|
||||
for go in self.lr1_readable(state) {
|
||||
let Some(to) = to.get(&go).and_then(|to| to.upgrade()) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let to_id = ids
|
||||
.get(&to)
|
||||
.expect("Found broken state in slr parse table gen.");
|
||||
|
||||
match go {
|
||||
Sentential::Terminal(t) => {
|
||||
conflict |= action
|
||||
.insert((*id, Some(t)), LrAction::Shift(*to_id))
|
||||
.is_some();
|
||||
}
|
||||
Sentential::NoneTerminal(nt) => {
|
||||
conflict |= goto.insert((*id, nt), *to_id).is_some();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for ((from, rule_id), lookahead, dot) in state.0.iter() {
|
||||
let Some(to) = self.rules.get(from).and_then(|v| v.get(*rule_id)) else {
|
||||
continue;
|
||||
};
|
||||
if to.len() <= *dot {
|
||||
for follow in lookahead.0.iter() {
|
||||
conflict |= action
|
||||
.insert(
|
||||
(*id, follow.clone()),
|
||||
LrAction::Reduce((from.clone(), *rule_id)),
|
||||
)
|
||||
.is_some();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.lr1_parse_table = Some((action, goto, start));
|
||||
conflict
|
||||
}
|
||||
|
||||
pub fn lr1_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
|
||||
&'a self,
|
||||
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
) -> LRTabelParser<N, T, S> {
|
||||
assert!(
|
||||
self.lr1_parse_table.is_some(),
|
||||
"Please call gen_lr1_parse_table before this!"
|
||||
);
|
||||
LRTabelParser {
|
||||
input: iter,
|
||||
start_rule: &self.start,
|
||||
rules: &self.rules,
|
||||
parse_table: self.lr1_parse_table.as_ref().unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
133
src/cfg/lr_parser.rs
Normal file
133
src/cfg/lr_parser.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
use std::{collections::HashMap, fmt::Debug, hash::Hash};
|
||||
|
||||
use super::{NodeChild, ParseTree, RuleIndex, Sentential};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LrAction<Shift, Reduce> {
|
||||
Shift(Shift),
|
||||
Reduce(Reduce),
|
||||
}
|
||||
|
||||
/// None is $
|
||||
pub type LrActionTable<N, T> = HashMap<(usize, Option<T>), LrAction<usize, RuleIndex<N>>>;
|
||||
pub type LrGotoTable<N> = HashMap<(usize, N), usize>;
|
||||
pub type LrParseTable<N, T> = (LrActionTable<N, T>, LrGotoTable<N>, usize);
|
||||
|
||||
pub struct LRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: Eq + Hash + Clone,
|
||||
T: Eq + Hash + Clone + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone,
|
||||
{
|
||||
pub start_rule: &'a N,
|
||||
pub rules: &'a HashMap<N, Vec<Vec<Sentential<N, T>>>>,
|
||||
pub parse_table: &'a LrParseTable<N, T>,
|
||||
pub input: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
}
|
||||
|
||||
impl<'a, N, T, S> LRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: Eq + Hash + Clone,
|
||||
T: Eq + Hash + Clone + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone,
|
||||
{
|
||||
pub fn get_action(
|
||||
&self,
|
||||
state: &usize,
|
||||
next: &Option<T>,
|
||||
) -> Option<&LrAction<usize, (N, usize)>> {
|
||||
self.parse_table.0.get(&(*state, next.clone()))
|
||||
}
|
||||
pub fn get_goto(&self, state: &usize, next: &N) -> Option<&usize> {
|
||||
self.parse_table.1.get(&(*state, next.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, N, T, S> LRTabelParser<'a, N, T, S>
|
||||
where
|
||||
N: Eq + Hash + Clone + Debug + Ord,
|
||||
T: Eq + Hash + Clone + Debug + Ord,
|
||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
{
|
||||
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||
let mut stack: Vec<(NodeChild<N, S>, usize)> = Vec::new();
|
||||
|
||||
let mut next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
let mut first = true;
|
||||
loop {
|
||||
let state = if first {
|
||||
// start with first state
|
||||
self.parse_table.2
|
||||
} else {
|
||||
let Some(state) = stack.last() else {
|
||||
return Err("Unexpected EOS 1".into());
|
||||
};
|
||||
state.1
|
||||
};
|
||||
first = false;
|
||||
let Some(current_state) =
|
||||
self.get_action(&state, &next.as_ref().map(|f| f.clone().into()))
|
||||
else {
|
||||
return Err(format!(
|
||||
"Unexpected Token: {}",
|
||||
next.map(|f| format!("{f:?}"))
|
||||
.unwrap_or("end of file".to_string())
|
||||
));
|
||||
};
|
||||
|
||||
println!("next: {next:?}, state: {current_state:?}, stack: {stack:?}");
|
||||
|
||||
match current_state {
|
||||
LrAction::Shift(to) => {
|
||||
stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to));
|
||||
next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
}
|
||||
LrAction::Reduce((rule, ind)) => {
|
||||
let Some(r) = self.rules.get(rule).and_then(|e| e.get(*ind)) else {
|
||||
return Err(format!("Invalid rule: {:?}-{}", rule, ind));
|
||||
};
|
||||
let mut childs = Vec::new();
|
||||
for elem in r.iter().rev() {
|
||||
let Some(last) = stack.pop() else {
|
||||
return Err("Unexpected EOF".into());
|
||||
};
|
||||
if last.0 != *elem {
|
||||
return Err(format!("Broken Stack: {:?} and {:?}", last.0, elem));
|
||||
}
|
||||
childs.push(last);
|
||||
}
|
||||
if *self.start_rule == *rule {
|
||||
return Ok(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
|
||||
});
|
||||
}
|
||||
let Some(state) = stack.last() else {
|
||||
return Err("Unexpected EOS 2".into());
|
||||
};
|
||||
let Some(next) = self.get_goto(&state.1, rule) else {
|
||||
return Err(format!(
|
||||
"Invalid reduction: state: {} rule: {:?}",
|
||||
state.1, rule
|
||||
));
|
||||
};
|
||||
stack.push((
|
||||
NodeChild::Child(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(d, _)| d).collect(),
|
||||
}),
|
||||
*next,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -4,10 +4,14 @@ use std::{
|
|||
hash::Hash,
|
||||
};
|
||||
|
||||
use lr0_grammar::{LR0Start, RL0Automaton, SlrParseTable};
|
||||
use lr0_grammar::{LR0Start, RL0Automaton};
|
||||
use lr1_grammar::{LR1Start, RL1Automaton};
|
||||
use lr_parser::LrParseTable;
|
||||
|
||||
pub mod ll_grammar;
|
||||
pub mod lr0_grammar;
|
||||
pub mod lr1_grammar;
|
||||
pub mod lr_parser;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! cfg_grammar {
|
||||
|
@ -27,15 +31,7 @@ macro_rules! cfg_grammar {
|
|||
}
|
||||
map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]);
|
||||
})*
|
||||
$crate::cfg::Grammar {
|
||||
start: $start,
|
||||
rules: map,
|
||||
first: None,
|
||||
follow: None,
|
||||
ll_parse_table: None,
|
||||
lr0_automaton: None,
|
||||
slr_parse_table: None,
|
||||
}
|
||||
$crate::cfg::Grammar::new($start, map)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -70,9 +66,14 @@ impl<T: Ord, N: Ord> Ord for Sentential<N, T> {
|
|||
|
||||
pub type RuleIndex<N> = (N, usize);
|
||||
|
||||
pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
|
||||
pub struct Grammar<N, T>
|
||||
where
|
||||
N: PartialEq + Eq + Hash + Clone,
|
||||
T: PartialEq + Eq + Hash + Clone + Ord,
|
||||
{
|
||||
pub start: N,
|
||||
pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>,
|
||||
pub produces_epsilon: Option<HashSet<N>>,
|
||||
/// none is epsilon
|
||||
pub first: Option<HashMap<N, HashSet<Option<T>>>>,
|
||||
/// none is $
|
||||
|
@ -89,22 +90,73 @@ pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash +
|
|||
pub lr0_automaton: Option<(RL0Automaton<N, T>, LR0Start<N>)>,
|
||||
|
||||
///
|
||||
pub slr_parse_table: Option<SlrParseTable<N, T>>,
|
||||
pub slr_parse_table: Option<LrParseTable<N, T>>,
|
||||
|
||||
///
|
||||
pub lr1_automaton: Option<(RL1Automaton<N, T>, LR1Start<N, T>)>,
|
||||
|
||||
///
|
||||
pub lr1_parse_table: Option<LrParseTable<N, T>>,
|
||||
}
|
||||
|
||||
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone + Ord> Grammar<N, T> {
|
||||
pub fn new(start: N, rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>) -> Self {
|
||||
Self {
|
||||
start,
|
||||
rules,
|
||||
produces_epsilon: None,
|
||||
first: None,
|
||||
follow: None,
|
||||
ll_parse_table: None,
|
||||
lr0_automaton: None,
|
||||
slr_parse_table: None,
|
||||
lr1_automaton: None,
|
||||
lr1_parse_table: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn gen_produces_epsilon(&mut self) {
|
||||
let mut out: HashSet<N> = HashSet::new();
|
||||
loop {
|
||||
let mut change = false;
|
||||
for (from, to) in self.rules.iter() {
|
||||
for to in to.iter() {
|
||||
if to.iter().all(|sen| match sen {
|
||||
Sentential::Terminal(_) => false,
|
||||
Sentential::NoneTerminal(a) => out.contains(a),
|
||||
}) {
|
||||
change |= out.insert(from.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
if !change {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.produces_epsilon = Some(out);
|
||||
}
|
||||
|
||||
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
|
||||
pub fn can_produce_epsilon(&self, rule: &Sentential<N, T>) -> bool {
|
||||
assert!(
|
||||
self.produces_epsilon.is_some(),
|
||||
"Please call gen_produces_epsilon before this!"
|
||||
);
|
||||
|
||||
match rule {
|
||||
Sentential::Terminal(_) => false,
|
||||
Sentential::NoneTerminal(nt) => self
|
||||
.rules
|
||||
.get(nt)
|
||||
.map(|f| f.iter().any(|v| v.is_empty()))
|
||||
.unwrap_or(false),
|
||||
Sentential::NoneTerminal(nt) => self.produces_epsilon.as_ref().unwrap().contains(nt),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn can_produce_epsilon_sen(&self, rule: &Vec<Sentential<N, T>>) -> bool {
|
||||
rule.iter()
|
||||
.all(|s: &Sentential<N, T>| self.can_produce_epsilon(s))
|
||||
}
|
||||
|
||||
pub fn gen_first(&mut self) {
|
||||
if self.produces_epsilon.is_none() {
|
||||
self.gen_produces_epsilon();
|
||||
}
|
||||
let mut first: HashMap<N, HashSet<Option<T>>> = HashMap::new();
|
||||
loop {
|
||||
let mut change = false;
|
||||
|
@ -165,6 +217,8 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar
|
|||
self.first = Some(first);
|
||||
}
|
||||
|
||||
/// get first of sentential
|
||||
/// None is e
|
||||
pub fn first(&self, sent: &Vec<Sentential<N, T>>) -> HashSet<Option<T>> {
|
||||
assert!(self.first.is_some(), "Please call gen_first before this!");
|
||||
let mut out = HashSet::<Option<T>>::new();
|
||||
|
|
12
src/main.rs
12
src/main.rs
|
@ -104,7 +104,7 @@ token_scanner!(
|
|||
}
|
||||
);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
|
||||
enum NoneTerminals {
|
||||
P, // Program, ; separated
|
||||
L, // Line of code
|
||||
|
@ -193,9 +193,11 @@ fn main() {
|
|||
|
||||
let mut grammar = grammer();
|
||||
grammar.gen_follow();
|
||||
println!("first: {:?}", grammar.first);
|
||||
println!("follow: {:?}", grammar.follow);
|
||||
let conflict = grammar.gen_ll_parse_table();
|
||||
//println!("first: {:?}", grammar.first);
|
||||
//println!("follow: {:?}", grammar.follow);
|
||||
grammar.gen_lr1_automaton();
|
||||
println!("conflict: {:?}", grammar.lr1_automaton);
|
||||
/* let conflict = grammar.gen_ll_parse_table();
|
||||
println!("conflict: {conflict}");
|
||||
println!("prase table: {:?}", grammar.ll_parse_table);
|
||||
println!("parse\n\n");
|
||||
|
@ -205,5 +207,5 @@ fn main() {
|
|||
.ll_parser(&mut m.iter_mut())
|
||||
.parse()
|
||||
.map(|tree| tree.clean())
|
||||
)
|
||||
) */
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue