From 46ca5ecc50ec4b68dd3f966615ea77df42773814 Mon Sep 17 00:00:00 2001 From: jusax23 Date: Tue, 5 Nov 2024 16:12:47 +0100 Subject: [PATCH] rl0 automaton, lib --- Cargo.toml | 8 + src/bin/book.rs | 82 +++++++ src/cfg/ll_grammar.rs | 261 ++++++++++++++++++++++ src/cfg/lr0.rs | 104 +++++++++ src/cfg/mod.rs | 282 +++++++++++++++++++++++ src/double_enum.rs | 2 +- src/lib.rs | 13 ++ src/ll_grammar.rs | 503 ------------------------------------------ src/main.rs | 25 +-- src/scanner.rs | 3 +- 10 files changed, 764 insertions(+), 519 deletions(-) create mode 100644 src/bin/book.rs create mode 100644 src/cfg/ll_grammar.rs create mode 100644 src/cfg/lr0.rs create mode 100644 src/cfg/mod.rs create mode 100644 src/lib.rs delete mode 100644 src/ll_grammar.rs diff --git a/Cargo.toml b/Cargo.toml index 4ffb99f..c4f6532 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,14 @@ name = "rcompiler" version = "0.1.0" edition = "2021" +default-run = "main" [dependencies] regex = "1.11.1" + +[[bin]] +name = "main" +path = "src/main.rs" + +[[bin]] +name = "book" diff --git a/src/bin/book.rs b/src/bin/book.rs new file mode 100644 index 0000000..95a704f --- /dev/null +++ b/src/bin/book.rs @@ -0,0 +1,82 @@ +use ll_grammar::Skippable; +use rcompiler::prelude::*; +use regex::Match; +use std::collections::HashMap; + +double_enum!( + BareTokens, Tokens { + WhiteSpace, + Assign, + Add, + LBrace, + RBrace, + Ident(String), + } +); + +token_scanner!( + Tokens, + r"^\s|\t|\n|\r" : |_,_| { + Some(WhiteSpace) + } + r"^\+" : |_,_| { + Some(Add) + } + r"^=" : |_,_| { + Some(Assign) + } + r"^\(" : |_,_| { + Some(LBrace) + } + r"^\)" : |_,_| { + Some(RBrace) + } + r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { + Some(Ident(String::from(m.as_str()))) + } +); + +#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] +enum NoneTerminals { + S, + E, + P, +} + +impl From for Sentential { + fn from(value: NoneTerminals) -> Self { + Sentential::NoneTerminal(value) + } +} + +impl From for Sentential { + fn from(value: BareTokens) -> Self { + Sentential::Terminal(value) + } +} + +fn grammer() -> Grammar { + use BareTokens::*; + use NoneTerminals::*; + cfg_grammar![ + start: S; + S -> Ident, Assign, E; + E -> E, Add, P; + E -> P; + P -> Ident; + P -> LBrace, E, RBrace; + P -> Ident, LBrace, E, RBrace; + ] +} + +fn main() { + //let code = String::from("a = b()+c+(d+e())"); + //let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); + + let mut grammar = grammer(); + grammar.gen_follow(); + println!("first: {:?}", grammar.first); + println!("follow: {:?}", grammar.follow); + grammar.gen_lr0_automaton(); + println!("automaton: {:?}", grammar.lr0_automaton); +} diff --git a/src/cfg/ll_grammar.rs b/src/cfg/ll_grammar.rs new file mode 100644 index 0000000..eded17a --- /dev/null +++ b/src/cfg/ll_grammar.rs @@ -0,0 +1,261 @@ +use std::{collections::HashMap, fmt::Debug, hash::Hash}; + +use super::{Grammar, Sentential}; + +impl Grammar { + pub fn gen_ll_parse_table(&mut self) -> bool { + if self.follow.is_none() { + self.gen_follow(); + } + if self.ll_parse_table.is_some() { + return false; + } + let mut conflict = false; + let mut parse_table: HashMap<(N, Option), usize> = HashMap::new(); + for (from, to) in self.rules.iter() { + for (id, to) in to.iter().enumerate() { + // rule is A -> al + // terminal == None means epsilon + for terminal in self.first(to) { + match terminal { + // let a be in First(al) -> add to T[A,a] = A->al (using the index of al) + Some(terminal) => { + conflict |= parse_table + .insert((from.clone(), Some(terminal.clone())), id) + .is_some(); + } + // if first contains epsilon then + // let b be in Follow(A) -> add to T[A,b] = A->al (using the index of al) + None => { + for terminal in self.follow(from).iter() { + conflict |= parse_table + .insert((from.clone(), terminal.clone()), id) + .is_some() + } + } + } + } + } + } + self.ll_parse_table = Some(parse_table); + conflict + } + + /// get parse_table rule + /// None means error. + pub fn ll_parse_table( + &self, + none_terminal: &N, + terminal: &Option, + ) -> Option<(usize, &Vec>)> { + assert!( + self.ll_parse_table.is_some(), + "Please call gen_parse_table before this!" + ); + self.ll_parse_table + .as_ref() + .unwrap() + .get(&(none_terminal.clone(), terminal.clone())) + .and_then(|f| { + self.rules + .get(none_terminal) + .and_then(|rule| rule.get(*f)) + .map(|rule| (*f, rule)) + }) + } + + pub fn ll_parser<'a, S: Into + PartialEq + Clone>( + &'a self, + iter: &'a mut dyn Iterator>, + ) -> LLTabelParser { + assert!( + self.ll_parse_table.is_some(), + "Please call gen_parse_table before this!" + ); + LLTabelParser { + input: iter, + grammar: self, + } + } +} + +/// Just checks a program. Does not generates output. +pub struct LLTabelParser< + 'a, + N: PartialEq + Eq + Hash + Clone, + T: PartialEq + Eq + Hash + Clone, + S: Into + PartialEq + Clone, +> { + grammar: &'a Grammar, + input: &'a mut dyn Iterator>, +} + +impl< + 'a, + N: PartialEq + Eq + Hash + Clone + Debug, + T: PartialEq + Eq + Hash + Clone + Debug, + S: Into + PartialEq + Clone + Debug, + > LLTabelParser<'a, N, T, S> +{ + pub fn parse(&mut self) -> Result, String> { + // stack of table driven parser + // content of the vec: + // - first element: all of them combined represent the complete stack, of the parser. + // - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element. + let mut stack: Vec<(Vec>, ParseTree)> = vec![( + vec![Sentential::NoneTerminal(self.grammar.start.clone())], + ParseTree::new(None), + )]; + + let mut next = match self.input.next() { + Some(Ok(d)) => Some(d), + Some(Err(err)) => return Err(format!("Invalid token: {}", err)), + None => None, + }; + + loop { + // look at current state + let mut state = stack.pop(); + match state.as_mut() { + // processing inner state, of tracked rules + Some((inner_stack, rule)) => { + let inner_state = inner_stack.pop(); + match inner_state { + // match terminal, check if equal + Some(Sentential::Terminal(terminal)) => match (next, terminal) { + // actual vs. expected input + (Some(inn), expect) if inn == expect => { + next = match self.input.next() { + Some(Ok(n)) => Some(n), + Some(Err(err)) => { + return Err(format!("Invalid token: {}", err)) + } + None => None, + }; + rule.childs.push(NodeChild::Data(inn)); + stack.push(state.unwrap()); + } + (a, b) => { + return Err(format!("found: {:?} expected: {:?}", a, b)); + } + }, + // take next none terminal and apply rule from parse table. + Some(Sentential::NoneTerminal(none_term)) => { + // load rule + let Some((id, new_rule)) = self + .grammar + .ll_parse_table(&none_term, &next.as_ref().map(|f| f.clone().into())) + else { + // no rule + return Err(format!( + "Unexpected token: {}", + next.map(|f| format!("{f:?}")) + .unwrap_or("end of file".to_string()) + )); + }; + + // reverse rule: because, uses vec as stack, but reversed + let new_rule_rev = + new_rule.iter().rev().map(|f| f.clone()).collect::>(); + // memorize current state/rule for later + stack.push(state.unwrap()); + // process next rule + stack.push(( + new_rule_rev, + ParseTree { + rule: Some((none_term, id)), + childs: Vec::new(), + }, + )); + } + // inner state is empty: current rule is finished + None => { + // if stack is empty, this is the initial state: finish or err + let Some(last) = stack.last_mut() else { + // ok: input has ended + if next.is_none() { + return Ok(state.unwrap().1); + } + // still code left, but not excepted + return Err(format!("Expected end of file.")); + }; + last.1.childs.push(NodeChild::Child(state.unwrap().1)); + } + } + } + // should not be possible, because every other path pushes to the stack back or returns + None => { + return Err(format!("Err: EOS")); + } + } + } + } +} + +// + +pub trait Skippable { + fn skippable(&self) -> bool { + false + } +} + +#[derive(Debug, Clone)] +pub enum NodeChild { + Child(ParseTree), + Data(S), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ParseTree { + pub rule: Option<(N, usize)>, + pub childs: Vec>, +} + +impl ParseTree { + pub fn new(rule: Option<(N, usize)>) -> Self { + Self { + rule, + childs: Vec::new(), + } + } +} + +impl ParseTree { + /// cleanup the parse tree + /// does not work on a subtree + pub fn clean(self) -> Self { + self.clean_internal() + .expect("Clean only works on the main tree.") + } + + /// internal clean + /// main node must not have a rule. + fn clean_internal(self) -> Result>> { + let childs = self + .childs + .into_iter() + .flat_map(|elem| match elem { + NodeChild::Child(parse_tree) => match parse_tree.clean_internal() { + Ok(tree) => [NodeChild::Child(tree)].into(), + Err(content) => content, + }, + NodeChild::Data(d) => [NodeChild::Data(d)].into(), + }) + .collect(); + if let Some((rule, _)) = &self.rule { + if rule.skippable() { + return Err(childs); + } + + if childs.is_empty() { + return Err(childs); + } + } + Ok(Self { + rule: self.rule, + childs, + }) + } +} diff --git a/src/cfg/lr0.rs b/src/cfg/lr0.rs new file mode 100644 index 0000000..84d1633 --- /dev/null +++ b/src/cfg/lr0.rs @@ -0,0 +1,104 @@ +use std::{ + collections::{HashMap, HashSet}, + hash::{Hash, Hasher}, + rc::{Rc, Weak}, +}; + +use super::{Grammar, Sentential}; + +#[derive(Debug, Eq, PartialEq)] +pub struct LR0State(HashSet<(N, Vec>, usize)>); + +impl LR0State { + pub fn next_kernel(&self, read: &Sentential) -> Self { + let mut next_state: LR0State = LR0State(HashSet::new()); + for (from, to, dot) in self.0.iter() { + if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) { + next_state.0.insert((from.clone(), to.clone(), dot + 1)); + } + } + next_state + } + pub fn readable(&self) -> HashSet> { + let mut readbles = HashSet::new(); + for (_, to, dot) in self.0.iter() { + if let Some(l) = to.get(*dot) { + readbles.insert(l.clone()); + } + } + readbles + } +} +impl Hash for LR0State { + fn hash(&self, state: &mut H) { + let mut a: Vec<&(N, Vec>, usize)> = self.0.iter().collect(); + a.sort(); + for s in a.iter() { + s.hash(state); + } + } +} + +impl Grammar +where + N: PartialEq + Eq + Hash + Clone + Ord, + T: PartialEq + Eq + Hash + Clone + Ord, +{ + pub fn lr0_clozure(&self, mut state: LR0State) -> LR0State { + loop { + let mut change = false; + let nt = state + .0 + .iter() + .filter_map(|(_, to, dot)| to.get(*dot).cloned()) + .collect::>(); + for n in nt { + if let Sentential::NoneTerminal(n) = n { + if let Some(rule) = self.rules.get(&n) { + for to in rule { + change |= state.0.insert((n.clone(), to.clone(), 0)); + } + } + } + } + + if !change { + return state; + } + } + } + + pub fn gen_lr0_automaton(&mut self) { + let mut out: HashMap>, Vec<(Sentential, Weak>)>> = + HashMap::new(); + let mut start_state = LR0State(HashSet::new()); + if let Some(rule) = self.rules.get(&self.start) { + for to in rule { + start_state.0.insert((self.start.clone(), to.clone(), 0)); + } + } + let rc = Rc::new(self.lr0_clozure(start_state)); + let mut todo = vec![Rc::downgrade(&rc)]; + out.insert(rc, Vec::new()); + while let Some(elem) = todo.pop() { + if let Some(elem) = elem.upgrade() { + let mut vec = Vec::new(); + for none_terminal in elem.readable() { + let next_state = self.lr0_clozure(elem.next_kernel(&none_terminal)); + let rc = Rc::new(next_state); + if let Some((k, _)) = out.get_key_value(&rc) { + vec.push((none_terminal, Rc::downgrade(k))); + } else { + todo.push(Rc::downgrade(&rc)); + vec.push((none_terminal, Rc::downgrade(&rc))); + out.insert(rc, Vec::new()); + } + } + out.entry(elem).and_modify(|elem| { + elem.extend(vec); + }); + } + } + self.lr0_automaton = Some(out); + } +} diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs new file mode 100644 index 0000000..71071a3 --- /dev/null +++ b/src/cfg/mod.rs @@ -0,0 +1,282 @@ +use std::{ + collections::{HashMap, HashSet}, + hash::Hash, + rc::{Rc, Weak}, +}; + +use lr0::LR0State; + +pub mod ll_grammar; +pub mod lr0; + +#[macro_export] +macro_rules! cfg_grammar { + ( + start: $start:ident; + $( + $left:ident -> $( + $right:ident + ),* + );* $(;)? + ) => { + { + let mut map = HashMap::new(); + $({ + if !map.contains_key(&$left) { + map.insert($left, Vec::new()); + } + map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); + })* + $crate::cfg::Grammar { + start: $start, + rules: map, + first: None, + follow: None, + ll_parse_table: None, + lr0_automaton: None, + } + } + }; +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Sentential { + Terminal(T), + NoneTerminal(N), +} + +impl PartialOrd for Sentential { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Self::Terminal(_), Self::NoneTerminal(_)) => Some(std::cmp::Ordering::Less), + (Self::NoneTerminal(_), Self::Terminal(_)) => Some(std::cmp::Ordering::Greater), + (Self::Terminal(a), Self::Terminal(b)) => a.partial_cmp(b), + (Self::NoneTerminal(a), Self::NoneTerminal(b)) => a.partial_cmp(b), + } + } +} + +impl Ord for Sentential { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + match (self, other) { + (Self::Terminal(_), Self::NoneTerminal(_)) => std::cmp::Ordering::Less, + (Self::NoneTerminal(_), Self::Terminal(_)) => std::cmp::Ordering::Greater, + (Self::Terminal(a), Self::Terminal(b)) => a.cmp(b), + (Self::NoneTerminal(a), Self::NoneTerminal(b)) => a.cmp(b), + } + } +} + +pub struct Grammar { + pub start: N, + pub rules: HashMap>>>, + /// none is epsilon + pub first: Option>>>, + /// none is $ + pub follow: Option>>>, + + // When in State N and reading T, then apply the usize'th rule of N. + /// none is $ + pub ll_parse_table: Option), usize>>, + + /// is a lr0 automaton + /// Graph, defined throw this adjacent list. + /// - key: states + /// - value: list with read symbol and linked node. + pub lr0_automaton: + Option>, Vec<(Sentential, Weak>)>>>, +} + +impl Grammar { + pub fn can_produce_epsilon(&self, rule: &Sentential) -> bool { + match rule { + Sentential::Terminal(_) => false, + Sentential::NoneTerminal(nt) => self + .rules + .get(&nt) + .map(|f| f.iter().any(|v| v.is_empty())) + .unwrap_or(false), + } + } + + pub fn gen_first(&mut self) { + let mut first: HashMap>> = HashMap::new(); + loop { + let mut change = false; + for (from, to) in self.rules.iter() { + 'rule: for to in to.iter() { + // for each rule from -> to = X -> Y1...Yk + // add First(Yn) to First(X) if Y1...Yn-1 => e // n can be 1, disregarding the if + // add e to First(X) if Y1...Yk => e + for symbol in to { + match symbol { + Sentential::Terminal(a) => { + first + .entry(from.clone()) + .and_modify(|e| { + change |= e.insert(Some(a.clone())); + }) + .or_insert_with(|| { + change = true; + HashSet::from([Some(a.clone())]) + }); + } + Sentential::NoneTerminal(nt) => { + if let Some(set) = first.get(nt).cloned() { + first + .entry(from.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + } + } + if !self.can_produce_epsilon(symbol) { + continue 'rule; + } + } + first + .entry(from.clone()) + .and_modify(|e| { + change |= e.insert(None); + }) + .or_insert_with(|| { + change = true; + HashSet::from([None]) + }); + } + } + if !change { + break; + } + } + + self.first = Some(first); + } + + pub fn first(&self, sent: &Vec>) -> HashSet> { + assert!(self.first.is_some(), "Please call gen_first before this!"); + let mut out = HashSet::>::new(); + + // Y1Y2...Yk = al + // add First(Yn) to First(al) if Y1...Yn-1 => e // n can be 1, disregarding the if + // add e to First(al) if Y1...Yk => e + 'rule: { + for symbol in sent { + match symbol { + Sentential::Terminal(a) => { + out.insert(Some(a.clone())); + } + Sentential::NoneTerminal(nt) => { + if let Some(set) = self.first.as_ref().unwrap().get(nt).cloned() { + out.extend(set); + } + } + } + if !self.can_produce_epsilon(symbol) { + break 'rule; + } + } + out.insert(None); + } + out + } + + pub fn gen_follow(&mut self) { + if self.first == None { + self.gen_first(); + } + let mut follow: HashMap>> = HashMap::new(); + follow.insert(self.start.clone(), HashSet::from([None])); + loop { + let mut change = false; + + for (from, to) in self.rules.iter() { + for to in to.iter() { + // a + // if A -> aBb then add First(b) - {e} to Follow(B) + // and if A -> aBb and e in First(b) add Follow(A) to Follow(B) + if to.len() >= 2 { + for i in 0..(to.len() - 1) { + let slice = to[i + 1..].iter().map(|f| f.clone()).collect::>(); + match to.get(i) { + Some(Sentential::NoneTerminal(b)) => { + let mut set = self.first(&slice); + if set.contains(&None) { + if let Some(set) = follow.get(from).cloned() { + follow + .entry(b.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + } + set.remove(&None); + follow + .entry(b.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + _ => (), + } + } + } + // b + // and if A -> aB add Follow(A) to Follow(B) + match to.last() { + Some(Sentential::NoneTerminal(b)) => { + if let Some(set) = follow.get(from).cloned() { + follow + .entry(b.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + } + _ => (), + } + } + } + + if !change { + break; + } + } + self.follow = Some(follow); + } + + pub fn follow(&self, none_termianl: &N) -> HashSet> { + assert!(self.follow.is_some(), "Please call gen_follow before this!"); + self.follow + .as_ref() + .unwrap() + .get(&none_termianl) + .cloned() + .unwrap_or(HashSet::new()) + } +} diff --git a/src/double_enum.rs b/src/double_enum.rs index cd8abd4..23d7af8 100644 --- a/src/double_enum.rs +++ b/src/double_enum.rs @@ -13,7 +13,7 @@ macro_rules! double_enum { ( $($args),+ ) )? ),*} - #[derive(Debug, Clone, PartialEq, Eq, Hash)] + #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] pub enum $bare_name {$($variant),*} impl PartialEq<$name> for $bare_name { diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..aca18f7 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,13 @@ +pub mod cfg; +pub mod double_enum; +pub mod scanner; + +pub mod prelude { + pub use crate::cfg::*; + pub use crate::cfg::ll_grammar::*; + pub use crate::cfg::lr0::*; + pub use crate::cfg_grammar; + pub use crate::double_enum; + pub use crate::scanner::*; + pub use crate::token_scanner; +} diff --git a/src/ll_grammar.rs b/src/ll_grammar.rs deleted file mode 100644 index e6569f5..0000000 --- a/src/ll_grammar.rs +++ /dev/null @@ -1,503 +0,0 @@ -use std::{ - collections::{HashMap, HashSet}, - fmt::Debug, - hash::Hash, -}; - -#[macro_export] -macro_rules! ll_grammar { - ( - start: $start:ident; - $( - $left:ident -> $( - $right:ident - ),* - );* $(;)? - ) => { - { - let mut map = HashMap::new(); - $({ - if !map.contains_key(&$left) { - map.insert($left, Vec::new()); - } - map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); - })* - $crate::ll_grammar::LLGrammar { - start: $start, - rules: map, - first: None, - follow: None, - parse_table: None, - } - } - }; -} - -#[derive(Debug, Clone)] -pub enum Sentential { - Terminal(T), - NoneTerminal(N), -} - -pub struct LLGrammar { - pub start: N, - pub rules: HashMap>>>, - /// none is epsilon - pub first: Option>>>, - /// none is $ - pub follow: Option>>>, - - // When in State N and reading T, then apply the usize'th rule of N. - /// none is $ - pub parse_table: Option), usize>>, -} - -impl LLGrammar { - pub fn can_produce_epsilon(&self, rule: &Sentential) -> bool { - match rule { - Sentential::Terminal(_) => false, - Sentential::NoneTerminal(nt) => self - .rules - .get(&nt) - .map(|f| f.iter().any(|v| v.is_empty())) - .unwrap_or(false), - } - } - - pub fn gen_first(&mut self) { - let mut first: HashMap>> = HashMap::new(); - loop { - let mut change = false; - for (from, to) in self.rules.iter() { - 'rule: for to in to.iter() { - // for each rule from -> to = X -> Y1...Yk - // add First(Yn) to First(X) if Y1...Yn-1 => e // n can be 1, disregarding the if - // add e to First(X) if Y1...Yk => e - for symbol in to { - match symbol { - Sentential::Terminal(a) => { - first - .entry(from.clone()) - .and_modify(|e| { - change |= e.insert(Some(a.clone())); - }) - .or_insert_with(|| { - change = true; - HashSet::from([Some(a.clone())]) - }); - } - Sentential::NoneTerminal(nt) => { - if let Some(set) = first.get(nt).cloned() { - first - .entry(from.clone()) - .and_modify(|e| { - for val in set.iter() { - change |= e.insert(val.clone()); - } - }) - .or_insert_with(|| { - change = true; - set - }); - } - } - } - if !self.can_produce_epsilon(symbol) { - continue 'rule; - } - } - first - .entry(from.clone()) - .and_modify(|e| { - change |= e.insert(None); - }) - .or_insert_with(|| { - change = true; - HashSet::from([None]) - }); - } - } - if !change { - break; - } - } - - self.first = Some(first); - } - - pub fn first(&self, sent: &Vec>) -> HashSet> { - assert!(self.first.is_some(), "Please call gen_first before this!"); - let mut out = HashSet::>::new(); - - // Y1Y2...Yk = al - // add First(Yn) to First(al) if Y1...Yn-1 => e // n can be 1, disregarding the if - // add e to First(al) if Y1...Yk => e - 'rule: { - for symbol in sent { - match symbol { - Sentential::Terminal(a) => { - out.insert(Some(a.clone())); - } - Sentential::NoneTerminal(nt) => { - if let Some(set) = self.first.as_ref().unwrap().get(nt).cloned() { - out.extend(set); - } - } - } - if !self.can_produce_epsilon(symbol) { - break 'rule; - } - } - out.insert(None); - } - out - } - - pub fn gen_follow(&mut self) { - if self.first == None { - self.gen_first(); - } - let mut follow: HashMap>> = HashMap::new(); - follow.insert(self.start.clone(), HashSet::from([None])); - loop { - let mut change = false; - - for (from, to) in self.rules.iter() { - for to in to.iter() { - // a - // if A -> aBb then add First(b) - {e} to Follow(B) - // and if A -> aBb and e in First(b) add Follow(A) to Follow(B) - if to.len() >= 2 { - for i in 0..(to.len() - 1) { - let slice = to[i + 1..].iter().map(|f| f.clone()).collect::>(); - match to.get(i) { - Some(Sentential::NoneTerminal(b)) => { - let mut set = self.first(&slice); - if set.contains(&None) { - if let Some(set) = follow.get(from).cloned() { - follow - .entry(b.clone()) - .and_modify(|e| { - for val in set.iter() { - change |= e.insert(val.clone()); - } - }) - .or_insert_with(|| { - change = true; - set - }); - } - } - set.remove(&None); - follow - .entry(b.clone()) - .and_modify(|e| { - for val in set.iter() { - change |= e.insert(val.clone()); - } - }) - .or_insert_with(|| { - change = true; - set - }); - } - _ => (), - } - } - } - // b - // and if A -> aB add Follow(A) to Follow(B) - match to.last() { - Some(Sentential::NoneTerminal(b)) => { - if let Some(set) = follow.get(from).cloned() { - follow - .entry(b.clone()) - .and_modify(|e| { - for val in set.iter() { - change |= e.insert(val.clone()); - } - }) - .or_insert_with(|| { - change = true; - set - }); - } - } - _ => (), - } - } - } - - if !change { - break; - } - } - self.follow = Some(follow); - } - - pub fn follow(&self, none_termianl: &N) -> HashSet> { - assert!(self.follow.is_some(), "Please call gen_follow before this!"); - self.follow - .as_ref() - .unwrap() - .get(&none_termianl) - .cloned() - .unwrap_or(HashSet::new()) - } - - pub fn gen_parse_table(&mut self) -> bool { - if self.follow.is_none() { - self.gen_follow(); - } - if self.parse_table.is_some() { - return false; - } - let mut conflict = false; - let mut parse_table: HashMap<(N, Option), usize> = HashMap::new(); - for (from, to) in self.rules.iter() { - for (id, to) in to.iter().enumerate() { - // rule is A -> al - // terminal == None means epsilon - for terminal in self.first(to) { - match terminal { - // let a be in First(al) -> add to T[A,a] = A->al (using the index of al) - Some(terminal) => { - conflict |= parse_table - .insert((from.clone(), Some(terminal.clone())), id) - .is_some(); - } - // if first contains epsilon then - // let b be in Follow(A) -> add to T[A,b] = A->al (using the index of al) - None => { - for terminal in self.follow(from).iter() { - conflict |= parse_table - .insert((from.clone(), terminal.clone()), id) - .is_some() - } - } - } - } - } - } - self.parse_table = Some(parse_table); - conflict - } - - /// get parse_table rule - /// None means error. - pub fn parse_table( - &self, - none_terminal: &N, - terminal: &Option, - ) -> Option<(usize, &Vec>)> { - assert!( - self.parse_table.is_some(), - "Please call gen_parse_table before this!" - ); - self.parse_table - .as_ref() - .unwrap() - .get(&(none_terminal.clone(), terminal.clone())) - .and_then(|f| { - self.rules - .get(none_terminal) - .and_then(|rule| rule.get(*f)) - .map(|rule| (*f, rule)) - }) - } - - pub fn parser<'a, S: Into + PartialEq + Clone>( - &'a self, - iter: &'a mut dyn Iterator>, - ) -> LLTabelParser { - assert!( - self.parse_table.is_some(), - "Please call gen_parse_table before this!" - ); - LLTabelParser { - input: iter, - grammar: self, - } - } -} - -/// Just checks a program. Does not generates output. -pub struct LLTabelParser< - 'a, - N: PartialEq + Eq + Hash + Clone, - T: PartialEq + Eq + Hash + Clone, - S: Into + PartialEq + Clone, -> { - grammar: &'a LLGrammar, - input: &'a mut dyn Iterator>, -} - -impl< - 'a, - N: PartialEq + Eq + Hash + Clone + Debug, - T: PartialEq + Eq + Hash + Clone + Debug, - S: Into + PartialEq + Clone + Debug, - > LLTabelParser<'a, N, T, S> -{ - pub fn parse(&mut self) -> Result, String> { - // stack of table driven parser - // content of the vec: - // - first element: all of them combined represent the complete stack, of the parser. - // - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element. - let mut stack: Vec<(Vec>, ParseTree)> = vec![( - vec![Sentential::NoneTerminal(self.grammar.start.clone())], - ParseTree::new(None), - )]; - - let mut next = match self.input.next() { - Some(Ok(d)) => Some(d), - Some(Err(err)) => return Err(format!("Invalid token: {}", err)), - None => None, - }; - - loop { - // look at current state - let mut state = stack.pop(); - match state.as_mut() { - // processing inner state, of tracked rules - Some((inner_stack, rule)) => { - let inner_state = inner_stack.pop(); - match inner_state { - // match terminal, check if equal - Some(Sentential::Terminal(terminal)) => match (next, terminal) { - // actual vs. expected input - (Some(inn), expect) if inn == expect => { - next = match self.input.next() { - Some(Ok(n)) => Some(n), - Some(Err(err)) => { - return Err(format!("Invalid token: {}", err)) - } - None => None, - }; - rule.childs.push(NodeChild::Data(inn)); - stack.push(state.unwrap()); - } - (a, b) => { - return Err(format!("found: {:?} expected: {:?}", a, b)); - } - }, - // take next none terminal and apply rule from parse table. - Some(Sentential::NoneTerminal(none_term)) => { - // load rule - let Some((id, new_rule)) = self - .grammar - .parse_table(&none_term, &next.as_ref().map(|f| f.clone().into())) - else { - // no rule - return Err(format!( - "Unexpected token: {}", - next.map(|f| format!("{f:?}")) - .unwrap_or("end of file".to_string()) - )); - }; - - // reverse rule: because, uses vec as stack, but reversed - let new_rule_rev = - new_rule.iter().rev().map(|f| f.clone()).collect::>(); - // memorize current state/rule for later - stack.push(state.unwrap()); - // process next rule - stack.push(( - new_rule_rev, - ParseTree { - rule: Some((none_term, id)), - childs: Vec::new(), - }, - )); - } - // inner state is empty: current rule is finished - None => { - // if stack is empty, this is the initial state: finish or err - let Some(last) = stack.last_mut() else { - // ok: input has ended - if next.is_none() { - return Ok(state.unwrap().1); - } - // still code left, but not excepted - return Err(format!("Expected end of file.")); - }; - last.1.childs.push(NodeChild::Child(state.unwrap().1)); - } - } - } - // should not be possible, because every other path pushes to the stack back or returns - None => { - return Err(format!("Err: EOS")); - } - } - } - } -} - -// - -pub trait Skippable { - fn skippable(&self) -> bool { - false - } -} - -#[derive(Debug, Clone)] -pub enum NodeChild { - Child(ParseTree), - Data(S), -} - -#[derive(Debug, Clone)] -#[allow(dead_code)] -pub struct ParseTree { - pub rule: Option<(N, usize)>, - pub childs: Vec>, -} - -impl ParseTree { - pub fn new(rule: Option<(N, usize)>) -> Self { - Self { - rule, - childs: Vec::new(), - } - } -} - -impl ParseTree { - /// cleanup the parse tree - /// does not work on a subtree - pub fn clean(self) -> Self { - self.clean_internal() - .expect("Clean only works on the main tree.") - } - - /// internal clean - /// main node must not have a rule. - fn clean_internal(self) -> Result>> { - let childs = self - .childs - .into_iter() - .flat_map(|elem| match elem { - NodeChild::Child(parse_tree) => match parse_tree.clean_internal() { - Ok(tree) => [NodeChild::Child(tree)].into(), - Err(content) => content, - }, - NodeChild::Data(d) => [NodeChild::Data(d)].into(), - }) - .collect(); - if let Some((rule, _)) = &self.rule { - if rule.skippable() { - return Err(childs); - } - - if childs.is_empty() { - return Err(childs); - } - } - Ok(Self { - rule: self.rule, - childs, - }) - } -} diff --git a/src/main.rs b/src/main.rs index 8bd06f0..8119a72 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,6 @@ -mod double_enum; -mod ll_grammar; -mod scanner; -use ll_grammar::{LLGrammar, Sentential, Skippable}; -use regex::{Match, Regex}; -use scanner::Scanner; +use ll_grammar::Skippable; +use rcompiler::prelude::*; +use regex::Match; use std::collections::HashMap; double_enum!( @@ -34,7 +31,7 @@ double_enum!( } ); -scanner!( +token_scanner!( Tokens, r"^\s|\t|\n|\r" : |_,_| { Some(WhiteSpace) @@ -134,22 +131,22 @@ impl Skippable for NoneTerminals { } } -impl From for Sentential { +impl From for Sentential { fn from(value: NoneTerminals) -> Self { Sentential::NoneTerminal(value) } } -impl From for Sentential { +impl From for Sentential { fn from(value: BareTokens) -> Self { Sentential::Terminal(value) } } -fn grammer() -> LLGrammar { +fn grammer() -> Grammar { use BareTokens::*; use NoneTerminals::*; - ll_grammar![ + cfg_grammar![ start: P; P -> L,P; P -> ; @@ -201,14 +198,14 @@ fn main() { grammar.gen_follow(); println!("first: {:?}", grammar.first); println!("follow: {:?}", grammar.follow); - let conflict = grammar.gen_parse_table(); + let conflict = grammar.gen_ll_parse_table(); println!("conflict: {conflict}"); - println!("prase table: {:?}", grammar.parse_table); + println!("prase table: {:?}", grammar.ll_parse_table); println!("parse\n\n"); println!( "parsed: {:?}", grammar - .parser(&mut m.iter_mut()) + .ll_parser(&mut m.iter_mut()) .parse() .map(|tree| tree.clean()) ) diff --git a/src/scanner.rs b/src/scanner.rs index c7025ea..f1976bd 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,13 +1,14 @@ use std::marker::PhantomData; #[macro_export] -macro_rules! scanner { +macro_rules! token_scanner { ($name:ident,$( $regex:tt : $code:expr )*) => { impl $crate::scanner::MatchNext<$name> for $name { fn match_next(code: &String) -> Option<(Self, usize)> { use $name::*; + use regex::Regex; $( if let Some(capture) = Regex::new($regex).unwrap().captures(&code) { if let Some(main_capture) = capture.get(0) {