From f35ecee4cbf62d398564cdafa77834558af67fbd Mon Sep 17 00:00:00 2001 From: jusax23 Date: Sun, 27 Oct 2024 01:32:18 +0200 Subject: [PATCH] working llgrammar --- src/double_enum.rs | 2 +- src/ll_grammar.rs | 228 +++++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 83 ++++++++++------- src/scanner.rs | 33 +------ 4 files changed, 268 insertions(+), 78 deletions(-) diff --git a/src/double_enum.rs b/src/double_enum.rs index 559c844..28cc815 100644 --- a/src/double_enum.rs +++ b/src/double_enum.rs @@ -13,7 +13,7 @@ macro_rules! double_enum { ( $($args),+ ) )? ),*} - #[derive(Debug, Clone, PartialEq)] + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum $bare_name {$($variant),*} impl PartialEq<$name> for $bare_name { diff --git a/src/ll_grammar.rs b/src/ll_grammar.rs index 0bbf32f..039ab9c 100644 --- a/src/ll_grammar.rs +++ b/src/ll_grammar.rs @@ -1,23 +1,231 @@ +use std::{ + collections::{HashMap, HashSet}, + fmt::Debug, + hash::Hash, +}; + #[macro_export] macro_rules! ll_grammar { ( - $tokens:ident, - $bare_tokens:ident, - $grammar:ident, - $non_term:ident, - [$( + $( $left:ident -> $( $right:ident ),* - );* $(;)?] - ) => {}; + );* $(;)? + ) => { + { + let mut map = HashMap::new(); + $({ + if !map.contains_key(&$left) { + map.insert($left, Vec::new()); + } + map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); + })* + $crate::ll_grammar::LLGrammar { + rules: map, + first: None, + follow: None, + } + } + }; } -enum Sentential { +#[derive(Debug, Clone)] +pub enum Sentential { Terminal(T), NoneTerminal(N), } -pub struct Grammar { - rules: Vec<(T, Vec>)>, +pub struct LLGrammar { + pub rules: HashMap>>>, + /// none is epsilon + pub first: Option>>>, + /// none is $ + pub follow: Option>>>, +} + +impl + LLGrammar +{ + pub fn can_produce_epsilon(&self, rule: &Sentential) -> bool { + match rule { + Sentential::Terminal(_) => false, + Sentential::NoneTerminal(nt) => self + .rules + .get(&nt) + .map(|f| f.iter().any(|v| v.is_empty())) + .unwrap_or(false), + } + } + + pub fn gen_first(&mut self) { + let mut first: HashMap>> = HashMap::new(); + loop { + let mut change = false; + for (from, to) in self.rules.iter() { + 'rule: for to in to.iter() { + // for each rule from -> to = X -> Y1...Yk + // add First(Yn) to First(X) if Y1...Yn-1 => e // n can be 1, disregarding the if + // add e to First(X) if Y1...Yk => e + for symbol in to { + match symbol { + Sentential::Terminal(a) => { + first + .entry(from.clone()) + .and_modify(|e| { + change |= e.insert(Some(a.clone())); + }) + .or_insert_with(|| { + change = true; + HashSet::from([Some(a.clone())]) + }); + } + Sentential::NoneTerminal(nt) => { + if let Some(set) = first.get(nt).cloned() { + first + .entry(from.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + } + } + if !self.can_produce_epsilon(symbol) { + continue 'rule; + } + } + first + .entry(from.clone()) + .and_modify(|e| { + change |= e.insert(None); + }) + .or_insert_with(|| { + change = true; + HashSet::from([None]) + }); + } + } + if !change { + break; + } + } + + self.first = Some(first); + } + + pub fn first(&self, sent: &Vec>) -> HashSet> { + assert!(self.first.is_some(), "Please call gen_first before this!"); + let mut out = HashSet::>::new(); + + // Y1Y2...Yk = al + // add First(Yn) to First(al) if Y1...Yn-1 => e // n can be 1, disregarding the if + // add e to First(al) if Y1...Yk => e + 'rule: { + for symbol in sent { + match symbol { + Sentential::Terminal(a) => { + out.insert(Some(a.clone())); + } + Sentential::NoneTerminal(nt) => { + if let Some(set) = self.first.as_ref().unwrap().get(nt).cloned() { + out.extend(set); + } + } + } + if !self.can_produce_epsilon(symbol) { + break 'rule; + } + } + out.insert(None); + } + out + } + + pub fn gen_follow(&mut self, start: N) { + if self.first == None { + self.gen_first(); + } + let mut follow: HashMap>> = HashMap::new(); + follow.insert(start, HashSet::from([None])); + loop { + let mut change = false; + + for (from, to) in self.rules.iter() { + for to in to.iter() { + // a + // if A -> aBb then add First(b) - {e} to Follow(B) + // and if A -> aBb and e in First(b) add Follow(A) to Follow(B) + if to.len() >= 2 { + for i in 0..(to.len() - 1) { + let slice = to[i + 1..].iter().map(|f| f.clone()).collect::>(); + match to.get(i) { + Some(Sentential::NoneTerminal(b)) => { + let mut set = self.first(&slice); + if set.contains(&None) { + if let Some(set) = follow.get(from).cloned() { + follow + .entry(b.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + } + set.remove(&None); + follow + .entry(b.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + _ => (), + } + } + } + // b + // and if A -> aB add Follow(A) to Follow(B) + match to.last() { + Some(Sentential::NoneTerminal(b)) => { + if let Some(set) = follow.get(from).cloned() { + follow + .entry(b.clone()) + .and_modify(|e| { + for val in set.iter() { + change |= e.insert(val.clone()); + } + }) + .or_insert_with(|| { + change = true; + set + }); + } + } + _ => (), + } + } + } + + if !change { + break; + } + } + self.follow = Some(follow); + } } diff --git a/src/main.rs b/src/main.rs index e12df40..15732bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,12 @@ +mod double_enum; mod ll_grammar; mod scanner; -mod double_enum; +use ll_grammar::{LLGrammar, Sentential}; use regex::{Match, Regex}; use scanner::Scanner; +use std::collections::HashMap; - - -scanner!( +double_enum!( BareTokens, Tokens { WhiteSpace, Add, @@ -14,11 +14,17 @@ scanner!( Mul, Div, While, + LBrace, + RBrace, Ident(String), Int(i64), Float(f64), } - r"^\s|\t|\n" : |_,_|{ +); + +scanner!( + Tokens, + r"^\s|\t|\n" : |_,_| { Some(WhiteSpace) } r"^\+" : |_,_| { @@ -36,6 +42,12 @@ scanner!( r"^while" : |_,_| { Some(While) } + r"\(" : |_,_| { + Some(LBrace) + } + r"\)" : |_,_| { + Some(RBrace) + } r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { Some(Ident(String::from(m.as_str()))) } @@ -47,28 +59,45 @@ scanner!( } ); +#[derive(Debug, PartialEq, Eq, Hash, Clone)] enum NoneTerminals { P, E, Ei, T, + Ti, + F, } -ll_grammar!( - Tokens, - BareTokens, - Grammar, - NoneTerminals, - [ +impl From for Sentential { + fn from(value: NoneTerminals) -> Self { + Sentential::NoneTerminal(value) + } +} + +impl From for Sentential { + fn from(value: BareTokens) -> Self { + Sentential::Terminal(value) + } +} + +fn grammer() -> LLGrammar { + use BareTokens::*; + use NoneTerminals::*; + ll_grammar![ P -> E; E -> T,Ei; Ei -> Add,T,Ei; Ei -> ; - T -> Ident; - T -> Int; + T -> F,Ti; + Ti -> Mul,F,Ti; + Ti -> ; + F -> LBrace, E, RBrace; + F -> Int; + F -> Float; + F -> Ident; ] -); - +} fn main() { let code = String::from("while 12 + a - 3.4 / 0. * 4"); let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); @@ -78,25 +107,9 @@ fn main() { if !m.is_empty() { println!("Error"); } -} -#[derive(Debug, Clone, PartialEq)] -pub enum Test { - A, - B, -} -#[derive(Debug, Clone)] -pub enum BareTest { - A, - B, -} - -impl PartialEq for BareTest { - fn eq(&self, other: &Test) -> bool { - match (self, other){ - (BareTest::A, Test::A) => true, - (BareTest::B, Test::B)=> true, - _=> false - } - } + let mut grammar = grammer(); + grammar.gen_follow(NoneTerminals::P); + println!("first: {:?}", grammar.first); + println!("follow: {:?}", grammar.follow); } diff --git a/src/scanner.rs b/src/scanner.rs index 1869465..b49ae51 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -2,40 +2,9 @@ use std::marker::PhantomData; #[macro_export] macro_rules! scanner { - ($bare_name:ident, $name:ident { - $( - $variant:ident$( - ( $($args:ty),+ $(,)? ) - )? - ),* $(,)? - } - $( + ($name:ident,$( $regex:tt : $code:expr )*) => { - #[derive(Debug, Clone, PartialEq)] - pub enum $name {$( - $variant$( - ( $($args),+ ) - )? - ),*} - #[derive(Debug, Clone, PartialEq)] - pub enum $bare_name {$($variant),*} - - impl PartialEq<$name> for $bare_name { - fn eq(&self, other: &$name) -> bool { - match (self, other){ - $(($bare_name::$variant, $name::$variant{ .. }) => true,)* - _=> false - } - } - } - - impl PartialEq<$bare_name> for $name { - fn eq(&self, other: &$bare_name) -> bool { - other.eq(self) - } - } - impl $crate::scanner::MatchNext<$name> for $name { fn match_next(code: &String) -> Option<(Self, usize)> { use $name::*;