diff --git a/src/double_enum.rs b/src/double_enum.rs index 28cc815..cd8abd4 100644 --- a/src/double_enum.rs +++ b/src/double_enum.rs @@ -24,11 +24,18 @@ macro_rules! double_enum { } } } - + impl PartialEq<$bare_name> for $name { fn eq(&self, other: &$bare_name) -> bool { other.eq(self) } } + impl From<$name> for $bare_name{ + fn from(value: $name) -> Self { + match value { + $($name::$variant{ .. } => $bare_name::$variant,)* + } + } + } } -} \ No newline at end of file +} diff --git a/src/ll_grammar.rs b/src/ll_grammar.rs index 039ab9c..75a24aa 100644 --- a/src/ll_grammar.rs +++ b/src/ll_grammar.rs @@ -7,6 +7,7 @@ use std::{ #[macro_export] macro_rules! ll_grammar { ( + start: $start:ident; $( $left:ident -> $( $right:ident @@ -22,9 +23,11 @@ macro_rules! ll_grammar { map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); })* $crate::ll_grammar::LLGrammar { + start: $start, rules: map, first: None, follow: None, + parse_table: None, } } }; @@ -37,16 +40,19 @@ pub enum Sentential { } pub struct LLGrammar { + pub start: N, pub rules: HashMap>>>, /// none is epsilon pub first: Option>>>, /// none is $ pub follow: Option>>>, + + // When in State N and reading T, then apply the usize'th rule of N. + /// none is $ + pub parse_table: Option), usize>>, } -impl - LLGrammar -{ +impl LLGrammar { pub fn can_produce_epsilon(&self, rule: &Sentential) -> bool { match rule { Sentential::Terminal(_) => false, @@ -147,12 +153,12 @@ impl>> = HashMap::new(); - follow.insert(start, HashSet::from([None])); + follow.insert(self.start.clone(), HashSet::from([None])); loop { let mut change = false; @@ -228,4 +234,149 @@ impl HashSet> { + assert!(self.follow.is_some(), "Please call gen_follow before this!"); + self.follow + .as_ref() + .unwrap() + .get(&none_termianl) + .cloned() + .unwrap_or(HashSet::new()) + } + + pub fn gen_parse_table(&mut self) -> bool { + if self.follow.is_none() { + self.gen_follow(); + } + if self.parse_table.is_some() { + return false; + } + let mut conflict = false; + let mut parse_table: HashMap<(N, Option), usize> = HashMap::new(); + for (from, to) in self.rules.iter() { + for (id, to) in to.iter().enumerate() { + // rule is A -> al + // terminal == None means epsilon + for terminal in self.first(to) { + match terminal { + // let a be in First(al) -> add to T[A,a] = A->al (using the index of al) + Some(terminal) => { + conflict |= parse_table + .insert((from.clone(), Some(terminal.clone())), id) + .is_some(); + } + // if first contains epsilon then + // let b be in Follow(A) -> add to T[A,b] = A->al (using the index of al) + None => { + for terminal in self.follow(from).iter() { + conflict |= parse_table + .insert((from.clone(), terminal.clone()), id) + .is_some() + } + } + } + } + } + } + self.parse_table = Some(parse_table); + conflict + } + + /// get parse_table rule + /// None means error. + pub fn parse_table( + &self, + none_terminal: &N, + terminal: &Option, + ) -> Option<&Vec>> { + assert!( + self.parse_table.is_some(), + "Please call gen_parse_table before this!" + ); + self.parse_table + .as_ref() + .unwrap() + .get(&(none_terminal.clone(), terminal.clone())) + .and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f))) + } + + pub fn create_checker<'a, S: Into + PartialEq + Clone>( + &'a self, + iter: &'a mut dyn Iterator, + ) -> LLTabelParser { + assert!( + self.parse_table.is_some(), + "Please call gen_parse_table before this!" + ); + LLTabelParser { + input: iter, + grammar: self, + } + } +} + +/// Just checks a program. Does not generates output. +pub struct LLTabelParser< + 'a, + N: PartialEq + Eq + Hash + Clone, + T: PartialEq + Eq + Hash + Clone, + S: Into + PartialEq + Clone, +> { + grammar: &'a LLGrammar, + input: &'a mut dyn Iterator, +} + +impl< + 'a, + N: PartialEq + Eq + Hash + Clone + Debug, + T: PartialEq + Eq + Hash + Clone + Debug, + S: Into + PartialEq + Clone + Debug, + > LLTabelParser<'a, N, T, S> +{ + pub fn parse(&mut self) -> bool { + let mut stack: Vec, N>> = vec![ + Sentential::Terminal(None), + Sentential::NoneTerminal(self.grammar.start.clone()), + ]; + let mut next = self.input.next(); + loop { + if next.is_none() { + println!("EOF"); + return self.input.size_hint().0 == 0; + } + let state = stack.pop(); + match state { + Some(Sentential::Terminal(t)) => match (next, t) { + (Some(a), Some(b)) if a == b => { + next = self.input.next(); + } + (None, None) => { + next = self.input.next(); + } + (a, b) => { + println!("not matching terminals: {a:?}, {b:?}"); + return false; + } + }, + Some(Sentential::NoneTerminal(nt)) => { + let Some(a) = self + .grammar + .parse_table(&nt, &next.as_ref().map(|f| f.clone().into())) + else { + println!("no parse table entry: {nt:?} next: {next:?}"); + return false; + }; + stack.extend(a.iter().rev().map(|f| match f { + Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())), + Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()), + })); + } + None => { + println!("EOS"); + return false; + } + } + } + } } diff --git a/src/main.rs b/src/main.rs index 18c9d50..c79990d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,13 +9,23 @@ use std::collections::HashMap; double_enum!( BareTokens, Tokens { WhiteSpace, + Semicolon, Add, Sub, Mul, Div, + Eq, + Neq, + Assign, While, + If, LBrace, RBrace, + LSBrace, + RSBrace, + LQBrace, + RQBrace, + Dot, Ident(String), Int(i64), Float(f64), @@ -27,6 +37,9 @@ scanner!( r"^\s|\t|\n" : |_,_| { Some(WhiteSpace) } + r"^;" : |_,_| { + Some(Semicolon) + } r"^\+" : |_,_| { Some(Add) } @@ -39,18 +52,42 @@ scanner!( r"^/" : |_,_| { Some(Div) } + r"^==" : |_,_| { + Some(Eq) + } + r"^!=" : |_,_| { + Some(Neq) + } + r"^=" : |_,_| { + Some(Assign) + } r"^while" : |_,_| { Some(While) } - r"\(" : |_,_| { + r"^\(" : |_,_| { Some(LBrace) } - r"\)" : |_,_| { + r"^\)" : |_,_| { Some(RBrace) } + r"^\[" : |_,_| { + Some(LSBrace) + } + r"^\]" : |_,_| { + Some(RSBrace) + } + r"^\{" : |_,_| { + Some(LQBrace) + } + r"^\}" : |_,_| { + Some(RQBrace) + } r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { Some(Ident(String::from(m.as_str()))) } + r"^\." : |_, _| { + Some(Dot) + } r"^[0-9]+.[0-9]*" : |_, m: Match<'_>| { m.as_str().parse::<_>().ok().map(|f| Float(f)) } @@ -61,12 +98,13 @@ scanner!( #[derive(Debug, PartialEq, Eq, Hash, Clone)] enum NoneTerminals { - P, - E, - Ei, - T, - Ti, - F, + P, // Program, ; separated + E, // Expression one line + Ei, // Expression extended additive + T, // Term, only containing Factors + Ti, // Term extend multiplicative + F, // Factor + FI, // Factor extended with complex types and operators } impl From for Sentential { @@ -85,8 +123,14 @@ fn grammer() -> LLGrammar { use BareTokens::*; use NoneTerminals::*; ll_grammar![ - P -> E; + start: P; + P -> E,Semicolon,P; + P -> ; E -> T,Ei; + E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace; + Ei -> Assign,T,Ei; + Ei -> Eq,T,Ei; + Ei -> Neq,T,Ei; Ei -> Add,T,Ei; Ei -> Sub,T,Ei; Ei -> ; @@ -97,21 +141,30 @@ fn grammer() -> LLGrammar { F -> LBrace, E, RBrace; F -> Int; F -> Float; - F -> Ident; + F -> Ident,FI; + FI -> ; + FI -> LBrace,E,RBrace; + FI -> Dot, FI; + FI -> LSBrace,E,RSBrace; ] } + fn main() { - let code = String::from("while 12 + a - 3.4 / 0. * 4"); + let code = String::from("a = 4; while(a != 5) {a = a+1;};"); let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); - for token in m.iter_mut() { + /* for token in m.iter_mut() { println!("{:?}", token); - } - if !m.is_empty() { - println!("Error"); - } + } */ let mut grammar = grammer(); - grammar.gen_follow(NoneTerminals::P); + grammar.gen_follow(); println!("first: {:?}", grammar.first); println!("follow: {:?}", grammar.follow); + let conflict = grammar.gen_parse_table(); + println!("conflict: {conflict}"); + println!("prase table: {:?}", grammar.parse_table); + println!( + "parsed: {}", + grammar.create_checker(&mut m.iter_mut()).parse() + ) } diff --git a/src/scanner.rs b/src/scanner.rs index b49ae51..bf2223f 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,4 @@ -use std::marker::PhantomData; +use std::{iter::Peekable, marker::PhantomData}; #[macro_export] macro_rules! scanner { @@ -73,4 +73,10 @@ impl<'a, T: MatchNext + PartialEq> Iterator for ScannerIter<'a, T> { return None; } } + fn size_hint(&self) -> (usize, Option) { + ( + if self.0.code.is_empty() { 0 } else { 1 }, + Some(self.0.code.len()), + ) + } }