From 5fafa6f342527bea757b55fa32c3d922a018cd2d Mon Sep 17 00:00:00 2001 From: jusax23 Date: Sun, 27 Oct 2024 20:45:43 +0100 Subject: [PATCH] working ll parser --- src/ll_grammar.rs | 161 ++++++++++++++++++++++++++++++++++------------ src/main.rs | 31 +++++---- src/scanner.rs | 17 +++-- 3 files changed, 148 insertions(+), 61 deletions(-) diff --git a/src/ll_grammar.rs b/src/ll_grammar.rs index 75a24aa..819607f 100644 --- a/src/ll_grammar.rs +++ b/src/ll_grammar.rs @@ -289,7 +289,7 @@ impl LLGramm &self, none_terminal: &N, terminal: &Option, - ) -> Option<&Vec>> { + ) -> Option<(usize, &Vec>)> { assert!( self.parse_table.is_some(), "Please call gen_parse_table before this!" @@ -298,12 +298,17 @@ impl LLGramm .as_ref() .unwrap() .get(&(none_terminal.clone(), terminal.clone())) - .and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f))) + .and_then(|f| { + self.rules + .get(none_terminal) + .and_then(|rule| rule.get(*f)) + .map(|rule| (*f, rule)) + }) } - pub fn create_checker<'a, S: Into + PartialEq + Clone>( + pub fn parser<'a, S: Into + PartialEq + Clone>( &'a self, - iter: &'a mut dyn Iterator, + iter: &'a mut dyn Iterator>, ) -> LLTabelParser { assert!( self.parse_table.is_some(), @@ -324,7 +329,7 @@ pub struct LLTabelParser< S: Into + PartialEq + Clone, > { grammar: &'a LLGrammar, - input: &'a mut dyn Iterator, + input: &'a mut dyn Iterator>, } impl< @@ -334,49 +339,121 @@ impl< S: Into + PartialEq + Clone + Debug, > LLTabelParser<'a, N, T, S> { - pub fn parse(&mut self) -> bool { - let mut stack: Vec, N>> = vec![ - Sentential::Terminal(None), - Sentential::NoneTerminal(self.grammar.start.clone()), - ]; - let mut next = self.input.next(); + pub fn parse(&mut self) -> Result, String> { + // stack of table driven parser + // content of the vec: + // - first element: all of them combined represent the complete stack, of the parser. + // - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element. + let mut stack: Vec<(Vec>, ParseTree)> = vec![( + vec![Sentential::NoneTerminal(self.grammar.start.clone())], + ParseTree::new(None), + )]; + + let mut next = match self.input.next() { + Some(Ok(d)) => Some(d), + Some(Err(err)) => return Err(format!("Invalid token: {}", err)), + None => None, + }; + loop { - if next.is_none() { - println!("EOF"); - return self.input.size_hint().0 == 0; - } - let state = stack.pop(); - match state { - Some(Sentential::Terminal(t)) => match (next, t) { - (Some(a), Some(b)) if a == b => { - next = self.input.next(); + // look at current state + let mut state = stack.pop(); + match state.as_mut() { + // processing inner state, of tracked rules + Some((inner_stack, rule)) => { + let inner_state = inner_stack.pop(); + match inner_state { + // match terminal, check if equal + Some(Sentential::Terminal(terminal)) => match (next, terminal) { + // actual vs. expected input + (Some(inn), expect) if inn == expect => { + next = match self.input.next() { + Some(Ok(n)) => Some(n), + Some(Err(err)) => { + return Err(format!("Invalid token: {}", err)) + } + None => None, + }; + rule.childs.push(NodeChild::Data(inn)); + stack.push(state.unwrap()); + } + (a, b) => { + return Err(format!("found: {:?} expected: {:?}", a, b)); + } + }, + // take next none terminal and apply rule from parse table. + Some(Sentential::NoneTerminal(none_term)) => { + // load rule + let Some((id, new_rule)) = self + .grammar + .parse_table(&none_term, &next.as_ref().map(|f| f.clone().into())) + else { + // no rule + return Err(format!( + "Unexpected token: {}", + next.map(|f| format!("{f:?}")) + .unwrap_or("end of file".to_string()) + )); + }; + + // reverse rule: because, uses vec as stack, but reversed + let new_rule_rev = + new_rule.iter().rev().map(|f| f.clone()).collect::>(); + // memorize current state/rule for later + stack.push(state.unwrap()); + // process next rule + stack.push(( + new_rule_rev, + ParseTree { + rule: Some((none_term, id)), + childs: Vec::new(), + }, + )); + } + // inner state is empty: current rule is finished + None => { + // if stack is empty, this is the initial state: finish or err + let Some(last) = stack.last_mut() else { + // ok: input has ended + if next.is_none() { + return Ok(state.unwrap().1); + } + // still code left, but not excepted + return Err(format!("Expected end of file.")); + }; + last.1.childs.push(NodeChild::Child(state.unwrap().1)); + } } - (None, None) => { - next = self.input.next(); - } - (a, b) => { - println!("not matching terminals: {a:?}, {b:?}"); - return false; - } - }, - Some(Sentential::NoneTerminal(nt)) => { - let Some(a) = self - .grammar - .parse_table(&nt, &next.as_ref().map(|f| f.clone().into())) - else { - println!("no parse table entry: {nt:?} next: {next:?}"); - return false; - }; - stack.extend(a.iter().rev().map(|f| match f { - Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())), - Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()), - })); } + // should not be possible, because every other path pushes to the stack back or returns None => { - println!("EOS"); - return false; + return Err(format!("Err: EOS")); } } } } } + +// + +#[derive(Debug, Clone)] +pub enum NodeChild { + Child(ParseTree), + Data(S), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ParseTree { + pub rule: Option<(N, usize)>, + pub childs: Vec>, +} + +impl ParseTree { + pub fn new(rule: Option<(N, usize)>) -> Self { + Self { + rule, + childs: Vec::new(), + } + } +} diff --git a/src/main.rs b/src/main.rs index c79990d..d474a6d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,7 +34,7 @@ double_enum!( scanner!( Tokens, - r"^\s|\t|\n" : |_,_| { + r"^\s|\t|\n|\r" : |_,_| { Some(WhiteSpace) } r"^;" : |_,_| { @@ -64,6 +64,9 @@ scanner!( r"^while" : |_,_| { Some(While) } + r"^if" : |_,_| { + Some(If) + } r"^\(" : |_,_| { Some(LBrace) } @@ -98,8 +101,10 @@ scanner!( #[derive(Debug, PartialEq, Eq, Hash, Clone)] enum NoneTerminals { - P, // Program, ; separated - E, // Expression one line + P, // Program, ; separated + L, // Line of code + Li, // line extended for assignments + E, // Expression Ei, // Expression extended additive T, // Term, only containing Factors Ti, // Term extend multiplicative @@ -124,11 +129,14 @@ fn grammer() -> LLGrammar { use NoneTerminals::*; ll_grammar![ start: P; - P -> E,Semicolon,P; + P -> L,Semicolon,P; P -> ; + L -> While,E,LQBrace,P,RQBrace; + L -> If,E,LQBrace,P,RQBrace; + L -> Ident,FI,Li; + Li -> Assign,E; + Li -> ; E -> T,Ei; - E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace; - Ei -> Assign,T,Ei; Ei -> Eq,T,Ei; Ei -> Neq,T,Ei; Ei -> Add,T,Ei; @@ -150,11 +158,8 @@ fn grammer() -> LLGrammar { } fn main() { - let code = String::from("a = 4; while(a != 5) {a = a+1;};"); + let code = String::from("a = 4;while a != 5 { a = a+1; }; if a == 5 { a = 4; };"); let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); - /* for token in m.iter_mut() { - println!("{:?}", token); - } */ let mut grammar = grammer(); grammar.gen_follow(); @@ -163,8 +168,6 @@ fn main() { let conflict = grammar.gen_parse_table(); println!("conflict: {conflict}"); println!("prase table: {:?}", grammar.parse_table); - println!( - "parsed: {}", - grammar.create_checker(&mut m.iter_mut()).parse() - ) + println!("parse\n\n"); + println!("parsed: {:?}", grammar.parser(&mut m.iter_mut()).parse()) } diff --git a/src/scanner.rs b/src/scanner.rs index bf2223f..c7025ea 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,4 @@ -use std::{iter::Peekable, marker::PhantomData}; +use std::marker::PhantomData; #[macro_export] macro_rules! scanner { @@ -33,6 +33,7 @@ pub struct Scanner + PartialEq> { _a: PhantomData, } +#[allow(dead_code)] impl + PartialEq> Scanner { pub fn new(code: String) -> Self { Self { @@ -55,8 +56,8 @@ impl + PartialEq> Scanner { pub struct ScannerIter<'a, T: MatchNext + PartialEq>(&'a mut Scanner); -impl<'a, T: MatchNext + PartialEq> Iterator for ScannerIter<'a, T> { - type Item = T; +impl<'a, T: MatchNext + PartialEq + std::fmt::Debug> Iterator for ScannerIter<'a, T> { + type Item = Result; fn next(&mut self) -> Option { if self.0.code.is_empty() { @@ -68,9 +69,15 @@ impl<'a, T: MatchNext + PartialEq> Iterator for ScannerIter<'a, T> { if self.0.skip.contains(&token) { continue; } - return Some(token); + return Some(Ok(token)); } - return None; + return self + .0 + .code + .split(" ") + .collect::>() + .first() + .map(|f| Err(f.to_string())); } } fn size_hint(&self) -> (usize, Option) {