From 686e43448a29bb489d94deec40eafc3a4718b89b Mon Sep 17 00:00:00 2001 From: jusax23 Date: Sat, 2 Nov 2024 21:07:39 +0100 Subject: [PATCH] cleanup parse tree --- .gitignore | 2 + src/ll_grammar.rs | 44 ++++++++++++++++++++++ src/main.rs | 95 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 111 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index ea8c4bf..c2816a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target + +log* \ No newline at end of file diff --git a/src/ll_grammar.rs b/src/ll_grammar.rs index 819607f..e6569f5 100644 --- a/src/ll_grammar.rs +++ b/src/ll_grammar.rs @@ -436,6 +436,12 @@ impl< // +pub trait Skippable { + fn skippable(&self) -> bool { + false + } +} + #[derive(Debug, Clone)] pub enum NodeChild { Child(ParseTree), @@ -457,3 +463,41 @@ impl ParseTree { } } } + +impl ParseTree { + /// cleanup the parse tree + /// does not work on a subtree + pub fn clean(self) -> Self { + self.clean_internal() + .expect("Clean only works on the main tree.") + } + + /// internal clean + /// main node must not have a rule. + fn clean_internal(self) -> Result>> { + let childs = self + .childs + .into_iter() + .flat_map(|elem| match elem { + NodeChild::Child(parse_tree) => match parse_tree.clean_internal() { + Ok(tree) => [NodeChild::Child(tree)].into(), + Err(content) => content, + }, + NodeChild::Data(d) => [NodeChild::Data(d)].into(), + }) + .collect(); + if let Some((rule, _)) = &self.rule { + if rule.skippable() { + return Err(childs); + } + + if childs.is_empty() { + return Err(childs); + } + } + Ok(Self { + rule: self.rule, + childs, + }) + } +} diff --git a/src/main.rs b/src/main.rs index 7de0486..8bd06f0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ mod double_enum; mod ll_grammar; mod scanner; -use ll_grammar::{LLGrammar, Sentential}; +use ll_grammar::{LLGrammar, Sentential, Skippable}; use regex::{Match, Regex}; use scanner::Scanner; use std::collections::HashMap; @@ -29,6 +29,7 @@ double_enum!( Dot, Ident(String), Int(i64), + Str(String), Float(f64), } ); @@ -98,6 +99,9 @@ scanner!( r"^[0-9]+.[0-9]*" : |_, m: Match<'_>| { m.as_str().parse::<_>().ok().map(|f| Float(f)) } + r#"^"(([^"\\]|(\\[a-z\\"]))*)""# : |capture: regex::Captures<'_>, _| { + capture.get(1).map(|m| Str(m.as_str().to_string())) + } r"^[0-9]+" : |_, m: Match<'_>| { m.as_str().parse::<_>().ok().map(|i| Int(i)) } @@ -105,16 +109,29 @@ scanner!( #[derive(Debug, PartialEq, Eq, Hash, Clone)] enum NoneTerminals { - P, // Program, ; separated - L, // Line of code - Li, // line extended for assignments - IF, // if helper - E, // Expression - Ei, // Expression extended additive - T, // Term, only containing Factors - Ti, // Term extend multiplicative - F, // Factor - FI, // Factor extended with complex types and operators + P, // Program, ; separated + L, // Line of code + Li, // line extended for assignments + IF, // if helper + Sem, // optional semicolon + E, // Expression + Ei, // Expression extended additive + T, // math term + Ti, // meth term extended + S, // Summand, only containing Factors + Si, // Summand extend multiplicative + F, // Factor + Fi, // Factor extended with complex types and operators +} + +impl Skippable for NoneTerminals { + fn skippable(&self) -> bool { + use NoneTerminals::*; + match self { + P | Li | Ei | Si | Ti | Fi | IF | Sem => true, + _ => false, + } + } } impl From for Sentential { @@ -134,38 +151,50 @@ fn grammer() -> LLGrammar { use NoneTerminals::*; ll_grammar![ start: P; - P -> L,Semicolon,P; + P -> L,P; P -> ; - L -> While,E,LQBrace,P,RQBrace; + L -> While,E,LQBrace,P,RQBrace,Sem; L -> If,E,LQBrace,P,RQBrace,IF; - IF -> ; - IF -> Else,LQBrace,P,RQBrace; - L -> Ident,FI,Li; - Li -> Assign,E; - Li -> ; + IF -> Sem; + IF -> Else,LQBrace,P,RQBrace,Sem; + Sem -> ; + Sem -> Semicolon; + L -> Ident,Fi,Li; + Li -> Assign,E,Semicolon; + Li -> Semicolon; + E -> T,Ei; Ei -> Eq,T,Ei; Ei -> Neq,T,Ei; - Ei -> Add,T,Ei; - Ei -> Sub,T,Ei; Ei -> ; - T -> F,Ti; - Ti -> Mul,F,Ti; - Ti -> Div,F,Ti; + + T -> S,Ti; + Ti -> Add,S,Ti; + Ti -> Sub,S,Ti; Ti -> ; + + S -> F,Si; + Si -> Mul,F,Si; + Si -> Div,F,Si; + Si -> ; + F -> LBrace, E, RBrace; F -> Int; F -> Float; - F -> Ident,FI; - FI -> ; - FI -> LBrace,E,RBrace; - FI -> Dot, FI; - FI -> LSBrace,E,RSBrace; + F -> Str; + F -> Ident,Fi; + + Fi -> ; + Fi -> LBrace,E,RBrace; + Fi -> Dot, Fi; + Fi -> LSBrace,E,RSBrace; ] } fn main() { - let code = String::from("a = 4; while a != 5 { a = a+1; }; if a == 5 { a = 4; } else {a = 5;};"); + let code = String::from( + "a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}", + ); let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); let mut grammar = grammer(); @@ -176,5 +205,11 @@ fn main() { println!("conflict: {conflict}"); println!("prase table: {:?}", grammar.parse_table); println!("parse\n\n"); - println!("parsed: {:?}", grammar.parser(&mut m.iter_mut()).parse()) + println!( + "parsed: {:?}", + grammar + .parser(&mut m.iter_mut()) + .parse() + .map(|tree| tree.clean()) + ) }