diff --git a/Cargo.toml b/Cargo.toml index 0a34098..0ffb61d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,6 @@ name = "book" [[bin]] name = "g10" + +[[bin]] +name = "ast" diff --git a/src/ast_gen.rs b/src/ast_gen.rs new file mode 100644 index 0000000..1167b57 --- /dev/null +++ b/src/ast_gen.rs @@ -0,0 +1,45 @@ +#[derive(Debug, Clone)] +pub enum AstGenData { + Ast(A), + Ter(B), +} + +#[macro_export] +macro_rules! ast_gen { + ( + types: $ast:ident, + $tokens:ident, + $none_terminal:ident; + start: $start:expr; + $( + ($left:pat => $matcher:pat) => $code:stmt + ),* $(,)? + ) => { + impl $ast { + pub fn from_tree(tree: &ParseTree<$none_terminal, $tokens>) -> Result { + use crate::ast_gen::AstGenData; + use AstGenData::*; + use $none_terminal::*; + use $tokens::*; + use $ast::*; + let rule = tree.rule.as_ref().map(|(r,_)| r.clone()).unwrap_or($start); + + let mut args = Vec::new(); + for node in tree.childs.iter() { + args.push(match node { + NodeChild::Data(d) => AstGenData::<$ast, $tokens>::Ter(d.clone()), + NodeChild::Child(child) => match Self::from_tree(child) { + Ok(data) => AstGenData::<$ast, $tokens>::Ast(data), + Err(err) => return Err(err), + }, + }); + } + + match (rule, args.as_slice()) { + $(($left, $matcher) => {$code},)* + (rule, _) => Err(format!("no rule for: {:?} -> {:?}", rule, args)), + } + } + } + }; +} diff --git a/src/bin/ast.rs b/src/bin/ast.rs new file mode 100644 index 0000000..14d69bf --- /dev/null +++ b/src/bin/ast.rs @@ -0,0 +1,383 @@ +use rcompiler::prelude::*; +use regex::Match; +use std::collections::HashMap; + +double_enum!( + BareTokens, Tokens { + WhiteSpace, + Semicolon, + Colon, + Comma, + Add, + Sub, + Mul, + Div, + Eq, + Neq, + Assign, + While, + For, + If, + Else, + Return, + LBrace, + RBrace, + LSBrace, + RSBrace, + LQBrace, + RQBrace, + Dot, + TVoid, + TFloat, + TBool, + TChar, + TInt, + TStr, + TArr, + TFun, + Ident(String), + Int(i64), + Str(String), + Float(f64), + } +); + +token_scanner!( + Tokens, + r"^(\s|\t|\n|\r)" : |_,_| { + Some(WhiteSpace) + } + r"^;" : |_,_| { + Some(Semicolon) + } + r"^\:" : |_,_| { + Some(Colon) + } + r"^," : |_,_| { + Some(Comma) + } + r"^\+" : |_,_| { + Some(Add) + } + r"^-" : |_,_| { + Some(Sub) + } + r"^\*" : |_,_| { + Some(Mul) + } + r"^/" : |_,_| { + Some(Div) + } + r"^==" : |_,_| { + Some(Eq) + } + r"^!=" : |_,_| { + Some(Neq) + } + r"^=" : |_,_| { + Some(Assign) + } + r"^while" : |_,_| { + Some(While) + } + r"^if" : |_,_| { + Some(If) + } + r"^for" : |_,_| { + Some(For) + } + r"^else" : |_,_| { + Some(Else) + } + r"^return" : |_,_| { + Some(Return) + } + r"^void" : |_,_|{ + Some(TVoid) + } + r"^bool" : |_,_|{ + Some(TBool) + } + r"^char" : |_,_|{ + Some(TChar) + } + r"^int" : |_,_|{ + Some(TInt) + } + r"^float" : |_,_|{ + Some(TFloat) + } + r"^str" : |_,_|{ + Some(TStr) + } + r"^arr" : |_,_|{ + Some(TArr) + } + r"^fun" : |_,_|{ + Some(TFun) + } + r"^\(" : |_,_| { + Some(LBrace) + } + r"^\)" : |_,_| { + Some(RBrace) + } + r"^\[" : |_,_| { + Some(LSBrace) + } + r"^\]" : |_,_| { + Some(RSBrace) + } + r"^\{" : |_,_| { + Some(LQBrace) + } + r"^\}" : |_,_| { + Some(RQBrace) + } + r"^[a-zA-Z][a-zA-Z0-9_]*" : |_, m: Match<'_>| { + Some(Ident(String::from(m.as_str()))) + } + r"^\." : |_, _| { + Some(Dot) + } + r"^-?[0-9]+\.[0-9]*" : |_, m: Match<'_>| { + m.as_str().parse::<_>().ok().map(Float) + } + r#"^"(([^"\\]|(\\[a-z\\"]))*)""# : |capture: regex::Captures<'_>, _| { + capture.get(1).map(|m| Str(m.as_str().to_string())) + } + r"^-?[0-9]+" : |_, m: Match<'_>| { + m.as_str().parse::<_>().ok().map(Int) + } +); + +#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] +enum NoneTerminals { + P, // Program, ; separated + Pi, + D, // declaration + E, // Expression + Ty, // Type + Ta, // all types + A, // Args + S, // Statement + Si, // Statement extension + T, // Term + F, // Factor + L, // Literal +} + +impl From for Sentential { + fn from(value: NoneTerminals) -> Self { + Sentential::NoneTerminal(value) + } +} + +impl From for Sentential { + fn from(value: BareTokens) -> Self { + Sentential::Terminal(value) + } +} + +fn grammer() -> Grammar { + use BareTokens::*; + use NoneTerminals::*; + cfg_grammar![ + start: P; + P -> Pi; + Pi -> D, Pi; + Pi -> ; + D -> Ident, Colon, Ty, Assign, E, Semicolon; + D -> Ident, Colon, TFun, Ta, LBrace, A, RBrace, Assign, S; + + S -> D; + S -> E; + S -> If, LBrace, E, RBrace, S, Else, S; + S -> LQBrace, Si, RQBrace; + Si -> S, Si; + Si -> ; + S -> For, LBrace, E, Comma, E, Comma, E, RBrace, S; + S -> Return, E, Semicolon; + + E -> T; + T -> F; + F -> L; + + T -> T, Add, F; + T -> T, Sub, F; + F -> F, Mul, L; + F -> F, Div, L; + + L -> Ident; + L -> Int; + L -> Float; + L -> Str; + L -> L, LSBrace, E, RSBrace; + L -> LBrace, E, RBrace; + + Ty -> TVoid; + Ty -> TBool; + Ty -> TChar; + Ty -> TInt; + Ty -> TFloat; + Ty -> TStr; + Ta -> Ty; + Ty -> TArr, LSBrace, Int, RSBrace, Ta; + Ta -> TFun, Ta, LBrace, A, RBrace; + + A -> Ident, Colon, Ta, Comma, A; + A -> Ident, Colon, Ta; + ] +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +enum VType { + Void, + Boolean, + Char, + Integer, + Float, + String, + Array(Box<(usize, VType)>), + Function(Box, Vec<(String, VType)>), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +struct VDecl { + name: String, + ttype: VType, + value: Option, + code: Option, +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +enum VStmt { + Decl(Box), + Expr(Box), + IfElse(Box, Box, Box), + For(Box, Box, Box, Box), + Block(Vec), + Return(Box), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +enum VExpr { + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), + Arr(Box, Box), + Ident(String), + Int(i64), + Float(f64), + String(String), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +enum AST { + Prog(Vec), + Decl(VDecl), + Stmt(VStmt), + Expr(VExpr), + Type(VType), + Args(Vec<(String, VType)>), +} + +ast_gen! { + types: AST, Tokens, NoneTerminals; + start: NoneTerminals::P; + (P => [Ast(Prog(v))]) => Ok(AST::Prog(v.to_vec())), + (Pi => [Ast(Decl(d)), Ast(Prog(v))]) => { + let mut prog = vec![d.clone()]; + prog.extend(v.clone()); + return Ok(Prog(prog)); + }, + (Pi => []) => Ok(Prog(Vec::new())), + (D => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Assign), Ast(Expr(expr)), Ter(Semicolon)]) => + Ok(Decl(VDecl { name: name.clone(), ttype: ty.clone(), value: Some(expr.clone()), code: None })), + (D => [Ter(Ident(name)), Ter(Colon), Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace), Ter(Assign), Ast(Stmt(stmt))]) => + Ok(Decl(VDecl { name: name.clone(), ttype: VType::Function(Box::new(ty.clone()), args.clone()), value: None, code: Some(stmt.clone()) })), + (S => [Ast(Decl(d))]) => + Ok(Stmt(VStmt::Decl(Box::from(d.clone())))), + (S => [Ast(Expr(v))]) => + Ok(Stmt(VStmt::Expr(Box::new(v.clone())))), + (S => [Ter(If), Ter(LBrace), Ast(Expr(cond)), Ter(RBrace), Ast(Stmt(iff)), Ter(Else), Ast(Stmt(eelse))]) => + Ok(Stmt(VStmt::IfElse(Box::new(cond.clone()), Box::new(iff.clone()), Box::new(eelse.clone())))), + (S => [Ter(LQBrace), Ast(Stmt(VStmt::Block(stats))), Ter(RQBrace)]) => + Ok(Stmt(VStmt::Block(stats.clone()))), + (Si => [Ast(Stmt(stat)), Ast(Stmt(VStmt::Block(stats)))]) => { + let mut st = vec![stat.clone()]; + st.extend(stats.clone()); + return Ok(Stmt(VStmt::Block(st))); + }, + (Si => []) => Ok(Stmt(VStmt::Block(Vec::new()))), + (S => [Ter(For), Ter(LBrace), Ast(Expr(pre)), Ter(Comma), Ast(Expr(cond)), Ter(Comma), Ast(Expr(post)), Ter(RBrace), Ast(Stmt(body))]) => + Ok(Stmt(VStmt::For(Box::new(pre.clone()), Box::new(cond.clone()), Box::new(post.clone()), Box::new(body.clone())))), + (S => [Ter(Return), Ast(Expr(expr)), Ter(Semicolon)]) => Ok(Stmt(VStmt::Return(Box::new(expr.clone())))), + (E | T | F => [Ast(Expr(v))]) => Ok(Expr(v.clone())), + (T => [Ast(Expr(left)), Ter(Add), Ast(Expr(right))]) => + Ok(Expr(VExpr::Add(Box::new(left.clone()), Box::new(right.clone())))), + (T => [Ast(Expr(left)), Ter(Sub), Ast(Expr(right))]) => + Ok(Expr(VExpr::Sub(Box::new(left.clone()), Box::new(right.clone())))), + (F => [Ast(Expr(left)), Ter(Mul), Ast(Expr(right))]) => + Ok(Expr(VExpr::Mul(Box::new(left.clone()), Box::new(right.clone())))), + (F => [Ast(Expr(left)), Ter(Div), Ast(Expr(right))]) => + Ok(Expr(VExpr::Div(Box::new(left.clone()), Box::new(right.clone())))), + (L => [Ter(Ident(n))]) => Ok(Expr(VExpr::Ident(n.clone()))), + (L => [Ter(Int(i))]) => Ok(Expr(VExpr::Int(*i))), + (L => [Ter(Float(f))]) => Ok(Expr(VExpr::Float(*f))), + (L => [Ter(Str(s))]) => Ok(Expr(VExpr::String(s.clone()))), + (L => [Ast(Expr(arr)), Ter(LQBrace), Ast(Expr(expr)), Ter(RQBrace)]) => + Ok(Expr(VExpr::Div(Box::new(arr.clone()), Box::new(expr.clone())))), + (L => [Ter(LBrace), Ast(Expr(expr)), Ter(RBrace)]) => Ok(Expr(expr.clone())), + (Ty => [Ter(TVoid)]) => Ok(Type(VType::Void)), + (Ty => [Ter(TBool)]) => Ok(Type(VType::Boolean)), + (Ty => [Ter(TChar)]) => Ok(Type(VType::Boolean)), + (Ty => [Ter(TInt)]) => Ok(Type(VType::Integer)), + (Ty => [Ter(TFloat)]) => Ok(Type(VType::Float)), + (Ty => [Ter(TStr)]) => Ok(Type(VType::String)), + (Ta => [Ast(Type(t))]) => Ok(Type(t.clone())), + (Ty => [Ter(TArr), Ter(LSBrace), Ter(Int(i)), Ter(RSBrace), Ast(Type(t))]) => { + if *i < 0 { + return Err(format!("Invalid array size: {i}")); + } + return Ok(Type(VType::Array(Box::new((*i as usize, t.clone()))))); + }, + (Ta => [Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace)]) => + Ok(Type(VType::Function(Box::new(ty.clone()), args.clone()))), + (A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Comma), Ast(Args(args))]) => { + let mut prog = vec![(name.clone(), ty.clone())]; + prog.extend(args.clone()); + return Ok(Args(prog)); + }, + (A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty))]) => Ok(Args(vec![(name.clone(), ty.clone())])), +} + +fn main() { + let code = String::from( + r#" + a: int = 5; + b: fun int (a:int) = { b: int = a; return a; } + c: arr[5] arr[5] int = 5; + "#, + ); + + let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); + + let mut grammar = grammer(); + grammar.gen_lr1_automaton(); + let conflict = grammar.gen_lr1_parse_table(); + println!("conflict: {conflict}"); + let tree = grammar.lr1_parser(&mut m.iter_mut()).parse(); + let Ok(tree) = tree else { + println!("err: {:?}", tree.unwrap_err()); + return; + }; + let a = AST::from_tree(&tree); + println!("a: {:?}", a); +} diff --git a/src/cfg/lr_parser.rs b/src/cfg/lr_parser.rs index ef8184b..46f367a 100644 --- a/src/cfg/lr_parser.rs +++ b/src/cfg/lr_parser.rs @@ -78,7 +78,7 @@ where .unwrap_or("end of file".to_string()) )); }; - + match current_state { LrAction::Shift(to) => { stack.push((NodeChild::Data(next.expect("Can not shift on EOF.")), *to)); @@ -102,7 +102,7 @@ where } childs.push(last); } - if *self.start_rule == *rule { + if *self.start_rule == *rule && next.is_none() && stack.is_empty() { return Ok(ParseTree { rule: Some((rule.clone(), *ind)), childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(), diff --git a/src/lib.rs b/src/lib.rs index f31c4a8..9dc8d65 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,16 @@ pub mod cfg; pub mod double_enum; pub mod scanner; +pub mod ast_gen; pub mod prelude { pub use crate::cfg::*; pub use crate::cfg::ll_grammar::*; pub use crate::cfg::lr0_grammar::*; + pub use crate::cfg::lr1_grammar::*; + pub use crate::cfg::lr_parser::*; + pub use crate::ast_gen::*; + pub use crate::ast_gen; pub use crate::cfg_grammar; pub use crate::double_enum; pub use crate::scanner::*; diff --git a/src/main.rs b/src/main.rs index 36b8fb0..3001ed5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,7 +33,7 @@ double_enum!( token_scanner!( Tokens, - r"^\s|\t|\n|\r" : |_,_| { + r"^(\s|\t|\n|\r)" : |_,_| { Some(WhiteSpace) } r"^;" : |_,_| { @@ -185,6 +185,7 @@ fn grammer() -> Grammar { ] } + fn main() { let code = String::from( "a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}", diff --git a/src/scanner.rs b/src/scanner.rs index 2c21d41..16788a5 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -61,10 +61,10 @@ impl<'a, T: MatchNext + PartialEq + std::fmt::Debug> Iterator for ScannerIter type Item = Result; fn next(&mut self) -> Option { - if self.0.code.is_empty() { - return None; - } loop { + if self.0.code.is_empty() { + return None; + } if let Some((token, len)) = T::match_next(&self.0.code) { self.0.code = self.0.code.split_off(len); if self.0.skip.contains(&token) {