This commit is contained in:
jusax23 2024-11-12 17:09:06 +01:00
parent 3c741567df
commit 58136d6c9d
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
6 changed files with 517 additions and 3 deletions

View file

@ -16,3 +16,6 @@ name = "book"
[[bin]]
name = "g10"
[[bin]]
name = "ast"

45
src/ast_gen.rs Normal file
View file

@ -0,0 +1,45 @@
#[derive(Debug, Clone)]
pub enum AstGenData<A, B> {
Ast(A),
Ter(B),
}
#[macro_export]
macro_rules! ast_gen {
(
types: $ast:ident,
$tokens:ident,
$none_terminal:ident;
start: $start:expr;
$(
($left:pat => $matcher:pat) => $code:stmt
),* $(,)?
) => {
impl $ast {
pub fn from_tree(tree: &ParseTree<$none_terminal, $tokens>) -> Result<Self, String> {
use crate::ast_gen::AstGenData;
use AstGenData::*;
use $none_terminal::*;
use $tokens::*;
use $ast::*;
let rule = tree.rule.as_ref().map(|(r,_)| r.clone()).unwrap_or($start);
let mut args = Vec::new();
for node in tree.childs.iter() {
args.push(match node {
NodeChild::Data(d) => AstGenData::<$ast, $tokens>::Ter(d.clone()),
NodeChild::Child(child) => match Self::from_tree(child) {
Ok(data) => AstGenData::<$ast, $tokens>::Ast(data),
Err(err) => return Err(err),
},
});
}
match (rule, args.as_slice()) {
$(($left, $matcher) => {$code},)*
(rule, _) => Err(format!("no rule for: {:?} -> {:?}", rule, args)),
}
}
}
};
}

460
src/bin/ast.rs Normal file
View file

@ -0,0 +1,460 @@
use rcompiler::prelude::*;
use regex::Match;
use std::collections::HashMap;
double_enum!(
BareTokens, Tokens {
WhiteSpace,
Semicolon,
Colon,
Comma,
Add,
Sub,
Mul,
Div,
Eq,
Neq,
Assign,
While,
For,
If,
Else,
Return,
LBrace,
RBrace,
LSBrace,
RSBrace,
LQBrace,
RQBrace,
Dot,
TVoid,
TFloat,
TBool,
TChar,
TInt,
TStr,
TArr,
TFun,
Ident(String),
Int(i64),
Str(String),
Float(f64),
}
);
token_scanner!(
Tokens,
r"^(\s|\t|\n|\r)" : |_,_| {
Some(WhiteSpace)
}
r"^;" : |_,_| {
Some(Semicolon)
}
r"^\:" : |_,_| {
Some(Colon)
}
r"^," : |_,_| {
Some(Comma)
}
r"^\+" : |_,_| {
Some(Add)
}
r"^-" : |_,_| {
Some(Sub)
}
r"^\*" : |_,_| {
Some(Mul)
}
r"^/" : |_,_| {
Some(Div)
}
r"^==" : |_,_| {
Some(Eq)
}
r"^!=" : |_,_| {
Some(Neq)
}
r"^=" : |_,_| {
Some(Assign)
}
r"^while" : |_,_| {
Some(While)
}
r"^if" : |_,_| {
Some(If)
}
r"^for" : |_,_| {
Some(For)
}
r"^else" : |_,_| {
Some(Else)
}
r"^return" : |_,_| {
Some(Return)
}
r"^void" : |_,_|{
Some(TVoid)
}
r"^bool" : |_,_|{
Some(TBool)
}
r"^char" : |_,_|{
Some(TChar)
}
r"^int" : |_,_|{
Some(TInt)
}
r"^float" : |_,_|{
Some(TFloat)
}
r"^str" : |_,_|{
Some(TStr)
}
r"^arr" : |_,_|{
Some(TArr)
}
r"^fun" : |_,_|{
Some(TFun)
}
r"^\(" : |_,_| {
Some(LBrace)
}
r"^\)" : |_,_| {
Some(RBrace)
}
r"^\[" : |_,_| {
Some(LSBrace)
}
r"^\]" : |_,_| {
Some(RSBrace)
}
r"^\{" : |_,_| {
Some(LQBrace)
}
r"^\}" : |_,_| {
Some(RQBrace)
}
r"^[a-zA-Z][a-zA-Z0-9_]*" : |_, m: Match<'_>| {
Some(Ident(String::from(m.as_str())))
}
r"^\." : |_, _| {
Some(Dot)
}
r"^-?[0-9]+\.[0-9]*" : |_, m: Match<'_>| {
m.as_str().parse::<_>().ok().map(Float)
}
r#"^"(([^"\\]|(\\[a-z\\"]))*)""# : |capture: regex::Captures<'_>, _| {
capture.get(1).map(|m| Str(m.as_str().to_string()))
}
r"^-?[0-9]+" : |_, m: Match<'_>| {
m.as_str().parse::<_>().ok().map(Int)
}
);
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
enum NoneTerminals {
P, // Program, ; separated
Pi,
D, // declaration
E, // Expression
Ty, // Type
Ta, // all types
A, // Args
S, // Statement
Si, // Statement extension
T, // Term
F, // Factor
L, // Literal
}
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
fn from(value: NoneTerminals) -> Self {
Sentential::NoneTerminal(value)
}
}
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
fn from(value: BareTokens) -> Self {
Sentential::Terminal(value)
}
}
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
use BareTokens::*;
use NoneTerminals::*;
cfg_grammar![
start: P;
P -> Pi;
Pi -> D, Pi;
Pi -> ;
D -> Ident, Colon, Ty, Assign, E, Semicolon;
D -> Ident, Colon, TFun, Ta, LBrace, A, RBrace, Assign, S;
S -> D;
S -> E;
S -> If, LBrace, E, RBrace, S, Else, S;
S -> LQBrace, Si, RQBrace;
Si -> S, Si;
Si -> ;
S -> For, LBrace, E, Comma, E, Comma, E, RBrace, S;
S -> Return, E, Semicolon;
E -> T;
T -> F;
F -> L;
T -> T, Add, F;
T -> T, Sub, F;
F -> F, Mul, L;
F -> F, Div, L;
L -> Ident;
L -> Int;
L -> Float;
L -> Str;
L -> L, LSBrace, E, RSBrace;
L -> LBrace, E, RBrace;
Ty -> TVoid;
Ty -> TBool;
Ty -> TChar;
Ty -> TInt;
Ty -> TFloat;
Ty -> TStr;
Ta -> Ty;
Ty -> TArr, LSBrace, Int, RSBrace, Ta;
Ta -> TFun, Ta, LBrace, A, RBrace;
A -> Ident, Colon, Ta, Comma, A;
A -> Ident, Colon, Ta;
]
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum VType {
Void,
Boolean,
Char,
Integer,
Float,
String,
Array(Box<(usize, VType)>),
Function(Box<VType>, Vec<(String, VType)>),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct VDecl {
name: String,
ttype: VType,
value: Option<VExpr>,
code: Option<VStmt>,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum VStmt {
Decl(Box<VDecl>),
Expr(Box<VExpr>),
IfElse(Box<VExpr>, Box<VStmt>, Box<VStmt>),
For(Box<VExpr>, Box<VExpr>, Box<VExpr>, Box<VStmt>),
Block(Vec<VStmt>),
Return(Box<VExpr>),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum VExpr {
Add(Box<VExpr>, Box<VExpr>),
Sub(Box<VExpr>, Box<VExpr>),
Mul(Box<VExpr>, Box<VExpr>),
Div(Box<VExpr>, Box<VExpr>),
Arr(Box<VExpr>, Box<VExpr>),
Ident(String),
Int(i64),
Float(f64),
String(String),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum AST {
Prog(Vec<VDecl>),
Decl(VDecl),
Stmt(VStmt),
Expr(VExpr),
Type(VType),
Args(Vec<(String, VType)>),
}
ast_gen! {
types: AST, Tokens, NoneTerminals;
start: NoneTerminals::P;
(P => [Ast(Prog(v))]) => {
return Ok(AST::Prog(v.to_vec()));
},
(Pi => [Ast(Decl(d)), Ast(Prog(v))]) => {
let mut prog = vec![d.clone()];
prog.extend(v.clone());
return Ok(Prog(prog));
},
(Pi => []) => {
return Ok(Prog(Vec::new()));
},
(D => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Assign), Ast(Expr(expr)), Ter(Semicolon)]) => {
return Ok(Decl(VDecl { name: name.clone(), ttype: ty.clone(), value: Some(expr.clone()), code: None }));
},
(D => [Ter(Ident(name)), Ter(Colon), Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace), Ter(Assign), Ast(Stmt(stmt))]) => {
return Ok(Decl(VDecl { name: name.clone(), ttype: VType::Function(Box::new(ty.clone()), args.clone()), value: None, code: Some(stmt.clone()) }));
},
(S => [Ast(Decl(d))]) => {
return Ok(Stmt(VStmt::Decl(Box::from(d.clone()))));
},
(S => [Ast(Expr(v))]) => {
return Ok(Stmt(VStmt::Expr(Box::new(v.clone()))));
},
(S => [Ter(If), Ter(LBrace), Ast(Expr(cond)), Ter(RBrace), Ast(Stmt(iff)), Ter(Else), Ast(Stmt(eelse))]) => {
return Ok(Stmt(VStmt::IfElse(Box::new(cond.clone()), Box::new(iff.clone()), Box::new(eelse.clone()))));
},
(S => [Ter(LQBrace), Ast(Stmt(VStmt::Block(stats))), Ter(RQBrace)]) => {
return Ok(Stmt(VStmt::Block(stats.clone())));
},
(Si => [Ast(Stmt(stat)), Ast(Stmt(VStmt::Block(stats)))]) => {
let mut st = vec![stat.clone()];
st.extend(stats.clone());
return Ok(Stmt(VStmt::Block(st)));
},
(Si => []) => {
return Ok(Stmt(VStmt::Block(Vec::new())));
},
(S => [Ter(For), Ter(LBrace), Ast(Expr(pre)), Ter(Comma), Ast(Expr(cond)), Ter(Comma), Ast(Expr(post)), Ter(RBrace), Ast(Stmt(body))]) => {
return Ok(Stmt(VStmt::For(Box::new(pre.clone()), Box::new(cond.clone()), Box::new(post.clone()), Box::new(body.clone()))))
},
(S => [Ter(Return), Ast(Expr(expr)), Ter(Semicolon)]) => {
return Ok(Stmt(VStmt::Return(Box::new(expr.clone()))))
},
(E | T | F => [Ast(Expr(v))]) => {
return Ok(Expr(v.clone()))
},
(T => [Ast(Expr(left)), Ter(Add), Ast(Expr(right))]) => {
return Ok(Expr(VExpr::Add(Box::new(left.clone()), Box::new(right.clone()))));
},
(T => [Ast(Expr(left)), Ter(Sub), Ast(Expr(right))]) => {
return Ok(Expr(VExpr::Sub(Box::new(left.clone()), Box::new(right.clone()))));
},
(F => [Ast(Expr(left)), Ter(Mul), Ast(Expr(right))]) => {
return Ok(Expr(VExpr::Mul(Box::new(left.clone()), Box::new(right.clone()))));
},
(F => [Ast(Expr(left)), Ter(Div), Ast(Expr(right))]) => {
return Ok(Expr(VExpr::Div(Box::new(left.clone()), Box::new(right.clone()))));
},
(L => [Ter(Ident(n))]) => {
return Ok(Expr(VExpr::Ident(n.clone())));
},
(L => [Ter(Int(i))]) => {
return Ok(Expr(VExpr::Int(*i)));
},
(L => [Ter(Float(f))]) => {
return Ok(Expr(VExpr::Float(*f)))
},
(L => [Ter(Str(s))]) => {
return Ok(Expr(VExpr::String(s.clone())))
},
(L => [Ast(Expr(arr)), Ter(LQBrace), Ast(Expr(expr)), Ter(RQBrace)]) => {
return Ok(Expr(VExpr::Div(Box::new(arr.clone()), Box::new(expr.clone()))));
},
(L => [Ter(LBrace), Ast(Expr(expr)), Ter(RBrace)]) => {
return Ok(Expr(expr.clone()));
},
(Ty => [Ter(TVoid)]) => {
return Ok(Type(VType::Void));
},
(Ty => [Ter(TBool)]) => {
return Ok(Type(VType::Boolean));
},
(Ty => [Ter(TChar)]) => {
return Ok(Type(VType::Boolean));
},
(Ty => [Ter(TInt)]) => {
return Ok(Type(VType::Integer));
},
(Ty => [Ter(TFloat)]) => {
return Ok(Type(VType::Float));
},
(Ty => [Ter(TStr)]) => {
return Ok(Type(VType::String));
},
(Ta => [Ast(Type(t))]) => {
return Ok(Type(t.clone()));
},
(Ty => [Ter(TArr), Ter(LSBrace), Ter(Int(i)), Ter(RSBrace), Ast(Type(t))]) => {
if *i < 0 {
return Err(format!("Invalid array size: {i}"));
}
return Ok(Type(VType::Array(Box::new((*i as usize, t.clone())))));
},
(Ta => [Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace)]) => {
return Ok(Type(VType::Function(Box::new(ty.clone()), args.clone())));
},
(A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Comma), Ast(Args(args))]) => {
let mut prog = vec![(name.clone(), ty.clone())];
prog.extend(args.clone());
return Ok(Args(prog));
},
(A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty))]) => {
return Ok(Args(vec![(name.clone(), ty.clone())]));
}
}
fn main() {
let mut code = String::new();
code += "a: int = 5;";
code += "b: fun int (a:int) = { b: int = a; return a; }";
code += "c: arr[5] arr[5] int = 5;";
/*
Prog([
VDecl {
name: "a",
ttype: Integer,
value: Some(Int(5)),
code: None,
},
VDecl {
name: "b",
ttype: Function(Integer, [("a", Integer)]),
value: None,
code: Some(Block([
Decl(VDecl {
name: "b",
ttype: Integer,
value: Some(Ident("a")),
code: None,
}),
Return(Ident("a")),
])),
},
VDecl {
name: "c",
ttype: Array((5, Array((5, Integer)))),
value: Some(Int(5)),
code: None,
},
])
*/
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
let mut grammar = grammer();
grammar.gen_lr1_automaton();
let conflict = grammar.gen_lr1_parse_table();
println!("conflict: {conflict}");
let tree = grammar.lr1_parser(&mut m.iter_mut()).parse();
let Ok(tree) = tree else {
println!("err: {:?}", tree.unwrap_err());
return;
};
let a = AST::from_tree(&tree);
println!("a: {:?}", a);
}

View file

@ -102,7 +102,7 @@ where
}
childs.push(last);
}
if *self.start_rule == *rule {
if *self.start_rule == *rule && next.is_none() && stack.is_empty() {
return Ok(ParseTree {
rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),

View file

@ -1,11 +1,16 @@
pub mod cfg;
pub mod double_enum;
pub mod scanner;
pub mod ast_gen;
pub mod prelude {
pub use crate::cfg::*;
pub use crate::cfg::ll_grammar::*;
pub use crate::cfg::lr0_grammar::*;
pub use crate::cfg::lr1_grammar::*;
pub use crate::cfg::lr_parser::*;
pub use crate::ast_gen::*;
pub use crate::ast_gen;
pub use crate::cfg_grammar;
pub use crate::double_enum;
pub use crate::scanner::*;

View file

@ -33,7 +33,7 @@ double_enum!(
token_scanner!(
Tokens,
r"^\s|\t|\n|\r" : |_,_| {
r"^(\s|\t|\n|\r)" : |_,_| {
Some(WhiteSpace)
}
r"^;" : |_,_| {
@ -185,6 +185,7 @@ fn grammer() -> Grammar<NoneTerminals, BareTokens> {
]
}
fn main() {
let code = String::from(
"a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}",