This commit is contained in:
jusax23 2024-11-12 17:33:48 +01:00
parent 3c741567df
commit 4e5905b6fd
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
7 changed files with 443 additions and 6 deletions

View file

@ -16,3 +16,6 @@ name = "book"
[[bin]] [[bin]]
name = "g10" name = "g10"
[[bin]]
name = "ast"

45
src/ast_gen.rs Normal file
View file

@ -0,0 +1,45 @@
#[derive(Debug, Clone)]
pub enum AstGenData<A, B> {
Ast(A),
Ter(B),
}
#[macro_export]
macro_rules! ast_gen {
(
types: $ast:ident,
$tokens:ident,
$none_terminal:ident;
start: $start:expr;
$(
($left:pat => $matcher:pat) => $code:stmt
),* $(,)?
) => {
impl $ast {
pub fn from_tree(tree: &ParseTree<$none_terminal, $tokens>) -> Result<Self, String> {
use crate::ast_gen::AstGenData;
use AstGenData::*;
use $none_terminal::*;
use $tokens::*;
use $ast::*;
let rule = tree.rule.as_ref().map(|(r,_)| r.clone()).unwrap_or($start);
let mut args = Vec::new();
for node in tree.childs.iter() {
args.push(match node {
NodeChild::Data(d) => AstGenData::<$ast, $tokens>::Ter(d.clone()),
NodeChild::Child(child) => match Self::from_tree(child) {
Ok(data) => AstGenData::<$ast, $tokens>::Ast(data),
Err(err) => return Err(err),
},
});
}
match (rule, args.as_slice()) {
$(($left, $matcher) => {$code},)*
(rule, _) => Err(format!("no rule for: {:?} -> {:?}", rule, args)),
}
}
}
};
}

383
src/bin/ast.rs Normal file
View file

@ -0,0 +1,383 @@
use rcompiler::prelude::*;
use regex::Match;
use std::collections::HashMap;
double_enum!(
BareTokens, Tokens {
WhiteSpace,
Semicolon,
Colon,
Comma,
Add,
Sub,
Mul,
Div,
Eq,
Neq,
Assign,
While,
For,
If,
Else,
Return,
LBrace,
RBrace,
LSBrace,
RSBrace,
LQBrace,
RQBrace,
Dot,
TVoid,
TFloat,
TBool,
TChar,
TInt,
TStr,
TArr,
TFun,
Ident(String),
Int(i64),
Str(String),
Float(f64),
}
);
token_scanner!(
Tokens,
r"^(\s|\t|\n|\r)" : |_,_| {
Some(WhiteSpace)
}
r"^;" : |_,_| {
Some(Semicolon)
}
r"^\:" : |_,_| {
Some(Colon)
}
r"^," : |_,_| {
Some(Comma)
}
r"^\+" : |_,_| {
Some(Add)
}
r"^-" : |_,_| {
Some(Sub)
}
r"^\*" : |_,_| {
Some(Mul)
}
r"^/" : |_,_| {
Some(Div)
}
r"^==" : |_,_| {
Some(Eq)
}
r"^!=" : |_,_| {
Some(Neq)
}
r"^=" : |_,_| {
Some(Assign)
}
r"^while" : |_,_| {
Some(While)
}
r"^if" : |_,_| {
Some(If)
}
r"^for" : |_,_| {
Some(For)
}
r"^else" : |_,_| {
Some(Else)
}
r"^return" : |_,_| {
Some(Return)
}
r"^void" : |_,_|{
Some(TVoid)
}
r"^bool" : |_,_|{
Some(TBool)
}
r"^char" : |_,_|{
Some(TChar)
}
r"^int" : |_,_|{
Some(TInt)
}
r"^float" : |_,_|{
Some(TFloat)
}
r"^str" : |_,_|{
Some(TStr)
}
r"^arr" : |_,_|{
Some(TArr)
}
r"^fun" : |_,_|{
Some(TFun)
}
r"^\(" : |_,_| {
Some(LBrace)
}
r"^\)" : |_,_| {
Some(RBrace)
}
r"^\[" : |_,_| {
Some(LSBrace)
}
r"^\]" : |_,_| {
Some(RSBrace)
}
r"^\{" : |_,_| {
Some(LQBrace)
}
r"^\}" : |_,_| {
Some(RQBrace)
}
r"^[a-zA-Z][a-zA-Z0-9_]*" : |_, m: Match<'_>| {
Some(Ident(String::from(m.as_str())))
}
r"^\." : |_, _| {
Some(Dot)
}
r"^-?[0-9]+\.[0-9]*" : |_, m: Match<'_>| {
m.as_str().parse::<_>().ok().map(Float)
}
r#"^"(([^"\\]|(\\[a-z\\"]))*)""# : |capture: regex::Captures<'_>, _| {
capture.get(1).map(|m| Str(m.as_str().to_string()))
}
r"^-?[0-9]+" : |_, m: Match<'_>| {
m.as_str().parse::<_>().ok().map(Int)
}
);
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
enum NoneTerminals {
P, // Program, ; separated
Pi,
D, // declaration
E, // Expression
Ty, // Type
Ta, // all types
A, // Args
S, // Statement
Si, // Statement extension
T, // Term
F, // Factor
L, // Literal
}
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
fn from(value: NoneTerminals) -> Self {
Sentential::NoneTerminal(value)
}
}
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
fn from(value: BareTokens) -> Self {
Sentential::Terminal(value)
}
}
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
use BareTokens::*;
use NoneTerminals::*;
cfg_grammar![
start: P;
P -> Pi;
Pi -> D, Pi;
Pi -> ;
D -> Ident, Colon, Ty, Assign, E, Semicolon;
D -> Ident, Colon, TFun, Ta, LBrace, A, RBrace, Assign, S;
S -> D;
S -> E;
S -> If, LBrace, E, RBrace, S, Else, S;
S -> LQBrace, Si, RQBrace;
Si -> S, Si;
Si -> ;
S -> For, LBrace, E, Comma, E, Comma, E, RBrace, S;
S -> Return, E, Semicolon;
E -> T;
T -> F;
F -> L;
T -> T, Add, F;
T -> T, Sub, F;
F -> F, Mul, L;
F -> F, Div, L;
L -> Ident;
L -> Int;
L -> Float;
L -> Str;
L -> L, LSBrace, E, RSBrace;
L -> LBrace, E, RBrace;
Ty -> TVoid;
Ty -> TBool;
Ty -> TChar;
Ty -> TInt;
Ty -> TFloat;
Ty -> TStr;
Ta -> Ty;
Ty -> TArr, LSBrace, Int, RSBrace, Ta;
Ta -> TFun, Ta, LBrace, A, RBrace;
A -> Ident, Colon, Ta, Comma, A;
A -> Ident, Colon, Ta;
]
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum VType {
Void,
Boolean,
Char,
Integer,
Float,
String,
Array(Box<(usize, VType)>),
Function(Box<VType>, Vec<(String, VType)>),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct VDecl {
name: String,
ttype: VType,
value: Option<VExpr>,
code: Option<VStmt>,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum VStmt {
Decl(Box<VDecl>),
Expr(Box<VExpr>),
IfElse(Box<VExpr>, Box<VStmt>, Box<VStmt>),
For(Box<VExpr>, Box<VExpr>, Box<VExpr>, Box<VStmt>),
Block(Vec<VStmt>),
Return(Box<VExpr>),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum VExpr {
Add(Box<VExpr>, Box<VExpr>),
Sub(Box<VExpr>, Box<VExpr>),
Mul(Box<VExpr>, Box<VExpr>),
Div(Box<VExpr>, Box<VExpr>),
Arr(Box<VExpr>, Box<VExpr>),
Ident(String),
Int(i64),
Float(f64),
String(String),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum AST {
Prog(Vec<VDecl>),
Decl(VDecl),
Stmt(VStmt),
Expr(VExpr),
Type(VType),
Args(Vec<(String, VType)>),
}
ast_gen! {
types: AST, Tokens, NoneTerminals;
start: NoneTerminals::P;
(P => [Ast(Prog(v))]) => Ok(AST::Prog(v.to_vec())),
(Pi => [Ast(Decl(d)), Ast(Prog(v))]) => {
let mut prog = vec![d.clone()];
prog.extend(v.clone());
return Ok(Prog(prog));
},
(Pi => []) => Ok(Prog(Vec::new())),
(D => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Assign), Ast(Expr(expr)), Ter(Semicolon)]) =>
Ok(Decl(VDecl { name: name.clone(), ttype: ty.clone(), value: Some(expr.clone()), code: None })),
(D => [Ter(Ident(name)), Ter(Colon), Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace), Ter(Assign), Ast(Stmt(stmt))]) =>
Ok(Decl(VDecl { name: name.clone(), ttype: VType::Function(Box::new(ty.clone()), args.clone()), value: None, code: Some(stmt.clone()) })),
(S => [Ast(Decl(d))]) =>
Ok(Stmt(VStmt::Decl(Box::from(d.clone())))),
(S => [Ast(Expr(v))]) =>
Ok(Stmt(VStmt::Expr(Box::new(v.clone())))),
(S => [Ter(If), Ter(LBrace), Ast(Expr(cond)), Ter(RBrace), Ast(Stmt(iff)), Ter(Else), Ast(Stmt(eelse))]) =>
Ok(Stmt(VStmt::IfElse(Box::new(cond.clone()), Box::new(iff.clone()), Box::new(eelse.clone())))),
(S => [Ter(LQBrace), Ast(Stmt(VStmt::Block(stats))), Ter(RQBrace)]) =>
Ok(Stmt(VStmt::Block(stats.clone()))),
(Si => [Ast(Stmt(stat)), Ast(Stmt(VStmt::Block(stats)))]) => {
let mut st = vec![stat.clone()];
st.extend(stats.clone());
return Ok(Stmt(VStmt::Block(st)));
},
(Si => []) => Ok(Stmt(VStmt::Block(Vec::new()))),
(S => [Ter(For), Ter(LBrace), Ast(Expr(pre)), Ter(Comma), Ast(Expr(cond)), Ter(Comma), Ast(Expr(post)), Ter(RBrace), Ast(Stmt(body))]) =>
Ok(Stmt(VStmt::For(Box::new(pre.clone()), Box::new(cond.clone()), Box::new(post.clone()), Box::new(body.clone())))),
(S => [Ter(Return), Ast(Expr(expr)), Ter(Semicolon)]) => Ok(Stmt(VStmt::Return(Box::new(expr.clone())))),
(E | T | F => [Ast(Expr(v))]) => Ok(Expr(v.clone())),
(T => [Ast(Expr(left)), Ter(Add), Ast(Expr(right))]) =>
Ok(Expr(VExpr::Add(Box::new(left.clone()), Box::new(right.clone())))),
(T => [Ast(Expr(left)), Ter(Sub), Ast(Expr(right))]) =>
Ok(Expr(VExpr::Sub(Box::new(left.clone()), Box::new(right.clone())))),
(F => [Ast(Expr(left)), Ter(Mul), Ast(Expr(right))]) =>
Ok(Expr(VExpr::Mul(Box::new(left.clone()), Box::new(right.clone())))),
(F => [Ast(Expr(left)), Ter(Div), Ast(Expr(right))]) =>
Ok(Expr(VExpr::Div(Box::new(left.clone()), Box::new(right.clone())))),
(L => [Ter(Ident(n))]) => Ok(Expr(VExpr::Ident(n.clone()))),
(L => [Ter(Int(i))]) => Ok(Expr(VExpr::Int(*i))),
(L => [Ter(Float(f))]) => Ok(Expr(VExpr::Float(*f))),
(L => [Ter(Str(s))]) => Ok(Expr(VExpr::String(s.clone()))),
(L => [Ast(Expr(arr)), Ter(LQBrace), Ast(Expr(expr)), Ter(RQBrace)]) =>
Ok(Expr(VExpr::Div(Box::new(arr.clone()), Box::new(expr.clone())))),
(L => [Ter(LBrace), Ast(Expr(expr)), Ter(RBrace)]) => Ok(Expr(expr.clone())),
(Ty => [Ter(TVoid)]) => Ok(Type(VType::Void)),
(Ty => [Ter(TBool)]) => Ok(Type(VType::Boolean)),
(Ty => [Ter(TChar)]) => Ok(Type(VType::Boolean)),
(Ty => [Ter(TInt)]) => Ok(Type(VType::Integer)),
(Ty => [Ter(TFloat)]) => Ok(Type(VType::Float)),
(Ty => [Ter(TStr)]) => Ok(Type(VType::String)),
(Ta => [Ast(Type(t))]) => Ok(Type(t.clone())),
(Ty => [Ter(TArr), Ter(LSBrace), Ter(Int(i)), Ter(RSBrace), Ast(Type(t))]) => {
if *i < 0 {
return Err(format!("Invalid array size: {i}"));
}
return Ok(Type(VType::Array(Box::new((*i as usize, t.clone())))));
},
(Ta => [Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace)]) =>
Ok(Type(VType::Function(Box::new(ty.clone()), args.clone()))),
(A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Comma), Ast(Args(args))]) => {
let mut prog = vec![(name.clone(), ty.clone())];
prog.extend(args.clone());
return Ok(Args(prog));
},
(A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty))]) => Ok(Args(vec![(name.clone(), ty.clone())])),
}
fn main() {
let code = String::from(
r#"
a: int = 5;
b: fun int (a:int) = { b: int = a; return a; }
c: arr[5] arr[5] int = 5;
"#,
);
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
let mut grammar = grammer();
grammar.gen_lr1_automaton();
let conflict = grammar.gen_lr1_parse_table();
println!("conflict: {conflict}");
let tree = grammar.lr1_parser(&mut m.iter_mut()).parse();
let Ok(tree) = tree else {
println!("err: {:?}", tree.unwrap_err());
return;
};
let a = AST::from_tree(&tree);
println!("a: {:?}", a);
}

View file

@ -102,7 +102,7 @@ where
} }
childs.push(last); childs.push(last);
} }
if *self.start_rule == *rule { if *self.start_rule == *rule && next.is_none() && stack.is_empty() {
return Ok(ParseTree { return Ok(ParseTree {
rule: Some((rule.clone(), *ind)), rule: Some((rule.clone(), *ind)),
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(), childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),

View file

@ -1,11 +1,16 @@
pub mod cfg; pub mod cfg;
pub mod double_enum; pub mod double_enum;
pub mod scanner; pub mod scanner;
pub mod ast_gen;
pub mod prelude { pub mod prelude {
pub use crate::cfg::*; pub use crate::cfg::*;
pub use crate::cfg::ll_grammar::*; pub use crate::cfg::ll_grammar::*;
pub use crate::cfg::lr0_grammar::*; pub use crate::cfg::lr0_grammar::*;
pub use crate::cfg::lr1_grammar::*;
pub use crate::cfg::lr_parser::*;
pub use crate::ast_gen::*;
pub use crate::ast_gen;
pub use crate::cfg_grammar; pub use crate::cfg_grammar;
pub use crate::double_enum; pub use crate::double_enum;
pub use crate::scanner::*; pub use crate::scanner::*;

View file

@ -33,7 +33,7 @@ double_enum!(
token_scanner!( token_scanner!(
Tokens, Tokens,
r"^\s|\t|\n|\r" : |_,_| { r"^(\s|\t|\n|\r)" : |_,_| {
Some(WhiteSpace) Some(WhiteSpace)
} }
r"^;" : |_,_| { r"^;" : |_,_| {
@ -185,6 +185,7 @@ fn grammer() -> Grammar<NoneTerminals, BareTokens> {
] ]
} }
fn main() { fn main() {
let code = String::from( let code = String::from(
"a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}", "a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}",

View file

@ -61,10 +61,10 @@ impl<'a, T: MatchNext<T> + PartialEq + std::fmt::Debug> Iterator for ScannerIter
type Item = Result<T, String>; type Item = Result<T, String>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop {
if self.0.code.is_empty() { if self.0.code.is_empty() {
return None; return None;
} }
loop {
if let Some((token, len)) = T::match_next(&self.0.code) { if let Some((token, len)) = T::match_next(&self.0.code) {
self.0.code = self.0.code.split_off(len); self.0.code = self.0.code.split_off(len);
if self.0.skip.contains(&token) { if self.0.skip.contains(&token) {