ast_gen
This commit is contained in:
parent
3c741567df
commit
4e5905b6fd
7 changed files with 443 additions and 6 deletions
|
@ -16,3 +16,6 @@ name = "book"
|
|||
|
||||
[[bin]]
|
||||
name = "g10"
|
||||
|
||||
[[bin]]
|
||||
name = "ast"
|
||||
|
|
45
src/ast_gen.rs
Normal file
45
src/ast_gen.rs
Normal file
|
@ -0,0 +1,45 @@
|
|||
#[derive(Debug, Clone)]
|
||||
pub enum AstGenData<A, B> {
|
||||
Ast(A),
|
||||
Ter(B),
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! ast_gen {
|
||||
(
|
||||
types: $ast:ident,
|
||||
$tokens:ident,
|
||||
$none_terminal:ident;
|
||||
start: $start:expr;
|
||||
$(
|
||||
($left:pat => $matcher:pat) => $code:stmt
|
||||
),* $(,)?
|
||||
) => {
|
||||
impl $ast {
|
||||
pub fn from_tree(tree: &ParseTree<$none_terminal, $tokens>) -> Result<Self, String> {
|
||||
use crate::ast_gen::AstGenData;
|
||||
use AstGenData::*;
|
||||
use $none_terminal::*;
|
||||
use $tokens::*;
|
||||
use $ast::*;
|
||||
let rule = tree.rule.as_ref().map(|(r,_)| r.clone()).unwrap_or($start);
|
||||
|
||||
let mut args = Vec::new();
|
||||
for node in tree.childs.iter() {
|
||||
args.push(match node {
|
||||
NodeChild::Data(d) => AstGenData::<$ast, $tokens>::Ter(d.clone()),
|
||||
NodeChild::Child(child) => match Self::from_tree(child) {
|
||||
Ok(data) => AstGenData::<$ast, $tokens>::Ast(data),
|
||||
Err(err) => return Err(err),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
match (rule, args.as_slice()) {
|
||||
$(($left, $matcher) => {$code},)*
|
||||
(rule, _) => Err(format!("no rule for: {:?} -> {:?}", rule, args)),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
383
src/bin/ast.rs
Normal file
383
src/bin/ast.rs
Normal file
|
@ -0,0 +1,383 @@
|
|||
use rcompiler::prelude::*;
|
||||
use regex::Match;
|
||||
use std::collections::HashMap;
|
||||
|
||||
double_enum!(
|
||||
BareTokens, Tokens {
|
||||
WhiteSpace,
|
||||
Semicolon,
|
||||
Colon,
|
||||
Comma,
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Eq,
|
||||
Neq,
|
||||
Assign,
|
||||
While,
|
||||
For,
|
||||
If,
|
||||
Else,
|
||||
Return,
|
||||
LBrace,
|
||||
RBrace,
|
||||
LSBrace,
|
||||
RSBrace,
|
||||
LQBrace,
|
||||
RQBrace,
|
||||
Dot,
|
||||
TVoid,
|
||||
TFloat,
|
||||
TBool,
|
||||
TChar,
|
||||
TInt,
|
||||
TStr,
|
||||
TArr,
|
||||
TFun,
|
||||
Ident(String),
|
||||
Int(i64),
|
||||
Str(String),
|
||||
Float(f64),
|
||||
}
|
||||
);
|
||||
|
||||
token_scanner!(
|
||||
Tokens,
|
||||
r"^(\s|\t|\n|\r)" : |_,_| {
|
||||
Some(WhiteSpace)
|
||||
}
|
||||
r"^;" : |_,_| {
|
||||
Some(Semicolon)
|
||||
}
|
||||
r"^\:" : |_,_| {
|
||||
Some(Colon)
|
||||
}
|
||||
r"^," : |_,_| {
|
||||
Some(Comma)
|
||||
}
|
||||
r"^\+" : |_,_| {
|
||||
Some(Add)
|
||||
}
|
||||
r"^-" : |_,_| {
|
||||
Some(Sub)
|
||||
}
|
||||
r"^\*" : |_,_| {
|
||||
Some(Mul)
|
||||
}
|
||||
r"^/" : |_,_| {
|
||||
Some(Div)
|
||||
}
|
||||
r"^==" : |_,_| {
|
||||
Some(Eq)
|
||||
}
|
||||
r"^!=" : |_,_| {
|
||||
Some(Neq)
|
||||
}
|
||||
r"^=" : |_,_| {
|
||||
Some(Assign)
|
||||
}
|
||||
r"^while" : |_,_| {
|
||||
Some(While)
|
||||
}
|
||||
r"^if" : |_,_| {
|
||||
Some(If)
|
||||
}
|
||||
r"^for" : |_,_| {
|
||||
Some(For)
|
||||
}
|
||||
r"^else" : |_,_| {
|
||||
Some(Else)
|
||||
}
|
||||
r"^return" : |_,_| {
|
||||
Some(Return)
|
||||
}
|
||||
r"^void" : |_,_|{
|
||||
Some(TVoid)
|
||||
}
|
||||
r"^bool" : |_,_|{
|
||||
Some(TBool)
|
||||
}
|
||||
r"^char" : |_,_|{
|
||||
Some(TChar)
|
||||
}
|
||||
r"^int" : |_,_|{
|
||||
Some(TInt)
|
||||
}
|
||||
r"^float" : |_,_|{
|
||||
Some(TFloat)
|
||||
}
|
||||
r"^str" : |_,_|{
|
||||
Some(TStr)
|
||||
}
|
||||
r"^arr" : |_,_|{
|
||||
Some(TArr)
|
||||
}
|
||||
r"^fun" : |_,_|{
|
||||
Some(TFun)
|
||||
}
|
||||
r"^\(" : |_,_| {
|
||||
Some(LBrace)
|
||||
}
|
||||
r"^\)" : |_,_| {
|
||||
Some(RBrace)
|
||||
}
|
||||
r"^\[" : |_,_| {
|
||||
Some(LSBrace)
|
||||
}
|
||||
r"^\]" : |_,_| {
|
||||
Some(RSBrace)
|
||||
}
|
||||
r"^\{" : |_,_| {
|
||||
Some(LQBrace)
|
||||
}
|
||||
r"^\}" : |_,_| {
|
||||
Some(RQBrace)
|
||||
}
|
||||
r"^[a-zA-Z][a-zA-Z0-9_]*" : |_, m: Match<'_>| {
|
||||
Some(Ident(String::from(m.as_str())))
|
||||
}
|
||||
r"^\." : |_, _| {
|
||||
Some(Dot)
|
||||
}
|
||||
r"^-?[0-9]+\.[0-9]*" : |_, m: Match<'_>| {
|
||||
m.as_str().parse::<_>().ok().map(Float)
|
||||
}
|
||||
r#"^"(([^"\\]|(\\[a-z\\"]))*)""# : |capture: regex::Captures<'_>, _| {
|
||||
capture.get(1).map(|m| Str(m.as_str().to_string()))
|
||||
}
|
||||
r"^-?[0-9]+" : |_, m: Match<'_>| {
|
||||
m.as_str().parse::<_>().ok().map(Int)
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
|
||||
enum NoneTerminals {
|
||||
P, // Program, ; separated
|
||||
Pi,
|
||||
D, // declaration
|
||||
E, // Expression
|
||||
Ty, // Type
|
||||
Ta, // all types
|
||||
A, // Args
|
||||
S, // Statement
|
||||
Si, // Statement extension
|
||||
T, // Term
|
||||
F, // Factor
|
||||
L, // Literal
|
||||
}
|
||||
|
||||
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
|
||||
fn from(value: NoneTerminals) -> Self {
|
||||
Sentential::NoneTerminal(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
|
||||
fn from(value: BareTokens) -> Self {
|
||||
Sentential::Terminal(value)
|
||||
}
|
||||
}
|
||||
|
||||
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
|
||||
use BareTokens::*;
|
||||
use NoneTerminals::*;
|
||||
cfg_grammar![
|
||||
start: P;
|
||||
P -> Pi;
|
||||
Pi -> D, Pi;
|
||||
Pi -> ;
|
||||
D -> Ident, Colon, Ty, Assign, E, Semicolon;
|
||||
D -> Ident, Colon, TFun, Ta, LBrace, A, RBrace, Assign, S;
|
||||
|
||||
S -> D;
|
||||
S -> E;
|
||||
S -> If, LBrace, E, RBrace, S, Else, S;
|
||||
S -> LQBrace, Si, RQBrace;
|
||||
Si -> S, Si;
|
||||
Si -> ;
|
||||
S -> For, LBrace, E, Comma, E, Comma, E, RBrace, S;
|
||||
S -> Return, E, Semicolon;
|
||||
|
||||
E -> T;
|
||||
T -> F;
|
||||
F -> L;
|
||||
|
||||
T -> T, Add, F;
|
||||
T -> T, Sub, F;
|
||||
F -> F, Mul, L;
|
||||
F -> F, Div, L;
|
||||
|
||||
L -> Ident;
|
||||
L -> Int;
|
||||
L -> Float;
|
||||
L -> Str;
|
||||
L -> L, LSBrace, E, RSBrace;
|
||||
L -> LBrace, E, RBrace;
|
||||
|
||||
Ty -> TVoid;
|
||||
Ty -> TBool;
|
||||
Ty -> TChar;
|
||||
Ty -> TInt;
|
||||
Ty -> TFloat;
|
||||
Ty -> TStr;
|
||||
Ta -> Ty;
|
||||
Ty -> TArr, LSBrace, Int, RSBrace, Ta;
|
||||
Ta -> TFun, Ta, LBrace, A, RBrace;
|
||||
|
||||
A -> Ident, Colon, Ta, Comma, A;
|
||||
A -> Ident, Colon, Ta;
|
||||
]
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
enum VType {
|
||||
Void,
|
||||
Boolean,
|
||||
Char,
|
||||
Integer,
|
||||
Float,
|
||||
String,
|
||||
Array(Box<(usize, VType)>),
|
||||
Function(Box<VType>, Vec<(String, VType)>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
struct VDecl {
|
||||
name: String,
|
||||
ttype: VType,
|
||||
value: Option<VExpr>,
|
||||
code: Option<VStmt>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
enum VStmt {
|
||||
Decl(Box<VDecl>),
|
||||
Expr(Box<VExpr>),
|
||||
IfElse(Box<VExpr>, Box<VStmt>, Box<VStmt>),
|
||||
For(Box<VExpr>, Box<VExpr>, Box<VExpr>, Box<VStmt>),
|
||||
Block(Vec<VStmt>),
|
||||
Return(Box<VExpr>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
enum VExpr {
|
||||
Add(Box<VExpr>, Box<VExpr>),
|
||||
Sub(Box<VExpr>, Box<VExpr>),
|
||||
Mul(Box<VExpr>, Box<VExpr>),
|
||||
Div(Box<VExpr>, Box<VExpr>),
|
||||
Arr(Box<VExpr>, Box<VExpr>),
|
||||
Ident(String),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
enum AST {
|
||||
Prog(Vec<VDecl>),
|
||||
Decl(VDecl),
|
||||
Stmt(VStmt),
|
||||
Expr(VExpr),
|
||||
Type(VType),
|
||||
Args(Vec<(String, VType)>),
|
||||
}
|
||||
|
||||
ast_gen! {
|
||||
types: AST, Tokens, NoneTerminals;
|
||||
start: NoneTerminals::P;
|
||||
(P => [Ast(Prog(v))]) => Ok(AST::Prog(v.to_vec())),
|
||||
(Pi => [Ast(Decl(d)), Ast(Prog(v))]) => {
|
||||
let mut prog = vec![d.clone()];
|
||||
prog.extend(v.clone());
|
||||
return Ok(Prog(prog));
|
||||
},
|
||||
(Pi => []) => Ok(Prog(Vec::new())),
|
||||
(D => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Assign), Ast(Expr(expr)), Ter(Semicolon)]) =>
|
||||
Ok(Decl(VDecl { name: name.clone(), ttype: ty.clone(), value: Some(expr.clone()), code: None })),
|
||||
(D => [Ter(Ident(name)), Ter(Colon), Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace), Ter(Assign), Ast(Stmt(stmt))]) =>
|
||||
Ok(Decl(VDecl { name: name.clone(), ttype: VType::Function(Box::new(ty.clone()), args.clone()), value: None, code: Some(stmt.clone()) })),
|
||||
(S => [Ast(Decl(d))]) =>
|
||||
Ok(Stmt(VStmt::Decl(Box::from(d.clone())))),
|
||||
(S => [Ast(Expr(v))]) =>
|
||||
Ok(Stmt(VStmt::Expr(Box::new(v.clone())))),
|
||||
(S => [Ter(If), Ter(LBrace), Ast(Expr(cond)), Ter(RBrace), Ast(Stmt(iff)), Ter(Else), Ast(Stmt(eelse))]) =>
|
||||
Ok(Stmt(VStmt::IfElse(Box::new(cond.clone()), Box::new(iff.clone()), Box::new(eelse.clone())))),
|
||||
(S => [Ter(LQBrace), Ast(Stmt(VStmt::Block(stats))), Ter(RQBrace)]) =>
|
||||
Ok(Stmt(VStmt::Block(stats.clone()))),
|
||||
(Si => [Ast(Stmt(stat)), Ast(Stmt(VStmt::Block(stats)))]) => {
|
||||
let mut st = vec![stat.clone()];
|
||||
st.extend(stats.clone());
|
||||
return Ok(Stmt(VStmt::Block(st)));
|
||||
},
|
||||
(Si => []) => Ok(Stmt(VStmt::Block(Vec::new()))),
|
||||
(S => [Ter(For), Ter(LBrace), Ast(Expr(pre)), Ter(Comma), Ast(Expr(cond)), Ter(Comma), Ast(Expr(post)), Ter(RBrace), Ast(Stmt(body))]) =>
|
||||
Ok(Stmt(VStmt::For(Box::new(pre.clone()), Box::new(cond.clone()), Box::new(post.clone()), Box::new(body.clone())))),
|
||||
(S => [Ter(Return), Ast(Expr(expr)), Ter(Semicolon)]) => Ok(Stmt(VStmt::Return(Box::new(expr.clone())))),
|
||||
(E | T | F => [Ast(Expr(v))]) => Ok(Expr(v.clone())),
|
||||
(T => [Ast(Expr(left)), Ter(Add), Ast(Expr(right))]) =>
|
||||
Ok(Expr(VExpr::Add(Box::new(left.clone()), Box::new(right.clone())))),
|
||||
(T => [Ast(Expr(left)), Ter(Sub), Ast(Expr(right))]) =>
|
||||
Ok(Expr(VExpr::Sub(Box::new(left.clone()), Box::new(right.clone())))),
|
||||
(F => [Ast(Expr(left)), Ter(Mul), Ast(Expr(right))]) =>
|
||||
Ok(Expr(VExpr::Mul(Box::new(left.clone()), Box::new(right.clone())))),
|
||||
(F => [Ast(Expr(left)), Ter(Div), Ast(Expr(right))]) =>
|
||||
Ok(Expr(VExpr::Div(Box::new(left.clone()), Box::new(right.clone())))),
|
||||
(L => [Ter(Ident(n))]) => Ok(Expr(VExpr::Ident(n.clone()))),
|
||||
(L => [Ter(Int(i))]) => Ok(Expr(VExpr::Int(*i))),
|
||||
(L => [Ter(Float(f))]) => Ok(Expr(VExpr::Float(*f))),
|
||||
(L => [Ter(Str(s))]) => Ok(Expr(VExpr::String(s.clone()))),
|
||||
(L => [Ast(Expr(arr)), Ter(LQBrace), Ast(Expr(expr)), Ter(RQBrace)]) =>
|
||||
Ok(Expr(VExpr::Div(Box::new(arr.clone()), Box::new(expr.clone())))),
|
||||
(L => [Ter(LBrace), Ast(Expr(expr)), Ter(RBrace)]) => Ok(Expr(expr.clone())),
|
||||
(Ty => [Ter(TVoid)]) => Ok(Type(VType::Void)),
|
||||
(Ty => [Ter(TBool)]) => Ok(Type(VType::Boolean)),
|
||||
(Ty => [Ter(TChar)]) => Ok(Type(VType::Boolean)),
|
||||
(Ty => [Ter(TInt)]) => Ok(Type(VType::Integer)),
|
||||
(Ty => [Ter(TFloat)]) => Ok(Type(VType::Float)),
|
||||
(Ty => [Ter(TStr)]) => Ok(Type(VType::String)),
|
||||
(Ta => [Ast(Type(t))]) => Ok(Type(t.clone())),
|
||||
(Ty => [Ter(TArr), Ter(LSBrace), Ter(Int(i)), Ter(RSBrace), Ast(Type(t))]) => {
|
||||
if *i < 0 {
|
||||
return Err(format!("Invalid array size: {i}"));
|
||||
}
|
||||
return Ok(Type(VType::Array(Box::new((*i as usize, t.clone())))));
|
||||
},
|
||||
(Ta => [Ter(TFun), Ast(Type(ty)), Ter(LBrace), Ast(Args(args)), Ter(RBrace)]) =>
|
||||
Ok(Type(VType::Function(Box::new(ty.clone()), args.clone()))),
|
||||
(A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty)), Ter(Comma), Ast(Args(args))]) => {
|
||||
let mut prog = vec![(name.clone(), ty.clone())];
|
||||
prog.extend(args.clone());
|
||||
return Ok(Args(prog));
|
||||
},
|
||||
(A => [Ter(Ident(name)), Ter(Colon), Ast(Type(ty))]) => Ok(Args(vec![(name.clone(), ty.clone())])),
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let code = String::from(
|
||||
r#"
|
||||
a: int = 5;
|
||||
b: fun int (a:int) = { b: int = a; return a; }
|
||||
c: arr[5] arr[5] int = 5;
|
||||
"#,
|
||||
);
|
||||
|
||||
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||
|
||||
let mut grammar = grammer();
|
||||
grammar.gen_lr1_automaton();
|
||||
let conflict = grammar.gen_lr1_parse_table();
|
||||
println!("conflict: {conflict}");
|
||||
let tree = grammar.lr1_parser(&mut m.iter_mut()).parse();
|
||||
let Ok(tree) = tree else {
|
||||
println!("err: {:?}", tree.unwrap_err());
|
||||
return;
|
||||
};
|
||||
let a = AST::from_tree(&tree);
|
||||
println!("a: {:?}", a);
|
||||
}
|
|
@ -102,7 +102,7 @@ where
|
|||
}
|
||||
childs.push(last);
|
||||
}
|
||||
if *self.start_rule == *rule {
|
||||
if *self.start_rule == *rule && next.is_none() && stack.is_empty() {
|
||||
return Ok(ParseTree {
|
||||
rule: Some((rule.clone(), *ind)),
|
||||
childs: childs.into_iter().rev().map(|(a, _)| a.clone()).collect(),
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
pub mod cfg;
|
||||
pub mod double_enum;
|
||||
pub mod scanner;
|
||||
pub mod ast_gen;
|
||||
|
||||
pub mod prelude {
|
||||
pub use crate::cfg::*;
|
||||
pub use crate::cfg::ll_grammar::*;
|
||||
pub use crate::cfg::lr0_grammar::*;
|
||||
pub use crate::cfg::lr1_grammar::*;
|
||||
pub use crate::cfg::lr_parser::*;
|
||||
pub use crate::ast_gen::*;
|
||||
pub use crate::ast_gen;
|
||||
pub use crate::cfg_grammar;
|
||||
pub use crate::double_enum;
|
||||
pub use crate::scanner::*;
|
||||
|
|
|
@ -33,7 +33,7 @@ double_enum!(
|
|||
|
||||
token_scanner!(
|
||||
Tokens,
|
||||
r"^\s|\t|\n|\r" : |_,_| {
|
||||
r"^(\s|\t|\n|\r)" : |_,_| {
|
||||
Some(WhiteSpace)
|
||||
}
|
||||
r"^;" : |_,_| {
|
||||
|
@ -185,6 +185,7 @@ fn grammer() -> Grammar<NoneTerminals, BareTokens> {
|
|||
]
|
||||
}
|
||||
|
||||
|
||||
fn main() {
|
||||
let code = String::from(
|
||||
"a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}",
|
||||
|
|
|
@ -61,10 +61,10 @@ impl<'a, T: MatchNext<T> + PartialEq + std::fmt::Debug> Iterator for ScannerIter
|
|||
type Item = Result<T, String>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.0.code.is_empty() {
|
||||
return None;
|
||||
}
|
||||
loop {
|
||||
if self.0.code.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if let Some((token, len)) = T::match_next(&self.0.code) {
|
||||
self.0.code = self.0.code.split_off(len);
|
||||
if self.0.skip.contains(&token) {
|
||||
|
|
Loading…
Reference in a new issue