From 31cf3b0e0859db038b5744f8344199f178f8525c Mon Sep 17 00:00:00 2001 From: jusax23 Date: Thu, 14 Nov 2024 11:55:23 +0100 Subject: [PATCH] json --- Cargo.toml | 3 ++ src/bin/ast.rs | 2 +- src/bin/g10.rs | 10 +++- src/bin/json.rs | 141 ++++++++++++++++++++++++++++++++++++++++++++++++ src/cfg/mod.rs | 1 + src/lib.rs | 8 +-- src/main.rs | 3 +- 7 files changed, 159 insertions(+), 9 deletions(-) create mode 100644 src/bin/json.rs diff --git a/Cargo.toml b/Cargo.toml index 0ffb61d..ee353f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,3 +19,6 @@ name = "g10" [[bin]] name = "ast" + +[[bin]] +name = "json" diff --git a/src/bin/ast.rs b/src/bin/ast.rs index 14d69bf..58576e0 100644 --- a/src/bin/ast.rs +++ b/src/bin/ast.rs @@ -361,7 +361,7 @@ ast_gen! { fn main() { let code = String::from( r#" - a: int = 5; + a: int = 1 + 2 * 3 + 4; b: fun int (a:int) = { b: int = a; return a; } c: arr[5] arr[5] int = 5; "#, diff --git a/src/bin/g10.rs b/src/bin/g10.rs index 8ad117f..bc533b9 100644 --- a/src/bin/g10.rs +++ b/src/bin/g10.rs @@ -79,8 +79,14 @@ fn main() { println!("automaton: {:?}", grammar.lr1_automaton); println!("conflict: {}", grammar.gen_slr_parse_table()); println!("conflict: {}", grammar.gen_lr1_parse_table()); - println!("parse_table: {:?}", grammar.slr_parse_table.as_ref().unwrap().0.len()); - println!("parse_table: {:?}", grammar.lr1_automaton.as_ref().unwrap().0.len()); + println!( + "parse_table: {:?}", + grammar.slr_parse_table.as_ref().unwrap().0.len() + ); + println!( + "parse_table: {:?}", + grammar.lr1_automaton.as_ref().unwrap().0.len() + ); println!( "parsed: {:?}", grammar.slr_parser(&mut m.iter_mut()).parse() diff --git a/src/bin/json.rs b/src/bin/json.rs new file mode 100644 index 0000000..c40a56c --- /dev/null +++ b/src/bin/json.rs @@ -0,0 +1,141 @@ +use cfg::{Grammar, Sentential}; +use rcompiler::*; +use regex::Match; +use scanner::Scanner; + +double_enum!(BareTokens, Tokens { + WhiteSpace, + TokenString(String), + TokenObjectBegin, + TokenColon, + TokenComma, + TokenNumber(f64), + TokenTrue, + TokenFalse, + TokenNull, + TokenArrayBegin, + TokenArrayEnd, + TokenObjectEnd +}); + +token_scanner!( + Tokens, + r"^(\s|\t|\n|\r)" : |_,_| { + Some(WhiteSpace) + } + r"^\{" : |_,_|{ + Some(TokenObjectBegin) + } + r"^\}" : |_,_|{ + Some(TokenObjectEnd) + } + r"^\[" : |_,_|{ + Some(TokenArrayBegin) + + } + r"^\]" : |_,_|{ + Some(TokenArrayEnd) + } + r#"^"(([^"\\\x00-\x1f]|\\(["\\/bfnrt]|(u[0-9a-f]{4})))*)""# : |capture: regex::Captures<'_>, _| { + capture.get(1).map(|m| TokenString(m.as_str().to_string())) + } + r"^-?([1-9][0-9]*|0)(\.[0-9]+)?([eE][-\+]?[0-9]+)?" : |_, m: Match<'_>| { + m.as_str().parse::<_>().ok().map(TokenNumber) + } + r"^," : |_,_|{ + Some(TokenComma) + } + r"^:" : |_,_|{ + Some(TokenColon) + } + r"^false" : |_,_|{ + Some(TokenFalse) + } + r"^true" : |_,_|{ + Some(TokenTrue) + } + r"^null" : |_,_|{ + Some(TokenNull) + } +); + +impl From for Sentential { + fn from(value: NoneTerminals) -> Self { + Sentential::NoneTerminal(value) + } +} + +impl From for Sentential { + fn from(value: BareTokens) -> Self { + Sentential::Terminal(value) + } +} + +#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] +enum NoneTerminals { + O, + Oi, + Oii, + A, + Ai, + Aii, + V, +} + +fn grammer() -> Grammar { + use BareTokens::*; + use NoneTerminals::*; + cfg_grammar![ + start: V; + V -> TokenString; + V -> TokenNumber; + V -> O; + V -> A; + V -> TokenTrue; + V -> TokenFalse; + V -> TokenNull; + A -> TokenArrayBegin, Ai, TokenArrayEnd; + Ai -> V, Aii; + Aii -> TokenComma, V, Aii; + Aii -> ; + O -> TokenObjectBegin, Oi, TokenObjectEnd; + Oi -> TokenString, TokenColon, V, Oii; + Oii -> TokenComma, TokenString, TokenColon, V, Oii; + Oii -> ; + ] +} + +fn main() { + let json = r#" + { + "hello?": "Chinese Keyboard - 中文鍵盤/中文键盘 ", + "string": "abc\"d\u231ee\\f\/g\fh\ni\rj\tk", + "number": -123.12E+22, + "array": [ + 0, + 1, + 10, + -10, + -1E-34 + ], + "object": { + "1337": "187" + }, + "true": true, + "false": false, + "null": null +} + "#; + let mut m = Scanner::::new(json.to_string()).with_skipping(Tokens::WhiteSpace); + + let mut grammar = grammer(); + let conflict_ll = grammar.gen_ll_parse_table(); + let conflict_slr = grammar.gen_slr_parse_table(); + let conflict_lr1 = grammar.gen_lr1_parse_table(); + println!( + "is grammar: \n ll: {}, slr: {}, lr1: {}", + !conflict_ll, !conflict_slr, !conflict_lr1 + ); + let tree = grammar.ll_parser(&mut m.iter_mut()).parse(); + println!("{tree:?}"); +} diff --git a/src/cfg/mod.rs b/src/cfg/mod.rs index 8d09c61..fc61d4e 100644 --- a/src/cfg/mod.rs +++ b/src/cfg/mod.rs @@ -24,6 +24,7 @@ macro_rules! cfg_grammar { );* $(;)? ) => { { + use std::collections::HashMap; let mut map = HashMap::new(); $({ if !map.contains_key(&$left) { diff --git a/src/lib.rs b/src/lib.rs index 9dc8d65..563c838 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,16 @@ +pub mod ast_gen; pub mod cfg; pub mod double_enum; pub mod scanner; -pub mod ast_gen; pub mod prelude { - pub use crate::cfg::*; + pub use crate::ast_gen; + pub use crate::ast_gen::*; pub use crate::cfg::ll_grammar::*; pub use crate::cfg::lr0_grammar::*; pub use crate::cfg::lr1_grammar::*; pub use crate::cfg::lr_parser::*; - pub use crate::ast_gen::*; - pub use crate::ast_gen; + pub use crate::cfg::*; pub use crate::cfg_grammar; pub use crate::double_enum; pub use crate::scanner::*; diff --git a/src/main.rs b/src/main.rs index 3001ed5..70f5333 100644 --- a/src/main.rs +++ b/src/main.rs @@ -185,7 +185,6 @@ fn grammer() -> Grammar { ] } - fn main() { let code = String::from( "a = 4; while a != 5 { a = (a+1) * 4; }; if a == 5 { a = \"abs123\"; } else {a = 5;}", @@ -197,7 +196,7 @@ fn main() { //println!("first: {:?}", grammar.first); //println!("follow: {:?}", grammar.follow); grammar.gen_lr1_automaton(); - println!("conflict: {:?}", grammar.lr1_automaton); + println!("conflict: {:?}", grammar.lr1_automaton); let conflict = grammar.gen_ll_parse_table(); println!("conflict: {conflict}"); println!("prase table: {:?}", grammar.ll_parse_table);