commit eb202642ade1633d983e29d4a50a13708fab103c Author: jusax23 Date: Sat Oct 26 23:03:27 2024 +0200 scanner diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..0e3e7c3 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,54 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "rcompiler" +version = "0.1.0" +dependencies = [ + "regex", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4ffb99f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "rcompiler" +version = "0.1.0" +edition = "2021" + +[dependencies] +regex = "1.11.1" diff --git a/src/double_enum.rs b/src/double_enum.rs new file mode 100644 index 0000000..559c844 --- /dev/null +++ b/src/double_enum.rs @@ -0,0 +1,34 @@ +#[macro_export] +macro_rules! double_enum { + ($bare_name:ident, $name:ident { + $( + $variant:ident$( + ( $($args:ty),+ $(,)? ) + )? + ),* $(,)? + }) => { + #[derive(Debug, Clone, PartialEq)] + pub enum $name {$( + $variant$( + ( $($args),+ ) + )? + ),*} + #[derive(Debug, Clone, PartialEq)] + pub enum $bare_name {$($variant),*} + + impl PartialEq<$name> for $bare_name { + fn eq(&self, other: &$name) -> bool { + match (self, other){ + $(($bare_name::$variant, $name::$variant{ .. }) => true,)* + _=> false + } + } + } + + impl PartialEq<$bare_name> for $name { + fn eq(&self, other: &$bare_name) -> bool { + other.eq(self) + } + } + } +} \ No newline at end of file diff --git a/src/ll_grammar.rs b/src/ll_grammar.rs new file mode 100644 index 0000000..0bbf32f --- /dev/null +++ b/src/ll_grammar.rs @@ -0,0 +1,23 @@ +#[macro_export] +macro_rules! ll_grammar { + ( + $tokens:ident, + $bare_tokens:ident, + $grammar:ident, + $non_term:ident, + [$( + $left:ident -> $( + $right:ident + ),* + );* $(;)?] + ) => {}; +} + +enum Sentential { + Terminal(T), + NoneTerminal(N), +} + +pub struct Grammar { + rules: Vec<(T, Vec>)>, +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e12df40 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,102 @@ +mod ll_grammar; +mod scanner; +mod double_enum; +use regex::{Match, Regex}; +use scanner::Scanner; + + + +scanner!( + BareTokens, Tokens { + WhiteSpace, + Add, + Sub, + Mul, + Div, + While, + Ident(String), + Int(i64), + Float(f64), + } + r"^\s|\t|\n" : |_,_|{ + Some(WhiteSpace) + } + r"^\+" : |_,_| { + Some(Add) + } + r"^-" : |_,_| { + Some(Sub) + } + r"^\*" : |_,_| { + Some(Mul) + } + r"^/" : |_,_| { + Some(Div) + } + r"^while" : |_,_| { + Some(While) + } + r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { + Some(Ident(String::from(m.as_str()))) + } + r"^[0-9]+.[0-9]*" : |_, m: Match<'_>| { + m.as_str().parse::<_>().ok().map(|f| Float(f)) + } + r"^[0-9]+" : |_, m: Match<'_>| { + m.as_str().parse::<_>().ok().map(|i| Int(i)) + } +); + +enum NoneTerminals { + P, + E, + Ei, + T, +} + +ll_grammar!( + Tokens, + BareTokens, + Grammar, + NoneTerminals, + [ + P -> E; + E -> T,Ei; + Ei -> Add,T,Ei; + Ei -> ; + T -> Ident; + T -> Int; + ] +); + +fn main() { + let code = String::from("while 12 + a - 3.4 / 0. * 4"); + let mut m = Scanner::::new(code).with_skipping(Tokens::WhiteSpace); + for token in m.iter_mut() { + println!("{:?}", token); + } + if !m.is_empty() { + println!("Error"); + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Test { + A, + B, +} +#[derive(Debug, Clone)] +pub enum BareTest { + A, + B, +} + +impl PartialEq for BareTest { + fn eq(&self, other: &Test) -> bool { + match (self, other){ + (BareTest::A, Test::A) => true, + (BareTest::B, Test::B)=> true, + _=> false + } + } +} diff --git a/src/scanner.rs b/src/scanner.rs new file mode 100644 index 0000000..1869465 --- /dev/null +++ b/src/scanner.rs @@ -0,0 +1,107 @@ +use std::marker::PhantomData; + +#[macro_export] +macro_rules! scanner { + ($bare_name:ident, $name:ident { + $( + $variant:ident$( + ( $($args:ty),+ $(,)? ) + )? + ),* $(,)? + } + $( + $regex:tt : $code:expr + )*) => { + #[derive(Debug, Clone, PartialEq)] + pub enum $name {$( + $variant$( + ( $($args),+ ) + )? + ),*} + #[derive(Debug, Clone, PartialEq)] + pub enum $bare_name {$($variant),*} + + impl PartialEq<$name> for $bare_name { + fn eq(&self, other: &$name) -> bool { + match (self, other){ + $(($bare_name::$variant, $name::$variant{ .. }) => true,)* + _=> false + } + } + } + + impl PartialEq<$bare_name> for $name { + fn eq(&self, other: &$bare_name) -> bool { + other.eq(self) + } + } + + impl $crate::scanner::MatchNext<$name> for $name { + fn match_next(code: &String) -> Option<(Self, usize)> { + use $name::*; + $( + if let Some(capture) = Regex::new($regex).unwrap().captures(&code) { + if let Some(main_capture) = capture.get(0) { + if let Some(token) = $code(capture, main_capture) { + return Some((token, main_capture.len())); + } + } + } + )* + None + } + } + }; +} + +pub trait MatchNext { + fn match_next(code: &String) -> Option<(T, usize)>; +} + +pub struct Scanner + PartialEq> { + code: String, + skip: Vec, + _a: PhantomData, +} + +impl + PartialEq> Scanner { + pub fn new(code: String) -> Self { + Self { + code, + skip: vec![], + _a: PhantomData, + } + } + pub fn iter_mut(&mut self) -> ScannerIter { + ScannerIter(self) + } + pub fn with_skipping(mut self, state: T) -> Self { + self.skip.push(state); + self + } + pub fn is_empty(&self) -> bool { + self.code.is_empty() + } +} + +pub struct ScannerIter<'a, T: MatchNext + PartialEq>(&'a mut Scanner); + +impl<'a, T: MatchNext + PartialEq> Iterator for ScannerIter<'a, T> { + type Item = T; + + fn next(&mut self) -> Option { + if self.0.code.is_empty() { + return None; + } + loop { + if let Some((token, len)) = T::match_next(&self.0.code) { + self.0.code = self.0.code.split_off(len); + if self.0.skip.contains(&token) { + continue; + } + return Some(token); + } + return None; + } + } +}