scanner
This commit is contained in:
commit
eb202642ad
7 changed files with 328 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/target
|
54
Cargo.lock
generated
Normal file
54
Cargo.lock
generated
Normal file
|
@ -0,0 +1,54 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "rcompiler"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[package]
|
||||
name = "rcompiler"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
regex = "1.11.1"
|
34
src/double_enum.rs
Normal file
34
src/double_enum.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
#[macro_export]
|
||||
macro_rules! double_enum {
|
||||
($bare_name:ident, $name:ident {
|
||||
$(
|
||||
$variant:ident$(
|
||||
( $($args:ty),+ $(,)? )
|
||||
)?
|
||||
),* $(,)?
|
||||
}) => {
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum $name {$(
|
||||
$variant$(
|
||||
( $($args),+ )
|
||||
)?
|
||||
),*}
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum $bare_name {$($variant),*}
|
||||
|
||||
impl PartialEq<$name> for $bare_name {
|
||||
fn eq(&self, other: &$name) -> bool {
|
||||
match (self, other){
|
||||
$(($bare_name::$variant, $name::$variant{ .. }) => true,)*
|
||||
_=> false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<$bare_name> for $name {
|
||||
fn eq(&self, other: &$bare_name) -> bool {
|
||||
other.eq(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
23
src/ll_grammar.rs
Normal file
23
src/ll_grammar.rs
Normal file
|
@ -0,0 +1,23 @@
|
|||
#[macro_export]
|
||||
macro_rules! ll_grammar {
|
||||
(
|
||||
$tokens:ident,
|
||||
$bare_tokens:ident,
|
||||
$grammar:ident,
|
||||
$non_term:ident,
|
||||
[$(
|
||||
$left:ident -> $(
|
||||
$right:ident
|
||||
),*
|
||||
);* $(;)?]
|
||||
) => {};
|
||||
}
|
||||
|
||||
enum Sentential<T, N> {
|
||||
Terminal(T),
|
||||
NoneTerminal(N),
|
||||
}
|
||||
|
||||
pub struct Grammar<T, N> {
|
||||
rules: Vec<(T, Vec<Sentential<T, N>>)>,
|
||||
}
|
102
src/main.rs
Normal file
102
src/main.rs
Normal file
|
@ -0,0 +1,102 @@
|
|||
mod ll_grammar;
|
||||
mod scanner;
|
||||
mod double_enum;
|
||||
use regex::{Match, Regex};
|
||||
use scanner::Scanner;
|
||||
|
||||
|
||||
|
||||
scanner!(
|
||||
BareTokens, Tokens {
|
||||
WhiteSpace,
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
While,
|
||||
Ident(String),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
}
|
||||
r"^\s|\t|\n" : |_,_|{
|
||||
Some(WhiteSpace)
|
||||
}
|
||||
r"^\+" : |_,_| {
|
||||
Some(Add)
|
||||
}
|
||||
r"^-" : |_,_| {
|
||||
Some(Sub)
|
||||
}
|
||||
r"^\*" : |_,_| {
|
||||
Some(Mul)
|
||||
}
|
||||
r"^/" : |_,_| {
|
||||
Some(Div)
|
||||
}
|
||||
r"^while" : |_,_| {
|
||||
Some(While)
|
||||
}
|
||||
r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| {
|
||||
Some(Ident(String::from(m.as_str())))
|
||||
}
|
||||
r"^[0-9]+.[0-9]*" : |_, m: Match<'_>| {
|
||||
m.as_str().parse::<_>().ok().map(|f| Float(f))
|
||||
}
|
||||
r"^[0-9]+" : |_, m: Match<'_>| {
|
||||
m.as_str().parse::<_>().ok().map(|i| Int(i))
|
||||
}
|
||||
);
|
||||
|
||||
enum NoneTerminals {
|
||||
P,
|
||||
E,
|
||||
Ei,
|
||||
T,
|
||||
}
|
||||
|
||||
ll_grammar!(
|
||||
Tokens,
|
||||
BareTokens,
|
||||
Grammar,
|
||||
NoneTerminals,
|
||||
[
|
||||
P -> E;
|
||||
E -> T,Ei;
|
||||
Ei -> Add,T,Ei;
|
||||
Ei -> ;
|
||||
T -> Ident;
|
||||
T -> Int;
|
||||
]
|
||||
);
|
||||
|
||||
fn main() {
|
||||
let code = String::from("while 12 + a - 3.4 / 0. * 4");
|
||||
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||
for token in m.iter_mut() {
|
||||
println!("{:?}", token);
|
||||
}
|
||||
if !m.is_empty() {
|
||||
println!("Error");
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Test {
|
||||
A,
|
||||
B,
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum BareTest {
|
||||
A,
|
||||
B,
|
||||
}
|
||||
|
||||
impl PartialEq<Test> for BareTest {
|
||||
fn eq(&self, other: &Test) -> bool {
|
||||
match (self, other){
|
||||
(BareTest::A, Test::A) => true,
|
||||
(BareTest::B, Test::B)=> true,
|
||||
_=> false
|
||||
}
|
||||
}
|
||||
}
|
107
src/scanner.rs
Normal file
107
src/scanner.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use std::marker::PhantomData;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! scanner {
|
||||
($bare_name:ident, $name:ident {
|
||||
$(
|
||||
$variant:ident$(
|
||||
( $($args:ty),+ $(,)? )
|
||||
)?
|
||||
),* $(,)?
|
||||
}
|
||||
$(
|
||||
$regex:tt : $code:expr
|
||||
)*) => {
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum $name {$(
|
||||
$variant$(
|
||||
( $($args),+ )
|
||||
)?
|
||||
),*}
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum $bare_name {$($variant),*}
|
||||
|
||||
impl PartialEq<$name> for $bare_name {
|
||||
fn eq(&self, other: &$name) -> bool {
|
||||
match (self, other){
|
||||
$(($bare_name::$variant, $name::$variant{ .. }) => true,)*
|
||||
_=> false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<$bare_name> for $name {
|
||||
fn eq(&self, other: &$bare_name) -> bool {
|
||||
other.eq(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl $crate::scanner::MatchNext<$name> for $name {
|
||||
fn match_next(code: &String) -> Option<(Self, usize)> {
|
||||
use $name::*;
|
||||
$(
|
||||
if let Some(capture) = Regex::new($regex).unwrap().captures(&code) {
|
||||
if let Some(main_capture) = capture.get(0) {
|
||||
if let Some(token) = $code(capture, main_capture) {
|
||||
return Some((token, main_capture.len()));
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub trait MatchNext<T> {
|
||||
fn match_next(code: &String) -> Option<(T, usize)>;
|
||||
}
|
||||
|
||||
pub struct Scanner<T: MatchNext<T> + PartialEq> {
|
||||
code: String,
|
||||
skip: Vec<T>,
|
||||
_a: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: MatchNext<T> + PartialEq> Scanner<T> {
|
||||
pub fn new(code: String) -> Self {
|
||||
Self {
|
||||
code,
|
||||
skip: vec![],
|
||||
_a: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn iter_mut(&mut self) -> ScannerIter<T> {
|
||||
ScannerIter(self)
|
||||
}
|
||||
pub fn with_skipping(mut self, state: T) -> Self {
|
||||
self.skip.push(state);
|
||||
self
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.code.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>);
|
||||
|
||||
impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
|
||||
type Item = T;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.0.code.is_empty() {
|
||||
return None;
|
||||
}
|
||||
loop {
|
||||
if let Some((token, len)) = T::match_next(&self.0.code) {
|
||||
self.0.code = self.0.code.split_off(len);
|
||||
if self.0.skip.contains(&token) {
|
||||
continue;
|
||||
}
|
||||
return Some(token);
|
||||
}
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue