working llgrammar

This commit is contained in:
jusax23 2024-10-27 01:32:18 +02:00
parent eb202642ad
commit f35ecee4cb
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
4 changed files with 268 additions and 78 deletions

View file

@ -13,7 +13,7 @@ macro_rules! double_enum {
( $($args),+ ) ( $($args),+ )
)? )?
),*} ),*}
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum $bare_name {$($variant),*} pub enum $bare_name {$($variant),*}
impl PartialEq<$name> for $bare_name { impl PartialEq<$name> for $bare_name {

View file

@ -1,23 +1,231 @@
use std::{
collections::{HashMap, HashSet},
fmt::Debug,
hash::Hash,
};
#[macro_export] #[macro_export]
macro_rules! ll_grammar { macro_rules! ll_grammar {
( (
$tokens:ident, $(
$bare_tokens:ident,
$grammar:ident,
$non_term:ident,
[$(
$left:ident -> $( $left:ident -> $(
$right:ident $right:ident
),* ),*
);* $(;)?] );* $(;)?
) => {}; ) => {
{
let mut map = HashMap::new();
$({
if !map.contains_key(&$left) {
map.insert($left, Vec::new());
}
map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]);
})*
$crate::ll_grammar::LLGrammar {
rules: map,
first: None,
follow: None,
}
}
};
} }
enum Sentential<T, N> { #[derive(Debug, Clone)]
pub enum Sentential<T, N> {
Terminal(T), Terminal(T),
NoneTerminal(N), NoneTerminal(N),
} }
pub struct Grammar<T, N> { pub struct LLGrammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
rules: Vec<(T, Vec<Sentential<T, N>>)>, pub rules: HashMap<N, Vec<Vec<Sentential<T, N>>>>,
/// none is epsilon
pub first: Option<HashMap<N, HashSet<Option<T>>>>,
/// none is $
pub follow: Option<HashMap<N, HashSet<Option<T>>>>,
}
impl<N: PartialEq + Eq + Hash + Clone + Debug, T: PartialEq + Eq + Hash + Clone + Debug>
LLGrammar<N, T>
{
pub fn can_produce_epsilon(&self, rule: &Sentential<T, N>) -> bool {
match rule {
Sentential::Terminal(_) => false,
Sentential::NoneTerminal(nt) => self
.rules
.get(&nt)
.map(|f| f.iter().any(|v| v.is_empty()))
.unwrap_or(false),
}
}
pub fn gen_first(&mut self) {
let mut first: HashMap<N, HashSet<Option<T>>> = HashMap::new();
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
'rule: for to in to.iter() {
// for each rule from -> to = X -> Y1...Yk
// add First(Yn) to First(X) if Y1...Yn-1 => e // n can be 1, disregarding the if
// add e to First(X) if Y1...Yk => e
for symbol in to {
match symbol {
Sentential::Terminal(a) => {
first
.entry(from.clone())
.and_modify(|e| {
change |= e.insert(Some(a.clone()));
})
.or_insert_with(|| {
change = true;
HashSet::from([Some(a.clone())])
});
}
Sentential::NoneTerminal(nt) => {
if let Some(set) = first.get(nt).cloned() {
first
.entry(from.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
}
if !self.can_produce_epsilon(symbol) {
continue 'rule;
}
}
first
.entry(from.clone())
.and_modify(|e| {
change |= e.insert(None);
})
.or_insert_with(|| {
change = true;
HashSet::from([None])
});
}
}
if !change {
break;
}
}
self.first = Some(first);
}
pub fn first(&self, sent: &Vec<Sentential<T, N>>) -> HashSet<Option<T>> {
assert!(self.first.is_some(), "Please call gen_first before this!");
let mut out = HashSet::<Option<T>>::new();
// Y1Y2...Yk = al
// add First(Yn) to First(al) if Y1...Yn-1 => e // n can be 1, disregarding the if
// add e to First(al) if Y1...Yk => e
'rule: {
for symbol in sent {
match symbol {
Sentential::Terminal(a) => {
out.insert(Some(a.clone()));
}
Sentential::NoneTerminal(nt) => {
if let Some(set) = self.first.as_ref().unwrap().get(nt).cloned() {
out.extend(set);
}
}
}
if !self.can_produce_epsilon(symbol) {
break 'rule;
}
}
out.insert(None);
}
out
}
pub fn gen_follow(&mut self, start: N) {
if self.first == None {
self.gen_first();
}
let mut follow: HashMap<N, HashSet<Option<T>>> = HashMap::new();
follow.insert(start, HashSet::from([None]));
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
for to in to.iter() {
// a
// if A -> aBb then add First(b) - {e} to Follow(B)
// and if A -> aBb and e in First(b) add Follow(A) to Follow(B)
if to.len() >= 2 {
for i in 0..(to.len() - 1) {
let slice = to[i + 1..].iter().map(|f| f.clone()).collect::<Vec<_>>();
match to.get(i) {
Some(Sentential::NoneTerminal(b)) => {
let mut set = self.first(&slice);
if set.contains(&None) {
if let Some(set) = follow.get(from).cloned() {
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
set.remove(&None);
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
_ => (),
}
}
}
// b
// and if A -> aB add Follow(A) to Follow(B)
match to.last() {
Some(Sentential::NoneTerminal(b)) => {
if let Some(set) = follow.get(from).cloned() {
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
_ => (),
}
}
}
if !change {
break;
}
}
self.follow = Some(follow);
}
} }

View file

@ -1,12 +1,12 @@
mod double_enum;
mod ll_grammar; mod ll_grammar;
mod scanner; mod scanner;
mod double_enum; use ll_grammar::{LLGrammar, Sentential};
use regex::{Match, Regex}; use regex::{Match, Regex};
use scanner::Scanner; use scanner::Scanner;
use std::collections::HashMap;
double_enum!(
scanner!(
BareTokens, Tokens { BareTokens, Tokens {
WhiteSpace, WhiteSpace,
Add, Add,
@ -14,11 +14,17 @@ scanner!(
Mul, Mul,
Div, Div,
While, While,
LBrace,
RBrace,
Ident(String), Ident(String),
Int(i64), Int(i64),
Float(f64), Float(f64),
} }
r"^\s|\t|\n" : |_,_|{ );
scanner!(
Tokens,
r"^\s|\t|\n" : |_,_| {
Some(WhiteSpace) Some(WhiteSpace)
} }
r"^\+" : |_,_| { r"^\+" : |_,_| {
@ -36,6 +42,12 @@ scanner!(
r"^while" : |_,_| { r"^while" : |_,_| {
Some(While) Some(While)
} }
r"\(" : |_,_| {
Some(LBrace)
}
r"\)" : |_,_| {
Some(RBrace)
}
r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| {
Some(Ident(String::from(m.as_str()))) Some(Ident(String::from(m.as_str())))
} }
@ -47,28 +59,45 @@ scanner!(
} }
); );
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
enum NoneTerminals { enum NoneTerminals {
P, P,
E, E,
Ei, Ei,
T, T,
Ti,
F,
} }
ll_grammar!( impl<T> From<NoneTerminals> for Sentential<T, NoneTerminals> {
Tokens, fn from(value: NoneTerminals) -> Self {
BareTokens, Sentential::NoneTerminal(value)
Grammar, }
NoneTerminals, }
[
impl<N> From<BareTokens> for Sentential<BareTokens, N> {
fn from(value: BareTokens) -> Self {
Sentential::Terminal(value)
}
}
fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
use BareTokens::*;
use NoneTerminals::*;
ll_grammar![
P -> E; P -> E;
E -> T,Ei; E -> T,Ei;
Ei -> Add,T,Ei; Ei -> Add,T,Ei;
Ei -> ; Ei -> ;
T -> Ident; T -> F,Ti;
T -> Int; Ti -> Mul,F,Ti;
Ti -> ;
F -> LBrace, E, RBrace;
F -> Int;
F -> Float;
F -> Ident;
] ]
); }
fn main() { fn main() {
let code = String::from("while 12 + a - 3.4 / 0. * 4"); let code = String::from("while 12 + a - 3.4 / 0. * 4");
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace); let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
@ -78,25 +107,9 @@ fn main() {
if !m.is_empty() { if !m.is_empty() {
println!("Error"); println!("Error");
} }
}
#[derive(Debug, Clone, PartialEq)] let mut grammar = grammer();
pub enum Test { grammar.gen_follow(NoneTerminals::P);
A, println!("first: {:?}", grammar.first);
B, println!("follow: {:?}", grammar.follow);
}
#[derive(Debug, Clone)]
pub enum BareTest {
A,
B,
}
impl PartialEq<Test> for BareTest {
fn eq(&self, other: &Test) -> bool {
match (self, other){
(BareTest::A, Test::A) => true,
(BareTest::B, Test::B)=> true,
_=> false
}
}
} }

View file

@ -2,40 +2,9 @@ use std::marker::PhantomData;
#[macro_export] #[macro_export]
macro_rules! scanner { macro_rules! scanner {
($bare_name:ident, $name:ident { ($name:ident,$(
$(
$variant:ident$(
( $($args:ty),+ $(,)? )
)?
),* $(,)?
}
$(
$regex:tt : $code:expr $regex:tt : $code:expr
)*) => { )*) => {
#[derive(Debug, Clone, PartialEq)]
pub enum $name {$(
$variant$(
( $($args),+ )
)?
),*}
#[derive(Debug, Clone, PartialEq)]
pub enum $bare_name {$($variant),*}
impl PartialEq<$name> for $bare_name {
fn eq(&self, other: &$name) -> bool {
match (self, other){
$(($bare_name::$variant, $name::$variant{ .. }) => true,)*
_=> false
}
}
}
impl PartialEq<$bare_name> for $name {
fn eq(&self, other: &$bare_name) -> bool {
other.eq(self)
}
}
impl $crate::scanner::MatchNext<$name> for $name { impl $crate::scanner::MatchNext<$name> for $name {
fn match_next(code: &String) -> Option<(Self, usize)> { fn match_next(code: &String) -> Option<(Self, usize)> {
use $name::*; use $name::*;