ll checker

This commit is contained in:
jusax23 2024-10-27 14:07:38 +01:00
parent 4548283ba8
commit db87495a63
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
4 changed files with 242 additions and 25 deletions

View file

@ -30,5 +30,12 @@ macro_rules! double_enum {
other.eq(self) other.eq(self)
} }
} }
impl From<$name> for $bare_name{
fn from(value: $name) -> Self {
match value {
$($name::$variant{ .. } => $bare_name::$variant,)*
}
}
}
} }
} }

View file

@ -7,6 +7,7 @@ use std::{
#[macro_export] #[macro_export]
macro_rules! ll_grammar { macro_rules! ll_grammar {
( (
start: $start:ident;
$( $(
$left:ident -> $( $left:ident -> $(
$right:ident $right:ident
@ -22,9 +23,11 @@ macro_rules! ll_grammar {
map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]); map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]);
})* })*
$crate::ll_grammar::LLGrammar { $crate::ll_grammar::LLGrammar {
start: $start,
rules: map, rules: map,
first: None, first: None,
follow: None, follow: None,
parse_table: None,
} }
} }
}; };
@ -37,16 +40,19 @@ pub enum Sentential<T, N> {
} }
pub struct LLGrammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> { pub struct LLGrammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
pub start: N,
pub rules: HashMap<N, Vec<Vec<Sentential<T, N>>>>, pub rules: HashMap<N, Vec<Vec<Sentential<T, N>>>>,
/// none is epsilon /// none is epsilon
pub first: Option<HashMap<N, HashSet<Option<T>>>>, pub first: Option<HashMap<N, HashSet<Option<T>>>>,
/// none is $ /// none is $
pub follow: Option<HashMap<N, HashSet<Option<T>>>>, pub follow: Option<HashMap<N, HashSet<Option<T>>>>,
// When in State N and reading T, then apply the usize'th rule of N.
/// none is $
pub parse_table: Option<HashMap<(N, Option<T>), usize>>,
} }
impl<N: PartialEq + Eq + Hash + Clone + Debug, T: PartialEq + Eq + Hash + Clone + Debug> impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGrammar<N, T> {
LLGrammar<N, T>
{
pub fn can_produce_epsilon(&self, rule: &Sentential<T, N>) -> bool { pub fn can_produce_epsilon(&self, rule: &Sentential<T, N>) -> bool {
match rule { match rule {
Sentential::Terminal(_) => false, Sentential::Terminal(_) => false,
@ -147,12 +153,12 @@ impl<N: PartialEq + Eq + Hash + Clone + Debug, T: PartialEq + Eq + Hash + Clone
out out
} }
pub fn gen_follow(&mut self, start: N) { pub fn gen_follow(&mut self) {
if self.first == None { if self.first == None {
self.gen_first(); self.gen_first();
} }
let mut follow: HashMap<N, HashSet<Option<T>>> = HashMap::new(); let mut follow: HashMap<N, HashSet<Option<T>>> = HashMap::new();
follow.insert(start, HashSet::from([None])); follow.insert(self.start.clone(), HashSet::from([None]));
loop { loop {
let mut change = false; let mut change = false;
@ -228,4 +234,149 @@ impl<N: PartialEq + Eq + Hash + Clone + Debug, T: PartialEq + Eq + Hash + Clone
} }
self.follow = Some(follow); self.follow = Some(follow);
} }
pub fn follow(&self, none_termianl: &N) -> HashSet<Option<T>> {
assert!(self.follow.is_some(), "Please call gen_follow before this!");
self.follow
.as_ref()
.unwrap()
.get(&none_termianl)
.cloned()
.unwrap_or(HashSet::new())
}
pub fn gen_parse_table(&mut self) -> bool {
if self.follow.is_none() {
self.gen_follow();
}
if self.parse_table.is_some() {
return false;
}
let mut conflict = false;
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
for (from, to) in self.rules.iter() {
for (id, to) in to.iter().enumerate() {
// rule is A -> al
// terminal == None means epsilon
for terminal in self.first(to) {
match terminal {
// let a be in First(al) -> add to T[A,a] = A->al (using the index of al)
Some(terminal) => {
conflict |= parse_table
.insert((from.clone(), Some(terminal.clone())), id)
.is_some();
}
// if first contains epsilon then
// let b be in Follow(A) -> add to T[A,b] = A->al (using the index of al)
None => {
for terminal in self.follow(from).iter() {
conflict |= parse_table
.insert((from.clone(), terminal.clone()), id)
.is_some()
}
}
}
}
}
}
self.parse_table = Some(parse_table);
conflict
}
/// get parse_table rule
/// None means error.
pub fn parse_table(
&self,
none_terminal: &N,
terminal: &Option<T>,
) -> Option<&Vec<Sentential<T, N>>> {
assert!(
self.parse_table.is_some(),
"Please call gen_parse_table before this!"
);
self.parse_table
.as_ref()
.unwrap()
.get(&(none_terminal.clone(), terminal.clone()))
.and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f)))
}
pub fn create_checker<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self,
iter: &'a mut dyn Iterator<Item = S>,
) -> LLTabelParser<N, T, S> {
assert!(
self.parse_table.is_some(),
"Please call gen_parse_table before this!"
);
LLTabelParser {
input: iter,
grammar: self,
}
}
}
/// Just checks a program. Does not generates output.
pub struct LLTabelParser<
'a,
N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone,
S: Into<T> + PartialEq<T> + Clone,
> {
grammar: &'a LLGrammar<N, T>,
input: &'a mut dyn Iterator<Item = S>,
}
impl<
'a,
N: PartialEq + Eq + Hash + Clone + Debug,
T: PartialEq + Eq + Hash + Clone + Debug,
S: Into<T> + PartialEq<T> + Clone + Debug,
> LLTabelParser<'a, N, T, S>
{
pub fn parse(&mut self) -> bool {
let mut stack: Vec<Sentential<Option<T>, N>> = vec![
Sentential::Terminal(None),
Sentential::NoneTerminal(self.grammar.start.clone()),
];
let mut next = self.input.next();
loop {
if next.is_none() {
println!("EOF");
return self.input.size_hint().0 == 0;
}
let state = stack.pop();
match state {
Some(Sentential::Terminal(t)) => match (next, t) {
(Some(a), Some(b)) if a == b => {
next = self.input.next();
}
(None, None) => {
next = self.input.next();
}
(a, b) => {
println!("not matching terminals: {a:?}, {b:?}");
return false;
}
},
Some(Sentential::NoneTerminal(nt)) => {
let Some(a) = self
.grammar
.parse_table(&nt, &next.as_ref().map(|f| f.clone().into()))
else {
println!("no parse table entry: {nt:?} next: {next:?}");
return false;
};
stack.extend(a.iter().rev().map(|f| match f {
Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())),
Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()),
}));
}
None => {
println!("EOS");
return false;
}
}
}
}
} }

View file

@ -9,13 +9,23 @@ use std::collections::HashMap;
double_enum!( double_enum!(
BareTokens, Tokens { BareTokens, Tokens {
WhiteSpace, WhiteSpace,
Semicolon,
Add, Add,
Sub, Sub,
Mul, Mul,
Div, Div,
Eq,
Neq,
Assign,
While, While,
If,
LBrace, LBrace,
RBrace, RBrace,
LSBrace,
RSBrace,
LQBrace,
RQBrace,
Dot,
Ident(String), Ident(String),
Int(i64), Int(i64),
Float(f64), Float(f64),
@ -27,6 +37,9 @@ scanner!(
r"^\s|\t|\n" : |_,_| { r"^\s|\t|\n" : |_,_| {
Some(WhiteSpace) Some(WhiteSpace)
} }
r"^;" : |_,_| {
Some(Semicolon)
}
r"^\+" : |_,_| { r"^\+" : |_,_| {
Some(Add) Some(Add)
} }
@ -39,18 +52,42 @@ scanner!(
r"^/" : |_,_| { r"^/" : |_,_| {
Some(Div) Some(Div)
} }
r"^==" : |_,_| {
Some(Eq)
}
r"^!=" : |_,_| {
Some(Neq)
}
r"^=" : |_,_| {
Some(Assign)
}
r"^while" : |_,_| { r"^while" : |_,_| {
Some(While) Some(While)
} }
r"\(" : |_,_| { r"^\(" : |_,_| {
Some(LBrace) Some(LBrace)
} }
r"\)" : |_,_| { r"^\)" : |_,_| {
Some(RBrace) Some(RBrace)
} }
r"^\[" : |_,_| {
Some(LSBrace)
}
r"^\]" : |_,_| {
Some(RSBrace)
}
r"^\{" : |_,_| {
Some(LQBrace)
}
r"^\}" : |_,_| {
Some(RQBrace)
}
r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| { r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| {
Some(Ident(String::from(m.as_str()))) Some(Ident(String::from(m.as_str())))
} }
r"^\." : |_, _| {
Some(Dot)
}
r"^[0-9]+.[0-9]*" : |_, m: Match<'_>| { r"^[0-9]+.[0-9]*" : |_, m: Match<'_>| {
m.as_str().parse::<_>().ok().map(|f| Float(f)) m.as_str().parse::<_>().ok().map(|f| Float(f))
} }
@ -61,12 +98,13 @@ scanner!(
#[derive(Debug, PartialEq, Eq, Hash, Clone)] #[derive(Debug, PartialEq, Eq, Hash, Clone)]
enum NoneTerminals { enum NoneTerminals {
P, P, // Program, ; separated
E, E, // Expression one line
Ei, Ei, // Expression extended additive
T, T, // Term, only containing Factors
Ti, Ti, // Term extend multiplicative
F, F, // Factor
FI, // Factor extended with complex types and operators
} }
impl<T> From<NoneTerminals> for Sentential<T, NoneTerminals> { impl<T> From<NoneTerminals> for Sentential<T, NoneTerminals> {
@ -85,8 +123,14 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
use BareTokens::*; use BareTokens::*;
use NoneTerminals::*; use NoneTerminals::*;
ll_grammar![ ll_grammar![
P -> E; start: P;
P -> E,Semicolon,P;
P -> ;
E -> T,Ei; E -> T,Ei;
E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace;
Ei -> Assign,T,Ei;
Ei -> Eq,T,Ei;
Ei -> Neq,T,Ei;
Ei -> Add,T,Ei; Ei -> Add,T,Ei;
Ei -> Sub,T,Ei; Ei -> Sub,T,Ei;
Ei -> ; Ei -> ;
@ -97,21 +141,30 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
F -> LBrace, E, RBrace; F -> LBrace, E, RBrace;
F -> Int; F -> Int;
F -> Float; F -> Float;
F -> Ident; F -> Ident,FI;
FI -> ;
FI -> LBrace,E,RBrace;
FI -> Dot, FI;
FI -> LSBrace,E,RSBrace;
] ]
} }
fn main() { fn main() {
let code = String::from("while 12 + a - 3.4 / 0. * 4"); let code = String::from("a = 4; while(a != 5) {a = a+1;};");
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace); let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
for token in m.iter_mut() { /* for token in m.iter_mut() {
println!("{:?}", token); println!("{:?}", token);
} } */
if !m.is_empty() {
println!("Error");
}
let mut grammar = grammer(); let mut grammar = grammer();
grammar.gen_follow(NoneTerminals::P); grammar.gen_follow();
println!("first: {:?}", grammar.first); println!("first: {:?}", grammar.first);
println!("follow: {:?}", grammar.follow); println!("follow: {:?}", grammar.follow);
let conflict = grammar.gen_parse_table();
println!("conflict: {conflict}");
println!("prase table: {:?}", grammar.parse_table);
println!(
"parsed: {}",
grammar.create_checker(&mut m.iter_mut()).parse()
)
} }

View file

@ -1,4 +1,4 @@
use std::marker::PhantomData; use std::{iter::Peekable, marker::PhantomData};
#[macro_export] #[macro_export]
macro_rules! scanner { macro_rules! scanner {
@ -73,4 +73,10 @@ impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
return None; return None;
} }
} }
fn size_hint(&self) -> (usize, Option<usize>) {
(
if self.0.code.is_empty() { 0 } else { 1 },
Some(self.0.code.len()),
)
}
} }