working ll parser

This commit is contained in:
jusax23 2024-10-27 16:37:00 +01:00
parent db87495a63
commit 9112736652
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
4 changed files with 460 additions and 59 deletions

View file

@ -289,7 +289,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
&self, &self,
none_terminal: &N, none_terminal: &N,
terminal: &Option<T>, terminal: &Option<T>,
) -> Option<&Vec<Sentential<T, N>>> { ) -> Option<(usize, &Vec<Sentential<T, N>>)> {
assert!( assert!(
self.parse_table.is_some(), self.parse_table.is_some(),
"Please call gen_parse_table before this!" "Please call gen_parse_table before this!"
@ -298,12 +298,17 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
.as_ref() .as_ref()
.unwrap() .unwrap()
.get(&(none_terminal.clone(), terminal.clone())) .get(&(none_terminal.clone(), terminal.clone()))
.and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f))) .and_then(|f| {
self.rules
.get(none_terminal)
.and_then(|rule| rule.get(*f))
.map(|rule| (*f, rule))
})
} }
pub fn create_checker<'a, S: Into<T> + PartialEq<T> + Clone>( pub fn parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self, &'a self,
iter: &'a mut dyn Iterator<Item = S>, iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> LLTabelParser<N, T, S> { ) -> LLTabelParser<N, T, S> {
assert!( assert!(
self.parse_table.is_some(), self.parse_table.is_some(),
@ -324,7 +329,7 @@ pub struct LLTabelParser<
S: Into<T> + PartialEq<T> + Clone, S: Into<T> + PartialEq<T> + Clone,
> { > {
grammar: &'a LLGrammar<N, T>, grammar: &'a LLGrammar<N, T>,
input: &'a mut dyn Iterator<Item = S>, input: &'a mut dyn Iterator<Item = Result<S, String>>,
} }
impl< impl<
@ -334,49 +339,122 @@ impl<
S: Into<T> + PartialEq<T> + Clone + Debug, S: Into<T> + PartialEq<T> + Clone + Debug,
> LLTabelParser<'a, N, T, S> > LLTabelParser<'a, N, T, S>
{ {
pub fn parse(&mut self) -> bool { pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
let mut stack: Vec<Sentential<Option<T>, N>> = vec![ // stack of table driven parser
Sentential::Terminal(None), // content of the vec:
Sentential::NoneTerminal(self.grammar.start.clone()), // - first element: all of them combined represent the complete stack, of the parser.
]; // - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element.
let mut next = self.input.next(); let mut stack: Vec<(Vec<Sentential<T, N>>, ParseTree<N, S>)> = vec![(
vec![Sentential::NoneTerminal(self.grammar.start.clone())],
ParseTree::new(None),
)];
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
loop { loop {
if next.is_none() { // look at current state
println!("EOF"); let mut state = stack.pop();
return self.input.size_hint().0 == 0; match state.as_mut() {
} // processing inner state, of tracked rules
let state = stack.pop(); Some((inner_stack, rule)) => {
match state { let inner_state = inner_stack.pop();
Some(Sentential::Terminal(t)) => match (next, t) { match inner_state {
(Some(a), Some(b)) if a == b => { // match terminal, check if equal
next = self.input.next(); Some(Sentential::Terminal(terminal)) => match (next, terminal) {
// actual vs. expected input
(Some(inn), expect) if inn == expect => {
next = match self.input.next() {
Some(Ok(n)) => Some(n),
Some(Err(err)) => {
return Err(format!("Invalid token: {}", err))
}
None => None,
};
rule.childs.push(NodeChild::Data(inn));
stack.push(state.unwrap());
}
(a, b) => {
return Err(format!("found: {:?} expected: {:?}", a, b));
}
},
// take next none terminal and apply rule from parse table.
Some(Sentential::NoneTerminal(none_term)) => {
// load rule
let Some((id, new_rule)) = self
.grammar
.parse_table(&none_term, &next.as_ref().map(|f| f.clone().into()))
else {
// no rule
return Err(format!(
"Unexpected token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
// reverse rule: because, uses vec as stack, but reversed
let new_rule_rev =
new_rule.iter().rev().map(|f| f.clone()).collect::<Vec<_>>();
// memorize current state/rule for later
stack.push(state.unwrap());
// process next rule
stack.push((
new_rule_rev,
ParseTree {
rule: Some((none_term, id)),
childs: Vec::new(),
},
));
}
// inner state is empty: current rule is finished
None => {
// if stack is empty, this is the initial state: finish or err
let Some(last) = stack.last_mut() else {
// ok: input has ended
if next.is_none() {
return Ok(state.unwrap().1);
}
// still code left, but not excepted
return Err(format!("Expected end of file."));
};
last.1.childs.push(NodeChild::Child(state.unwrap().1));
}
} }
(None, None) => {
next = self.input.next();
}
(a, b) => {
println!("not matching terminals: {a:?}, {b:?}");
return false;
}
},
Some(Sentential::NoneTerminal(nt)) => {
let Some(a) = self
.grammar
.parse_table(&nt, &next.as_ref().map(|f| f.clone().into()))
else {
println!("no parse table entry: {nt:?} next: {next:?}");
return false;
};
stack.extend(a.iter().rev().map(|f| match f {
Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())),
Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()),
}));
} }
// should not be possible, because every other path pushes to the stack back or returns
None => { None => {
println!("EOS"); return Err(format!("Err: EOS"));
return false;
} }
} }
} }
} }
} }
//
#[derive(Debug, Clone)]
pub enum NodeChild<N, S> {
Child(ParseTree<N, S>),
Data(S),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ParseTree<N, S> {
pub rule: Option<(N, usize)>,
pub childs: Vec<NodeChild<N, S>>,
}
impl<N, S> ParseTree<N, S> {
pub fn new(rule: Option<(N, usize)>) -> Self {
Self {
rule,
childs: Vec::new(),
}
}
}

310
src/log.rs Normal file
View file

@ -0,0 +1,310 @@
fn test() {
let a = Ok(ParseTree {
rule: None,
childs: [Child(ParseTree {
rule: Some((P, 0)),
childs: [
Child(ParseTree {
rule: Some((L, 2)),
childs: [
Data(Ident("a")),
Child(ParseTree {
rule: Some((FI, 0)),
childs: [],
}),
Child(ParseTree {
rule: Some((Li, 0)),
childs: [
Data(Assign),
Child(ParseTree {
rule: Some((E, 0)),
childs: [
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((F, 1)),
childs: [Data(Int(4))],
}),
Child(ParseTree {
rule: Some((Ti, 2)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 4)),
childs: [],
}),
],
}),
],
}),
],
}),
Data(Semicolon),
Child(ParseTree {
rule: Some((P, 0)),
childs: [
Child(ParseTree {
rule: Some((L, 0)),
childs: [
Data(While),
Child(ParseTree {
rule: Some((E, 0)),
childs: [
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((F, 3)),
childs: [
Data(Ident("a")),
Child(ParseTree {
rule: Some((FI, 0)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ti, 2)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 1)),
childs: [
Data(Neq),
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((F, 1)),
childs: [Data(Int(5))],
}),
Child(ParseTree {
rule: Some((Ti, 2)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 4)),
childs: [],
}),
],
}),
],
}),
Data(LQBrace),
Child(ParseTree {
rule: Some((P, 0)),
childs: [
Child(ParseTree {
rule: Some((L, 2)),
childs: [
Data(Ident("a")),
Child(ParseTree {
rule: Some((FI, 0)),
childs: [],
}),
Child(ParseTree {
rule: Some((Li, 0)),
childs: [
Data(Assign),
Child(ParseTree {
rule: Some((E, 0)),
childs: [
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((F, 3)),
childs: [
Data(Ident("a")),
Child(ParseTree {
rule: Some((
FI, 0,
)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ti, 2)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 2)),
childs: [
Data(Add),
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((
F, 1,
)),
childs: [Data(
Int(1),
)],
}),
Child(ParseTree {
rule: Some((
Ti, 2,
)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 4)),
childs: [],
}),
],
}),
],
}),
],
}),
],
}),
Data(Semicolon),
Child(ParseTree {
rule: Some((P, 1)),
childs: [],
}),
],
}),
Data(RQBrace),
],
}),
Data(Semicolon),
Child(ParseTree {
rule: Some((P, 0)),
childs: [
Child(ParseTree {
rule: Some((L, 1)),
childs: [
Data(If),
Child(ParseTree {
rule: Some((E, 0)),
childs: [
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((F, 3)),
childs: [
Data(Ident("a")),
Child(ParseTree {
rule: Some((FI, 0)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ti, 2)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 0)),
childs: [
Data(Eq),
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((F, 1)),
childs: [Data(Int(5))],
}),
Child(ParseTree {
rule: Some((Ti, 2)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 4)),
childs: [],
}),
],
}),
],
}),
Data(LQBrace),
Child(ParseTree {
rule: Some((P, 0)),
childs: [
Child(ParseTree {
rule: Some((L, 2)),
childs: [
Data(Ident("a")),
Child(ParseTree {
rule: Some((FI, 0)),
childs: [],
}),
Child(ParseTree {
rule: Some((Li, 0)),
childs: [
Data(Assign),
Child(ParseTree {
rule: Some((E, 0)),
childs: [
Child(ParseTree {
rule: Some((T, 0)),
childs: [
Child(ParseTree {
rule: Some((
F, 1,
)),
childs: [Data(
Int(4),
)],
}),
Child(ParseTree {
rule: Some((
Ti, 2,
)),
childs: [],
}),
],
}),
Child(ParseTree {
rule: Some((Ei, 4)),
childs: [],
}),
],
}),
],
}),
],
}),
Data(Semicolon),
Child(ParseTree {
rule: Some((P, 1)),
childs: [],
}),
],
}),
Data(RQBrace),
],
}),
Data(Semicolon),
Child(ParseTree {
rule: Some((P, 1)),
childs: [],
}),
],
}),
],
}),
],
})],
});
}

View file

@ -34,7 +34,7 @@ double_enum!(
scanner!( scanner!(
Tokens, Tokens,
r"^\s|\t|\n" : |_,_| { r"^\s|\t|\n|\r" : |_,_| {
Some(WhiteSpace) Some(WhiteSpace)
} }
r"^;" : |_,_| { r"^;" : |_,_| {
@ -64,6 +64,9 @@ scanner!(
r"^while" : |_,_| { r"^while" : |_,_| {
Some(While) Some(While)
} }
r"^if" : |_,_| {
Some(If)
}
r"^\(" : |_,_| { r"^\(" : |_,_| {
Some(LBrace) Some(LBrace)
} }
@ -98,8 +101,10 @@ scanner!(
#[derive(Debug, PartialEq, Eq, Hash, Clone)] #[derive(Debug, PartialEq, Eq, Hash, Clone)]
enum NoneTerminals { enum NoneTerminals {
P, // Program, ; separated P, // Program, ; separated
E, // Expression one line L,
Li,
E, // Expression
Ei, // Expression extended additive Ei, // Expression extended additive
T, // Term, only containing Factors T, // Term, only containing Factors
Ti, // Term extend multiplicative Ti, // Term extend multiplicative
@ -124,11 +129,14 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
use NoneTerminals::*; use NoneTerminals::*;
ll_grammar![ ll_grammar![
start: P; start: P;
P -> E,Semicolon,P; P -> L,Semicolon,P;
P -> ; P -> ;
L -> While,E,LQBrace,P,RQBrace;
L -> If,E,LQBrace,P,RQBrace;
L -> Ident,FI,Li;
Li -> Assign,E;
Li -> ;
E -> T,Ei; E -> T,Ei;
E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace;
Ei -> Assign,T,Ei;
Ei -> Eq,T,Ei; Ei -> Eq,T,Ei;
Ei -> Neq,T,Ei; Ei -> Neq,T,Ei;
Ei -> Add,T,Ei; Ei -> Add,T,Ei;
@ -150,11 +158,8 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
} }
fn main() { fn main() {
let code = String::from("a = 4; while(a != 5) {a = a+1;};"); let code = String::from("a = 4;while a != 5 { a = a+1; }; if a == 5 { a = 4; };");
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace); let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
/* for token in m.iter_mut() {
println!("{:?}", token);
} */
let mut grammar = grammer(); let mut grammar = grammer();
grammar.gen_follow(); grammar.gen_follow();
@ -163,8 +168,9 @@ fn main() {
let conflict = grammar.gen_parse_table(); let conflict = grammar.gen_parse_table();
println!("conflict: {conflict}"); println!("conflict: {conflict}");
println!("prase table: {:?}", grammar.parse_table); println!("prase table: {:?}", grammar.parse_table);
println!("parse\n\n");
println!( println!(
"parsed: {}", "parsed: {:?}",
grammar.create_checker(&mut m.iter_mut()).parse() grammar.parser(&mut m.iter_mut()).parse()
) )
} }

View file

@ -1,4 +1,4 @@
use std::{iter::Peekable, marker::PhantomData}; use std::marker::PhantomData;
#[macro_export] #[macro_export]
macro_rules! scanner { macro_rules! scanner {
@ -33,6 +33,7 @@ pub struct Scanner<T: MatchNext<T> + PartialEq> {
_a: PhantomData<T>, _a: PhantomData<T>,
} }
#[allow(dead_code)]
impl<T: MatchNext<T> + PartialEq> Scanner<T> { impl<T: MatchNext<T> + PartialEq> Scanner<T> {
pub fn new(code: String) -> Self { pub fn new(code: String) -> Self {
Self { Self {
@ -55,8 +56,8 @@ impl<T: MatchNext<T> + PartialEq> Scanner<T> {
pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>); pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>);
impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> { impl<'a, T: MatchNext<T> + PartialEq + std::fmt::Debug> Iterator for ScannerIter<'a, T> {
type Item = T; type Item = Result<T, String>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.0.code.is_empty() { if self.0.code.is_empty() {
@ -68,9 +69,15 @@ impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
if self.0.skip.contains(&token) { if self.0.skip.contains(&token) {
continue; continue;
} }
return Some(token); return Some(Ok(token));
} }
return None; return self
.0
.code
.split(" ")
.collect::<Vec<_>>()
.first()
.map(|f| Err(f.to_string()));
} }
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {