working ll parser

This commit is contained in:
jusax23 2024-10-27 20:45:43 +01:00
parent db87495a63
commit 5fafa6f342
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
3 changed files with 148 additions and 61 deletions

View file

@ -289,7 +289,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
&self,
none_terminal: &N,
terminal: &Option<T>,
) -> Option<&Vec<Sentential<T, N>>> {
) -> Option<(usize, &Vec<Sentential<T, N>>)> {
assert!(
self.parse_table.is_some(),
"Please call gen_parse_table before this!"
@ -298,12 +298,17 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
.as_ref()
.unwrap()
.get(&(none_terminal.clone(), terminal.clone()))
.and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f)))
.and_then(|f| {
self.rules
.get(none_terminal)
.and_then(|rule| rule.get(*f))
.map(|rule| (*f, rule))
})
}
pub fn create_checker<'a, S: Into<T> + PartialEq<T> + Clone>(
pub fn parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self,
iter: &'a mut dyn Iterator<Item = S>,
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> LLTabelParser<N, T, S> {
assert!(
self.parse_table.is_some(),
@ -324,7 +329,7 @@ pub struct LLTabelParser<
S: Into<T> + PartialEq<T> + Clone,
> {
grammar: &'a LLGrammar<N, T>,
input: &'a mut dyn Iterator<Item = S>,
input: &'a mut dyn Iterator<Item = Result<S, String>>,
}
impl<
@ -334,49 +339,121 @@ impl<
S: Into<T> + PartialEq<T> + Clone + Debug,
> LLTabelParser<'a, N, T, S>
{
pub fn parse(&mut self) -> bool {
let mut stack: Vec<Sentential<Option<T>, N>> = vec![
Sentential::Terminal(None),
Sentential::NoneTerminal(self.grammar.start.clone()),
];
let mut next = self.input.next();
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
// stack of table driven parser
// content of the vec:
// - first element: all of them combined represent the complete stack, of the parser.
// - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element.
let mut stack: Vec<(Vec<Sentential<T, N>>, ParseTree<N, S>)> = vec![(
vec![Sentential::NoneTerminal(self.grammar.start.clone())],
ParseTree::new(None),
)];
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
loop {
if next.is_none() {
println!("EOF");
return self.input.size_hint().0 == 0;
}
let state = stack.pop();
match state {
Some(Sentential::Terminal(t)) => match (next, t) {
(Some(a), Some(b)) if a == b => {
next = self.input.next();
// look at current state
let mut state = stack.pop();
match state.as_mut() {
// processing inner state, of tracked rules
Some((inner_stack, rule)) => {
let inner_state = inner_stack.pop();
match inner_state {
// match terminal, check if equal
Some(Sentential::Terminal(terminal)) => match (next, terminal) {
// actual vs. expected input
(Some(inn), expect) if inn == expect => {
next = match self.input.next() {
Some(Ok(n)) => Some(n),
Some(Err(err)) => {
return Err(format!("Invalid token: {}", err))
}
None => None,
};
rule.childs.push(NodeChild::Data(inn));
stack.push(state.unwrap());
}
(a, b) => {
return Err(format!("found: {:?} expected: {:?}", a, b));
}
},
// take next none terminal and apply rule from parse table.
Some(Sentential::NoneTerminal(none_term)) => {
// load rule
let Some((id, new_rule)) = self
.grammar
.parse_table(&none_term, &next.as_ref().map(|f| f.clone().into()))
else {
// no rule
return Err(format!(
"Unexpected token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
// reverse rule: because, uses vec as stack, but reversed
let new_rule_rev =
new_rule.iter().rev().map(|f| f.clone()).collect::<Vec<_>>();
// memorize current state/rule for later
stack.push(state.unwrap());
// process next rule
stack.push((
new_rule_rev,
ParseTree {
rule: Some((none_term, id)),
childs: Vec::new(),
},
));
}
// inner state is empty: current rule is finished
None => {
// if stack is empty, this is the initial state: finish or err
let Some(last) = stack.last_mut() else {
// ok: input has ended
if next.is_none() {
return Ok(state.unwrap().1);
}
// still code left, but not excepted
return Err(format!("Expected end of file."));
};
last.1.childs.push(NodeChild::Child(state.unwrap().1));
}
}
(None, None) => {
next = self.input.next();
}
(a, b) => {
println!("not matching terminals: {a:?}, {b:?}");
return false;
}
},
Some(Sentential::NoneTerminal(nt)) => {
let Some(a) = self
.grammar
.parse_table(&nt, &next.as_ref().map(|f| f.clone().into()))
else {
println!("no parse table entry: {nt:?} next: {next:?}");
return false;
};
stack.extend(a.iter().rev().map(|f| match f {
Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())),
Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()),
}));
}
// should not be possible, because every other path pushes to the stack back or returns
None => {
println!("EOS");
return false;
return Err(format!("Err: EOS"));
}
}
}
}
}
//
#[derive(Debug, Clone)]
pub enum NodeChild<N, S> {
Child(ParseTree<N, S>),
Data(S),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ParseTree<N, S> {
pub rule: Option<(N, usize)>,
pub childs: Vec<NodeChild<N, S>>,
}
impl<N, S> ParseTree<N, S> {
pub fn new(rule: Option<(N, usize)>) -> Self {
Self {
rule,
childs: Vec::new(),
}
}
}

View file

@ -34,7 +34,7 @@ double_enum!(
scanner!(
Tokens,
r"^\s|\t|\n" : |_,_| {
r"^\s|\t|\n|\r" : |_,_| {
Some(WhiteSpace)
}
r"^;" : |_,_| {
@ -64,6 +64,9 @@ scanner!(
r"^while" : |_,_| {
Some(While)
}
r"^if" : |_,_| {
Some(If)
}
r"^\(" : |_,_| {
Some(LBrace)
}
@ -98,8 +101,10 @@ scanner!(
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
enum NoneTerminals {
P, // Program, ; separated
E, // Expression one line
P, // Program, ; separated
L, // Line of code
Li, // line extended for assignments
E, // Expression
Ei, // Expression extended additive
T, // Term, only containing Factors
Ti, // Term extend multiplicative
@ -124,11 +129,14 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
use NoneTerminals::*;
ll_grammar![
start: P;
P -> E,Semicolon,P;
P -> L,Semicolon,P;
P -> ;
L -> While,E,LQBrace,P,RQBrace;
L -> If,E,LQBrace,P,RQBrace;
L -> Ident,FI,Li;
Li -> Assign,E;
Li -> ;
E -> T,Ei;
E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace;
Ei -> Assign,T,Ei;
Ei -> Eq,T,Ei;
Ei -> Neq,T,Ei;
Ei -> Add,T,Ei;
@ -150,11 +158,8 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
}
fn main() {
let code = String::from("a = 4; while(a != 5) {a = a+1;};");
let code = String::from("a = 4;while a != 5 { a = a+1; }; if a == 5 { a = 4; };");
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
/* for token in m.iter_mut() {
println!("{:?}", token);
} */
let mut grammar = grammer();
grammar.gen_follow();
@ -163,8 +168,6 @@ fn main() {
let conflict = grammar.gen_parse_table();
println!("conflict: {conflict}");
println!("prase table: {:?}", grammar.parse_table);
println!(
"parsed: {}",
grammar.create_checker(&mut m.iter_mut()).parse()
)
println!("parse\n\n");
println!("parsed: {:?}", grammar.parser(&mut m.iter_mut()).parse())
}

View file

@ -1,4 +1,4 @@
use std::{iter::Peekable, marker::PhantomData};
use std::marker::PhantomData;
#[macro_export]
macro_rules! scanner {
@ -33,6 +33,7 @@ pub struct Scanner<T: MatchNext<T> + PartialEq> {
_a: PhantomData<T>,
}
#[allow(dead_code)]
impl<T: MatchNext<T> + PartialEq> Scanner<T> {
pub fn new(code: String) -> Self {
Self {
@ -55,8 +56,8 @@ impl<T: MatchNext<T> + PartialEq> Scanner<T> {
pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>);
impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
type Item = T;
impl<'a, T: MatchNext<T> + PartialEq + std::fmt::Debug> Iterator for ScannerIter<'a, T> {
type Item = Result<T, String>;
fn next(&mut self) -> Option<Self::Item> {
if self.0.code.is_empty() {
@ -68,9 +69,15 @@ impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
if self.0.skip.contains(&token) {
continue;
}
return Some(token);
return Some(Ok(token));
}
return None;
return self
.0
.code
.split(" ")
.collect::<Vec<_>>()
.first()
.map(|f| Err(f.to_string()));
}
}
fn size_hint(&self) -> (usize, Option<usize>) {