working ll parser
This commit is contained in:
parent
db87495a63
commit
5fafa6f342
3 changed files with 148 additions and 61 deletions
|
@ -289,7 +289,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
|
|||
&self,
|
||||
none_terminal: &N,
|
||||
terminal: &Option<T>,
|
||||
) -> Option<&Vec<Sentential<T, N>>> {
|
||||
) -> Option<(usize, &Vec<Sentential<T, N>>)> {
|
||||
assert!(
|
||||
self.parse_table.is_some(),
|
||||
"Please call gen_parse_table before this!"
|
||||
|
@ -298,12 +298,17 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
|
|||
.as_ref()
|
||||
.unwrap()
|
||||
.get(&(none_terminal.clone(), terminal.clone()))
|
||||
.and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f)))
|
||||
.and_then(|f| {
|
||||
self.rules
|
||||
.get(none_terminal)
|
||||
.and_then(|rule| rule.get(*f))
|
||||
.map(|rule| (*f, rule))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_checker<'a, S: Into<T> + PartialEq<T> + Clone>(
|
||||
pub fn parser<'a, S: Into<T> + PartialEq<T> + Clone>(
|
||||
&'a self,
|
||||
iter: &'a mut dyn Iterator<Item = S>,
|
||||
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
) -> LLTabelParser<N, T, S> {
|
||||
assert!(
|
||||
self.parse_table.is_some(),
|
||||
|
@ -324,7 +329,7 @@ pub struct LLTabelParser<
|
|||
S: Into<T> + PartialEq<T> + Clone,
|
||||
> {
|
||||
grammar: &'a LLGrammar<N, T>,
|
||||
input: &'a mut dyn Iterator<Item = S>,
|
||||
input: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||
}
|
||||
|
||||
impl<
|
||||
|
@ -334,49 +339,121 @@ impl<
|
|||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||
> LLTabelParser<'a, N, T, S>
|
||||
{
|
||||
pub fn parse(&mut self) -> bool {
|
||||
let mut stack: Vec<Sentential<Option<T>, N>> = vec![
|
||||
Sentential::Terminal(None),
|
||||
Sentential::NoneTerminal(self.grammar.start.clone()),
|
||||
];
|
||||
let mut next = self.input.next();
|
||||
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||
// stack of table driven parser
|
||||
// content of the vec:
|
||||
// - first element: all of them combined represent the complete stack, of the parser.
|
||||
// - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element.
|
||||
let mut stack: Vec<(Vec<Sentential<T, N>>, ParseTree<N, S>)> = vec![(
|
||||
vec![Sentential::NoneTerminal(self.grammar.start.clone())],
|
||||
ParseTree::new(None),
|
||||
)];
|
||||
|
||||
let mut next = match self.input.next() {
|
||||
Some(Ok(d)) => Some(d),
|
||||
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||
None => None,
|
||||
};
|
||||
|
||||
loop {
|
||||
if next.is_none() {
|
||||
println!("EOF");
|
||||
return self.input.size_hint().0 == 0;
|
||||
}
|
||||
let state = stack.pop();
|
||||
match state {
|
||||
Some(Sentential::Terminal(t)) => match (next, t) {
|
||||
(Some(a), Some(b)) if a == b => {
|
||||
next = self.input.next();
|
||||
// look at current state
|
||||
let mut state = stack.pop();
|
||||
match state.as_mut() {
|
||||
// processing inner state, of tracked rules
|
||||
Some((inner_stack, rule)) => {
|
||||
let inner_state = inner_stack.pop();
|
||||
match inner_state {
|
||||
// match terminal, check if equal
|
||||
Some(Sentential::Terminal(terminal)) => match (next, terminal) {
|
||||
// actual vs. expected input
|
||||
(Some(inn), expect) if inn == expect => {
|
||||
next = match self.input.next() {
|
||||
Some(Ok(n)) => Some(n),
|
||||
Some(Err(err)) => {
|
||||
return Err(format!("Invalid token: {}", err))
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
rule.childs.push(NodeChild::Data(inn));
|
||||
stack.push(state.unwrap());
|
||||
}
|
||||
(a, b) => {
|
||||
return Err(format!("found: {:?} expected: {:?}", a, b));
|
||||
}
|
||||
},
|
||||
// take next none terminal and apply rule from parse table.
|
||||
Some(Sentential::NoneTerminal(none_term)) => {
|
||||
// load rule
|
||||
let Some((id, new_rule)) = self
|
||||
.grammar
|
||||
.parse_table(&none_term, &next.as_ref().map(|f| f.clone().into()))
|
||||
else {
|
||||
// no rule
|
||||
return Err(format!(
|
||||
"Unexpected token: {}",
|
||||
next.map(|f| format!("{f:?}"))
|
||||
.unwrap_or("end of file".to_string())
|
||||
));
|
||||
};
|
||||
|
||||
// reverse rule: because, uses vec as stack, but reversed
|
||||
let new_rule_rev =
|
||||
new_rule.iter().rev().map(|f| f.clone()).collect::<Vec<_>>();
|
||||
// memorize current state/rule for later
|
||||
stack.push(state.unwrap());
|
||||
// process next rule
|
||||
stack.push((
|
||||
new_rule_rev,
|
||||
ParseTree {
|
||||
rule: Some((none_term, id)),
|
||||
childs: Vec::new(),
|
||||
},
|
||||
));
|
||||
}
|
||||
// inner state is empty: current rule is finished
|
||||
None => {
|
||||
// if stack is empty, this is the initial state: finish or err
|
||||
let Some(last) = stack.last_mut() else {
|
||||
// ok: input has ended
|
||||
if next.is_none() {
|
||||
return Ok(state.unwrap().1);
|
||||
}
|
||||
// still code left, but not excepted
|
||||
return Err(format!("Expected end of file."));
|
||||
};
|
||||
last.1.childs.push(NodeChild::Child(state.unwrap().1));
|
||||
}
|
||||
}
|
||||
(None, None) => {
|
||||
next = self.input.next();
|
||||
}
|
||||
(a, b) => {
|
||||
println!("not matching terminals: {a:?}, {b:?}");
|
||||
return false;
|
||||
}
|
||||
},
|
||||
Some(Sentential::NoneTerminal(nt)) => {
|
||||
let Some(a) = self
|
||||
.grammar
|
||||
.parse_table(&nt, &next.as_ref().map(|f| f.clone().into()))
|
||||
else {
|
||||
println!("no parse table entry: {nt:?} next: {next:?}");
|
||||
return false;
|
||||
};
|
||||
stack.extend(a.iter().rev().map(|f| match f {
|
||||
Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())),
|
||||
Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()),
|
||||
}));
|
||||
}
|
||||
// should not be possible, because every other path pushes to the stack back or returns
|
||||
None => {
|
||||
println!("EOS");
|
||||
return false;
|
||||
return Err(format!("Err: EOS"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum NodeChild<N, S> {
|
||||
Child(ParseTree<N, S>),
|
||||
Data(S),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub struct ParseTree<N, S> {
|
||||
pub rule: Option<(N, usize)>,
|
||||
pub childs: Vec<NodeChild<N, S>>,
|
||||
}
|
||||
|
||||
impl<N, S> ParseTree<N, S> {
|
||||
pub fn new(rule: Option<(N, usize)>) -> Self {
|
||||
Self {
|
||||
rule,
|
||||
childs: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
31
src/main.rs
31
src/main.rs
|
@ -34,7 +34,7 @@ double_enum!(
|
|||
|
||||
scanner!(
|
||||
Tokens,
|
||||
r"^\s|\t|\n" : |_,_| {
|
||||
r"^\s|\t|\n|\r" : |_,_| {
|
||||
Some(WhiteSpace)
|
||||
}
|
||||
r"^;" : |_,_| {
|
||||
|
@ -64,6 +64,9 @@ scanner!(
|
|||
r"^while" : |_,_| {
|
||||
Some(While)
|
||||
}
|
||||
r"^if" : |_,_| {
|
||||
Some(If)
|
||||
}
|
||||
r"^\(" : |_,_| {
|
||||
Some(LBrace)
|
||||
}
|
||||
|
@ -98,8 +101,10 @@ scanner!(
|
|||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
enum NoneTerminals {
|
||||
P, // Program, ; separated
|
||||
E, // Expression one line
|
||||
P, // Program, ; separated
|
||||
L, // Line of code
|
||||
Li, // line extended for assignments
|
||||
E, // Expression
|
||||
Ei, // Expression extended additive
|
||||
T, // Term, only containing Factors
|
||||
Ti, // Term extend multiplicative
|
||||
|
@ -124,11 +129,14 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
|
|||
use NoneTerminals::*;
|
||||
ll_grammar![
|
||||
start: P;
|
||||
P -> E,Semicolon,P;
|
||||
P -> L,Semicolon,P;
|
||||
P -> ;
|
||||
L -> While,E,LQBrace,P,RQBrace;
|
||||
L -> If,E,LQBrace,P,RQBrace;
|
||||
L -> Ident,FI,Li;
|
||||
Li -> Assign,E;
|
||||
Li -> ;
|
||||
E -> T,Ei;
|
||||
E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace;
|
||||
Ei -> Assign,T,Ei;
|
||||
Ei -> Eq,T,Ei;
|
||||
Ei -> Neq,T,Ei;
|
||||
Ei -> Add,T,Ei;
|
||||
|
@ -150,11 +158,8 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
|
|||
}
|
||||
|
||||
fn main() {
|
||||
let code = String::from("a = 4; while(a != 5) {a = a+1;};");
|
||||
let code = String::from("a = 4;while a != 5 { a = a+1; }; if a == 5 { a = 4; };");
|
||||
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||
/* for token in m.iter_mut() {
|
||||
println!("{:?}", token);
|
||||
} */
|
||||
|
||||
let mut grammar = grammer();
|
||||
grammar.gen_follow();
|
||||
|
@ -163,8 +168,6 @@ fn main() {
|
|||
let conflict = grammar.gen_parse_table();
|
||||
println!("conflict: {conflict}");
|
||||
println!("prase table: {:?}", grammar.parse_table);
|
||||
println!(
|
||||
"parsed: {}",
|
||||
grammar.create_checker(&mut m.iter_mut()).parse()
|
||||
)
|
||||
println!("parse\n\n");
|
||||
println!("parsed: {:?}", grammar.parser(&mut m.iter_mut()).parse())
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::{iter::Peekable, marker::PhantomData};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! scanner {
|
||||
|
@ -33,6 +33,7 @@ pub struct Scanner<T: MatchNext<T> + PartialEq> {
|
|||
_a: PhantomData<T>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<T: MatchNext<T> + PartialEq> Scanner<T> {
|
||||
pub fn new(code: String) -> Self {
|
||||
Self {
|
||||
|
@ -55,8 +56,8 @@ impl<T: MatchNext<T> + PartialEq> Scanner<T> {
|
|||
|
||||
pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>);
|
||||
|
||||
impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
|
||||
type Item = T;
|
||||
impl<'a, T: MatchNext<T> + PartialEq + std::fmt::Debug> Iterator for ScannerIter<'a, T> {
|
||||
type Item = Result<T, String>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.0.code.is_empty() {
|
||||
|
@ -68,9 +69,15 @@ impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
|
|||
if self.0.skip.contains(&token) {
|
||||
continue;
|
||||
}
|
||||
return Some(token);
|
||||
return Some(Ok(token));
|
||||
}
|
||||
return None;
|
||||
return self
|
||||
.0
|
||||
.code
|
||||
.split(" ")
|
||||
.collect::<Vec<_>>()
|
||||
.first()
|
||||
.map(|f| Err(f.to_string()));
|
||||
}
|
||||
}
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
|
|
Loading…
Reference in a new issue