working ll parser
This commit is contained in:
parent
db87495a63
commit
5fafa6f342
3 changed files with 148 additions and 61 deletions
|
@ -289,7 +289,7 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
|
||||||
&self,
|
&self,
|
||||||
none_terminal: &N,
|
none_terminal: &N,
|
||||||
terminal: &Option<T>,
|
terminal: &Option<T>,
|
||||||
) -> Option<&Vec<Sentential<T, N>>> {
|
) -> Option<(usize, &Vec<Sentential<T, N>>)> {
|
||||||
assert!(
|
assert!(
|
||||||
self.parse_table.is_some(),
|
self.parse_table.is_some(),
|
||||||
"Please call gen_parse_table before this!"
|
"Please call gen_parse_table before this!"
|
||||||
|
@ -298,12 +298,17 @@ impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGramm
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.get(&(none_terminal.clone(), terminal.clone()))
|
.get(&(none_terminal.clone(), terminal.clone()))
|
||||||
.and_then(|f| self.rules.get(none_terminal).and_then(|rule| rule.get(*f)))
|
.and_then(|f| {
|
||||||
|
self.rules
|
||||||
|
.get(none_terminal)
|
||||||
|
.and_then(|rule| rule.get(*f))
|
||||||
|
.map(|rule| (*f, rule))
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_checker<'a, S: Into<T> + PartialEq<T> + Clone>(
|
pub fn parser<'a, S: Into<T> + PartialEq<T> + Clone>(
|
||||||
&'a self,
|
&'a self,
|
||||||
iter: &'a mut dyn Iterator<Item = S>,
|
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||||
) -> LLTabelParser<N, T, S> {
|
) -> LLTabelParser<N, T, S> {
|
||||||
assert!(
|
assert!(
|
||||||
self.parse_table.is_some(),
|
self.parse_table.is_some(),
|
||||||
|
@ -324,7 +329,7 @@ pub struct LLTabelParser<
|
||||||
S: Into<T> + PartialEq<T> + Clone,
|
S: Into<T> + PartialEq<T> + Clone,
|
||||||
> {
|
> {
|
||||||
grammar: &'a LLGrammar<N, T>,
|
grammar: &'a LLGrammar<N, T>,
|
||||||
input: &'a mut dyn Iterator<Item = S>,
|
input: &'a mut dyn Iterator<Item = Result<S, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<
|
impl<
|
||||||
|
@ -334,49 +339,121 @@ impl<
|
||||||
S: Into<T> + PartialEq<T> + Clone + Debug,
|
S: Into<T> + PartialEq<T> + Clone + Debug,
|
||||||
> LLTabelParser<'a, N, T, S>
|
> LLTabelParser<'a, N, T, S>
|
||||||
{
|
{
|
||||||
pub fn parse(&mut self) -> bool {
|
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
|
||||||
let mut stack: Vec<Sentential<Option<T>, N>> = vec![
|
// stack of table driven parser
|
||||||
Sentential::Terminal(None),
|
// content of the vec:
|
||||||
Sentential::NoneTerminal(self.grammar.start.clone()),
|
// - first element: all of them combined represent the complete stack, of the parser.
|
||||||
];
|
// - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element.
|
||||||
let mut next = self.input.next();
|
let mut stack: Vec<(Vec<Sentential<T, N>>, ParseTree<N, S>)> = vec![(
|
||||||
|
vec![Sentential::NoneTerminal(self.grammar.start.clone())],
|
||||||
|
ParseTree::new(None),
|
||||||
|
)];
|
||||||
|
|
||||||
|
let mut next = match self.input.next() {
|
||||||
|
Some(Ok(d)) => Some(d),
|
||||||
|
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
if next.is_none() {
|
// look at current state
|
||||||
println!("EOF");
|
let mut state = stack.pop();
|
||||||
return self.input.size_hint().0 == 0;
|
match state.as_mut() {
|
||||||
}
|
// processing inner state, of tracked rules
|
||||||
let state = stack.pop();
|
Some((inner_stack, rule)) => {
|
||||||
match state {
|
let inner_state = inner_stack.pop();
|
||||||
Some(Sentential::Terminal(t)) => match (next, t) {
|
match inner_state {
|
||||||
(Some(a), Some(b)) if a == b => {
|
// match terminal, check if equal
|
||||||
next = self.input.next();
|
Some(Sentential::Terminal(terminal)) => match (next, terminal) {
|
||||||
|
// actual vs. expected input
|
||||||
|
(Some(inn), expect) if inn == expect => {
|
||||||
|
next = match self.input.next() {
|
||||||
|
Some(Ok(n)) => Some(n),
|
||||||
|
Some(Err(err)) => {
|
||||||
|
return Err(format!("Invalid token: {}", err))
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
rule.childs.push(NodeChild::Data(inn));
|
||||||
|
stack.push(state.unwrap());
|
||||||
|
}
|
||||||
|
(a, b) => {
|
||||||
|
return Err(format!("found: {:?} expected: {:?}", a, b));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// take next none terminal and apply rule from parse table.
|
||||||
|
Some(Sentential::NoneTerminal(none_term)) => {
|
||||||
|
// load rule
|
||||||
|
let Some((id, new_rule)) = self
|
||||||
|
.grammar
|
||||||
|
.parse_table(&none_term, &next.as_ref().map(|f| f.clone().into()))
|
||||||
|
else {
|
||||||
|
// no rule
|
||||||
|
return Err(format!(
|
||||||
|
"Unexpected token: {}",
|
||||||
|
next.map(|f| format!("{f:?}"))
|
||||||
|
.unwrap_or("end of file".to_string())
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
// reverse rule: because, uses vec as stack, but reversed
|
||||||
|
let new_rule_rev =
|
||||||
|
new_rule.iter().rev().map(|f| f.clone()).collect::<Vec<_>>();
|
||||||
|
// memorize current state/rule for later
|
||||||
|
stack.push(state.unwrap());
|
||||||
|
// process next rule
|
||||||
|
stack.push((
|
||||||
|
new_rule_rev,
|
||||||
|
ParseTree {
|
||||||
|
rule: Some((none_term, id)),
|
||||||
|
childs: Vec::new(),
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
// inner state is empty: current rule is finished
|
||||||
|
None => {
|
||||||
|
// if stack is empty, this is the initial state: finish or err
|
||||||
|
let Some(last) = stack.last_mut() else {
|
||||||
|
// ok: input has ended
|
||||||
|
if next.is_none() {
|
||||||
|
return Ok(state.unwrap().1);
|
||||||
|
}
|
||||||
|
// still code left, but not excepted
|
||||||
|
return Err(format!("Expected end of file."));
|
||||||
|
};
|
||||||
|
last.1.childs.push(NodeChild::Child(state.unwrap().1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
(None, None) => {
|
|
||||||
next = self.input.next();
|
|
||||||
}
|
|
||||||
(a, b) => {
|
|
||||||
println!("not matching terminals: {a:?}, {b:?}");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Some(Sentential::NoneTerminal(nt)) => {
|
|
||||||
let Some(a) = self
|
|
||||||
.grammar
|
|
||||||
.parse_table(&nt, &next.as_ref().map(|f| f.clone().into()))
|
|
||||||
else {
|
|
||||||
println!("no parse table entry: {nt:?} next: {next:?}");
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
stack.extend(a.iter().rev().map(|f| match f {
|
|
||||||
Sentential::Terminal(t) => Sentential::Terminal(Some(t.clone())),
|
|
||||||
Sentential::NoneTerminal(nt) => Sentential::NoneTerminal(nt.clone()),
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
// should not be possible, because every other path pushes to the stack back or returns
|
||||||
None => {
|
None => {
|
||||||
println!("EOS");
|
return Err(format!("Err: EOS"));
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum NodeChild<N, S> {
|
||||||
|
Child(ParseTree<N, S>),
|
||||||
|
Data(S),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub struct ParseTree<N, S> {
|
||||||
|
pub rule: Option<(N, usize)>,
|
||||||
|
pub childs: Vec<NodeChild<N, S>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<N, S> ParseTree<N, S> {
|
||||||
|
pub fn new(rule: Option<(N, usize)>) -> Self {
|
||||||
|
Self {
|
||||||
|
rule,
|
||||||
|
childs: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
31
src/main.rs
31
src/main.rs
|
@ -34,7 +34,7 @@ double_enum!(
|
||||||
|
|
||||||
scanner!(
|
scanner!(
|
||||||
Tokens,
|
Tokens,
|
||||||
r"^\s|\t|\n" : |_,_| {
|
r"^\s|\t|\n|\r" : |_,_| {
|
||||||
Some(WhiteSpace)
|
Some(WhiteSpace)
|
||||||
}
|
}
|
||||||
r"^;" : |_,_| {
|
r"^;" : |_,_| {
|
||||||
|
@ -64,6 +64,9 @@ scanner!(
|
||||||
r"^while" : |_,_| {
|
r"^while" : |_,_| {
|
||||||
Some(While)
|
Some(While)
|
||||||
}
|
}
|
||||||
|
r"^if" : |_,_| {
|
||||||
|
Some(If)
|
||||||
|
}
|
||||||
r"^\(" : |_,_| {
|
r"^\(" : |_,_| {
|
||||||
Some(LBrace)
|
Some(LBrace)
|
||||||
}
|
}
|
||||||
|
@ -98,8 +101,10 @@ scanner!(
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||||
enum NoneTerminals {
|
enum NoneTerminals {
|
||||||
P, // Program, ; separated
|
P, // Program, ; separated
|
||||||
E, // Expression one line
|
L, // Line of code
|
||||||
|
Li, // line extended for assignments
|
||||||
|
E, // Expression
|
||||||
Ei, // Expression extended additive
|
Ei, // Expression extended additive
|
||||||
T, // Term, only containing Factors
|
T, // Term, only containing Factors
|
||||||
Ti, // Term extend multiplicative
|
Ti, // Term extend multiplicative
|
||||||
|
@ -124,11 +129,14 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
|
||||||
use NoneTerminals::*;
|
use NoneTerminals::*;
|
||||||
ll_grammar![
|
ll_grammar![
|
||||||
start: P;
|
start: P;
|
||||||
P -> E,Semicolon,P;
|
P -> L,Semicolon,P;
|
||||||
P -> ;
|
P -> ;
|
||||||
|
L -> While,E,LQBrace,P,RQBrace;
|
||||||
|
L -> If,E,LQBrace,P,RQBrace;
|
||||||
|
L -> Ident,FI,Li;
|
||||||
|
Li -> Assign,E;
|
||||||
|
Li -> ;
|
||||||
E -> T,Ei;
|
E -> T,Ei;
|
||||||
E -> While,LBrace,E,RBrace,LQBrace,P,RQBrace;
|
|
||||||
Ei -> Assign,T,Ei;
|
|
||||||
Ei -> Eq,T,Ei;
|
Ei -> Eq,T,Ei;
|
||||||
Ei -> Neq,T,Ei;
|
Ei -> Neq,T,Ei;
|
||||||
Ei -> Add,T,Ei;
|
Ei -> Add,T,Ei;
|
||||||
|
@ -150,11 +158,8 @@ fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let code = String::from("a = 4; while(a != 5) {a = a+1;};");
|
let code = String::from("a = 4;while a != 5 { a = a+1; }; if a == 5 { a = 4; };");
|
||||||
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
|
||||||
/* for token in m.iter_mut() {
|
|
||||||
println!("{:?}", token);
|
|
||||||
} */
|
|
||||||
|
|
||||||
let mut grammar = grammer();
|
let mut grammar = grammer();
|
||||||
grammar.gen_follow();
|
grammar.gen_follow();
|
||||||
|
@ -163,8 +168,6 @@ fn main() {
|
||||||
let conflict = grammar.gen_parse_table();
|
let conflict = grammar.gen_parse_table();
|
||||||
println!("conflict: {conflict}");
|
println!("conflict: {conflict}");
|
||||||
println!("prase table: {:?}", grammar.parse_table);
|
println!("prase table: {:?}", grammar.parse_table);
|
||||||
println!(
|
println!("parse\n\n");
|
||||||
"parsed: {}",
|
println!("parsed: {:?}", grammar.parser(&mut m.iter_mut()).parse())
|
||||||
grammar.create_checker(&mut m.iter_mut()).parse()
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use std::{iter::Peekable, marker::PhantomData};
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! scanner {
|
macro_rules! scanner {
|
||||||
|
@ -33,6 +33,7 @@ pub struct Scanner<T: MatchNext<T> + PartialEq> {
|
||||||
_a: PhantomData<T>,
|
_a: PhantomData<T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
impl<T: MatchNext<T> + PartialEq> Scanner<T> {
|
impl<T: MatchNext<T> + PartialEq> Scanner<T> {
|
||||||
pub fn new(code: String) -> Self {
|
pub fn new(code: String) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
@ -55,8 +56,8 @@ impl<T: MatchNext<T> + PartialEq> Scanner<T> {
|
||||||
|
|
||||||
pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>);
|
pub struct ScannerIter<'a, T: MatchNext<T> + PartialEq>(&'a mut Scanner<T>);
|
||||||
|
|
||||||
impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
|
impl<'a, T: MatchNext<T> + PartialEq + std::fmt::Debug> Iterator for ScannerIter<'a, T> {
|
||||||
type Item = T;
|
type Item = Result<T, String>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if self.0.code.is_empty() {
|
if self.0.code.is_empty() {
|
||||||
|
@ -68,9 +69,15 @@ impl<'a, T: MatchNext<T> + PartialEq> Iterator for ScannerIter<'a, T> {
|
||||||
if self.0.skip.contains(&token) {
|
if self.0.skip.contains(&token) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return Some(token);
|
return Some(Ok(token));
|
||||||
}
|
}
|
||||||
return None;
|
return self
|
||||||
|
.0
|
||||||
|
.code
|
||||||
|
.split(" ")
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.first()
|
||||||
|
.map(|f| Err(f.to_string()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
|
Loading…
Reference in a new issue