rl0 automaton, lib

This commit is contained in:
jusax23 2024-11-05 16:12:47 +01:00
parent 686e43448a
commit 46ca5ecc50
Signed by: jusax23
GPG key ID: 499E2AA870C1CD41
10 changed files with 764 additions and 519 deletions

View file

@ -2,6 +2,14 @@
name = "rcompiler"
version = "0.1.0"
edition = "2021"
default-run = "main"
[dependencies]
regex = "1.11.1"
[[bin]]
name = "main"
path = "src/main.rs"
[[bin]]
name = "book"

82
src/bin/book.rs Normal file
View file

@ -0,0 +1,82 @@
use ll_grammar::Skippable;
use rcompiler::prelude::*;
use regex::Match;
use std::collections::HashMap;
double_enum!(
BareTokens, Tokens {
WhiteSpace,
Assign,
Add,
LBrace,
RBrace,
Ident(String),
}
);
token_scanner!(
Tokens,
r"^\s|\t|\n|\r" : |_,_| {
Some(WhiteSpace)
}
r"^\+" : |_,_| {
Some(Add)
}
r"^=" : |_,_| {
Some(Assign)
}
r"^\(" : |_,_| {
Some(LBrace)
}
r"^\)" : |_,_| {
Some(RBrace)
}
r"^[a-zA-Z](\w)*" : |_, m: Match<'_>| {
Some(Ident(String::from(m.as_str())))
}
);
#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
enum NoneTerminals {
S,
E,
P,
}
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
fn from(value: NoneTerminals) -> Self {
Sentential::NoneTerminal(value)
}
}
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
fn from(value: BareTokens) -> Self {
Sentential::Terminal(value)
}
}
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
use BareTokens::*;
use NoneTerminals::*;
cfg_grammar![
start: S;
S -> Ident, Assign, E;
E -> E, Add, P;
E -> P;
P -> Ident;
P -> LBrace, E, RBrace;
P -> Ident, LBrace, E, RBrace;
]
}
fn main() {
//let code = String::from("a = b()+c+(d+e())");
//let mut m = Scanner::<Tokens>::new(code).with_skipping(Tokens::WhiteSpace);
let mut grammar = grammer();
grammar.gen_follow();
println!("first: {:?}", grammar.first);
println!("follow: {:?}", grammar.follow);
grammar.gen_lr0_automaton();
println!("automaton: {:?}", grammar.lr0_automaton);
}

261
src/cfg/ll_grammar.rs Normal file
View file

@ -0,0 +1,261 @@
use std::{collections::HashMap, fmt::Debug, hash::Hash};
use super::{Grammar, Sentential};
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
pub fn gen_ll_parse_table(&mut self) -> bool {
if self.follow.is_none() {
self.gen_follow();
}
if self.ll_parse_table.is_some() {
return false;
}
let mut conflict = false;
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
for (from, to) in self.rules.iter() {
for (id, to) in to.iter().enumerate() {
// rule is A -> al
// terminal == None means epsilon
for terminal in self.first(to) {
match terminal {
// let a be in First(al) -> add to T[A,a] = A->al (using the index of al)
Some(terminal) => {
conflict |= parse_table
.insert((from.clone(), Some(terminal.clone())), id)
.is_some();
}
// if first contains epsilon then
// let b be in Follow(A) -> add to T[A,b] = A->al (using the index of al)
None => {
for terminal in self.follow(from).iter() {
conflict |= parse_table
.insert((from.clone(), terminal.clone()), id)
.is_some()
}
}
}
}
}
}
self.ll_parse_table = Some(parse_table);
conflict
}
/// get parse_table rule
/// None means error.
pub fn ll_parse_table(
&self,
none_terminal: &N,
terminal: &Option<T>,
) -> Option<(usize, &Vec<Sentential<N,T>>)> {
assert!(
self.ll_parse_table.is_some(),
"Please call gen_parse_table before this!"
);
self.ll_parse_table
.as_ref()
.unwrap()
.get(&(none_terminal.clone(), terminal.clone()))
.and_then(|f| {
self.rules
.get(none_terminal)
.and_then(|rule| rule.get(*f))
.map(|rule| (*f, rule))
})
}
pub fn ll_parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self,
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> LLTabelParser<N, T, S> {
assert!(
self.ll_parse_table.is_some(),
"Please call gen_parse_table before this!"
);
LLTabelParser {
input: iter,
grammar: self,
}
}
}
/// Just checks a program. Does not generates output.
pub struct LLTabelParser<
'a,
N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone,
S: Into<T> + PartialEq<T> + Clone,
> {
grammar: &'a Grammar<N, T>,
input: &'a mut dyn Iterator<Item = Result<S, String>>,
}
impl<
'a,
N: PartialEq + Eq + Hash + Clone + Debug,
T: PartialEq + Eq + Hash + Clone + Debug,
S: Into<T> + PartialEq<T> + Clone + Debug,
> LLTabelParser<'a, N, T, S>
{
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
// stack of table driven parser
// content of the vec:
// - first element: all of them combined represent the complete stack, of the parser.
// - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element.
let mut stack: Vec<(Vec<Sentential<N,T>>, ParseTree<N, S>)> = vec![(
vec![Sentential::NoneTerminal(self.grammar.start.clone())],
ParseTree::new(None),
)];
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
loop {
// look at current state
let mut state = stack.pop();
match state.as_mut() {
// processing inner state, of tracked rules
Some((inner_stack, rule)) => {
let inner_state = inner_stack.pop();
match inner_state {
// match terminal, check if equal
Some(Sentential::Terminal(terminal)) => match (next, terminal) {
// actual vs. expected input
(Some(inn), expect) if inn == expect => {
next = match self.input.next() {
Some(Ok(n)) => Some(n),
Some(Err(err)) => {
return Err(format!("Invalid token: {}", err))
}
None => None,
};
rule.childs.push(NodeChild::Data(inn));
stack.push(state.unwrap());
}
(a, b) => {
return Err(format!("found: {:?} expected: {:?}", a, b));
}
},
// take next none terminal and apply rule from parse table.
Some(Sentential::NoneTerminal(none_term)) => {
// load rule
let Some((id, new_rule)) = self
.grammar
.ll_parse_table(&none_term, &next.as_ref().map(|f| f.clone().into()))
else {
// no rule
return Err(format!(
"Unexpected token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
// reverse rule: because, uses vec as stack, but reversed
let new_rule_rev =
new_rule.iter().rev().map(|f| f.clone()).collect::<Vec<_>>();
// memorize current state/rule for later
stack.push(state.unwrap());
// process next rule
stack.push((
new_rule_rev,
ParseTree {
rule: Some((none_term, id)),
childs: Vec::new(),
},
));
}
// inner state is empty: current rule is finished
None => {
// if stack is empty, this is the initial state: finish or err
let Some(last) = stack.last_mut() else {
// ok: input has ended
if next.is_none() {
return Ok(state.unwrap().1);
}
// still code left, but not excepted
return Err(format!("Expected end of file."));
};
last.1.childs.push(NodeChild::Child(state.unwrap().1));
}
}
}
// should not be possible, because every other path pushes to the stack back or returns
None => {
return Err(format!("Err: EOS"));
}
}
}
}
}
//
pub trait Skippable {
fn skippable(&self) -> bool {
false
}
}
#[derive(Debug, Clone)]
pub enum NodeChild<N, S> {
Child(ParseTree<N, S>),
Data(S),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ParseTree<N, S> {
pub rule: Option<(N, usize)>,
pub childs: Vec<NodeChild<N, S>>,
}
impl<N, S> ParseTree<N, S> {
pub fn new(rule: Option<(N, usize)>) -> Self {
Self {
rule,
childs: Vec::new(),
}
}
}
impl<N: Skippable + Debug, S: Debug> ParseTree<N, S> {
/// cleanup the parse tree
/// does not work on a subtree
pub fn clean(self) -> Self {
self.clean_internal()
.expect("Clean only works on the main tree.")
}
/// internal clean
/// main node must not have a rule.
fn clean_internal(self) -> Result<Self, Vec<NodeChild<N, S>>> {
let childs = self
.childs
.into_iter()
.flat_map(|elem| match elem {
NodeChild::Child(parse_tree) => match parse_tree.clean_internal() {
Ok(tree) => [NodeChild::Child(tree)].into(),
Err(content) => content,
},
NodeChild::Data(d) => [NodeChild::Data(d)].into(),
})
.collect();
if let Some((rule, _)) = &self.rule {
if rule.skippable() {
return Err(childs);
}
if childs.is_empty() {
return Err(childs);
}
}
Ok(Self {
rule: self.rule,
childs,
})
}
}

104
src/cfg/lr0.rs Normal file
View file

@ -0,0 +1,104 @@
use std::{
collections::{HashMap, HashSet},
hash::{Hash, Hasher},
rc::{Rc, Weak},
};
use super::{Grammar, Sentential};
#[derive(Debug, Eq, PartialEq)]
pub struct LR0State<N: Hash + Eq, T: Hash + Eq>(HashSet<(N, Vec<Sentential<N, T>>, usize)>);
impl<N: Hash + Eq + Clone, T: Hash + Eq + Clone> LR0State<N, T> {
pub fn next_kernel(&self, read: &Sentential<N, T>) -> Self {
let mut next_state: LR0State<N, T> = LR0State(HashSet::new());
for (from, to, dot) in self.0.iter() {
if to.get(*dot).map(|elem| *elem == *read).unwrap_or(false) {
next_state.0.insert((from.clone(), to.clone(), dot + 1));
}
}
next_state
}
pub fn readable(&self) -> HashSet<Sentential<N, T>> {
let mut readbles = HashSet::new();
for (_, to, dot) in self.0.iter() {
if let Some(l) = to.get(*dot) {
readbles.insert(l.clone());
}
}
readbles
}
}
impl<N: Hash + Eq + Ord, T: Hash + Eq + Ord> Hash for LR0State<N, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let mut a: Vec<&(N, Vec<Sentential<N, T>>, usize)> = self.0.iter().collect();
a.sort();
for s in a.iter() {
s.hash(state);
}
}
}
impl<N, T> Grammar<N, T>
where
N: PartialEq + Eq + Hash + Clone + Ord,
T: PartialEq + Eq + Hash + Clone + Ord,
{
pub fn lr0_clozure(&self, mut state: LR0State<N, T>) -> LR0State<N, T> {
loop {
let mut change = false;
let nt = state
.0
.iter()
.filter_map(|(_, to, dot)| to.get(*dot).cloned())
.collect::<Vec<_>>();
for n in nt {
if let Sentential::NoneTerminal(n) = n {
if let Some(rule) = self.rules.get(&n) {
for to in rule {
change |= state.0.insert((n.clone(), to.clone(), 0));
}
}
}
}
if !change {
return state;
}
}
}
pub fn gen_lr0_automaton(&mut self) {
let mut out: HashMap<Rc<LR0State<N, T>>, Vec<(Sentential<N, T>, Weak<LR0State<N, T>>)>> =
HashMap::new();
let mut start_state = LR0State(HashSet::new());
if let Some(rule) = self.rules.get(&self.start) {
for to in rule {
start_state.0.insert((self.start.clone(), to.clone(), 0));
}
}
let rc = Rc::new(self.lr0_clozure(start_state));
let mut todo = vec![Rc::downgrade(&rc)];
out.insert(rc, Vec::new());
while let Some(elem) = todo.pop() {
if let Some(elem) = elem.upgrade() {
let mut vec = Vec::new();
for none_terminal in elem.readable() {
let next_state = self.lr0_clozure(elem.next_kernel(&none_terminal));
let rc = Rc::new(next_state);
if let Some((k, _)) = out.get_key_value(&rc) {
vec.push((none_terminal, Rc::downgrade(k)));
} else {
todo.push(Rc::downgrade(&rc));
vec.push((none_terminal, Rc::downgrade(&rc)));
out.insert(rc, Vec::new());
}
}
out.entry(elem).and_modify(|elem| {
elem.extend(vec);
});
}
}
self.lr0_automaton = Some(out);
}
}

282
src/cfg/mod.rs Normal file
View file

@ -0,0 +1,282 @@
use std::{
collections::{HashMap, HashSet},
hash::Hash,
rc::{Rc, Weak},
};
use lr0::LR0State;
pub mod ll_grammar;
pub mod lr0;
#[macro_export]
macro_rules! cfg_grammar {
(
start: $start:ident;
$(
$left:ident -> $(
$right:ident
),*
);* $(;)?
) => {
{
let mut map = HashMap::new();
$({
if !map.contains_key(&$left) {
map.insert($left, Vec::new());
}
map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]);
})*
$crate::cfg::Grammar {
start: $start,
rules: map,
first: None,
follow: None,
ll_parse_table: None,
lr0_automaton: None,
}
}
};
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Sentential<N, T> {
Terminal(T),
NoneTerminal(N),
}
impl<T: PartialOrd, N: PartialOrd> PartialOrd for Sentential<N, T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self, other) {
(Self::Terminal(_), Self::NoneTerminal(_)) => Some(std::cmp::Ordering::Less),
(Self::NoneTerminal(_), Self::Terminal(_)) => Some(std::cmp::Ordering::Greater),
(Self::Terminal(a), Self::Terminal(b)) => a.partial_cmp(b),
(Self::NoneTerminal(a), Self::NoneTerminal(b)) => a.partial_cmp(b),
}
}
}
impl<T: Ord, N: Ord> Ord for Sentential<N, T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match (self, other) {
(Self::Terminal(_), Self::NoneTerminal(_)) => std::cmp::Ordering::Less,
(Self::NoneTerminal(_), Self::Terminal(_)) => std::cmp::Ordering::Greater,
(Self::Terminal(a), Self::Terminal(b)) => a.cmp(b),
(Self::NoneTerminal(a), Self::NoneTerminal(b)) => a.cmp(b),
}
}
}
pub struct Grammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
pub start: N,
pub rules: HashMap<N, Vec<Vec<Sentential<N, T>>>>,
/// none is epsilon
pub first: Option<HashMap<N, HashSet<Option<T>>>>,
/// none is $
pub follow: Option<HashMap<N, HashSet<Option<T>>>>,
// When in State N and reading T, then apply the usize'th rule of N.
/// none is $
pub ll_parse_table: Option<HashMap<(N, Option<T>), usize>>,
/// is a lr0 automaton
/// Graph, defined throw this adjacent list.
/// - key: states
/// - value: list with read symbol and linked node.
pub lr0_automaton:
Option<HashMap<Rc<LR0State<N, T>>, Vec<(Sentential<N, T>, Weak<LR0State<N, T>>)>>>,
}
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> Grammar<N, T> {
pub fn can_produce_epsilon(&self, rule: &Sentential<N, T>) -> bool {
match rule {
Sentential::Terminal(_) => false,
Sentential::NoneTerminal(nt) => self
.rules
.get(&nt)
.map(|f| f.iter().any(|v| v.is_empty()))
.unwrap_or(false),
}
}
pub fn gen_first(&mut self) {
let mut first: HashMap<N, HashSet<Option<T>>> = HashMap::new();
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
'rule: for to in to.iter() {
// for each rule from -> to = X -> Y1...Yk
// add First(Yn) to First(X) if Y1...Yn-1 => e // n can be 1, disregarding the if
// add e to First(X) if Y1...Yk => e
for symbol in to {
match symbol {
Sentential::Terminal(a) => {
first
.entry(from.clone())
.and_modify(|e| {
change |= e.insert(Some(a.clone()));
})
.or_insert_with(|| {
change = true;
HashSet::from([Some(a.clone())])
});
}
Sentential::NoneTerminal(nt) => {
if let Some(set) = first.get(nt).cloned() {
first
.entry(from.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
}
if !self.can_produce_epsilon(symbol) {
continue 'rule;
}
}
first
.entry(from.clone())
.and_modify(|e| {
change |= e.insert(None);
})
.or_insert_with(|| {
change = true;
HashSet::from([None])
});
}
}
if !change {
break;
}
}
self.first = Some(first);
}
pub fn first(&self, sent: &Vec<Sentential<N, T>>) -> HashSet<Option<T>> {
assert!(self.first.is_some(), "Please call gen_first before this!");
let mut out = HashSet::<Option<T>>::new();
// Y1Y2...Yk = al
// add First(Yn) to First(al) if Y1...Yn-1 => e // n can be 1, disregarding the if
// add e to First(al) if Y1...Yk => e
'rule: {
for symbol in sent {
match symbol {
Sentential::Terminal(a) => {
out.insert(Some(a.clone()));
}
Sentential::NoneTerminal(nt) => {
if let Some(set) = self.first.as_ref().unwrap().get(nt).cloned() {
out.extend(set);
}
}
}
if !self.can_produce_epsilon(symbol) {
break 'rule;
}
}
out.insert(None);
}
out
}
pub fn gen_follow(&mut self) {
if self.first == None {
self.gen_first();
}
let mut follow: HashMap<N, HashSet<Option<T>>> = HashMap::new();
follow.insert(self.start.clone(), HashSet::from([None]));
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
for to in to.iter() {
// a
// if A -> aBb then add First(b) - {e} to Follow(B)
// and if A -> aBb and e in First(b) add Follow(A) to Follow(B)
if to.len() >= 2 {
for i in 0..(to.len() - 1) {
let slice = to[i + 1..].iter().map(|f| f.clone()).collect::<Vec<_>>();
match to.get(i) {
Some(Sentential::NoneTerminal(b)) => {
let mut set = self.first(&slice);
if set.contains(&None) {
if let Some(set) = follow.get(from).cloned() {
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
set.remove(&None);
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
_ => (),
}
}
}
// b
// and if A -> aB add Follow(A) to Follow(B)
match to.last() {
Some(Sentential::NoneTerminal(b)) => {
if let Some(set) = follow.get(from).cloned() {
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
_ => (),
}
}
}
if !change {
break;
}
}
self.follow = Some(follow);
}
pub fn follow(&self, none_termianl: &N) -> HashSet<Option<T>> {
assert!(self.follow.is_some(), "Please call gen_follow before this!");
self.follow
.as_ref()
.unwrap()
.get(&none_termianl)
.cloned()
.unwrap_or(HashSet::new())
}
}

View file

@ -13,7 +13,7 @@ macro_rules! double_enum {
( $($args),+ )
)?
),*}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum $bare_name {$($variant),*}
impl PartialEq<$name> for $bare_name {

13
src/lib.rs Normal file
View file

@ -0,0 +1,13 @@
pub mod cfg;
pub mod double_enum;
pub mod scanner;
pub mod prelude {
pub use crate::cfg::*;
pub use crate::cfg::ll_grammar::*;
pub use crate::cfg::lr0::*;
pub use crate::cfg_grammar;
pub use crate::double_enum;
pub use crate::scanner::*;
pub use crate::token_scanner;
}

View file

@ -1,503 +0,0 @@
use std::{
collections::{HashMap, HashSet},
fmt::Debug,
hash::Hash,
};
#[macro_export]
macro_rules! ll_grammar {
(
start: $start:ident;
$(
$left:ident -> $(
$right:ident
),*
);* $(;)?
) => {
{
let mut map = HashMap::new();
$({
if !map.contains_key(&$left) {
map.insert($left, Vec::new());
}
map.get_mut(&$left).unwrap().push(vec![$($right.into()),*]);
})*
$crate::ll_grammar::LLGrammar {
start: $start,
rules: map,
first: None,
follow: None,
parse_table: None,
}
}
};
}
#[derive(Debug, Clone)]
pub enum Sentential<T, N> {
Terminal(T),
NoneTerminal(N),
}
pub struct LLGrammar<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> {
pub start: N,
pub rules: HashMap<N, Vec<Vec<Sentential<T, N>>>>,
/// none is epsilon
pub first: Option<HashMap<N, HashSet<Option<T>>>>,
/// none is $
pub follow: Option<HashMap<N, HashSet<Option<T>>>>,
// When in State N and reading T, then apply the usize'th rule of N.
/// none is $
pub parse_table: Option<HashMap<(N, Option<T>), usize>>,
}
impl<N: PartialEq + Eq + Hash + Clone, T: PartialEq + Eq + Hash + Clone> LLGrammar<N, T> {
pub fn can_produce_epsilon(&self, rule: &Sentential<T, N>) -> bool {
match rule {
Sentential::Terminal(_) => false,
Sentential::NoneTerminal(nt) => self
.rules
.get(&nt)
.map(|f| f.iter().any(|v| v.is_empty()))
.unwrap_or(false),
}
}
pub fn gen_first(&mut self) {
let mut first: HashMap<N, HashSet<Option<T>>> = HashMap::new();
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
'rule: for to in to.iter() {
// for each rule from -> to = X -> Y1...Yk
// add First(Yn) to First(X) if Y1...Yn-1 => e // n can be 1, disregarding the if
// add e to First(X) if Y1...Yk => e
for symbol in to {
match symbol {
Sentential::Terminal(a) => {
first
.entry(from.clone())
.and_modify(|e| {
change |= e.insert(Some(a.clone()));
})
.or_insert_with(|| {
change = true;
HashSet::from([Some(a.clone())])
});
}
Sentential::NoneTerminal(nt) => {
if let Some(set) = first.get(nt).cloned() {
first
.entry(from.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
}
if !self.can_produce_epsilon(symbol) {
continue 'rule;
}
}
first
.entry(from.clone())
.and_modify(|e| {
change |= e.insert(None);
})
.or_insert_with(|| {
change = true;
HashSet::from([None])
});
}
}
if !change {
break;
}
}
self.first = Some(first);
}
pub fn first(&self, sent: &Vec<Sentential<T, N>>) -> HashSet<Option<T>> {
assert!(self.first.is_some(), "Please call gen_first before this!");
let mut out = HashSet::<Option<T>>::new();
// Y1Y2...Yk = al
// add First(Yn) to First(al) if Y1...Yn-1 => e // n can be 1, disregarding the if
// add e to First(al) if Y1...Yk => e
'rule: {
for symbol in sent {
match symbol {
Sentential::Terminal(a) => {
out.insert(Some(a.clone()));
}
Sentential::NoneTerminal(nt) => {
if let Some(set) = self.first.as_ref().unwrap().get(nt).cloned() {
out.extend(set);
}
}
}
if !self.can_produce_epsilon(symbol) {
break 'rule;
}
}
out.insert(None);
}
out
}
pub fn gen_follow(&mut self) {
if self.first == None {
self.gen_first();
}
let mut follow: HashMap<N, HashSet<Option<T>>> = HashMap::new();
follow.insert(self.start.clone(), HashSet::from([None]));
loop {
let mut change = false;
for (from, to) in self.rules.iter() {
for to in to.iter() {
// a
// if A -> aBb then add First(b) - {e} to Follow(B)
// and if A -> aBb and e in First(b) add Follow(A) to Follow(B)
if to.len() >= 2 {
for i in 0..(to.len() - 1) {
let slice = to[i + 1..].iter().map(|f| f.clone()).collect::<Vec<_>>();
match to.get(i) {
Some(Sentential::NoneTerminal(b)) => {
let mut set = self.first(&slice);
if set.contains(&None) {
if let Some(set) = follow.get(from).cloned() {
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
set.remove(&None);
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
_ => (),
}
}
}
// b
// and if A -> aB add Follow(A) to Follow(B)
match to.last() {
Some(Sentential::NoneTerminal(b)) => {
if let Some(set) = follow.get(from).cloned() {
follow
.entry(b.clone())
.and_modify(|e| {
for val in set.iter() {
change |= e.insert(val.clone());
}
})
.or_insert_with(|| {
change = true;
set
});
}
}
_ => (),
}
}
}
if !change {
break;
}
}
self.follow = Some(follow);
}
pub fn follow(&self, none_termianl: &N) -> HashSet<Option<T>> {
assert!(self.follow.is_some(), "Please call gen_follow before this!");
self.follow
.as_ref()
.unwrap()
.get(&none_termianl)
.cloned()
.unwrap_or(HashSet::new())
}
pub fn gen_parse_table(&mut self) -> bool {
if self.follow.is_none() {
self.gen_follow();
}
if self.parse_table.is_some() {
return false;
}
let mut conflict = false;
let mut parse_table: HashMap<(N, Option<T>), usize> = HashMap::new();
for (from, to) in self.rules.iter() {
for (id, to) in to.iter().enumerate() {
// rule is A -> al
// terminal == None means epsilon
for terminal in self.first(to) {
match terminal {
// let a be in First(al) -> add to T[A,a] = A->al (using the index of al)
Some(terminal) => {
conflict |= parse_table
.insert((from.clone(), Some(terminal.clone())), id)
.is_some();
}
// if first contains epsilon then
// let b be in Follow(A) -> add to T[A,b] = A->al (using the index of al)
None => {
for terminal in self.follow(from).iter() {
conflict |= parse_table
.insert((from.clone(), terminal.clone()), id)
.is_some()
}
}
}
}
}
}
self.parse_table = Some(parse_table);
conflict
}
/// get parse_table rule
/// None means error.
pub fn parse_table(
&self,
none_terminal: &N,
terminal: &Option<T>,
) -> Option<(usize, &Vec<Sentential<T, N>>)> {
assert!(
self.parse_table.is_some(),
"Please call gen_parse_table before this!"
);
self.parse_table
.as_ref()
.unwrap()
.get(&(none_terminal.clone(), terminal.clone()))
.and_then(|f| {
self.rules
.get(none_terminal)
.and_then(|rule| rule.get(*f))
.map(|rule| (*f, rule))
})
}
pub fn parser<'a, S: Into<T> + PartialEq<T> + Clone>(
&'a self,
iter: &'a mut dyn Iterator<Item = Result<S, String>>,
) -> LLTabelParser<N, T, S> {
assert!(
self.parse_table.is_some(),
"Please call gen_parse_table before this!"
);
LLTabelParser {
input: iter,
grammar: self,
}
}
}
/// Just checks a program. Does not generates output.
pub struct LLTabelParser<
'a,
N: PartialEq + Eq + Hash + Clone,
T: PartialEq + Eq + Hash + Clone,
S: Into<T> + PartialEq<T> + Clone,
> {
grammar: &'a LLGrammar<N, T>,
input: &'a mut dyn Iterator<Item = Result<S, String>>,
}
impl<
'a,
N: PartialEq + Eq + Hash + Clone + Debug,
T: PartialEq + Eq + Hash + Clone + Debug,
S: Into<T> + PartialEq<T> + Clone + Debug,
> LLTabelParser<'a, N, T, S>
{
pub fn parse(&mut self) -> Result<ParseTree<N, S>, String> {
// stack of table driven parser
// content of the vec:
// - first element: all of them combined represent the complete stack, of the parser.
// - secount element: rule has to able to derive the code defined, by its inner childs and the unparsed code from the accompanying first element.
let mut stack: Vec<(Vec<Sentential<T, N>>, ParseTree<N, S>)> = vec![(
vec![Sentential::NoneTerminal(self.grammar.start.clone())],
ParseTree::new(None),
)];
let mut next = match self.input.next() {
Some(Ok(d)) => Some(d),
Some(Err(err)) => return Err(format!("Invalid token: {}", err)),
None => None,
};
loop {
// look at current state
let mut state = stack.pop();
match state.as_mut() {
// processing inner state, of tracked rules
Some((inner_stack, rule)) => {
let inner_state = inner_stack.pop();
match inner_state {
// match terminal, check if equal
Some(Sentential::Terminal(terminal)) => match (next, terminal) {
// actual vs. expected input
(Some(inn), expect) if inn == expect => {
next = match self.input.next() {
Some(Ok(n)) => Some(n),
Some(Err(err)) => {
return Err(format!("Invalid token: {}", err))
}
None => None,
};
rule.childs.push(NodeChild::Data(inn));
stack.push(state.unwrap());
}
(a, b) => {
return Err(format!("found: {:?} expected: {:?}", a, b));
}
},
// take next none terminal and apply rule from parse table.
Some(Sentential::NoneTerminal(none_term)) => {
// load rule
let Some((id, new_rule)) = self
.grammar
.parse_table(&none_term, &next.as_ref().map(|f| f.clone().into()))
else {
// no rule
return Err(format!(
"Unexpected token: {}",
next.map(|f| format!("{f:?}"))
.unwrap_or("end of file".to_string())
));
};
// reverse rule: because, uses vec as stack, but reversed
let new_rule_rev =
new_rule.iter().rev().map(|f| f.clone()).collect::<Vec<_>>();
// memorize current state/rule for later
stack.push(state.unwrap());
// process next rule
stack.push((
new_rule_rev,
ParseTree {
rule: Some((none_term, id)),
childs: Vec::new(),
},
));
}
// inner state is empty: current rule is finished
None => {
// if stack is empty, this is the initial state: finish or err
let Some(last) = stack.last_mut() else {
// ok: input has ended
if next.is_none() {
return Ok(state.unwrap().1);
}
// still code left, but not excepted
return Err(format!("Expected end of file."));
};
last.1.childs.push(NodeChild::Child(state.unwrap().1));
}
}
}
// should not be possible, because every other path pushes to the stack back or returns
None => {
return Err(format!("Err: EOS"));
}
}
}
}
}
//
pub trait Skippable {
fn skippable(&self) -> bool {
false
}
}
#[derive(Debug, Clone)]
pub enum NodeChild<N, S> {
Child(ParseTree<N, S>),
Data(S),
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ParseTree<N, S> {
pub rule: Option<(N, usize)>,
pub childs: Vec<NodeChild<N, S>>,
}
impl<N, S> ParseTree<N, S> {
pub fn new(rule: Option<(N, usize)>) -> Self {
Self {
rule,
childs: Vec::new(),
}
}
}
impl<N: Skippable + Debug, S: Debug> ParseTree<N, S> {
/// cleanup the parse tree
/// does not work on a subtree
pub fn clean(self) -> Self {
self.clean_internal()
.expect("Clean only works on the main tree.")
}
/// internal clean
/// main node must not have a rule.
fn clean_internal(self) -> Result<Self, Vec<NodeChild<N, S>>> {
let childs = self
.childs
.into_iter()
.flat_map(|elem| match elem {
NodeChild::Child(parse_tree) => match parse_tree.clean_internal() {
Ok(tree) => [NodeChild::Child(tree)].into(),
Err(content) => content,
},
NodeChild::Data(d) => [NodeChild::Data(d)].into(),
})
.collect();
if let Some((rule, _)) = &self.rule {
if rule.skippable() {
return Err(childs);
}
if childs.is_empty() {
return Err(childs);
}
}
Ok(Self {
rule: self.rule,
childs,
})
}
}

View file

@ -1,9 +1,6 @@
mod double_enum;
mod ll_grammar;
mod scanner;
use ll_grammar::{LLGrammar, Sentential, Skippable};
use regex::{Match, Regex};
use scanner::Scanner;
use ll_grammar::Skippable;
use rcompiler::prelude::*;
use regex::Match;
use std::collections::HashMap;
double_enum!(
@ -34,7 +31,7 @@ double_enum!(
}
);
scanner!(
token_scanner!(
Tokens,
r"^\s|\t|\n|\r" : |_,_| {
Some(WhiteSpace)
@ -134,22 +131,22 @@ impl Skippable for NoneTerminals {
}
}
impl<T> From<NoneTerminals> for Sentential<T, NoneTerminals> {
impl<N> From<NoneTerminals> for Sentential<NoneTerminals, N> {
fn from(value: NoneTerminals) -> Self {
Sentential::NoneTerminal(value)
}
}
impl<N> From<BareTokens> for Sentential<BareTokens, N> {
impl<T> From<BareTokens> for Sentential<T, BareTokens> {
fn from(value: BareTokens) -> Self {
Sentential::Terminal(value)
}
}
fn grammer() -> LLGrammar<NoneTerminals, BareTokens> {
fn grammer() -> Grammar<NoneTerminals, BareTokens> {
use BareTokens::*;
use NoneTerminals::*;
ll_grammar![
cfg_grammar![
start: P;
P -> L,P;
P -> ;
@ -201,14 +198,14 @@ fn main() {
grammar.gen_follow();
println!("first: {:?}", grammar.first);
println!("follow: {:?}", grammar.follow);
let conflict = grammar.gen_parse_table();
let conflict = grammar.gen_ll_parse_table();
println!("conflict: {conflict}");
println!("prase table: {:?}", grammar.parse_table);
println!("prase table: {:?}", grammar.ll_parse_table);
println!("parse\n\n");
println!(
"parsed: {:?}",
grammar
.parser(&mut m.iter_mut())
.ll_parser(&mut m.iter_mut())
.parse()
.map(|tree| tree.clean())
)

View file

@ -1,13 +1,14 @@
use std::marker::PhantomData;
#[macro_export]
macro_rules! scanner {
macro_rules! token_scanner {
($name:ident,$(
$regex:tt : $code:expr
)*) => {
impl $crate::scanner::MatchNext<$name> for $name {
fn match_next(code: &String) -> Option<(Self, usize)> {
use $name::*;
use regex::Regex;
$(
if let Some(capture) = Regex::new($regex).unwrap().captures(&code) {
if let Some(main_capture) = capture.get(0) {