added almost the full grammar, added extra parsers and lexers for

testing
This commit is contained in:
2025-10-25 11:39:07 +02:00
parent 4c08803a54
commit a0a6a15be3
17 changed files with 369 additions and 118 deletions

View File

@@ -5,10 +5,21 @@ fn main() {
CTLexerBuilder::new() CTLexerBuilder::new()
.lrpar_config(|ctp| { .lrpar_config(|ctp| {
ctp.yacckind(YaccKind::Grmtools) ctp.yacckind(YaccKind::Grmtools)
.grammar_in_src_dir("grammar.y") .grammar_in_src_dir("parsers/fudge.y")
.unwrap() .unwrap()
}) })
.lexer_in_src_dir("grammar.l") .lexer_in_src_dir("lexers/fudge.l")
.unwrap()
.build()
.unwrap();
CTLexerBuilder::new()
.lrpar_config(|ctp| {
ctp.yacckind(YaccKind::Grmtools)
.grammar_in_src_dir("parsers/expr_only.y")
.unwrap()
})
.lexer_in_src_dir("lexers/expr_only.l")
.unwrap() .unwrap()
.build() .build()
.unwrap(); .unwrap();

14
src/ast/errors.rs Normal file
View File

@@ -0,0 +1,14 @@
use std::fmt::Display;
pub struct ExpectedTokenError {
line: usize,
column_start: usize,
actual: &'static str,
expected: Vec<&'static str>,
}
impl Display for ExpectedTokenError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "line {} column {}:\nExpected one of {} but got {}", self.line, self.column_start, self.expected.join(", "), self.actual)
}
}

View File

@@ -0,0 +1,19 @@
pub fn parse_int(s: &str) -> Result<i64, ()> {
match s.parse::<i64>() {
Ok(val) => Ok(val),
Err(_) => {
eprintln!("{} cannot be represented as a i64", s);
Err(())
}
}
}
pub fn parse_float(s: &str) -> Result<f64, ()> {
match s.parse::<f64>() {
Ok(val) => Ok(val),
Err(_) => {
eprintln!("{} cannot be represented as a f64", s);
Err(())
}
}
}

View File

@@ -1,3 +1,12 @@
pub mod literal_parsers;
mod errors;
#[derive(Debug, Clone, PartialEq)]
pub struct Program {
pub definition_list: Vec<Definition>,
pub main_def: Expression,
}
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Literal { pub enum Literal {
Int(i64), Int(i64),
@@ -19,22 +28,22 @@ pub enum Expression {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Definition { pub enum Definition {
Binding { Binding {
identifier_info: TypedIdentifier, name: String,
value: Expression, value: Expression,
}, },
StructDef(StructDefinition),
EnumDef(EnumDefinition),
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct TypedIdentifier { pub struct TypedIdentifier {
identifier_uuid: usize, pub id: String,
pub type_arg: TypeArg,
id: String,
type_arg: TypeArg,
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum TypeArg { pub enum TypeArg {
Rank0(AST_TypeId), Rank0(TypeId),
RankN { RankN {
from: Box<TypeArg>, from: Box<TypeArg>,
to: Box<TypeArg>, to: Box<TypeArg>,
@@ -42,8 +51,35 @@ pub enum TypeArg {
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct AST_TypeId { pub struct TypeId {
type_uuid: usize, pub string_repr: String,
}
string_repr: String,
#[derive(Debug, Clone, PartialEq)]
pub struct StructDefinition {
pub name: String,
pub body: StructDefLiteral,
}
#[derive(Debug, Clone, PartialEq)]
pub struct StructDefLiteral {
pub members: Vec<TypedIdentifier>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnumDefinition {
pub name: String,
pub type_args: Vec<TypeArg>,
pub members: Vec<TypedIdentifier>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct EnumdefLiteral {
pub body: Vec<OptionallyTypedIdentifier>
}
#[derive(Debug, Clone, PartialEq)]
pub struct OptionallyTypedIdentifier {
pub name: String,
pub type_arg: Option<TypeArg>,
} }

View File

@@ -13,6 +13,6 @@ pub Atom: Literal = {
"(" <f: Atom> ")" => f, "(" <f: Atom> ")" => f,
}; };
pub Int: i64 = <s:r"[0-9]|[1-9][0-9]*"> => s.parse::<i64>().unwrap(); pub Int: i64 = <s:r"[0-9]|[1-9][0-9]*"> => s.clone().parse::<i64>().unwrap();
pub Float: f64 = <s:r"[+-]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][+-]?[1-9][0-9]*)"> => s.parse::<f64>().unwrap(); pub Float: f64 = <s:r"[+-]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][+-]?[1-9][0-9]*)"> => s.clone().parse::<f64>().unwrap();
// pub Identifier: &'input str = <s:r"[_a-zA-Z][_a-zA-Z0-9]*"> => s; // pub Identifier: &'input str = <s:r"[_a-zA-Z][_a-zA-Z0-9]*"> => s;

View File

@@ -1,10 +1,20 @@
Expr := Identifier
| Expr Expr
| "&" Identifier "." Expr
| "(" Expr ")"
Declaration := Identifier ":=" Expr ";"
Abstraction := "&" Identifier "." Expr ";" <structdef> ::= 'Type' <ident> <typearg>* ':=' 'Struct' '{' <typedident>
('&' <typedident>)*
'}'
Application := Identifier "<|" Expr <enumdef> ::= 'type' <ident> <typearg>* ':=' 'Enum' '{' <ident> (':' <typearg>)?
('|' <ident> (':' >typearg>)? )*
'}'
<typedident> ::= <ident>':' <typearg>
Type Pair T := Struct { left: T
& right: T }
Type Quartet T := Struct { left: T & middle: T & right: T }
Type Optional T := Enum { Some: T
| None }

View File

@@ -1,9 +0,0 @@
%%
[1-9][0-9]* "LITINT"
[-+]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][-+]?[1-9][0-9]*) "LITFLOAT"
[_a-zA-Z][_0-9a-zA-Z]* "IDENT"
\+ "ADD"
\* "MUL"
\( "("
\) ")"
[\t ]+ ;

View File

@@ -1,52 +0,0 @@
%start Expr
%%
Expr -> Result<Expression, () >:
Expr "ADD" MulExpr { Ok(Expression::Add(Box::new( $1? ), Box::new( $3? ))) }
| MulExpr { $1 }
;
MulExpr -> Result<Expression, () >:
MulExpr "MUL" ConstExpr { Ok( Expression::Mul(Box::new($1?), Box::new($3?)) ) }
| ConstExpr { $1 }
;
ConstExpr -> Result<Expression, () >:
'(' Expr ')' { $2 }
| 'LITINT' {
let v = $1.map_err( | _| ())?;
Ok(Expression::Lit(Literal::Int(parse_int( $lexer.span_str(v.span()))?)))
}
| "LITFLOAT" {
let v = $1.map_err( | _| ())?;
Ok(Expression::Lit(Literal::Float(parse_float( $lexer.span_str(v.span()))?)))
}
| "IDENT" {
let v = $1.map_err( | _| ())?;
Ok(Expression::Ident(String::from( $lexer.span_str(v.span()))))
}
;
%%
// Any functions here are in scope for all the grammar actions above.
fn parse_int(s: &str) -> Result<i64, ()> {
match s.parse::<i64>() {
Ok(val) => Ok(val),
Err(_) => {
eprintln!("{} cannot be represented as a i64", s);
Err(())
}
}
}
fn parse_float(s: &str) -> Result<f64, ()> {
match s.parse::<f64>() {
Ok(val) => Ok(val),
Err(_) => {
eprintln!("{} cannot be represented as a f64", s);
Err(())
}
}
}
use crate::ast::{Literal, Expression};

32
src/lexers/expr_only.l Normal file
View File

@@ -0,0 +1,32 @@
%%
[_a-zA-Z][_0-9a-zA-Z]* "ident"
[1-9][0-9]* "lit_int"
[-+]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][-+]?[1-9][0-9]*) "lit_float"
\+ "op_add"
\* "op_mul"
[-] "op_sub"
[/] "op_div"
[%] "op_mod"
= "eq"
\( "("
\) ")"
\{ "{"
\} "}"
[\t ]+ ;
true "bool_true"
false "bool_false"
Struct "struct"
Enum "enum"
if "if"
then "then"
else "else"
: "colon"
; "semicolon"
let "let"
:= "assign"
Type "type"
@main "main"

32
src/lexers/fudge.l Normal file
View File

@@ -0,0 +1,32 @@
%%
[_a-zA-Z][_0-9a-zA-Z]* "ident"
[1-9][0-9]* "lit_int"
[-+]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][-+]?[1-9][0-9]*) "lit_float"
\+ "op_add"
\* "op_mul"
[-] "op_sub"
[/] "op_div"
[%] "op_mod"
= "eq"
\( "("
\) ")"
\{ "{"
\} "}"
[\t ]+ ;
true "bool_true"
false "bool_false"
Struct "struct"
Enum "enum"
if "if"
then "then"
else "else"
: "colon"
; "semicolon"
let "let"
:= "assign"
Type "type"
@main "main"

View File

@@ -2,7 +2,7 @@ use crate::errors::CLIArgumentError;
use lrlex::lrlex_mod; use lrlex::lrlex_mod;
use lrpar::lrpar_mod; use lrpar::lrpar_mod;
use std::error::Error; use std::error::Error;
use std::fmt::{Display, format}; use std::fmt::Display;
use std::io; use std::io;
use std::io::ErrorKind::InvalidInput; use std::io::ErrorKind::InvalidInput;
@@ -11,8 +11,8 @@ mod errors;
mod optimising; mod optimising;
mod tests; mod tests;
lrlex_mod!("grammar.l"); lrlex_mod!("lexers/fudge.l");
lrpar_mod!("grammar.y"); lrpar_mod!("parsers/fudge.y");
fn main() -> Result<(), Box<dyn Error>> { fn main() -> Result<(), Box<dyn Error>> {
let src_path = std::env::args() let src_path = std::env::args()
@@ -20,13 +20,25 @@ fn main() -> Result<(), Box<dyn Error>> {
.ok_or(Box::new(CLIArgumentError("Source File Not Provided")))?; .ok_or(Box::new(CLIArgumentError("Source File Not Provided")))?;
let src_string = std::fs::read_to_string(&src_path)?; let src_string = std::fs::read_to_string(&src_path)?;
let lexerdef = grammar_l::lexerdef(); let lexer_def = fudge_l::lexerdef();
let lexer = lexerdef.lexer((src_string.as_str())); let lexer = lexer_def.lexer(src_string.as_str());
let (res, errs) = grammar_y::parse(&lexer); let (res, errs) = fudge_y::parse(&lexer);
if let Some(Ok(res)) = res { match (res, errs) {
(Some(Ok(res)), _) => {
println!("Parsed succesfully:");
println!("{:#?}", res); println!("{:#?}", res);
},
(Some(Err(e)), _) => {
println!("Parsing failed:");
println!("{:#?}", e);
} }
(_, vec) => {
println!("LexParseErrors encountered:");
println!("{:#?}", vec);
}
}
Ok(()) Ok(())
} }

41
src/parsers/expr_only.y Normal file
View File

@@ -0,0 +1,41 @@
%start Expr
%%
Expr -> Result<Expression, () >:
Expr "op_add" MulExpr { Ok(Expression::Add(Box::new( $1? ), Box::new( $3? ))) }
| Expr "op_sub" MulExpr { Ok(Expression::Sub(Box::new( $1? ), Box::new( $3? ))) }
| MulExpr { $1 }
;
MulExpr -> Result<Expression, () >:
MulExpr "op_mul" ConstExpr { Ok( Expression::Mul(Box::new($1?), Box::new($3?)) ) }
| MulExpr "op_div" ConstExpr { Ok( Expression::Div(Box::new($1?), Box::new($3?)) ) }
| MulExpr "op_mod" ConstExpr { Ok( Expression::Mod(Box::new($1?), Box::new($3?)) ) }
| ConstExpr { $1 }
;
ConstExpr -> Result<Expression, () >:
'(' Expr ')' { $2 }
| 'lit_int' {
let v = $1.map_err( | _| ())?;
Ok(Expression::Lit(Literal::Int(parse_int( $lexer.span_str(v.span()))?)))
}
| "lit_float" {
let v = $1.map_err( | _| ())?;
Ok(Expression::Lit(Literal::Float(parse_float( $lexer.span_str(v.span()))?)))
}
| "ident" {
let v = $1.map_err( | _| ())?;
Ok(Expression::Ident(String::from( $lexer.span_str(v.span()))))
}
;
%%
// Any functions here are in scope for all the grammar actions above.
use crate::ast::{Literal, Expression, Definition, StructDefinition, Program, TypedIdentifier, TypeArg,StructDefLiteral, TypeId};
use crate::ast::literal_parsers::*;

123
src/parsers/fudge.y Normal file
View File

@@ -0,0 +1,123 @@
%start program
%%
program -> Result<Program, ()>:
definitionlist maindef {
Ok(Program {
definition_list: $1?,
main_def: $2?
})
}
| maindef {
Ok(Program {
definition_list: vec![],
main_def: $1?
})
}
;
maindef -> Result<Expression, ()>:
"main" "assign" Expr {$3}
;
definitionlist -> Result<Vec<Definition>, ()>:
definition {Ok(vec![$1?])}
| definition definitionlist {
// TO DO: Test the fuck out of this
let mut first = vec![$1?];
first.extend($2?);
Ok(first)
}
;
definition -> Result<Definition, ()>:
"let" "ident" "assign" Expr {
let v = $2.map_err(|_| ())?;
Ok(Definition::Binding {
name: String::from($lexer.span_str(v.span())),
value: $4?
})
}
| "type" "ident" "assign" structdefliteral {
let v = $2.map_err( | _| ())?;
Ok(Definition::StructDef(
StructDefinition {
name: String::from($lexer.span_str(v.span())),
body: $4?
})
)
}
;
Expr -> Result<Expression, () >:
Expr "op_add" MulExpr { Ok(Expression::Add(Box::new( $1? ), Box::new( $3? ))) }
| Expr "op_sub" MulExpr { Ok(Expression::Sub(Box::new( $1? ), Box::new( $3? ))) }
| MulExpr { $1 }
;
MulExpr -> Result<Expression, () >:
MulExpr "op_mul" ConstExpr { Ok( Expression::Mul(Box::new($1?), Box::new($3?)) ) }
| MulExpr "op_div" ConstExpr { Ok( Expression::Div(Box::new($1?), Box::new($3?)) ) }
| MulExpr "op_mod" ConstExpr { Ok( Expression::Mod(Box::new($1?), Box::new($3?)) ) }
| ConstExpr { $1 }
;
ConstExpr -> Result<Expression, () >:
'(' Expr ')' { $2 }
| 'lit_int' {
let v = $1.map_err( | _| ())?;
Ok(Expression::Lit(Literal::Int(parse_int( $lexer.span_str(v.span()))?)))
}
| "lit_float" {
let v = $1.map_err( | _| ())?;
Ok(Expression::Lit(Literal::Float(parse_float( $lexer.span_str(v.span()))?)))
}
| "ident" {
let v = $1.map_err( | _| ())?;
Ok(Expression::Ident(String::from( $lexer.span_str(v.span()))))
}
;
structdefliteral -> Result<StructDefLiteral, ()>:
"struct" "{" structdefmemberlist "}" {
Ok(StructDefLiteral {
members: $3?,
})
}
;
structdefmemberlist -> Result<Vec<TypedIdentifier>, ()>:
structdefmember {Ok(vec![$1?])}
;
structdefmember -> Result<TypedIdentifier, ()>:
"ident" "colon" typearg {
let v = $1.map_err(|_| ())?;
Ok(TypedIdentifier {
id: String::from($lexer.span_str(v.span())),
type_arg: $3?
})
}
;
typearg -> Result<TypeArg, ()>:
"ident" {
let v = $1.map_err(|_| ())?;
Ok(TypeArg::Rank0(
TypeId {
string_repr: String::from($lexer.span_str(v.span())),
})
)
}
;
%%
// Any functions here are in scope for all the grammar actions above.
use crate::ast::{Literal, Expression, Definition, StructDefinition, Program, TypedIdentifier, TypeArg,StructDefLiteral, TypeId};
use crate::ast::literal_parsers::*;

View File

@@ -1,16 +0,0 @@
//
//
// #[test]
// fn identifiers() {
// use crate::expressions::IdentifierParser as IdentParser;
// assert!(IdentParser::new().parse("a").is_ok());
// assert!(IdentParser::new().parse("_").is_ok());
// assert!(IdentParser::new().parse("a_a_a_").is_ok());
// assert!(IdentParser::new().parse("_0").is_ok());
// assert!(IdentParser::new().parse("_a").is_ok());
// assert!(IdentParser::new().parse("__").is_ok());
//
// assert!(IdentParser::new().parse("0").is_err());
// assert!(IdentParser::new().parse("0123456").is_err());
// assert!(IdentParser::new().parse("0aaaa").is_err());
// }

View File

@@ -1,10 +1,12 @@
use lrlex::lrlex_mod; use lrlex::lrlex_mod;
use lrpar::lrpar_mod; use lrpar::lrpar_mod;
use crate::ast::{Expression, Literal}; use crate::ast::{Expression, Literal, Program};
lrlex_mod!("grammar.l"); lrlex_mod!("lexers/fudge.l");
lrpar_mod!("grammar.y"); lrpar_mod!("parsers/fudge.y");
lrlex_mod!("lexers/expr_only.l");
lrpar_mod!("parsers/expr_only.y");
macro_rules! test_literal_list { macro_rules! test_literal_list {
@@ -19,10 +21,10 @@ macro_rules! test_literal_list {
}; };
} }
fn parse_str(input: &str) -> Result<Expression, ()> { fn parse_expr(input: &str) -> Result<Expression, ()> {
let lexerdef = grammar_l::lexerdef(); let lexerdef = expr_only_l::lexerdef();
let lexer = lexerdef.lexer(&input); let lexer = lexerdef.lexer(&input);
let (res, errs) = grammar_y::parse(&lexer); let (res, errs) = expr_only_y::parse(&lexer);
if let Some(parsed_res) = res { if let Some(parsed_res) = res {
parsed_res parsed_res
} else { } else {
@@ -30,13 +32,9 @@ fn parse_str(input: &str) -> Result<Expression, ()> {
} }
} }
#[test] #[test]
fn test_int_literal() { fn test_int_literal() {
let lexer = grammar_l::lexerdef();
let valid_ints = vec!["1", "1", "100000000000000000", "1234567890", "1234567890"];
let invalid_ints = vec!["01", "AAAAAAAAAAAAAAA", "-1"];
let matches_parsed_int = |s: &str| match parse_str(s) { let matches_parsed_int = |s: &str| match parse_expr(s) {
Ok(i) => matches!(i, Expression::Lit(Literal::Int(_))), Ok(i) => matches!(i, Expression::Lit(Literal::Int(_))),
Err(_) => false, Err(_) => false,
}; };

View File

@@ -1,2 +1 @@
mod expressions;
mod literals; mod literals;

View File

@@ -0,0 +1 @@
13 / 14 % 17 *