diff --git a/build.rs b/build.rs index 346dd2f..a827baf 100644 --- a/build.rs +++ b/build.rs @@ -5,10 +5,21 @@ fn main() { CTLexerBuilder::new() .lrpar_config(|ctp| { ctp.yacckind(YaccKind::Grmtools) - .grammar_in_src_dir("grammar.y") + .grammar_in_src_dir("parsers/fudge.y") .unwrap() }) - .lexer_in_src_dir("grammar.l") + .lexer_in_src_dir("lexers/fudge.l") + .unwrap() + .build() + .unwrap(); + + CTLexerBuilder::new() + .lrpar_config(|ctp| { + ctp.yacckind(YaccKind::Grmtools) + .grammar_in_src_dir("parsers/expr_only.y") + .unwrap() + }) + .lexer_in_src_dir("lexers/expr_only.l") .unwrap() .build() .unwrap(); diff --git a/src/ast/errors.rs b/src/ast/errors.rs new file mode 100644 index 0000000..7e47ffc --- /dev/null +++ b/src/ast/errors.rs @@ -0,0 +1,14 @@ +use std::fmt::Display; + +pub struct ExpectedTokenError { + line: usize, + column_start: usize, + actual: &'static str, + expected: Vec<&'static str>, +} + +impl Display for ExpectedTokenError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "line {} column {}:\nExpected one of {} but got {}", self.line, self.column_start, self.expected.join(", "), self.actual) + } +} \ No newline at end of file diff --git a/src/ast/literal_parsers.rs b/src/ast/literal_parsers.rs new file mode 100644 index 0000000..4dc0943 --- /dev/null +++ b/src/ast/literal_parsers.rs @@ -0,0 +1,19 @@ +pub fn parse_int(s: &str) -> Result { + match s.parse::() { + Ok(val) => Ok(val), + Err(_) => { + eprintln!("{} cannot be represented as a i64", s); + Err(()) + } + } +} + +pub fn parse_float(s: &str) -> Result { + match s.parse::() { + Ok(val) => Ok(val), + Err(_) => { + eprintln!("{} cannot be represented as a f64", s); + Err(()) + } + } +} \ No newline at end of file diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7e84e3f..354e0f1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,3 +1,12 @@ +pub mod literal_parsers; +mod errors; + +#[derive(Debug, Clone, PartialEq)] +pub struct Program { + pub definition_list: Vec, + pub main_def: Expression, +} + #[derive(Debug, Clone, PartialEq)] pub enum Literal { Int(i64), @@ -19,22 +28,22 @@ pub enum Expression { #[derive(Debug, Clone, PartialEq)] pub enum Definition { Binding { - identifier_info: TypedIdentifier, + name: String, value: Expression, }, + StructDef(StructDefinition), + EnumDef(EnumDefinition), } #[derive(Debug, Clone, PartialEq)] pub struct TypedIdentifier { - identifier_uuid: usize, - - id: String, - type_arg: TypeArg, + pub id: String, + pub type_arg: TypeArg, } #[derive(Debug, Clone, PartialEq)] pub enum TypeArg { - Rank0(AST_TypeId), + Rank0(TypeId), RankN { from: Box, to: Box, @@ -42,8 +51,35 @@ pub enum TypeArg { } #[derive(Debug, Clone, PartialEq)] -pub struct AST_TypeId { - type_uuid: usize, - - string_repr: String, +pub struct TypeId { + pub string_repr: String, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StructDefinition { + pub name: String, + pub body: StructDefLiteral, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StructDefLiteral { + pub members: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct EnumDefinition { + pub name: String, + pub type_args: Vec, + pub members: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct EnumdefLiteral { + pub body: Vec +} + +#[derive(Debug, Clone, PartialEq)] +pub struct OptionallyTypedIdentifier { + pub name: String, + pub type_arg: Option, } diff --git a/src/expressions.lalrpop b/src/expressions.lalrpop index d506f20..64f777f 100644 --- a/src/expressions.lalrpop +++ b/src/expressions.lalrpop @@ -13,6 +13,6 @@ pub Atom: Literal = { "(" ")" => f, }; -pub Int: i64 = => s.parse::().unwrap(); -pub Float: f64 = => s.parse::().unwrap(); +pub Int: i64 = => s.clone().parse::().unwrap(); +pub Float: f64 = => s.clone().parse::().unwrap(); // pub Identifier: &'input str = => s; diff --git a/src/grammar.ebnf b/src/grammar.ebnf index a1113c7..41dfffb 100644 --- a/src/grammar.ebnf +++ b/src/grammar.ebnf @@ -1,10 +1,20 @@ -Expr := Identifier -| Expr Expr -| "&" Identifier "." Expr -| "(" Expr ")" -Declaration := Identifier ":=" Expr ";" -Abstraction := "&" Identifier "." Expr ";" + ::= 'Type' * ':=' 'Struct' '{' + ('&' )* + '}' -Application := Identifier "<|" Expr \ No newline at end of file + ::= 'type' * ':=' 'Enum' '{' (':' )? + ('|' (':' >typearg>)? )* + '}' + + ::= ':' + + +Type Pair T := Struct { left: T + & right: T } + +Type Quartet T := Struct { left: T & middle: T & right: T } + +Type Optional T := Enum { Some: T + | None } \ No newline at end of file diff --git a/src/grammar.l b/src/grammar.l deleted file mode 100644 index fbe8faa..0000000 --- a/src/grammar.l +++ /dev/null @@ -1,9 +0,0 @@ -%% -[1-9][0-9]* "LITINT" -[-+]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][-+]?[1-9][0-9]*) "LITFLOAT" -[_a-zA-Z][_0-9a-zA-Z]* "IDENT" -\+ "ADD" -\* "MUL" -\( "(" -\) ")" -[\t ]+ ; diff --git a/src/grammar.y b/src/grammar.y deleted file mode 100644 index 11d1953..0000000 --- a/src/grammar.y +++ /dev/null @@ -1,52 +0,0 @@ -%start Expr -%% -Expr -> Result: -Expr "ADD" MulExpr { Ok(Expression::Add(Box::new( $1? ), Box::new( $3? ))) } -| MulExpr { $1 } -; - -MulExpr -> Result: - MulExpr "MUL" ConstExpr { Ok( Expression::Mul(Box::new($1?), Box::new($3?)) ) } -| ConstExpr { $1 } -; - -ConstExpr -> Result: - '(' Expr ')' { $2 } -| 'LITINT' { - let v = $1.map_err( | _| ())?; - Ok(Expression::Lit(Literal::Int(parse_int( $lexer.span_str(v.span()))?))) -} -| "LITFLOAT" { - let v = $1.map_err( | _| ())?; - Ok(Expression::Lit(Literal::Float(parse_float( $lexer.span_str(v.span()))?))) -} -| "IDENT" { -let v = $1.map_err( | _| ())?; - Ok(Expression::Ident(String::from( $lexer.span_str(v.span())))) -} -; -%% -// Any functions here are in scope for all the grammar actions above. - -fn parse_int(s: &str) -> Result { - match s.parse::() { - Ok(val) => Ok(val), - Err(_) => { - eprintln!("{} cannot be represented as a i64", s); - Err(()) - } - } -} - -fn parse_float(s: &str) -> Result { - match s.parse::() { - Ok(val) => Ok(val), - Err(_) => { - eprintln!("{} cannot be represented as a f64", s); - Err(()) - } - } -} - - -use crate::ast::{Literal, Expression}; \ No newline at end of file diff --git a/src/lexers/expr_only.l b/src/lexers/expr_only.l new file mode 100644 index 0000000..416e1d5 --- /dev/null +++ b/src/lexers/expr_only.l @@ -0,0 +1,32 @@ +%% +[_a-zA-Z][_0-9a-zA-Z]* "ident" +[1-9][0-9]* "lit_int" +[-+]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][-+]?[1-9][0-9]*) "lit_float" + +\+ "op_add" +\* "op_mul" +[-] "op_sub" +[/] "op_div" +[%] "op_mod" += "eq" + +\( "(" +\) ")" +\{ "{" +\} "}" + +[\t ]+ ; + +true "bool_true" +false "bool_false" +Struct "struct" +Enum "enum" +if "if" +then "then" +else "else" +: "colon" +; "semicolon" +let "let" +:= "assign" +Type "type" +@main "main" diff --git a/src/lexers/fudge.l b/src/lexers/fudge.l new file mode 100644 index 0000000..416e1d5 --- /dev/null +++ b/src/lexers/fudge.l @@ -0,0 +1,32 @@ +%% +[_a-zA-Z][_0-9a-zA-Z]* "ident" +[1-9][0-9]* "lit_int" +[-+]?([0-9]*[.][0-9]+|[0-9]*([.][0-9]+)?[Ee][-+]?[1-9][0-9]*) "lit_float" + +\+ "op_add" +\* "op_mul" +[-] "op_sub" +[/] "op_div" +[%] "op_mod" += "eq" + +\( "(" +\) ")" +\{ "{" +\} "}" + +[\t ]+ ; + +true "bool_true" +false "bool_false" +Struct "struct" +Enum "enum" +if "if" +then "then" +else "else" +: "colon" +; "semicolon" +let "let" +:= "assign" +Type "type" +@main "main" diff --git a/src/main.rs b/src/main.rs index c6e49fa..399d0f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use crate::errors::CLIArgumentError; use lrlex::lrlex_mod; use lrpar::lrpar_mod; use std::error::Error; -use std::fmt::{Display, format}; +use std::fmt::Display; use std::io; use std::io::ErrorKind::InvalidInput; @@ -11,8 +11,8 @@ mod errors; mod optimising; mod tests; -lrlex_mod!("grammar.l"); -lrpar_mod!("grammar.y"); +lrlex_mod!("lexers/fudge.l"); +lrpar_mod!("parsers/fudge.y"); fn main() -> Result<(), Box> { let src_path = std::env::args() @@ -20,13 +20,25 @@ fn main() -> Result<(), Box> { .ok_or(Box::new(CLIArgumentError("Source File Not Provided")))?; let src_string = std::fs::read_to_string(&src_path)?; - let lexerdef = grammar_l::lexerdef(); + let lexer_def = fudge_l::lexerdef(); - let lexer = lexerdef.lexer((src_string.as_str())); - let (res, errs) = grammar_y::parse(&lexer); - if let Some(Ok(res)) = res { - println!("{:#?}", res); + let lexer = lexer_def.lexer(src_string.as_str()); + let (res, errs) = fudge_y::parse(&lexer); + match (res, errs) { + (Some(Ok(res)), _) => { + println!("Parsed succesfully:"); + println!("{:#?}", res); + }, + (Some(Err(e)), _) => { + println!("Parsing failed:"); + println!("{:#?}", e); + } + (_, vec) => { + println!("LexParseErrors encountered:"); + println!("{:#?}", vec); + } } + Ok(()) } diff --git a/src/parsers/expr_only.y b/src/parsers/expr_only.y new file mode 100644 index 0000000..721bdbf --- /dev/null +++ b/src/parsers/expr_only.y @@ -0,0 +1,41 @@ +%start Expr + +%% + +Expr -> Result: + Expr "op_add" MulExpr { Ok(Expression::Add(Box::new( $1? ), Box::new( $3? ))) } +| Expr "op_sub" MulExpr { Ok(Expression::Sub(Box::new( $1? ), Box::new( $3? ))) } +| MulExpr { $1 } +; + +MulExpr -> Result: + MulExpr "op_mul" ConstExpr { Ok( Expression::Mul(Box::new($1?), Box::new($3?)) ) } +| MulExpr "op_div" ConstExpr { Ok( Expression::Div(Box::new($1?), Box::new($3?)) ) } +| MulExpr "op_mod" ConstExpr { Ok( Expression::Mod(Box::new($1?), Box::new($3?)) ) } +| ConstExpr { $1 } +; + +ConstExpr -> Result: + '(' Expr ')' { $2 } +| 'lit_int' { + let v = $1.map_err( | _| ())?; + Ok(Expression::Lit(Literal::Int(parse_int( $lexer.span_str(v.span()))?))) +} +| "lit_float" { + let v = $1.map_err( | _| ())?; + Ok(Expression::Lit(Literal::Float(parse_float( $lexer.span_str(v.span()))?))) +} +| "ident" { +let v = $1.map_err( | _| ())?; + Ok(Expression::Ident(String::from( $lexer.span_str(v.span())))) +} +; + +%% +// Any functions here are in scope for all the grammar actions above. +use crate::ast::{Literal, Expression, Definition, StructDefinition, Program, TypedIdentifier, TypeArg,StructDefLiteral, TypeId}; +use crate::ast::literal_parsers::*; + + + + diff --git a/src/parsers/fudge.y b/src/parsers/fudge.y new file mode 100644 index 0000000..da4a5a3 --- /dev/null +++ b/src/parsers/fudge.y @@ -0,0 +1,123 @@ +%start program + +%% + +program -> Result: + definitionlist maindef { + Ok(Program { + definition_list: $1?, + main_def: $2? + }) + +} +| maindef { + Ok(Program { + definition_list: vec![], + main_def: $1? + }) +} +; + +maindef -> Result: + "main" "assign" Expr {$3} +; + +definitionlist -> Result, ()>: + definition {Ok(vec![$1?])} +| definition definitionlist { + // TO DO: Test the fuck out of this + let mut first = vec![$1?]; + first.extend($2?); + Ok(first) +} +; + +definition -> Result: + "let" "ident" "assign" Expr { + let v = $2.map_err(|_| ())?; + Ok(Definition::Binding { + name: String::from($lexer.span_str(v.span())), + value: $4? + }) +} +| "type" "ident" "assign" structdefliteral { + let v = $2.map_err( | _| ())?; + Ok(Definition::StructDef( + StructDefinition { + name: String::from($lexer.span_str(v.span())), + body: $4? + }) + ) +} +; + +Expr -> Result: + Expr "op_add" MulExpr { Ok(Expression::Add(Box::new( $1? ), Box::new( $3? ))) } +| Expr "op_sub" MulExpr { Ok(Expression::Sub(Box::new( $1? ), Box::new( $3? ))) } +| MulExpr { $1 } +; + +MulExpr -> Result: + MulExpr "op_mul" ConstExpr { Ok( Expression::Mul(Box::new($1?), Box::new($3?)) ) } +| MulExpr "op_div" ConstExpr { Ok( Expression::Div(Box::new($1?), Box::new($3?)) ) } +| MulExpr "op_mod" ConstExpr { Ok( Expression::Mod(Box::new($1?), Box::new($3?)) ) } +| ConstExpr { $1 } +; + +ConstExpr -> Result: + '(' Expr ')' { $2 } +| 'lit_int' { + let v = $1.map_err( | _| ())?; + Ok(Expression::Lit(Literal::Int(parse_int( $lexer.span_str(v.span()))?))) +} +| "lit_float" { + let v = $1.map_err( | _| ())?; + Ok(Expression::Lit(Literal::Float(parse_float( $lexer.span_str(v.span()))?))) +} +| "ident" { +let v = $1.map_err( | _| ())?; + Ok(Expression::Ident(String::from( $lexer.span_str(v.span())))) +} +; + + +structdefliteral -> Result: + "struct" "{" structdefmemberlist "}" { + Ok(StructDefLiteral { + members: $3?, + }) +} +; + +structdefmemberlist -> Result, ()>: +structdefmember {Ok(vec![$1?])} +; + +structdefmember -> Result: +"ident" "colon" typearg { + let v = $1.map_err(|_| ())?; + Ok(TypedIdentifier { + id: String::from($lexer.span_str(v.span())), + type_arg: $3? + }) +} +; + +typearg -> Result: + "ident" { + let v = $1.map_err(|_| ())?; + Ok(TypeArg::Rank0( + TypeId { + string_repr: String::from($lexer.span_str(v.span())), + }) +) +} +; +%% +// Any functions here are in scope for all the grammar actions above. +use crate::ast::{Literal, Expression, Definition, StructDefinition, Program, TypedIdentifier, TypeArg,StructDefLiteral, TypeId}; +use crate::ast::literal_parsers::*; + + + + diff --git a/src/tests/expressions.rs b/src/tests/expressions.rs deleted file mode 100644 index c1f6766..0000000 --- a/src/tests/expressions.rs +++ /dev/null @@ -1,16 +0,0 @@ -// -// -// #[test] -// fn identifiers() { -// use crate::expressions::IdentifierParser as IdentParser; -// assert!(IdentParser::new().parse("a").is_ok()); -// assert!(IdentParser::new().parse("_").is_ok()); -// assert!(IdentParser::new().parse("a_a_a_").is_ok()); -// assert!(IdentParser::new().parse("_0").is_ok()); -// assert!(IdentParser::new().parse("_a").is_ok()); -// assert!(IdentParser::new().parse("__").is_ok()); -// -// assert!(IdentParser::new().parse("0").is_err()); -// assert!(IdentParser::new().parse("0123456").is_err()); -// assert!(IdentParser::new().parse("0aaaa").is_err()); -// } diff --git a/src/tests/literals.rs b/src/tests/literals.rs index 5c82fa9..9fa2e50 100644 --- a/src/tests/literals.rs +++ b/src/tests/literals.rs @@ -1,10 +1,12 @@ use lrlex::lrlex_mod; use lrpar::lrpar_mod; -use crate::ast::{Expression, Literal}; +use crate::ast::{Expression, Literal, Program}; -lrlex_mod!("grammar.l"); -lrpar_mod!("grammar.y"); +lrlex_mod!("lexers/fudge.l"); +lrpar_mod!("parsers/fudge.y"); +lrlex_mod!("lexers/expr_only.l"); +lrpar_mod!("parsers/expr_only.y"); macro_rules! test_literal_list { @@ -19,10 +21,10 @@ macro_rules! test_literal_list { }; } -fn parse_str(input: &str) -> Result { - let lexerdef = grammar_l::lexerdef(); +fn parse_expr(input: &str) -> Result { + let lexerdef = expr_only_l::lexerdef(); let lexer = lexerdef.lexer(&input); - let (res, errs) = grammar_y::parse(&lexer); + let (res, errs) = expr_only_y::parse(&lexer); if let Some(parsed_res) = res { parsed_res } else { @@ -30,13 +32,9 @@ fn parse_str(input: &str) -> Result { } } #[test] - fn test_int_literal() { - let lexer = grammar_l::lexerdef(); - let valid_ints = vec!["1", "1", "100000000000000000", "1234567890", "1234567890"]; - let invalid_ints = vec!["01", "AAAAAAAAAAAAAAA", "-1"]; - let matches_parsed_int = |s: &str| match parse_str(s) { + let matches_parsed_int = |s: &str| match parse_expr(s) { Ok(i) => matches!(i, Expression::Lit(Literal::Int(_))), Err(_) => false, }; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 861a239..0077ddd 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,2 +1 @@ -mod expressions; mod literals; diff --git a/test.txt b/test.txt index e69de29..62d19a3 100644 --- a/test.txt +++ b/test.txt @@ -0,0 +1 @@ +13 / 14 % 17 * \ No newline at end of file