diff --git a/.gitignore b/.gitignore index a5ff07f..0dc7ebb 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ # already existing elements were commented out #/target +/tmp diff --git a/program.trv b/program.trv index 28e349b..fd640f1 100644 --- a/program.trv +++ b/program.trv @@ -2,7 +2,7 @@ func is_greater_than_44(params : Integer) -> Integer { let x : Integer = params; - if x > 44 is True then { + if x > 44 then { 1 } else { @@ -10,11 +10,10 @@ func is_greater_than_44(params : Integer) -> Integer { } } - func otherFunction() -> Boolean { let number : Integer = 20; let is_greater : Integer = is_greater_than_44(number); - if is_greater == 0 is True then { + if is_greater == 0 then { print("Didnt work the first time!"); } while is_greater == 0 do { diff --git a/simple.trv b/simple.trv new file mode 100644 index 0000000..ecb9a22 --- /dev/null +++ b/simple.trv @@ -0,0 +1,13 @@ +func main() -> Integer { + let num : Integer = 0; + + while num < 10 do { + num = 11; + } + if num > 10 { + num = 11; + } else { + num = 11; + } + return num; +} diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 3d24724..53d523d 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,5 +1,3 @@ -use std::{any::Any, string}; - use crate::lexer::token::{Token, TokenType}; @@ -42,43 +40,46 @@ impl Lexer { tokens.push(self.assign_or_equals()); } ':' => { - tokens.push(self.simple_token(TokenType::Colon)); + tokens.push(self.simple_token(ch.to_string(),TokenType::Colon)); } '+' => { - tokens.push(self.simple_token(TokenType::Plus)); + tokens.push(self.simple_token(ch.to_string(),TokenType::Plus)); } '-' => { tokens.push(self.minus_or_arrow()); } '*' => { - tokens.push(self.simple_token(TokenType::Multiply)); + tokens.push(self.simple_token(ch.to_string(),TokenType::Multiply)); + } + '/' => { + tokens.push(self.simple_token(ch.to_string(),TokenType::Division)); } '{' => { - tokens.push(self.simple_token(TokenType::LeftBrace)); + tokens.push(self.simple_token(ch.to_string(),TokenType::LeftBrace)); } '}' => { - tokens.push(self.simple_token(TokenType::RightBrace)); + tokens.push(self.simple_token(ch.to_string(),TokenType::RightBrace)); } '(' => { - tokens.push(self.simple_token(TokenType::LeftParen)); + tokens.push(self.simple_token(ch.to_string(),TokenType::LeftParen)); } ')' => { - tokens.push(self.simple_token(TokenType::RightParen)); + tokens.push(self.simple_token(ch.to_string(),TokenType::RightParen)); } '>' =>{ - tokens.push(self.simple_token(TokenType::GreaterThan)); + tokens.push(self.simple_token(ch.to_string(),TokenType::GreaterThan)); } '<' =>{ - tokens.push(self.simple_token(TokenType::LessThan)); + tokens.push(self.simple_token(ch.to_string(),TokenType::LessThan)); } ';' =>{ - tokens.push(self.simple_token(TokenType::Semicolon)); + tokens.push(self.simple_token(ch.to_string(),TokenType::Semicolon)); } '"' => { tokens.push(self.read_string_literal()); } ',' => { - tokens.push(self.simple_token(TokenType::Comma)); + tokens.push(self.simple_token(ch.to_string(),TokenType::Comma)); } '#' => { self.read_comment(); @@ -86,6 +87,7 @@ impl Lexer { _ => panic!("Suuuper wrongdog in here, unexpected char '{}' at {}:{}", ch, self.line, self.column), } } + tokens.push(self.simple_token("EOF".to_string(),TokenType::EOF)); tokens } fn current_char(&self) -> Option { @@ -103,28 +105,29 @@ impl Lexer { self.column = 1; } - fn simple_token(&mut self, token_type: TokenType) -> Token { + fn simple_token(&mut self,value: String, token_type: TokenType) -> Token { let start_col_num = self.column; self.advance(); - Token::new(token_type, self.line, start_col_num) + Token::new(value, token_type, self.line, start_col_num) } fn assign_or_equals(&mut self) -> Token{ let original_col = self.column; self.advance(); if self.current_char().unwrap() == '=' { - Token::new(TokenType::Equals, self.line, original_col) + Token::new("=".to_string(),TokenType::Equals, self.line, original_col) } else { - Token::new(TokenType::Assign, self.line, original_col) + Token::new("==".to_string(),TokenType::Assign, self.line, original_col) } } fn minus_or_arrow(&mut self) -> Token { let original_col = self.column; self.advance(); if self.current_char().unwrap() == '>' { - Token::new(TokenType::Arrow, self.line, original_col) + self.advance(); + Token::new("->".to_string(),TokenType::Arrow, self.line, original_col) } else { - Token::new(TokenType::Minus, self.line, original_col) + Token::new("-".to_string(),TokenType::Minus, self.line, original_col) } } fn read_comment(&mut self) { @@ -156,7 +159,7 @@ impl Lexer { } } let num = num_string.parse::().unwrap(); - Token::new(TokenType::IntegerLiteral(num), self.line, start_col_num) + Token::new(num_string, TokenType::IntegerLiteral, self.line, start_col_num) } fn read_string_literal(&mut self) -> Token{ @@ -177,7 +180,7 @@ impl Lexer { } } } - Token::new(TokenType::StringLiteral(the_litteral), self.line, start_col_num) + Token::new(the_litteral.clone(), TokenType::StringLiteral, self.line, start_col_num) } fn read_identifier(&mut self, first_ch: char) -> Token { @@ -200,22 +203,22 @@ impl Lexer { } fn give_keyword_or_literal_token(&mut self, name: &str, line: usize, col: usize) -> Token{ match name { - "let" => Token::new(TokenType::Let, line, col), - "func" => Token::new(TokenType::Func, line, col), - "if" => Token::new(TokenType::If, line, col), - "then" => Token::new(TokenType::Then, line, col), - "else" => Token::new(TokenType::Else, line, col), - "not" => Token::new(TokenType::Not, line, col), - "while" => Token::new(TokenType::While, line, col), - "print" => Token::new(TokenType::Print, line, col), - "do" => Token::new(TokenType::Do, line, col), - "is" => Token::new(TokenType::Is, line, col), - "Integer"=> Token::new(TokenType::Integer, line, col), - "Boolean"=> Token::new(TokenType::Boolean, line, col), - "True" => Token::new(TokenType::True, line, col), - "False" => Token::new(TokenType::False, line, col), - "Eof" => Token::new(TokenType::Eof, line, col), - _ => Token::new(TokenType::Identifier(name.to_string()), line, col), + "let" => Token::new("let".to_string(), TokenType::Let, line, col), + "func" => Token::new("func".to_string(), TokenType::Func, line, col), + "if" => Token::new("if".to_string(), TokenType::If, line, col), + "then" => Token::new("then".to_string(), TokenType::Then, line, col), + "else" => Token::new("else".to_string(), TokenType::Else, line, col), + "not" => Token::new("not".to_string(), TokenType::Not, line, col), + "while" => Token::new("while".to_string(), TokenType::While, line, col), + "print" => Token::new("print".to_string(), TokenType::Print, line, col), + "do" => Token::new("do".to_string(), TokenType::Do, line, col), + "is" => Token::new("is".to_string(), TokenType::Is, line, col), + "Integer"=> Token::new("Integer".to_string(), TokenType::Integer, line, col), + "Boolean"=> Token::new("Boolean".to_string(), TokenType::Boolean, line, col), + "return"=> Token::new("Return".to_string(), TokenType::Return, line, col), + "True" => Token::new("True".to_string(), TokenType::BooleanLiteral, line, col), + "False" => Token::new("False".to_string(), TokenType::BooleanLiteral, line, col), + _ => Token::new(name.to_string(), TokenType::Identifier, line, col), } } } @@ -248,9 +251,10 @@ mod tests{ let actual_token_vec: Vec = lex.tokenize(); let expected: Vec = vec![ - Token::new(TokenType::Identifier("abc_def".to_string()), 1, 1), - Token::new(TokenType::Assign, 1, 9), - Token::new(TokenType::IntegerLiteral(2), 1, 11), + Token::new("abc_def".to_string(), TokenType::Identifier, 1, 1), + Token::new("=".to_string(), TokenType::Assign, 1, 9), + Token::new(2.to_string(), TokenType::IntegerLiteral, 1, 11), + Token::new("EOF".to_string(), TokenType::EOF, 1, 12), ]; assert_eq!(actual_token_vec, expected); @@ -259,14 +263,16 @@ mod tests{ fn reading_comments_tokenize_lexer_line_col_are_correct(){ let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); lex.tokenize(); - assert_eq!((lex.line, lex.column), (2,1)); + assert_eq!((lex.line, lex.column), (2,2)); } #[test] - fn reading_comments_tokenize_returns_empty_vector(){ + fn reading_comments_tokenize_returns_eof_vector(){ let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); let actual_token_vec: Vec = lex.tokenize(); - let expected: Vec = vec![]; + let expected: Vec = vec![ + Token::new("EOF".to_string(), TokenType::EOF, 2, 1) + ]; assert_eq!(actual_token_vec, expected); } @@ -276,9 +282,10 @@ mod tests{ let actual_token_vec: Vec = lex.tokenize(); let expected: Vec = vec![ - Token::new(TokenType::StringLiteral("\"test\"".to_string()), 1, 1) + Token::new("\"test\"".to_string(), TokenType::StringLiteral, 1, 1), + Token::new("EOF".to_string(), TokenType::EOF, 1, 7) ]; assert_eq!(actual_token_vec, expected); } -} \ No newline at end of file +} diff --git a/src/lexer/token.rs b/src/lexer/token.rs index d3c7e0a..b9d04f9 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,3 +1,5 @@ +use std::fmt; + #[derive(Debug, Clone, PartialEq)] pub enum TokenType { // Keywords @@ -17,13 +19,12 @@ pub enum TokenType { Boolean, // Literals - True, - False, - IntegerLiteral(i64), - StringLiteral(String), + BooleanLiteral, + IntegerLiteral, + StringLiteral, // Identifiers - Identifier(String), + Identifier, // Operators Colon, // : @@ -35,6 +36,7 @@ pub enum TokenType { Plus, // + Minus, // - Multiply, // * + Division, // Punctuation LeftParen, // ( @@ -43,27 +45,91 @@ pub enum TokenType { RightBrace, // } Comma, // , Semicolon, // ; - - // Special - Eof, + //special + Return, + EOF, // End of file } #[derive(Debug, Clone)] pub struct Token { + pub value: String, pub token_type: TokenType, pub line: usize, pub column: usize, } impl Token { - pub fn new(token_type: TokenType, line: usize, column: usize) -> Self { + pub fn new(value: String,token_type: TokenType, line: usize, column: usize) -> Self { Token { + value:value, token_type, line, column, } } } +impl fmt::Display for TokenType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + // Keywords + TokenType::Func => write!(f, "func"), + TokenType::Let => write!(f, "let"), + TokenType::If => write!(f, "if"), + TokenType::Then => write!(f, "then"), + TokenType::Else => write!(f, "else"), + TokenType::Not => write!(f, "not"), + TokenType::While => write!(f, "while"), + TokenType::Print => write!(f, "print"), + TokenType::Do => write!(f, "do"), + TokenType::Is => write!(f, "is"), + + // Types + TokenType::Integer => write!(f, "Integer"), + TokenType::Boolean => write!(f, "Boolean"), + + // Literals + TokenType::BooleanLiteral => write!(f, "BooleanLiteral"), + TokenType::IntegerLiteral => write!(f, "IntegerLiteral"), + TokenType::StringLiteral => write!(f, "StringLiteral"), + + // Identifiers + TokenType::Identifier => write!(f, "Identifier"), + + // Operators + TokenType::Colon => write!(f, ":"), + TokenType::Arrow => write!(f, "->"), + TokenType::Assign => write!(f, "="), + TokenType::GreaterThan => write!(f, ">"), + TokenType::LessThan => write!(f, "<"), + TokenType::Equals => write!(f, "=="), + TokenType::Plus => write!(f, "+"), + TokenType::Minus => write!(f, "-"), + TokenType::Multiply => write!(f, "*"), + TokenType::Division => write!(f, "/"), + + + // Punctuation + TokenType::LeftParen => write!(f, "("), + TokenType::RightParen => write!(f, ")"), + TokenType::LeftBrace => write!(f, "{{"), + TokenType::RightBrace => write!(f, "}}"), + TokenType::Comma => write!(f, ","), + TokenType::Semicolon => write!(f, ";"), + TokenType::EOF => write!(f, "EOF"), + TokenType::Return => write!(f, "Return"), + } + } +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{} at {}:{}", + self.token_type, self.line, self.column + ) + } +} impl PartialEq for Token { fn eq(&self, other: &Self) -> bool { self.token_type == other.token_type && diff --git a/src/main.rs b/src/main.rs index ef90e80..972f265 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,34 +1,31 @@ +mod parser; mod lexer; +mod semantic; use std::env; use std::fs; - +use lexer::token::Token; use lexer::lexer::Lexer; // adjust if needed +use parser::parser::AST; +use parser::parser::Parser; fn main() { let args: Vec = env::args().collect(); - if args.len() < 3 { + if args.len() < 2 { eprintln!("Usage: triviC "); std::process::exit(1); } - let filename = &args[2]; + let filename = &args[1]; let source = fs::read_to_string(filename) .expect("Failed to read file"); - let mut lexer = Lexer::new(source); - let _tokens = lexer.tokenize(); + let mut lexer: Lexer = Lexer::new(source); + let _tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(_tokens); + let _ast: AST = parser.parse_program(); - println!("Lexing completed successfully."); + println!("Lexing and parsing completed successfully."); } - -#[cfg(test)] -mod tests{ - use pretty_assertions::{assert_eq}; - #[test] - fn zero_eq_zero(){ - assert_eq!(0,0); - } -} \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..67c567f --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1 @@ +pub mod parser; diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..5717655 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,452 @@ +use crate::lexer::token::{Token, TokenType}; +#[derive(Debug, Clone, PartialEq)] +pub enum Stmt { + Let(String, Type, Expr), + AssignStatement(String, Expr), + ExprStatement(Expr), + If { + condition: Expr, + block: Block, + option: Option, + }, + While { + expr: Expr, + block: Block, + }, + Print(Expr), + Return(Expr) +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Function { + pub name: String, + pub params: Vec, + pub return_type: Type, + pub body: Block, +} +#[derive(Debug, Clone, PartialEq)] +pub struct Block { + pub statements: Vec, +} +#[derive(Debug, Clone, PartialEq)] +pub struct Param { + pub name: String, + pub param_type: Type, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Type { + Integer, + Boolean, +} +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + IntegerLiteral(i64), + BooleanLiteral(bool), + StringLiteral(String), + Identifier(String), + BinaryOp(Box, BinOp, Box), + UnaryOp(UnOp, Box), + Call(Vec), //I do not understand what this one is, but the expert recommended it +} + +#[derive(Debug, Clone, PartialEq)] +pub enum BinOp { + Add, + Sub, + Mul, + Equals, + NotEquals, + GreaterThan, + LessThan, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum UnOp { + Not, +} +pub type AST = Vec; +pub struct Parser { + tokens: Vec, + position: usize, +} + +impl Parser { + pub fn new(token_vector: Vec) -> Self { + Self { + tokens: token_vector, + position: 0, + } + } + pub fn parse_program(&mut self) -> AST { + let mut ast: AST = Vec::new(); + while !self.match_token(TokenType::EOF) { + match self.current().token_type { + TokenType::Func => { + ast.push(self.parse_func()); + } + _ => panic!( + "Wrong token {} at {}:{}", + self.current(), + self.current().line, + self.current().column, + ), + } + } + ast + } + //Todo: Implement the following funcs/helper funcs-> + fn parse_func(&mut self) -> Function { + let _ = self.expect(TokenType::Func); + + let name = self.expect(TokenType::Identifier).unwrap().value; + let _ = self.expect(TokenType::LeftParen); + + let mut params = Vec::new(); + while !self.match_token(TokenType::RightParen) { + let name = self.expect(TokenType::Identifier).unwrap().value; + let _ = self.expect(TokenType::Colon); + + let typevalue = match self.current().token_type { + TokenType::Integer => { + let _ = self.expect(TokenType::Integer); + Type::Integer + } + TokenType::Boolean => { + let _ = self.expect(TokenType::Boolean); + Type::Boolean + } + _ => panic!("Unknown type for parameter"), + }; + params.push(Param { + name, + param_type: typevalue, + }); + if !self.match_token(TokenType::RightParen) { + let _ = self.expect(TokenType::Comma); + } + } + + let _ = self.expect(TokenType::RightParen); + + let _ = self.expect(TokenType::Arrow); + + let return_type = match self.current().token_type { + TokenType::Integer => { + let _ = self.expect(TokenType::Integer); + Type::Integer + } + TokenType::Boolean => { + let _ = self.expect(TokenType::Boolean); + Type::Boolean + } + _ => panic!( + "Unknown return type for function: {}", + self.current().token_type + ), + }; + + let _ = self.expect(TokenType::LeftBrace); + let body: Block = self.parse_block(); + let _ = self.expect(TokenType::RightBrace); + Function { + name, + params, + return_type, + body, + } + } + + fn parse_block(&mut self) -> Block { + let mut statements: Vec = Vec::new(); + + while !self.match_token(TokenType::EOF) && !self.match_token(TokenType::RightBrace) { + if self.match_token(TokenType::Let) { + statements.push(self.parse_let()); //dingdong test commit after revert; + } else if self.match_token(TokenType::If) { + statements.push(self.parse_if()); + } else if self.match_token(TokenType::While) { + statements.push(self.parse_while()); + } else if self.match_token(TokenType::Identifier) && self.peek(TokenType::Assign) + { + statements.push(self.parse_assignment()); + } else if self.match_token(TokenType::Return) { + statements.push(self.parse_return()); + } + else { + let expression = self.parse_expression(); + statements.push(Stmt::ExprStatement(expression)); + } + } + Block { statements } + } + fn parse_return(&mut self) -> Stmt{ + self.consume(); + let expr = self.parse_expression(); + let _ = self.expect(TokenType::Semicolon); + Stmt::Return(expr) + } + + fn parse_assignment(&mut self) -> Stmt{ + let var_name = self.expect(TokenType::Identifier).unwrap().value; + let _ = self.expect(TokenType::Assign); + let expr = self.parse_expression(); + let _ = self.expect(TokenType::Semicolon); + Stmt::AssignStatement(var_name, expr) + } + + fn parse_while(&mut self) -> Stmt{ + self.consume(); + let expr = self.parse_expression(); + let _ = self.expect(TokenType::Do); + let _ = self.expect(TokenType::LeftBrace); + let block = self.parse_block(); + let _ = self.expect(TokenType::RightBrace); + Stmt::While { expr, block } + } + + fn parse_if(&mut self) -> Stmt{ + self.consume(); + let condition = self.parse_expression(); + let _ = self.expect(TokenType::Then); + let _ = self.expect(TokenType::LeftBrace); + let block = self.parse_block(); + let _ = self.expect(TokenType::RightBrace); + let option = match self.current().token_type { + TokenType::Else => { + self.consume(); + let _ = self.expect(TokenType::LeftBrace); + Some(self.parse_block()) + } + _ => None + }; + let _ = self.expect(TokenType::RightBrace); + Stmt::If { condition , block, option } + } + + fn parse_let(&mut self) -> Stmt { + self.consume(); + let var_name = self.expect(TokenType::Identifier).unwrap().value; + let _ = self.expect(TokenType::Colon); + let type_of_var = match self.current().token_type { + TokenType::Integer => Type::Integer, + TokenType::Boolean => Type::Boolean, + _ => panic!( + "Expected type, got something else at {}:{}", + self.current().line, + self.current().column + ), + }; + self.consume(); + let _ = self.expect(TokenType::Assign); + let expr = self.parse_expression(); + let _ = self.expect(TokenType::Semicolon); + Stmt::Let(var_name, type_of_var, expr) + } + + fn token_to_binop(&self, tt: &TokenType) -> BinOp { + match tt { + TokenType::Plus => BinOp::Add, + TokenType::Minus => BinOp::Sub, + TokenType::Multiply => BinOp::Mul, + TokenType::Equals => BinOp::Equals, + TokenType::GreaterThan => BinOp::GreaterThan, + TokenType::LessThan => BinOp::LessThan, + _ => panic!("Not a binary operator"), + } + } + + fn parse_expression(&mut self) -> Expr { + let tok = self.consume(); + match tok.token_type { + TokenType::IntegerLiteral | TokenType::Identifier => { + if self.match_any(&[ + TokenType::Minus, + TokenType::Plus, + TokenType::Multiply, + TokenType::Division, + TokenType::GreaterThan, + TokenType::LessThan, + TokenType::Equals, + ]) { + let op_token = self.consume(); + let op = self.token_to_binop(&op_token.token_type); + let right = self.parse_expression(); + Expr::BinaryOp( + match tok.token_type { + TokenType::IntegerLiteral => { + Box::new(Expr::IntegerLiteral(tok.value.parse::().unwrap())) + } + TokenType::Identifier => Box::new(Expr::Identifier(tok.value)), + _ => panic!("SOMETHING IS WRONGDOG"), + }, + op, + Box::new(right), + ) + }else { + match tok.token_type{ + TokenType::IntegerLiteral => Expr::IntegerLiteral(tok.value.parse::().unwrap()), + TokenType::Identifier => Expr::Identifier(tok.value), + _ => panic!("tokentype wrong, should be integer literal or identifyer") + } + } + } + TokenType::BooleanLiteral => { + if self.match_token(TokenType::Equals) { + let op_token = self.consume(); + let op = self.token_to_binop(&op_token.token_type); + let right = self.parse_expression(); + Expr::BinaryOp( + Box::new(Expr::BooleanLiteral(tok.value.parse::().unwrap())), + op, + Box::new(right), + ) + } else if self.match_token(TokenType::Not) { + let _ = self.expect(TokenType::Equals); + let op = BinOp::NotEquals; + let right = self.parse_expression(); + Expr::BinaryOp( + Box::new(Expr::BooleanLiteral(tok.value.parse::().unwrap())), + op, + Box::new(right), + ) + } else { + Expr::BooleanLiteral(tok.value.parse::().unwrap()) + } + } + TokenType::StringLiteral => { + if self.match_token(TokenType::Equals) { + let op_token = self.consume(); + let op = self.token_to_binop(&op_token.token_type); + let right = self.expect(TokenType::StringLiteral).unwrap(); + Expr::BinaryOp( + Box::new(Expr::StringLiteral(tok.value.parse::().unwrap())), + op, + Box::new(Expr::StringLiteral(right.value.parse::().unwrap())), + ) + } else if self.match_token(TokenType::Not) { + let _ = self.expect(TokenType::Equals); + let op = BinOp::NotEquals; + let right = self.expect(TokenType::StringLiteral).unwrap(); + Expr::BinaryOp( + Box::new(Expr::StringLiteral(tok.value.parse::().unwrap())), + op, + Box::new(Expr::StringLiteral(right.value.parse::().unwrap())), + ) + } else { + Expr::StringLiteral(tok.value.clone()) + } + } + TokenType::Not => { + let exprs = self.parse_expression(); + Expr::UnaryOp(UnOp::Not, Box::new(exprs)) + } + _ => panic!("Unexpected token {:?} in expression", tok.token_type), + } + } + + //Here im making some helper functions i reckon mate + fn peek(&self, token_type: TokenType) -> bool { + if self.position + 1 < self.tokens.len() { + self.tokens.get(self.position + 1).unwrap().token_type == token_type + } else { + false + } + } + + fn current(&self) -> &Token { + self.tokens.get(self.position).unwrap() + } + + fn advance(&mut self) { + self.position += 1; + } + fn consume(&mut self) -> Token { + let token = self.current().clone(); + self.advance(); + token + } + fn match_token(&self, expected: TokenType) -> bool { + self.current().token_type == expected + } + fn match_any(&self, types: &[TokenType]) -> bool { + types.contains(&self.current().token_type) + } + fn expect(&mut self, expected: TokenType) -> Result { + let tok = self.current(); + if tok.token_type == expected { + Ok(self.consume()) + } else { + Err(format!( + "Expected {:?} at {}:{}, found {:?}", + expected, tok.line, tok.column, tok.token_type + )) + } + } +} + +mod tests { + + use crate::parser::parser::Parser; + use crate::{ + lexer::{ + lexer::Lexer, + token::{Token, TokenType}, + }, + parser::parser::{AST, BinOp, Block, Expr, Function, Type}, + }; + + #[test] + fn test_parser_parses_correct_ast() { + use crate::parser::parser::Stmt; + use std::fs; + let source = fs::read_to_string("simple.trv").expect("Failed to read file"); + let mut lexer = Lexer::new(source); + let tokens = lexer.tokenize(); + let mut parser = Parser::new(tokens); + let actual = parser.parse_program(); + + let expected: AST = vec![Function { + name: "main".to_string(), + params: vec![], + return_type: Type::Integer, + body: Block { + statements: vec![ + Stmt::Let("num".to_string(), Type::Integer, Expr::IntegerLiteral(0)), + Stmt::While{ + expr: Expr::BinaryOp( + Box::new(Expr::Identifier("num".to_string())), + BinOp::LessThan, + Box::new(Expr::IntegerLiteral(10)) + ), + block: Block{ + statements: vec![ + Stmt::AssignStatement("num".to_string(), Expr::IntegerLiteral(11)) + ] + } + }, + Stmt::If { + condition: Expr::BinaryOp( + Box::new(Expr::Identifier("num".to_string())), + BinOp::GreaterThan, + Box::new(Expr::IntegerLiteral(10)) + ), + block: Block{ + statements: vec![ + Stmt::AssignStatement("num".to_string(), Expr::IntegerLiteral(11)) + ] + }, + option: Some(Block{ + statements: vec![ + Stmt:: AssignStatement("num".to_string(), Expr::IntegerLiteral(11)) + ] + }) + }, + Stmt::Return(Expr::Identifier("num".to_string())) + ], + }, + }]; + + assert_eq!(actual, expected); + } +} diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index 3533058..eed35c2 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -1,2 +1 @@ -pub mod Type; -pub mod SymbolTable; \ No newline at end of file +pub mod symbol_table; \ No newline at end of file diff --git a/src/semantic/symbol_table.rs b/src/semantic/symbol_table.rs index 815f864..3871dc5 100644 --- a/src/semantic/symbol_table.rs +++ b/src/semantic/symbol_table.rs @@ -1,11 +1,73 @@ use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq)] +pub enum Type { + Integer, + Boolean, + String, + Void, //mayhaps not needed dunno, only if we allow functiions that dont return anything + Function { + params: Vec, + return_type: Box, + }, +} + pub struct Symbol { + name: String, symbol_type: Type, scope_level: usize, //perchance we need to add some more info, for functions (return types, param names) } +impl Symbol { + pub fn new(_name: String, s_type: Type, scope_lvl: usize ) -> Self{ + Self{ + name: _name, + symbol_type: s_type, + scope_level: scope_lvl, + } + } +} + pub struct SymbolTable { - scopes: Vec>, - //key = name i figured, so no "name" property in Symbol struct -} \ No newline at end of file + scopes: Vec>, +} +impl SymbolTable { + pub fn new() -> Self { + Self{ + scopes: Vec::new(), + } + } + pub fn enter_scope(&mut self) { + self.scopes.push(HashMap::new()); + } + pub fn exit_scope(&mut self) { + if self.scopes.len() > 1 { + self.scopes.pop(); + } + } + pub fn insert(&mut self, symbol: Symbol) -> Result<(), String> { + let current = self.scopes.last_mut().unwrap(); + + if current.contains_key(&symbol.name) { + return Err(format!("Symbol '{}' already declared in this scope", symbol.name)); + } + + current.insert(symbol.name.clone(), symbol); + Ok(()) + } + pub fn lookup(&self, name: &str) -> Option<&Symbol> { + for scope in self.scopes.iter().rev() { + if let Some(symbol) = scope.get(name) { + return Some(symbol); + } + } + None + } + pub fn lookup_current(&self, name: &str) -> Option<&Symbol> { + self.scopes.last()?.get(name) + } + + + +} diff --git a/src/semantic/types.rs b/src/semantic/types.rs deleted file mode 100644 index da7232b..0000000 --- a/src/semantic/types.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[derive(Debug, Clone, PartialEq)] -pub enum Type { - Integer, - Boolean, -}