From ba418633297900ad50be8afa0d594e4015efbe81 Mon Sep 17 00:00:00 2001 From: pbaekgaard Date: Wed, 25 Feb 2026 10:27:35 +0100 Subject: [PATCH 1/9] ready for codegeneration --- src/codegen/codegen.rs | 50 +++++++++++ src/codegen/mod.rs | 1 + src/lexer/lexer.rs | 169 +++++++++++++++++++---------------- src/lexer/mod.rs | 2 +- src/lexer/token.rs | 19 ++-- src/main.rs | 23 ++--- src/parser/mod.rs | 1 + src/parser/parser.rs | 88 +++++++++--------- src/semantic/mod.rs | 2 +- src/semantic/symbol_table.rs | 20 ++--- 10 files changed, 221 insertions(+), 154 deletions(-) create mode 100644 src/codegen/codegen.rs create mode 100644 src/codegen/mod.rs diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs new file mode 100644 index 0000000..3a99fad --- /dev/null +++ b/src/codegen/codegen.rs @@ -0,0 +1,50 @@ +use crate::parser::AST; + +#[derive(Debug)] +pub struct CodeGenerator {} + +impl CodeGenerator { + pub fn new() -> Self { + Self {} + } + pub fn generate(&self, ast : AST) { + + } + + fn emit(&self, instruction : String) { + + } +} + +#[cfg(test)] +mod tests { + use crate::CodeGenerator; + use crate::lexer::{lexer::Lexer, token::Token}; + use crate::parser::parser::{Parser, AST}; + + #[test] + fn can_generate_print() { + let source = r##" + func main() -> Boolean { + print("hello world"); + } + "##.to_string(); + let mut lexer: Lexer = Lexer::new(source); + let tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(tokens); + let ast: AST = parser.parse_program(); + } + + #[test] + fn can_generate_let() { + let source = r##" + func main() -> Boolean { + let x : Integer = 11; + } + "##.to_string(); + let mut lexer: Lexer = Lexer::new(source); + let tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(tokens); + let ast: AST = parser.parse_program(); + } +} diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs new file mode 100644 index 0000000..24ccbdd --- /dev/null +++ b/src/codegen/mod.rs @@ -0,0 +1 @@ +pub mod codegen; diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 53d523d..a10dfc0 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -1,19 +1,17 @@ use crate::lexer::token::{Token, TokenType}; - - #[derive(Debug)] pub struct Lexer { input: Vec, position: usize, line: usize, - column: usize + column: usize, } impl Lexer { - pub fn new(input: String) -> Self{ + pub fn new(input: String) -> Self { Self { - input:input.chars().collect(), + input: input.chars().collect(), position: 0, line: 1, column: 1, @@ -33,61 +31,64 @@ impl Lexer { 'A'..='Z' | 'a'..='z' | '_' => { tokens.push(self.read_identifier(ch)); } - '0' ..= '9' => { + '0'..='9' => { tokens.push(self.read_number(ch)); } '=' => { tokens.push(self.assign_or_equals()); } ':' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::Colon)); + tokens.push(self.simple_token(ch.to_string(), TokenType::Colon)); } '+' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::Plus)); + tokens.push(self.simple_token(ch.to_string(), TokenType::Plus)); } '-' => { tokens.push(self.minus_or_arrow()); } '*' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::Multiply)); + tokens.push(self.simple_token(ch.to_string(), TokenType::Multiply)); } '/' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::Division)); + tokens.push(self.simple_token(ch.to_string(), TokenType::Division)); } '{' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::LeftBrace)); + tokens.push(self.simple_token(ch.to_string(), TokenType::LeftBrace)); } '}' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::RightBrace)); + tokens.push(self.simple_token(ch.to_string(), TokenType::RightBrace)); } '(' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::LeftParen)); + tokens.push(self.simple_token(ch.to_string(), TokenType::LeftParen)); } ')' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::RightParen)); + tokens.push(self.simple_token(ch.to_string(), TokenType::RightParen)); } - '>' =>{ - tokens.push(self.simple_token(ch.to_string(),TokenType::GreaterThan)); + '>' => { + tokens.push(self.simple_token(ch.to_string(), TokenType::GreaterThan)); } - '<' =>{ - tokens.push(self.simple_token(ch.to_string(),TokenType::LessThan)); + '<' => { + tokens.push(self.simple_token(ch.to_string(), TokenType::LessThan)); } - ';' =>{ - tokens.push(self.simple_token(ch.to_string(),TokenType::Semicolon)); + ';' => { + tokens.push(self.simple_token(ch.to_string(), TokenType::Semicolon)); } '"' => { tokens.push(self.read_string_literal()); } ',' => { - tokens.push(self.simple_token(ch.to_string(),TokenType::Comma)); + tokens.push(self.simple_token(ch.to_string(), TokenType::Comma)); } '#' => { self.read_comment(); } - _ => panic!("Suuuper wrongdog in here, unexpected char '{}' at {}:{}", ch, self.line, self.column), + _ => panic!( + "Suuuper wrongdog in here, unexpected char '{}' at {}:{}", + ch, self.line, self.column + ), } } - tokens.push(self.simple_token("EOF".to_string(),TokenType::EOF)); + tokens.push(self.simple_token("EOF".to_string(), TokenType::EOF)); tokens } fn current_char(&self) -> Option { @@ -105,19 +106,18 @@ impl Lexer { self.column = 1; } - fn simple_token(&mut self,value: String, token_type: TokenType) -> Token { + fn simple_token(&mut self, value: String, token_type: TokenType) -> Token { let start_col_num = self.column; self.advance(); Token::new(value, token_type, self.line, start_col_num) - } - fn assign_or_equals(&mut self) -> Token{ + fn assign_or_equals(&mut self) -> Token { let original_col = self.column; self.advance(); if self.current_char().unwrap() == '=' { - Token::new("=".to_string(),TokenType::Equals, self.line, original_col) + Token::new("=".to_string(), TokenType::Equals, self.line, original_col) } else { - Token::new("==".to_string(),TokenType::Assign, self.line, original_col) + Token::new("==".to_string(), TokenType::Assign, self.line, original_col) } } fn minus_or_arrow(&mut self) -> Token { @@ -125,14 +125,14 @@ impl Lexer { self.advance(); if self.current_char().unwrap() == '>' { self.advance(); - Token::new("->".to_string(),TokenType::Arrow, self.line, original_col) + Token::new("->".to_string(), TokenType::Arrow, self.line, original_col) } else { - Token::new("-".to_string(),TokenType::Minus, self.line, original_col) + Token::new("-".to_string(), TokenType::Minus, self.line, original_col) } } fn read_comment(&mut self) { - while let Some(ch) = self.current_char(){ - match ch{ + while let Some(ch) = self.current_char() { + match ch { '\n' => { break; } @@ -144,12 +144,12 @@ impl Lexer { } fn read_number(&mut self, first_ch: char) -> Token { let mut num_string: String = "".to_string(); - let start_col_num :usize= self.column; + let start_col_num: usize = self.column; num_string.push(first_ch); self.advance(); while let Some(ch) = self.current_char() { match ch { - '0'..='9' =>{ + '0'..='9' => { num_string.push(ch); self.advance(); } @@ -159,15 +159,20 @@ impl Lexer { } } let num = num_string.parse::().unwrap(); - Token::new(num_string, TokenType::IntegerLiteral, self.line, start_col_num) + Token::new( + num_string, + TokenType::IntegerLiteral, + self.line, + start_col_num, + ) } - fn read_string_literal(&mut self) -> Token{ + fn read_string_literal(&mut self) -> Token { let mut the_litteral: String = "".to_string(); let start_col_num: usize = self.column; the_litteral.push('"'); self.advance(); - while let Some(ch) = self.current_char(){ + while let Some(ch) = self.current_char() { match ch { '"' => { the_litteral.push(ch); @@ -180,11 +185,16 @@ impl Lexer { } } } - Token::new(the_litteral.clone(), TokenType::StringLiteral, self.line, start_col_num) + Token::new( + the_litteral.clone(), + TokenType::StringLiteral, + self.line, + start_col_num, + ) } fn read_identifier(&mut self, first_ch: char) -> Token { - let mut name:String = "".to_string(); + let mut name: String = "".to_string(); let start_col_num: usize = self.column; name.push(first_ch); self.advance(); @@ -201,39 +211,44 @@ impl Lexer { } self.give_keyword_or_literal_token(name.as_mut_str(), self.line, start_col_num) } - fn give_keyword_or_literal_token(&mut self, name: &str, line: usize, col: usize) -> Token{ + fn give_keyword_or_literal_token(&mut self, name: &str, line: usize, col: usize) -> Token { match name { - "let" => Token::new("let".to_string(), TokenType::Let, line, col), - "func" => Token::new("func".to_string(), TokenType::Func, line, col), - "if" => Token::new("if".to_string(), TokenType::If, line, col), - "then" => Token::new("then".to_string(), TokenType::Then, line, col), - "else" => Token::new("else".to_string(), TokenType::Else, line, col), - "not" => Token::new("not".to_string(), TokenType::Not, line, col), - "while" => Token::new("while".to_string(), TokenType::While, line, col), - "print" => Token::new("print".to_string(), TokenType::Print, line, col), - "do" => Token::new("do".to_string(), TokenType::Do, line, col), - "is" => Token::new("is".to_string(), TokenType::Is, line, col), - "Integer"=> Token::new("Integer".to_string(), TokenType::Integer, line, col), - "Boolean"=> Token::new("Boolean".to_string(), TokenType::Boolean, line, col), - "return"=> Token::new("Return".to_string(), TokenType::Return, line, col), - "True" => Token::new("True".to_string(), TokenType::BooleanLiteral, line, col), - "False" => Token::new("False".to_string(), TokenType::BooleanLiteral, line, col), - _ => Token::new(name.to_string(), TokenType::Identifier, line, col), + "let" => Token::new("let".to_string(), TokenType::Let, line, col), + "func" => Token::new("func".to_string(), TokenType::Func, line, col), + "if" => Token::new("if".to_string(), TokenType::If, line, col), + "then" => Token::new("then".to_string(), TokenType::Then, line, col), + "else" => Token::new("else".to_string(), TokenType::Else, line, col), + "not" => Token::new("not".to_string(), TokenType::Not, line, col), + "while" => Token::new("while".to_string(), TokenType::While, line, col), + "print" => Token::new("print".to_string(), TokenType::Print, line, col), + "do" => Token::new("do".to_string(), TokenType::Do, line, col), + "is" => Token::new("is".to_string(), TokenType::Is, line, col), + "Integer" => Token::new("Integer".to_string(), TokenType::Integer, line, col), + "Boolean" => Token::new("Boolean".to_string(), TokenType::Boolean, line, col), + "return" => Token::new("Return".to_string(), TokenType::Return, line, col), + "True" => Token::new("True".to_string(), TokenType::BooleanLiteral, line, col), + "False" => Token::new("False".to_string(), TokenType::BooleanLiteral, line, col), + _ => Token::new(name.to_string(), TokenType::Identifier, line, col), } } } impl PartialEq for Lexer { fn eq(&self, other: &Self) -> bool { - self.input == other.input && - self.position == other.position && - self.column == other.column && - self.line == other.line + self.input == other.input + && self.position == other.position + && self.column == other.column + && self.line == other.line } } -mod tests{ - use crate::lexer::{lexer::Lexer, token::{Token, TokenType}}; + +#[cfg(test)] +mod tests { + use crate::lexer::{ + lexer::Lexer, + token::{Token, TokenType}, + }; #[test] - fn new_creates_lexer_correctly(){ + fn new_creates_lexer_correctly() { let actual = Lexer::new("a = 2".to_string()); let expected = Lexer { @@ -246,13 +261,13 @@ mod tests{ assert_eq!(actual, expected); } #[test] - fn tokenize_works_as_intended(){ - let mut lex: Lexer = Lexer::new("abc_def = 2".to_string()); + fn tokenize_works_as_intended() { + let mut lex: Lexer = Lexer::new("abc_def = 2".to_string()); let actual_token_vec: Vec = lex.tokenize(); let expected: Vec = vec![ - Token::new("abc_def".to_string(), TokenType::Identifier, 1, 1), - Token::new("=".to_string(), TokenType::Assign, 1, 9), + Token::new("abc_def".to_string(), TokenType::Identifier, 1, 1), + Token::new("=".to_string(), TokenType::Assign, 1, 9), Token::new(2.to_string(), TokenType::IntegerLiteral, 1, 11), Token::new("EOF".to_string(), TokenType::EOF, 1, 12), ]; @@ -260,30 +275,28 @@ mod tests{ assert_eq!(actual_token_vec, expected); } #[test] - fn reading_comments_tokenize_lexer_line_col_are_correct(){ - let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); + fn reading_comments_tokenize_lexer_line_col_are_correct() { + let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); lex.tokenize(); - assert_eq!((lex.line, lex.column), (2,2)); + assert_eq!((lex.line, lex.column), (2, 2)); } #[test] - fn reading_comments_tokenize_returns_eof_vector(){ - let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); + fn reading_comments_tokenize_returns_eof_vector() { + let mut lex: Lexer = Lexer::new("#abc_def = 2\n".to_string()); let actual_token_vec: Vec = lex.tokenize(); - let expected: Vec = vec![ - Token::new("EOF".to_string(), TokenType::EOF, 2, 1) - ]; + let expected: Vec = vec![Token::new("EOF".to_string(), TokenType::EOF, 2, 1)]; assert_eq!(actual_token_vec, expected); } #[test] - fn read_string_literal_makes_correct_token(){ - let mut lex: Lexer = Lexer::new("\"test\"".to_string()); + fn read_string_literal_makes_correct_token() { + let mut lex: Lexer = Lexer::new("\"test\"".to_string()); let actual_token_vec: Vec = lex.tokenize(); let expected: Vec = vec![ Token::new("\"test\"".to_string(), TokenType::StringLiteral, 1, 1), - Token::new("EOF".to_string(), TokenType::EOF, 1, 7) + Token::new("EOF".to_string(), TokenType::EOF, 1, 7), ]; assert_eq!(actual_token_vec, expected); diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 325f721..e12719b 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,2 +1,2 @@ +pub mod lexer; pub mod token; -pub mod lexer; \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs index b9d04f9..b8697d9 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -31,7 +31,7 @@ pub enum TokenType { Arrow, // -> Assign, // = GreaterThan, // > - LessThan, // < + LessThan, // < Equals, // == Plus, // + Minus, // - @@ -59,9 +59,9 @@ pub struct Token { } impl Token { - pub fn new(value: String,token_type: TokenType, line: usize, column: usize) -> Self { + pub fn new(value: String, token_type: TokenType, line: usize, column: usize) -> Self { Token { - value:value, + value: value, token_type, line, column, @@ -107,7 +107,6 @@ impl fmt::Display for TokenType { TokenType::Multiply => write!(f, "*"), TokenType::Division => write!(f, "/"), - // Punctuation TokenType::LeftParen => write!(f, "("), TokenType::RightParen => write!(f, ")"), @@ -123,17 +122,13 @@ impl fmt::Display for TokenType { impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} at {}:{}", - self.token_type, self.line, self.column - ) + write!(f, "{} at {}:{}", self.token_type, self.line, self.column) } } impl PartialEq for Token { fn eq(&self, other: &Self) -> bool { - self.token_type == other.token_type && - self.line == other.line && - self.column == other.column + self.token_type == other.token_type + && self.line == other.line + && self.column == other.column } } diff --git a/src/main.rs b/src/main.rs index 972f265..24b561a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,16 @@ -mod parser; +mod codegen; mod lexer; +mod parser; mod semantic; -use std::env; -use std::fs; -use lexer::token::Token; use lexer::lexer::Lexer; // adjust if needed +use lexer::token::Token; use parser::parser::AST; use parser::parser::Parser; +use std::env; +use std::fs; + +use crate::codegen::codegen::CodeGenerator; fn main() { let args: Vec = env::args().collect(); @@ -19,13 +22,13 @@ fn main() { let filename = &args[1]; - let source = fs::read_to_string(filename) - .expect("Failed to read file"); + let source = fs::read_to_string(filename).expect("Failed to read file"); let mut lexer: Lexer = Lexer::new(source); - let _tokens: Vec = lexer.tokenize(); - let mut parser: Parser = Parser::new(_tokens); - let _ast: AST = parser.parse_program(); - + let tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(tokens); + let ast: AST = parser.parse_program(); println!("Lexing and parsing completed successfully."); + let codegen = CodeGenerator::new(); + codegen.generate(ast); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 67c567f..a78deca 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1 +1,2 @@ pub mod parser; +pub use parser::AST; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 5717655..fa5ca4b 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -14,7 +14,7 @@ pub enum Stmt { block: Block, }, Print(Expr), - Return(Expr) + Return(Expr), } #[derive(Debug, Clone, PartialEq)] @@ -167,27 +167,25 @@ impl Parser { statements.push(self.parse_if()); } else if self.match_token(TokenType::While) { statements.push(self.parse_while()); - } else if self.match_token(TokenType::Identifier) && self.peek(TokenType::Assign) - { + } else if self.match_token(TokenType::Identifier) && self.peek(TokenType::Assign) { statements.push(self.parse_assignment()); } else if self.match_token(TokenType::Return) { statements.push(self.parse_return()); - } - else { + } else { let expression = self.parse_expression(); statements.push(Stmt::ExprStatement(expression)); } } Block { statements } } - fn parse_return(&mut self) -> Stmt{ + fn parse_return(&mut self) -> Stmt { self.consume(); let expr = self.parse_expression(); let _ = self.expect(TokenType::Semicolon); Stmt::Return(expr) } - fn parse_assignment(&mut self) -> Stmt{ + fn parse_assignment(&mut self) -> Stmt { let var_name = self.expect(TokenType::Identifier).unwrap().value; let _ = self.expect(TokenType::Assign); let expr = self.parse_expression(); @@ -195,7 +193,7 @@ impl Parser { Stmt::AssignStatement(var_name, expr) } - fn parse_while(&mut self) -> Stmt{ + fn parse_while(&mut self) -> Stmt { self.consume(); let expr = self.parse_expression(); let _ = self.expect(TokenType::Do); @@ -205,7 +203,7 @@ impl Parser { Stmt::While { expr, block } } - fn parse_if(&mut self) -> Stmt{ + fn parse_if(&mut self) -> Stmt { self.consume(); let condition = self.parse_expression(); let _ = self.expect(TokenType::Then); @@ -218,10 +216,14 @@ impl Parser { let _ = self.expect(TokenType::LeftBrace); Some(self.parse_block()) } - _ => None + _ => None, }; let _ = self.expect(TokenType::RightBrace); - Stmt::If { condition , block, option } + Stmt::If { + condition, + block, + option, + } } fn parse_let(&mut self) -> Stmt { @@ -283,11 +285,13 @@ impl Parser { op, Box::new(right), ) - }else { - match tok.token_type{ - TokenType::IntegerLiteral => Expr::IntegerLiteral(tok.value.parse::().unwrap()), + } else { + match tok.token_type { + TokenType::IntegerLiteral => { + Expr::IntegerLiteral(tok.value.parse::().unwrap()) + } TokenType::Identifier => Expr::Identifier(tok.value), - _ => panic!("tokentype wrong, should be integer literal or identifyer") + _ => panic!("tokentype wrong, should be integer literal or identifyer"), } } } @@ -385,20 +389,19 @@ impl Parser { } } +#[cfg(test)] mod tests { - use crate::parser::parser::Parser; use crate::{ lexer::{ lexer::Lexer, - token::{Token, TokenType}, }, - parser::parser::{AST, BinOp, Block, Expr, Function, Type}, + parser::parser::{BinOp, Block, Expr, Function, Type, AST}, }; + use crate::parser::parser::Stmt; #[test] fn test_parser_parses_correct_ast() { - use crate::parser::parser::Stmt; use std::fs; let source = fs::read_to_string("simple.trv").expect("Failed to read file"); let mut lexer = Lexer::new(source); @@ -413,36 +416,39 @@ mod tests { body: Block { statements: vec![ Stmt::Let("num".to_string(), Type::Integer, Expr::IntegerLiteral(0)), - Stmt::While{ + Stmt::While { expr: Expr::BinaryOp( Box::new(Expr::Identifier("num".to_string())), BinOp::LessThan, - Box::new(Expr::IntegerLiteral(10)) + Box::new(Expr::IntegerLiteral(10)), ), - block: Block{ - statements: vec![ - Stmt::AssignStatement("num".to_string(), Expr::IntegerLiteral(11)) - ] - } + block: Block { + statements: vec![Stmt::AssignStatement( + "num".to_string(), + Expr::IntegerLiteral(11), + )], + }, }, Stmt::If { condition: Expr::BinaryOp( - Box::new(Expr::Identifier("num".to_string())), - BinOp::GreaterThan, - Box::new(Expr::IntegerLiteral(10)) - ), - block: Block{ - statements: vec![ - Stmt::AssignStatement("num".to_string(), Expr::IntegerLiteral(11)) - ] - }, - option: Some(Block{ - statements: vec![ - Stmt:: AssignStatement("num".to_string(), Expr::IntegerLiteral(11)) - ] - }) + Box::new(Expr::Identifier("num".to_string())), + BinOp::GreaterThan, + Box::new(Expr::IntegerLiteral(10)), + ), + block: Block { + statements: vec![Stmt::AssignStatement( + "num".to_string(), + Expr::IntegerLiteral(11), + )], + }, + option: Some(Block { + statements: vec![Stmt::AssignStatement( + "num".to_string(), + Expr::IntegerLiteral(11), + )], + }), }, - Stmt::Return(Expr::Identifier("num".to_string())) + Stmt::Return(Expr::Identifier("num".to_string())), ], }, }]; diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index eed35c2..2522d2c 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -1 +1 @@ -pub mod symbol_table; \ No newline at end of file +pub mod symbol_table; diff --git a/src/semantic/symbol_table.rs b/src/semantic/symbol_table.rs index 3871dc5..562fdc4 100644 --- a/src/semantic/symbol_table.rs +++ b/src/semantic/symbol_table.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; #[derive(Debug, Clone, PartialEq)] -pub enum Type { +pub enum Type { Integer, Boolean, String, - Void, //mayhaps not needed dunno, only if we allow functiions that dont return anything + Void, //mayhaps not needed dunno, only if we allow functiions that dont return anything Function { params: Vec, return_type: Box, @@ -20,8 +20,8 @@ pub struct Symbol { } impl Symbol { - pub fn new(_name: String, s_type: Type, scope_lvl: usize ) -> Self{ - Self{ + pub fn new(_name: String, s_type: Type, scope_lvl: usize) -> Self { + Self { name: _name, symbol_type: s_type, scope_level: scope_lvl, @@ -34,9 +34,7 @@ pub struct SymbolTable { } impl SymbolTable { pub fn new() -> Self { - Self{ - scopes: Vec::new(), - } + Self { scopes: Vec::new() } } pub fn enter_scope(&mut self) { self.scopes.push(HashMap::new()); @@ -50,7 +48,10 @@ impl SymbolTable { let current = self.scopes.last_mut().unwrap(); if current.contains_key(&symbol.name) { - return Err(format!("Symbol '{}' already declared in this scope", symbol.name)); + return Err(format!( + "Symbol '{}' already declared in this scope", + symbol.name + )); } current.insert(symbol.name.clone(), symbol); @@ -67,7 +68,4 @@ impl SymbolTable { pub fn lookup_current(&self, name: &str) -> Option<&Symbol> { self.scopes.last()?.get(name) } - - - } From c0ddf65e0806309e6fa7dc47994acadcb7525e8e Mon Sep 17 00:00:00 2001 From: pbaekgaard Date: Wed, 25 Feb 2026 12:43:03 +0100 Subject: [PATCH 2/9] created a run_asm including some example code. we are now ready to make the code generator. --- playground/main.asm | 13 +++++++++++++ playground/run_asm | 19 +++++++++++++++++++ playground/test.trv | 3 +++ run_asm | 12 ++++++++++++ 4 files changed, 47 insertions(+) create mode 100644 playground/main.asm create mode 100755 playground/run_asm create mode 100644 playground/test.trv create mode 100755 run_asm diff --git a/playground/main.asm b/playground/main.asm new file mode 100644 index 0000000..b4edc48 --- /dev/null +++ b/playground/main.asm @@ -0,0 +1,13 @@ +.syntax unified +.thumb + +.section .text +.global _start +.type _start, %function + +_start: + mov r0, #67 + mov r7, #1 + svc #0 + +.size _start, .-_start diff --git a/playground/run_asm b/playground/run_asm new file mode 100755 index 0000000..4ab6b1e --- /dev/null +++ b/playground/run_asm @@ -0,0 +1,19 @@ +#!/bin/bash + +ASM_FILE="${1:-main.asm}" + +if [ ! -f "$ASM_FILE" ]; then + echo "Error: File '$ASM_FILE' does not exist" >&2 + exit 1 +fi + +BASE_NAME="${ASM_FILE%.asm}" + +arm-none-eabi-as -mthumb -o "${BASE_NAME}.o" "$ASM_FILE" + +arm-none-eabi-ld -o "$BASE_NAME" "${BASE_NAME}.o" + +qemu-arm ./"$BASE_NAME" +echo $? + +rm "${BASE_NAME}.o" "$BASE_NAME" diff --git a/playground/test.trv b/playground/test.trv new file mode 100644 index 0000000..5da0ba4 --- /dev/null +++ b/playground/test.trv @@ -0,0 +1,3 @@ +func main() -> Integer { + return 420; +} diff --git a/run_asm b/run_asm new file mode 100755 index 0000000..1ae9685 --- /dev/null +++ b/run_asm @@ -0,0 +1,12 @@ +#!/bin/bash +# make assemble +arm-none-eabi-as -mthumb -o main.o main.asm + +# link +arm-none-eabi-ld -o main main.o + +# run +qemu-arm ./main +echo $? + +rm main.o main From 502a8abb593e5a365a4382681d00ab9ff9073e7a Mon Sep 17 00:00:00 2001 From: pbaekgaard Date: Wed, 25 Feb 2026 12:58:10 +0100 Subject: [PATCH 3/9] updates --- src/codegen/codegen.rs | 22 ++++++++++++++++------ src/main.rs | 4 +++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index 3a99fad..92be0a0 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -1,11 +1,14 @@ use crate::parser::AST; +use std::fs::File; #[derive(Debug)] -pub struct CodeGenerator {} +pub struct CodeGenerator { + file: File +} impl CodeGenerator { - pub fn new() -> Self { - Self {} + pub fn new(file : File) -> Self { + Self {file} } pub fn generate(&self, ast : AST) { @@ -18,21 +21,26 @@ impl CodeGenerator { #[cfg(test)] mod tests { - use crate::CodeGenerator; + use crate::{CodeGenerator, codegen}; use crate::lexer::{lexer::Lexer, token::Token}; use crate::parser::parser::{Parser, AST}; + use std::fs::File; #[test] - fn can_generate_print() { + fn can_generate_return() { let source = r##" func main() -> Boolean { - print("hello world"); + return 69; } "##.to_string(); let mut lexer: Lexer = Lexer::new(source); let tokens: Vec = lexer.tokenize(); let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); + let output_file = File::create("test_can_generate_return.asm").expect("Couldn't create file: test_can_generate_return.asm"); + + let codegen = CodeGenerator::new(output_file); + codegen.generate(ast); } #[test] @@ -46,5 +54,7 @@ mod tests { let tokens: Vec = lexer.tokenize(); let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); + let output_file = File::create("test_can_generate_let.asm").expect("Couldn't create file: test_can_generate_let.asm"); + let codegen = CodeGenerator::new(output_file); } } diff --git a/src/main.rs b/src/main.rs index 24b561a..5ed8d7a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ use parser::parser::AST; use parser::parser::Parser; use std::env; use std::fs; +use std::fs::File; use crate::codegen::codegen::CodeGenerator; @@ -29,6 +30,7 @@ fn main() { let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); println!("Lexing and parsing completed successfully."); - let codegen = CodeGenerator::new(); + let output_file = File::create("main.asm").expect("failed to create main.asm"); + let codegen = CodeGenerator::new(output_file); codegen.generate(ast); } From b32753b7aea4bfd98f8645f81eb1e5d455730d29 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Wed, 25 Feb 2026 13:36:52 +0100 Subject: [PATCH 4/9] made some stuf ngl --- src/codegen/codegen.rs | 63 +++++++++++++++++++++++++++++++----- src/main.rs | 2 +- test_can_generate_let.asm | 15 +++++++++ test_can_generate_return.asm | 16 +++++++++ 4 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 test_can_generate_let.asm create mode 100644 test_can_generate_return.asm diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index 92be0a0..52b7102 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -1,5 +1,5 @@ -use crate::parser::AST; -use std::fs::File; +use crate::parser::{AST, parser::Function}; +use std::{fs::File, io::{Seek, SeekFrom, Write}}; #[derive(Debug)] pub struct CodeGenerator { @@ -10,12 +10,34 @@ impl CodeGenerator { pub fn new(file : File) -> Self { Self {file} } - pub fn generate(&self, ast : AST) { + pub fn generate(&mut self, ast : AST) { + self.gen_main(ast[0].clone()); } + fn gen_main(&mut self, func: Function){ + self.emit("instruction".to_string()); + } + fn emit(&mut self, instruction : String) { + let mut buf = br##" + .syntax unified + .thumb + + .section .text + .global _start - fn emit(&self, instruction : String) { + "##; + let mut buf_2 = br##" + .type _start, %function + _start: + mov r0, #69 + mov r7, #1 + svc #0 + + .size _start, .-_start"##; + self.file.write_all(buf); + self.file.write_all(buf_2); + self.file.flush().unwrap(); } } @@ -24,7 +46,8 @@ mod tests { use crate::{CodeGenerator, codegen}; use crate::lexer::{lexer::Lexer, token::Token}; use crate::parser::parser::{Parser, AST}; - use std::fs::File; + use std::{fs::File, io::{Seek, SeekFrom, Write}}; + use std::io::Read; #[test] fn can_generate_return() { @@ -38,9 +61,28 @@ mod tests { let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); let output_file = File::create("test_can_generate_return.asm").expect("Couldn't create file: test_can_generate_return.asm"); - - let codegen = CodeGenerator::new(output_file); + let mut codegen = CodeGenerator::new(output_file); codegen.generate(ast); + let mut buf = "".to_string(); + codegen.file.flush().unwrap(); // ensure all writes are written + codegen.file.seek(SeekFrom::Start(0)).unwrap(); // rewind to start + let _ = codegen.file.read_to_string(&mut buf); + let expected = r##" + .syntax unified + .thumb + + .section .text + .global _start + .type _start, %function + + _start: + mov r0, #69 + mov r7, #1 + svc #0 + + .size _start, .-_start + "##.to_string();//make + assert_eq!(expected, buf) } #[test] @@ -55,6 +97,11 @@ mod tests { let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); let output_file = File::create("test_can_generate_let.asm").expect("Couldn't create file: test_can_generate_let.asm"); - let codegen = CodeGenerator::new(output_file); + let mut codegen = CodeGenerator::new(output_file); + codegen.generate(ast); + let mut buf = "".to_string(); + let _ = codegen.file.read_to_string(&mut buf); + let expected = r##""##.to_string();//make + assert_eq!(expected, buf) } } diff --git a/src/main.rs b/src/main.rs index 5ed8d7a..950fea0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,6 +31,6 @@ fn main() { let ast: AST = parser.parse_program(); println!("Lexing and parsing completed successfully."); let output_file = File::create("main.asm").expect("failed to create main.asm"); - let codegen = CodeGenerator::new(output_file); + let mut codegen = CodeGenerator::new(output_file); codegen.generate(ast); } diff --git a/test_can_generate_let.asm b/test_can_generate_let.asm new file mode 100644 index 0000000..a82d40e --- /dev/null +++ b/test_can_generate_let.asm @@ -0,0 +1,15 @@ + + .syntax unified + .thumb + + .section .text + .global _start + + .type _start, %function + + _start: + mov r0, #69 + mov r7, #1 + svc #0 + + .size _start, .-_start \ No newline at end of file diff --git a/test_can_generate_return.asm b/test_can_generate_return.asm new file mode 100644 index 0000000..aecabe0 --- /dev/null +++ b/test_can_generate_return.asm @@ -0,0 +1,16 @@ + + .syntax unified + .thumb + + .section .text + .global _start + + + .type _start, %function + + _start: + mov r0, #69 + mov r7, #1 + svc #0 + + .size _start, .-_start \ No newline at end of file From b45c989864be62db0d024241984bf37ef06e0fca Mon Sep 17 00:00:00 2001 From: pbaekgaard Date: Wed, 25 Feb 2026 15:18:38 +0100 Subject: [PATCH 5/9] gen init works --- .gitignore | 1 + Cargo.lock | 16 +++ Cargo.toml | 1 + src/codegen/codegen.rs | 150 ++++++++++++++++------------ src/parser/parser.rs | 2 +- test_can_generate_let.asm | 15 --- test_can_generate_return.asm | 16 --- simple.trv => test_codes/simple.trv | 0 test_codes/test_main_return.trv | 3 + 9 files changed, 109 insertions(+), 95 deletions(-) delete mode 100644 test_can_generate_let.asm delete mode 100644 test_can_generate_return.asm rename simple.trv => test_codes/simple.trv (100%) create mode 100644 test_codes/test_main_return.trv diff --git a/.gitignore b/.gitignore index 0dc7ebb..7b13b47 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ #/target /tmp +/temp diff --git a/Cargo.lock b/Cargo.lock index cc5f216..a333194 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "pretty_assertions" version = "1.4.1" @@ -18,10 +27,17 @@ dependencies = [ "yansi", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "trivilang" version = "0.1.0" dependencies = [ + "indoc", "pretty_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 23d8030..88d9f7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ name = "trivic" path = "src/main.rs" [dependencies] +indoc = "2.0" [dev-dependencies] diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index 52b7102..9ebb6a7 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -1,73 +1,117 @@ -use crate::parser::{AST, parser::Function}; -use std::{fs::File, io::{Seek, SeekFrom, Write}}; +use crate::parser::{parser::Function, AST}; +use std::{fs::File, io::Write}; #[derive(Debug)] pub struct CodeGenerator { - file: File + pub file: File, } impl CodeGenerator { - pub fn new(file : File) -> Self { - Self {file} + pub fn new(file: File) -> Self { + Self { file } + } + pub fn generate(&mut self, ast: AST) { + self.gen_init(); + for func in ast { + self.emit(func); + } } - pub fn generate(&mut self, ast : AST) { - self.gen_main(ast[0].clone()); + fn gen_init(&mut self) { + self.write_line(".syntax unified"); + self.write_line(".thumb"); + self.write_line(".section .text"); + self.write_line(".global _start"); + self.write_line(".type _start, %function"); } - fn gen_main(&mut self, func: Function){ - self.emit("instruction".to_string()); + + fn write_line(&mut self, string: &str) { + // writeln! automatically appends \n and writes to the file + let _ = writeln!(self.file, "{}", string); } - fn emit(&mut self, instruction : String) { - let mut buf = br##" - .syntax unified - .thumb - .section .text - .global _start + fn emit(&mut self, func: Function) { + match func.name.as_str() { + "main" => self.emit_main(func), + _ => panic!("failed"), + } + self.file.sync_all().unwrap(); + } - "##; - let mut buf_2 = br##" - .type _start, %function + fn emit_main(&mut self, func: Function) { - _start: - mov r0, #69 - mov r7, #1 - svc #0 - - .size _start, .-_start"##; - self.file.write_all(buf); - self.file.write_all(buf_2); - self.file.flush().unwrap(); } } #[cfg(test)] mod tests { - use crate::{CodeGenerator, codegen}; use crate::lexer::{lexer::Lexer, token::Token}; use crate::parser::parser::{Parser, AST}; - use std::{fs::File, io::{Seek, SeekFrom, Write}}; - use std::io::Read; + use crate::CodeGenerator; + use std::fs::{File, OpenOptions}; + use std::io::{Read, Seek, SeekFrom}; + use std::sync::Once; + + static INIT: Once = Once::new(); + + fn initialize() { + INIT.call_once(|| { + let mut path = std::path::PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); + path.push("temp/tests"); + std::fs::create_dir_all(path).unwrap(); + }); + } + + #[test] + fn can_generate_init() { + initialize(); + let output_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open("temp/tests/test_can_generate_init.asm") + .expect("Failed to create file: /temp/tests/test_can_generate_init.asm"); + + let ast = AST::new(); + let mut codegen = CodeGenerator::new(output_file); + codegen.generate(ast); + codegen.file.seek(SeekFrom::Start(0)).unwrap(); + let mut buf = String::new(); + codegen.file.read_to_string(&mut buf).unwrap(); + let expected = indoc::indoc! {r##" + .syntax unified + .thumb + .section .text + .global _start + .type _start, %function + "##}.to_string(); + assert_eq!(expected, buf); + + } #[test] fn can_generate_return() { - let source = r##" - func main() -> Boolean { - return 69; - } - "##.to_string(); + initialize(); + let source = std::fs::read_to_string("test_codes/test_main_return.trv") + .expect("Failed to read file"); let mut lexer: Lexer = Lexer::new(source); let tokens: Vec = lexer.tokenize(); let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); - let output_file = File::create("test_can_generate_return.asm").expect("Couldn't create file: test_can_generate_return.asm"); + let output_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open("temp/tests/test_can_generate_return.asm") + .expect("Failed to create file: /temp/tests/test_can_generate_return.asm"); let mut codegen = CodeGenerator::new(output_file); codegen.generate(ast); - let mut buf = "".to_string(); - codegen.file.flush().unwrap(); // ensure all writes are written - codegen.file.seek(SeekFrom::Start(0)).unwrap(); // rewind to start - let _ = codegen.file.read_to_string(&mut buf); - let expected = r##" + codegen.file.seek(SeekFrom::Start(0)).unwrap(); + let mut buf = String::new(); + codegen.file.read_to_string(&mut buf).unwrap(); + let expected = indoc::indoc! {r##" .syntax unified .thumb @@ -80,28 +124,8 @@ mod tests { mov r7, #1 svc #0 - .size _start, .-_start - "##.to_string();//make - assert_eq!(expected, buf) - } - - #[test] - fn can_generate_let() { - let source = r##" - func main() -> Boolean { - let x : Integer = 11; - } - "##.to_string(); - let mut lexer: Lexer = Lexer::new(source); - let tokens: Vec = lexer.tokenize(); - let mut parser: Parser = Parser::new(tokens); - let ast: AST = parser.parse_program(); - let output_file = File::create("test_can_generate_let.asm").expect("Couldn't create file: test_can_generate_let.asm"); - let mut codegen = CodeGenerator::new(output_file); - codegen.generate(ast); - let mut buf = "".to_string(); - let _ = codegen.file.read_to_string(&mut buf); - let expected = r##""##.to_string();//make + .size _start, .-_start"##} + .to_string(); assert_eq!(expected, buf) } } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index fa5ca4b..a107749 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -403,7 +403,7 @@ mod tests { #[test] fn test_parser_parses_correct_ast() { use std::fs; - let source = fs::read_to_string("simple.trv").expect("Failed to read file"); + let source = fs::read_to_string("test_codes/simple.trv").expect("Failed to read file"); let mut lexer = Lexer::new(source); let tokens = lexer.tokenize(); let mut parser = Parser::new(tokens); diff --git a/test_can_generate_let.asm b/test_can_generate_let.asm deleted file mode 100644 index a82d40e..0000000 --- a/test_can_generate_let.asm +++ /dev/null @@ -1,15 +0,0 @@ - - .syntax unified - .thumb - - .section .text - .global _start - - .type _start, %function - - _start: - mov r0, #69 - mov r7, #1 - svc #0 - - .size _start, .-_start \ No newline at end of file diff --git a/test_can_generate_return.asm b/test_can_generate_return.asm deleted file mode 100644 index aecabe0..0000000 --- a/test_can_generate_return.asm +++ /dev/null @@ -1,16 +0,0 @@ - - .syntax unified - .thumb - - .section .text - .global _start - - - .type _start, %function - - _start: - mov r0, #69 - mov r7, #1 - svc #0 - - .size _start, .-_start \ No newline at end of file diff --git a/simple.trv b/test_codes/simple.trv similarity index 100% rename from simple.trv rename to test_codes/simple.trv diff --git a/test_codes/test_main_return.trv b/test_codes/test_main_return.trv new file mode 100644 index 0000000..c66bbb3 --- /dev/null +++ b/test_codes/test_main_return.trv @@ -0,0 +1,3 @@ +func main() -> Integer { + return 69; +} From 9f0f6a8fd74e16d82e403ff175e00acb7059c778 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Wed, 25 Feb 2026 15:48:57 +0100 Subject: [PATCH 6/9] made some stuf ngl --- src/codegen/codegen.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index 9ebb6a7..045dd11 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -1,4 +1,4 @@ -use crate::parser::{parser::Function, AST}; +use crate::parser::{AST, parser::{Block, Function, Stmt}}; use std::{fs::File, io::Write}; #[derive(Debug)] @@ -18,16 +18,19 @@ impl CodeGenerator { } fn gen_init(&mut self) { - self.write_line(".syntax unified"); - self.write_line(".thumb"); - self.write_line(".section .text"); - self.write_line(".global _start"); - self.write_line(".type _start, %function"); + self.write_line(".syntax unified",0); + self.write_line(".thumb",0); + self.write_line(".section .text",0); + self.write_line(".global _start",0); + self.write_line(".type _start, %function",0); } - fn write_line(&mut self, string: &str) { - // writeln! automatically appends \n and writes to the file - let _ = writeln!(self.file, "{}", string); + fn write_line(&mut self, string: &str, indents: usize) { + // Create indentation (e.g., 4 spaces per indent level) + let indent_str = "\t".repeat(indents); + + // writeln! automatically appends \n + let _ = writeln!(self.file, "{}{}", indent_str, string); } fn emit(&mut self, func: Function) { @@ -39,7 +42,18 @@ impl CodeGenerator { } fn emit_main(&mut self, func: Function) { - + self.write_line("_start:",0); + self.emit_block(func.body, true); + } + fn emit_block(&mut self, block: Block, is_main: bool){ + for stmt in block.statements{ + match stmt { + Stmt::Return(expr) => { + expr + } + _ => panic!("Block fucking wrong") + } + } } } From 32bdc3cb43b7247cb9f51e9c0ac264f0813db769 Mon Sep 17 00:00:00 2001 From: pbaekgaard Date: Wed, 25 Feb 2026 16:18:26 +0100 Subject: [PATCH 7/9] more stuff --- main.asm | 13 ++++ src/codegen/codegen.rs | 143 ++++++++++++++++++++++++++++++----- test | Bin 0 -> 5020 bytes test.c | 4 + test.s | 40 ++++++++++ test_codes/test_func_let.trv | 10 +++ test_codes/test_main_let.trv | 4 + 7 files changed, 197 insertions(+), 17 deletions(-) create mode 100644 main.asm create mode 100755 test create mode 100644 test.c create mode 100644 test.s create mode 100644 test_codes/test_func_let.trv create mode 100644 test_codes/test_main_let.trv diff --git a/main.asm b/main.asm new file mode 100644 index 0000000..01167be --- /dev/null +++ b/main.asm @@ -0,0 +1,13 @@ +.syntax unified +.thumb + +.section .text +.global _start +.type _start, %function + +_start: + mov r0, #69 + mov r7, #1 + svc #0 + +.size _start, .-_start diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index 045dd11..d6a2b9d 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -1,4 +1,7 @@ -use crate::parser::{AST, parser::{Block, Function, Stmt}}; +use crate::parser::{ + parser::{Block, Function, Stmt}, + AST, +}; use std::{fs::File, io::Write}; #[derive(Debug)] @@ -18,16 +21,18 @@ impl CodeGenerator { } fn gen_init(&mut self) { - self.write_line(".syntax unified",0); - self.write_line(".thumb",0); - self.write_line(".section .text",0); - self.write_line(".global _start",0); - self.write_line(".type _start, %function",0); + self.write_line(".syntax unified", 0); + self.write_line(".thumb", 0); + self.write_line("", 0); + self.write_line(".section .text", 0); + self.write_line(".global _start", 0); + self.write_line(".type _start, %function", 0); + self.write_line("", 0); } fn write_line(&mut self, string: &str, indents: usize) { // Create indentation (e.g., 4 spaces per indent level) - let indent_str = "\t".repeat(indents); + let indent_str = " ".repeat(indents); // writeln! automatically appends \n let _ = writeln!(self.file, "{}{}", indent_str, string); @@ -36,22 +41,36 @@ impl CodeGenerator { fn emit(&mut self, func: Function) { match func.name.as_str() { "main" => self.emit_main(func), - _ => panic!("failed"), + _ => self.emit_func(func), } self.file.sync_all().unwrap(); } + fn emit_func(&mut self, func: Function) {} + fn emit_main(&mut self, func: Function) { - self.write_line("_start:",0); + self.write_line("_start:", 0); self.emit_block(func.body, true); + self.write_line("\n.size _start, .-_start", 0); } - fn emit_block(&mut self, block: Block, is_main: bool){ - for stmt in block.statements{ + fn emit_block(&mut self, block: Block, is_main: bool) { + for stmt in block.statements { match stmt { Stmt::Return(expr) => { - expr + match expr { + crate::parser::parser::Expr::IntegerLiteral(val) => { + self.write_line(&format!("mov r0, #{}", val), 1); + } + _ => panic!("Unsupported expression type in return"), + } + if is_main { + self.write_line("mov r7, #1", 1); + self.write_line("svc #0", 1); + } else { + self.write_line("bx lr", 1); + } } - _ => panic!("Block fucking wrong") + _ => panic!("Block fucking wrong"), } } } @@ -62,7 +81,7 @@ mod tests { use crate::lexer::{lexer::Lexer, token::Token}; use crate::parser::parser::{Parser, AST}; use crate::CodeGenerator; - use std::fs::{File, OpenOptions}; + use std::fs::File; use std::io::{Read, Seek, SeekFrom}; use std::sync::Once; @@ -99,9 +118,9 @@ mod tests { .section .text .global _start .type _start, %function - "##}.to_string(); + "##} + .to_string(); assert_eq!(expected, buf); - } #[test] @@ -138,7 +157,97 @@ mod tests { mov r7, #1 svc #0 - .size _start, .-_start"##} + .size _start, .-_start + "##} + .to_string(); + assert_eq!(expected, buf) + } + + #[test] + fn can_generate_let() { + initialize(); + let source = + std::fs::read_to_string("test_codes/test_main_let.trv").expect("Failed to read file"); + let mut lexer: Lexer = Lexer::new(source); + let tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(tokens); + let ast: AST = parser.parse_program(); + let output_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open("temp/tests/test_can_generate_let.asm") + .expect("Failed to create file: /temp/tests/test_can_generate_let.asm"); + let mut codegen = CodeGenerator::new(output_file); + codegen.generate(ast); + codegen.file.seek(SeekFrom::Start(0)).unwrap(); + let mut buf = String::new(); + codegen.file.read_to_string(&mut buf).unwrap(); + let expected = indoc::indoc! {r##" + .syntax unified + .thumb + + .section .text + .global _start + .type _start, %function + + _start: + mov r4, #27 + mov r0, r4 + mov r7, #1 + svc #0 + + .size _start, .-_start + "##} + .to_string(); + assert_eq!(expected, buf) + } + + #[test] + fn can_generate_func_let() { + initialize(); + let source = + std::fs::read_to_string("test_codes/test_func_let.trv").expect("Failed to read file"); + let mut lexer: Lexer = Lexer::new(source); + let tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(tokens); + let ast: AST = parser.parse_program(); + let output_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open("temp/tests/test_can_generate_func_let.asm") + .expect("Failed to create file: /temp/tests/test_can_generate_func_let.asm"); + let mut codegen = CodeGenerator::new(output_file); + codegen.generate(ast); + codegen.file.seek(SeekFrom::Start(0)).unwrap(); + let mut buf = String::new(); + codegen.file.read_to_string(&mut buf).unwrap(); + let expected = indoc::indoc! {r##" + .syntax unified + .thumb + + .section .text + .global _start + .type _start, %function + + x: + mov r4, #100 + mov r0, r4 + bx lr + + _start: + bl x + mov r4, r0 + mov r5, #5 + mov r0, #29 + mov r7, #1 + svc #0 + + .size _start, .-_start + "##} .to_string(); assert_eq!(expected, buf) } diff --git a/test b/test new file mode 100755 index 0000000000000000000000000000000000000000..cf410c719648c493b089a52ca6ee2d609b8a94b7 GIT binary patch literal 5020 zcmeHLJx?1!5S{he#w00>1;l`aE&(Ki#EB0RloU};1k;cd?x>ElImsvRhwe5~5Ctct zrQn~CB4zS(@&obQo004evz+-W6=cbF|AkU>&dySO=^F)&c8)b-+4c9k32q2do3uf&a#V z?Aw>G^susabAB^ucAg#n=w#m~CqySlM{nGgpY#WAH=2yoSdNqHO?S8L?Rh)o@8JW& zDD1@~T1YF0+dfsF%=k4iTWXZ&#e#V5G{urw7OT~lVoSV#oG=tVJ=56HQw3_?8(dP! zBX)ElKOHI;a3$}c=`#KLUE?&<7!wUzMr!)ifu9!eQUP~?c@Dy}jRy1^h5Bv*zXo2+ z@6Y|;73#;pg!u`tIyrQTIISlcrPAxux}+fJrD>4LFp)u!qiA&TH&+$#ITu}oG9*UC zf7HK3i7DNI$V;z>GVCGBM8|>2Wb|2*H;IxoPGvMgJ{rfP7_mPd4i({dKX{>(N!;L7)3E{-zk8L{88&7{?4?=e1z&(*9ltQYPZPT7Wpvvh**&A1)^Kig2gteK4wG2i zZ&j!2 Integer { + let v = 100; + return v; +} + +func main() -> Integer { + let func_res = x(); + let result = 5; + return 29; +} diff --git a/test_codes/test_main_let.trv b/test_codes/test_main_let.trv new file mode 100644 index 0000000..fdc57cb --- /dev/null +++ b/test_codes/test_main_let.trv @@ -0,0 +1,4 @@ +func main() -> Integer { + let num : Integer = 27; + return num; +} From 9c1d63789d4f07d371638583ff1b13a6522a36b8 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Thu, 26 Feb 2026 15:17:59 +0100 Subject: [PATCH 8/9] finished code generator for assignment, rest of expressions, if statements and lets --- src/codegen/codegen.rs | 304 ++++++++++++++++++++++++++----- src/main.rs | 2 + src/parser/parser.rs | 2 +- test_codes/test_func_if_else.trv | 9 + 4 files changed, 267 insertions(+), 50 deletions(-) create mode 100644 test_codes/test_func_if_else.trv diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index d6a2b9d..1d4843a 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -1,17 +1,23 @@ -use crate::parser::{ - parser::{Block, Function, Stmt}, - AST, -}; -use std::{fs::File, io::Write}; +use crate::parser::{ AST, parser::{ BinOp, Block, Expr, Function, Stmt } }; +use core::panic; +use std::{ collections::HashMap, fs::File, io::Write }; #[derive(Debug)] pub struct CodeGenerator { pub file: File, + locals: HashMap, + stack_offset: i32, + label_count: usize, } impl CodeGenerator { pub fn new(file: File) -> Self { - Self { file } + Self { + file, + locals: HashMap::new(), + stack_offset: 0, + label_count: 0, + } } pub fn generate(&mut self, ast: AST) { self.gen_init(); @@ -56,33 +62,154 @@ impl CodeGenerator { fn emit_block(&mut self, block: Block, is_main: bool) { for stmt in block.statements { match stmt { - Stmt::Return(expr) => { - match expr { - crate::parser::parser::Expr::IntegerLiteral(val) => { - self.write_line(&format!("mov r0, #{}", val), 1); - } - _ => panic!("Unsupported expression type in return"), + Stmt::Let(_, _, _) => self.emit_let(stmt), + Stmt::AssignStatement(_, _) => self.emit_assign(stmt), + Stmt::Return(_) => self.emit_return(stmt, is_main), + Stmt::If { .. } => self.emit_if(stmt), + _ => panic!("Error found in expression in return"), + } + } + } + fn emit_if(&mut self, if_stmt: Stmt) { + match if_stmt { + Stmt::If { condition, block, option } => { + let label_id = self.label_count; + self.label_count += 1; + + self.emit_expr(condition); + self.write_line("cmp r0, #0", 1); + self.write_line(&format!("beq else_{}", label_id), 1); + + // then block + self.emit_block(block, false); + self.write_line(&format!("b endif_{}", label_id), 1); + + // else block + self.write_line(&format!("else_{}:", label_id), 0); + if let Some(else_block) = option { + self.emit_block(else_block, false); + } + + self.write_line(&format!("endif_{}:", label_id), 0); + } + _ => panic!("emit_if called with non-if statement"), + } + } + + fn emit_return(&mut self, return_stmt: Stmt, is_main: bool) { + match return_stmt { + Stmt::Return(expr) => { + match expr { + | crate::parser::parser::Expr::IntegerLiteral(_) + | crate::parser::parser::Expr::Identifier(_) => { + self.emit_expr(expr); } - if is_main { - self.write_line("mov r7, #1", 1); - self.write_line("svc #0", 1); - } else { - self.write_line("bx lr", 1); + _ => panic!("Unsupported expression type in return"), + } + if is_main { + self.write_line("mov r7, #1", 1); + self.write_line("svc #0", 1); + } else { + self.write_line("bx lr", 1); + } + } + _ => panic!("return poorly formed"), + } + } + fn emit_assign(&mut self, assign_stmt: Stmt) { + match assign_stmt { + Stmt::AssignStatement(name, expr) => { + self.emit_expr(expr); + let offset = self.locals.get(&name).expect("Undefined variable"); + if self.stack_offset - offset == 0{ + self.write_line(&format!("str r0, [sp]"), 1); + }else{ + self.write_line(&format!("str r0, [sp, #{}]", self.stack_offset - offset), 1); + } + } + _ => panic!("Not a valid assignment"), + } + } + fn emit_let(&mut self, let_stmt: Stmt) { + match let_stmt { + Stmt::Let(name, type_name, expr) => { + self.stack_offset += 4; + self.write_line("sub sp, sp, #4", 1); + self.emit_expr(expr); + self.write_line("str r0, [sp]", 1); + self.locals.insert(name, self.stack_offset); + } + _ => panic!("Not a let statement format sorry "), + } + } + fn emit_expr(&mut self, expr: Expr) { + match expr { + Expr::IntegerLiteral(val) => { self.write_line(&format!("mov r0, #{}", val), 1) } + Expr::BooleanLiteral(val) => { self.write_line(&format!("mov r0, #{}", val), 1) } + Expr::Identifier(name) => { + let offset = self.locals.get(&name).expect("Undefined variable"); + self.write_line(&format!("ldr r0, [sp, #{}]", self.stack_offset - offset), 1); + } + Expr::BinaryOp(_, _, _) => { + self.emit_bin_op(expr); + } + + _ => panic!("Expression not valid sorry"), + } + } + fn emit_bin_op(&mut self, bin_op_expr: Expr) { + match bin_op_expr { + Expr::BinaryOp(left, op, right) => { + self.emit_expr(*left); + self.write_line("mov r1, r0", 1); // store left + self.emit_expr(*right); + + match op { + BinOp::Add => self.write_line("add r0, r1, r0", 1), + BinOp::Sub => self.write_line("sub r0, r1, r0", 1), + BinOp::Mul => self.write_line("mul r0, r1, r0", 1), + + BinOp::Equals => { + self.write_line("cmp r1, r0", 1); + self.write_line("mov r0, #0", 1); + self.write_line("it eq", 1); + self.write_line("moveq r0, #1", 1); + } + + BinOp::NotEquals => { + self.write_line("cmp r1, r0", 1); + self.write_line("mov r0, #0", 1); + self.write_line("it ne", 1); + self.write_line("movne r0, #1", 1); + } + + BinOp::GreaterThan => { + self.write_line("cmp r1, r0", 1); + self.write_line("mov r0, #0", 1); + self.write_line("it gt", 1); + self.write_line("movgt r0, #1", 1); + } + + BinOp::LessThan => { + self.write_line("cmp r1, r0", 1); + self.write_line("mov r0, #0", 1); + self.write_line("it lt", 1); + self.write_line("movlt r0, #1", 1); } } - _ => panic!("Block fucking wrong"), } + _ => panic!("not a binary operation if ive ever seen one"), } } } #[cfg(test)] mod tests { - use crate::lexer::{lexer::Lexer, token::Token}; - use crate::parser::parser::{Parser, AST}; + use crate::lexer::{ lexer::Lexer, token::Token }; + use crate::parser::parser::{ Parser, AST }; use crate::CodeGenerator; use std::fs::File; - use std::io::{Read, Seek, SeekFrom}; + use std::io::{ Read, Seek, SeekFrom }; use std::sync::Once; static INIT: Once = Once::new(); @@ -112,21 +239,27 @@ mod tests { codegen.file.seek(SeekFrom::Start(0)).unwrap(); let mut buf = String::new(); codegen.file.read_to_string(&mut buf).unwrap(); - let expected = indoc::indoc! {r##" + let expected = ( + indoc::indoc! { + r##" .syntax unified .thumb + .section .text .global _start .type _start, %function - "##} - .to_string(); + + "## + } + ).to_string(); assert_eq!(expected, buf); } #[test] fn can_generate_return() { initialize(); - let source = std::fs::read_to_string("test_codes/test_main_return.trv") + let source = std::fs + ::read_to_string("test_codes/test_main_return.trv") .expect("Failed to read file"); let mut lexer: Lexer = Lexer::new(source); let tokens: Vec = lexer.tokenize(); @@ -144,7 +277,9 @@ mod tests { codegen.file.seek(SeekFrom::Start(0)).unwrap(); let mut buf = String::new(); codegen.file.read_to_string(&mut buf).unwrap(); - let expected = indoc::indoc! {r##" + let expected = ( + indoc::indoc! { + r##" .syntax unified .thumb @@ -158,16 +293,18 @@ mod tests { svc #0 .size _start, .-_start - "##} - .to_string(); + "## + } + ).to_string(); assert_eq!(expected, buf) } #[test] fn can_generate_let() { initialize(); - let source = - std::fs::read_to_string("test_codes/test_main_let.trv").expect("Failed to read file"); + let source = std::fs + ::read_to_string("test_codes/test_main_let.trv") + .expect("Failed to read file"); let mut lexer: Lexer = Lexer::new(source); let tokens: Vec = lexer.tokenize(); let mut parser: Parser = Parser::new(tokens); @@ -184,31 +321,97 @@ mod tests { codegen.file.seek(SeekFrom::Start(0)).unwrap(); let mut buf = String::new(); codegen.file.read_to_string(&mut buf).unwrap(); - let expected = indoc::indoc! {r##" - .syntax unified - .thumb + let expected = ( + indoc::indoc! { + r##" + .syntax unified + .thumb - .section .text - .global _start - .type _start, %function + .section .text + .global _start + .type _start, %function - _start: - mov r4, #27 - mov r0, r4 - mov r7, #1 - svc #0 + _start: + sub sp, sp, #4 + mov r0, #27 + str r0, [sp] + ldr r0, [sp, #0] + mov r7, #1 + svc #0 - .size _start, .-_start - "##} - .to_string(); + .size _start, .-_start + "## + } + ).to_string(); assert_eq!(expected, buf) } + #[test] + fn can_generate_if_else_and_assign() { + initialize(); + let source = std::fs + ::read_to_string("test_codes/test_func_if_else.trv") + .expect("Failed to read file"); + let mut lexer: Lexer = Lexer::new(source); + let tokens: Vec = lexer.tokenize(); + let mut parser: Parser = Parser::new(tokens); + let ast: AST = parser.parse_program(); + let output_file = File::options() + .read(true) + .write(true) + .create(true) + .truncate(true) + .open("temp/tests/test_can_generate_if_else.asm") + .expect("Failed to create file: /temp/tests/test_can_generate_let.asm"); + let mut codegen = CodeGenerator::new(output_file); + codegen.generate(ast); + codegen.file.seek(SeekFrom::Start(0)).unwrap(); + let mut buf = String::new(); + codegen.file.read_to_string(&mut buf).unwrap(); + let expected = ( + indoc::indoc! { + r##" + .syntax unified + .thumb + .section .text + .global _start + .type _start, %function + + _start: + sub sp, sp, #4 + mov r0, #11 + str r0, [sp] + ldr r0, [sp, #0] + mov r1, r0 + mov r0, #10 + cmp r1, r0 + movgt r0, #1 + movle r0, #0 + cmp r0, #0 + beq else_0 + mov r0, #11 + str r0, [sp, #0] + b endif_0 + else_0: + mov r0, #12 + str r0, [sp, #0] + endif_0: + ldr r0, [sp, #0] + mov r7, #1 + svc #0 + + .size _start, .-_start + "## + } + ).to_string(); + assert_eq!(expected, buf) + } #[test] fn can_generate_func_let() { initialize(); - let source = - std::fs::read_to_string("test_codes/test_func_let.trv").expect("Failed to read file"); + let source = std::fs + ::read_to_string("test_codes/test_func_let.trv") + .expect("Failed to read file"); let mut lexer: Lexer = Lexer::new(source); let tokens: Vec = lexer.tokenize(); let mut parser: Parser = Parser::new(tokens); @@ -225,7 +428,9 @@ mod tests { codegen.file.seek(SeekFrom::Start(0)).unwrap(); let mut buf = String::new(); codegen.file.read_to_string(&mut buf).unwrap(); - let expected = indoc::indoc! {r##" + let expected = ( + indoc::indoc! { + r##" .syntax unified .thumb @@ -247,8 +452,9 @@ mod tests { svc #0 .size _start, .-_start - "##} - .to_string(); + "## + } + ).to_string(); assert_eq!(expected, buf) } } diff --git a/src/main.rs b/src/main.rs index 950fea0..28495e7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ use lexer::lexer::Lexer; // adjust if needed use lexer::token::Token; use parser::parser::AST; use parser::parser::Parser; +use semantic::symbol_table::SymbolTable; use std::env; use std::fs; use std::fs::File; @@ -27,6 +28,7 @@ fn main() { let mut lexer: Lexer = Lexer::new(source); let tokens: Vec = lexer.tokenize(); + let mut symbol_table = SymbolTable::new(); let mut parser: Parser = Parser::new(tokens); let ast: AST = parser.parse_program(); println!("Lexing and parsing completed successfully."); diff --git a/src/parser/parser.rs b/src/parser/parser.rs index a107749..708ba99 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,4 +1,4 @@ -use crate::lexer::token::{Token, TokenType}; +use crate::{lexer::token::{Token, TokenType}, semantic::symbol_table::SymbolTable}; #[derive(Debug, Clone, PartialEq)] pub enum Stmt { Let(String, Type, Expr), diff --git a/test_codes/test_func_if_else.trv b/test_codes/test_func_if_else.trv new file mode 100644 index 0000000..3b984ea --- /dev/null +++ b/test_codes/test_func_if_else.trv @@ -0,0 +1,9 @@ +func main() -> Integer { + let num : Integer = 9; + if num > 10 { + num = 11; + } else { + num = 12; + } + return num; +} From c0de01b0d348b2217d4c7f487fb93e96968f9051 Mon Sep 17 00:00:00 2001 From: Aasmundur Date: Tue, 3 Mar 2026 12:34:11 +0100 Subject: [PATCH 9/9] finnished testing the codegen --- src/codegen/codegen.rs | 62 ++++-------------------------------- test_codes/test_func_let.trv | 2 +- 2 files changed, 7 insertions(+), 57 deletions(-) diff --git a/src/codegen/codegen.rs b/src/codegen/codegen.rs index 1d4843a..036d50f 100644 --- a/src/codegen/codegen.rs +++ b/src/codegen/codegen.rs @@ -379,79 +379,29 @@ mod tests { _start: sub sp, sp, #4 - mov r0, #11 + mov r0, #9 str r0, [sp] ldr r0, [sp, #0] mov r1, r0 mov r0, #10 cmp r1, r0 + mov r0, #0 + it gt movgt r0, #1 - movle r0, #0 cmp r0, #0 beq else_0 mov r0, #11 - str r0, [sp, #0] + str r0, [sp] b endif_0 else_0: mov r0, #12 - str r0, [sp, #0] + str r0, [sp] endif_0: ldr r0, [sp, #0] mov r7, #1 svc #0 - - .size _start, .-_start - "## - } - ).to_string(); - assert_eq!(expected, buf) - } - #[test] - fn can_generate_func_let() { - initialize(); - let source = std::fs - ::read_to_string("test_codes/test_func_let.trv") - .expect("Failed to read file"); - let mut lexer: Lexer = Lexer::new(source); - let tokens: Vec = lexer.tokenize(); - let mut parser: Parser = Parser::new(tokens); - let ast: AST = parser.parse_program(); - let output_file = File::options() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open("temp/tests/test_can_generate_func_let.asm") - .expect("Failed to create file: /temp/tests/test_can_generate_func_let.asm"); - let mut codegen = CodeGenerator::new(output_file); - codegen.generate(ast); - codegen.file.seek(SeekFrom::Start(0)).unwrap(); - let mut buf = String::new(); - codegen.file.read_to_string(&mut buf).unwrap(); - let expected = ( - indoc::indoc! { - r##" - .syntax unified - .thumb - .section .text - .global _start - .type _start, %function - - x: - mov r4, #100 - mov r0, r4 - bx lr - - _start: - bl x - mov r4, r0 - mov r5, #5 - mov r0, #29 - mov r7, #1 - svc #0 - - .size _start, .-_start + .size _start, .-_start "## } ).to_string(); diff --git a/test_codes/test_func_let.trv b/test_codes/test_func_let.trv index a7b21d8..cdf3d17 100644 --- a/test_codes/test_func_let.trv +++ b/test_codes/test_func_let.trv @@ -1,5 +1,5 @@ func x() -> Integer { - let v = 100; + let v : Integer = 100; return v; }