From acd7f482abce43ff69f342c05cb56923c76ac960 Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 04:16:35 +0100 Subject: [PATCH 1/9] compiler: support .data segment with strings - Add tables/data to represent our .data segment - Add compiler/main/analyzer/data to extract string literals - Emit .data section with symbols and data for the strings - Adjust examples/assert_examples.sh to not check for newline at the end of file. Since struct StringConst does not support this right now. --- compiler/main/CMakeLists.txt | 3 + compiler/main/analyzer/data/data_analyzer.c | 30 +++++ compiler/main/analyzer/data/data_analyzer.h | 10 ++ compiler/main/avr_code_gen/cg_avr.c | 5 + compiler/main/compiler.c | 2 + compiler/main/derefll/derefll.c | 7 +- compiler/main/gen_tac/gen_tac.h | 1 + compiler/main/gen_tac/gen_tac_const_data.c | 17 +++ compiler/main/gen_tac/gen_tac_const_value.c | 12 ++ compiler/main/gen_tac/gen_tac_term.c | 4 +- .../type_contains/tc_type_contains.c | 5 + compiler/main/x86_code_gen/cg_x86.c | 2 + .../main/x86_code_gen/cg_x86_basic_block.c | 26 ++++- .../main/x86_code_gen/cg_x86_basic_block.h | 3 +- .../x86_code_gen/cg_x86_single_function.c | 5 +- .../main/x86_code_gen/cg_x86_single_tac.c | 1 + .../x86_code_gen/compile_ir/compile_tac.h | 1 + .../compile_ir/compile_tac_const_data.c | 30 +++++ examples/assert_examples.sh | 2 +- stdlib/syscalls.dg | 6 + tables/CMakeLists.txt | 2 + tables/data/data.c | 103 ++++++++++++++++++ tables/data/data.h | 30 +++++ tables/symtable/symtable.c | 2 + tables/symtable/symtable.h | 4 + tac/_struct.h | 2 + tac/tac.c | 37 +++++-- tac/tac.h | 4 + tac/tac_ctor.c | 8 ++ tac/tac_ctor.h | 1 + tac/tac_str.c | 4 + 31 files changed, 347 insertions(+), 22 deletions(-) create mode 100644 compiler/main/analyzer/data/data_analyzer.c create mode 100644 compiler/main/analyzer/data/data_analyzer.h create mode 100644 compiler/main/gen_tac/gen_tac_const_data.c create mode 100644 compiler/main/gen_tac/gen_tac_const_value.c create mode 100644 compiler/main/x86_code_gen/compile_ir/compile_tac_const_data.c create mode 100644 tables/data/data.c create mode 100644 tables/data/data.h diff --git a/compiler/main/CMakeLists.txt b/compiler/main/CMakeLists.txt index 84def0938..fc64c4a26 100644 --- a/compiler/main/CMakeLists.txt +++ b/compiler/main/CMakeLists.txt @@ -11,6 +11,7 @@ add_library("sd-base" analyzer/fn/fn_analyzer.c analyzer/halts/halt_analyzer.c analyzer/lv/lv_analyzer.c + analyzer/data/data_analyzer.c #liveness liveness/liveness.c @@ -22,6 +23,7 @@ add_library("sd-base" gen_tac/gen_tac_assignstmt.c gen_tac/gen_tac_call.c gen_tac/gen_tac_constvalue.c + gen_tac/gen_tac_const_data.c gen_tac/gen_tac_expr.c gen_tac/gen_tac_forstmt.c gen_tac/gen_tac_ifstmt.c @@ -67,6 +69,7 @@ add_library("sd-base" x86_code_gen/compile_ir/compile_tac_call.c x86_code_gen/compile_ir/compile_tac_icall.c x86_code_gen/compile_ir/compile_tac_const_value.c + x86_code_gen/compile_ir/compile_tac_const_data.c x86_code_gen/compile_ir/compile_tac_copy.c x86_code_gen/compile_ir/compile_tac_goto.c x86_code_gen/compile_ir/compile_tac_if_goto.c diff --git a/compiler/main/analyzer/data/data_analyzer.c b/compiler/main/analyzer/data/data_analyzer.c new file mode 100644 index 000000000..7bb07c054 --- /dev/null +++ b/compiler/main/analyzer/data/data_analyzer.c @@ -0,0 +1,30 @@ +#include +#include +#include + +#include "ast/ast.h" + +#include "tables/symtable/symtable.h" +#include "tables/data/data.h" + +#include "data_analyzer.h" + +#include "ast/visitor/visitor.h" + +static void data_visitor(void* node, enum NODE_TYPE type, void* arg); + +void analyze_data(struct ST* st, struct AST* ast) { + + visit_ast(ast, data_visitor, st->data); +} + +static void data_visitor(void* node, enum NODE_TYPE type, void* arg) { + + struct DataTable* data = (struct DataTable*)arg; + + if (type != NODE_STRINGCONST) { return; } + + struct StringConst* sc = (struct StringConst*)node; + + data_insert(data, sc->value); +} diff --git a/compiler/main/analyzer/data/data_analyzer.h b/compiler/main/analyzer/data/data_analyzer.h new file mode 100644 index 000000000..19174363e --- /dev/null +++ b/compiler/main/analyzer/data/data_analyzer.h @@ -0,0 +1,10 @@ +#pragma once + +// This analyzer module visits the AST +// to look for StringConst instances +// and adds them to DataTable + +struct ST; +struct AST; + +void analyze_data(struct ST* st, struct AST* ast); diff --git a/compiler/main/avr_code_gen/cg_avr.c b/compiler/main/avr_code_gen/cg_avr.c index bda34828f..e804f5e4f 100644 --- a/compiler/main/avr_code_gen/cg_avr.c +++ b/compiler/main/avr_code_gen/cg_avr.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "tables/symtable/symtable.h" @@ -91,6 +92,10 @@ bool compile_and_write_avr(struct AST* ast, struct Ctx* ctx) { { emit_defs(fout); + + //TODO: figure out how to support something like .data on AVR. + assert(data_count(ctx_tables(ctx)->data) == 0); + ibu_write(ibu, fout); } diff --git a/compiler/main/compiler.c b/compiler/main/compiler.c index d13564d85..d24012bf1 100644 --- a/compiler/main/compiler.c +++ b/compiler/main/compiler.c @@ -25,6 +25,7 @@ #include "analyzer/dead/dead_analyzer.h" #include "analyzer/halts/halt_analyzer.h" #include "analyzer/annotation/annotation_analyzer.h" +#include "analyzer/data/data_analyzer.h" bool compile(struct Flags* flags) { @@ -114,6 +115,7 @@ bool compile(struct Flags* flags) { analyze_dead_code(ctx_tables(ctx), ast); analyze_termination(ctx_tables(ctx)); analyze_annotations(ctx_tables(ctx), ast); + analyze_data(ctx_tables(ctx), ast); bool success; if (flags_x86(flags)) { diff --git a/compiler/main/derefll/derefll.c b/compiler/main/derefll/derefll.c index d1224bb20..3d6fd9a5c 100644 --- a/compiler/main/derefll/derefll.c +++ b/compiler/main/derefll/derefll.c @@ -96,9 +96,10 @@ struct DerefLL* derefll_ctor_simplevar(struct SimpleVar* sv, struct Ctx* ctx) { struct LVSTLine* line = lvst_get(lvst, sv->name); // In case of array type, we need an additional deref, - // to get the pointer out of the stackframe. - // Deref is implicit for array type - if (line->type->array_type) { + // to get the pointer out of the stackframe, in case indices are present. + // That Deref is implicit for array type + + if (line->type->array_type && sv->count_indices) { derefll_append(res, derefll_deref()); } diff --git a/compiler/main/gen_tac/gen_tac.h b/compiler/main/gen_tac/gen_tac.h index 3b0147f9d..0044db484 100644 --- a/compiler/main/gen_tac/gen_tac.h +++ b/compiler/main/gen_tac/gen_tac.h @@ -51,6 +51,7 @@ bool tac_term(struct TACBuffer* buffer, struct Term* t, struct Ctx* ctx); bool tac_term_addr(struct TACBuffer* buffer, struct Term* t, struct Ctx* ctx); void tac_constvalue(struct TACBuffer* buffer, struct ConstValue* c); +void tac_const_data(struct TACBuffer* buffer, struct StringConst* c); //---- int int_value_from_const(struct ConstValue* cv); diff --git a/compiler/main/gen_tac/gen_tac_const_data.c b/compiler/main/gen_tac/gen_tac_const_data.c new file mode 100644 index 000000000..752d81800 --- /dev/null +++ b/compiler/main/gen_tac/gen_tac_const_data.c @@ -0,0 +1,17 @@ +#include + +#include "tac/tac.h" +#include "tac/tacbuffer.h" +#include "gen_tac.h" + +void tac_const_data(struct TACBuffer* buffer, struct StringConst* str) { + + //struct DataTable* table = ctx_tables(ctx)->data; + //const uint32_t offset = data_offset(str->value); + //TODO: find the offset in data table + const uint32_t offset = 0; + + tacbuffer_append( + buffer, + makeTACConstData(make_temp(), offset)); +} diff --git a/compiler/main/gen_tac/gen_tac_const_value.c b/compiler/main/gen_tac/gen_tac_const_value.c new file mode 100644 index 000000000..1ae689808 --- /dev/null +++ b/compiler/main/gen_tac/gen_tac_const_value.c @@ -0,0 +1,12 @@ +#include + +#include "tac/tac.h" +#include "tac/tacbuffer.h" +#include "gen_tac.h" + +void tac_constvalue(struct TACBuffer* buffer, struct ConstValue* c) { + + tacbuffer_append( + buffer, + makeTACConst(make_temp(), int_value_from_const(c))); +} diff --git a/compiler/main/gen_tac/gen_tac_term.c b/compiler/main/gen_tac/gen_tac_term.c index 2e4249834..280c85986 100644 --- a/compiler/main/gen_tac/gen_tac_term.c +++ b/compiler/main/gen_tac/gen_tac_term.c @@ -11,8 +11,8 @@ bool tac_term(struct TACBuffer* buffer, struct Term* t, struct Ctx* ctx) { case 5: tac_expr(buffer, t->ptr.m5, ctx); break; case 6: tac_variable(buffer, t->ptr.m6, ctx); break; case 8: - fprintf(stderr, "string const currently unsupported\n"); - return false; + tac_const_data(buffer, t->ptr.m8); + break; case 11: fprintf(stderr, "Fatal Error. Lambdas should not exist at this stage.\n"); return false; diff --git a/compiler/main/typechecker/type_contains/tc_type_contains.c b/compiler/main/typechecker/type_contains/tc_type_contains.c index 9c0ec9a19..e9ce95fd6 100644 --- a/compiler/main/typechecker/type_contains/tc_type_contains.c +++ b/compiler/main/typechecker/type_contains/tc_type_contains.c @@ -68,6 +68,11 @@ static bool tc_pointer_type_contains(struct PointerType* expect, struct Type* ac // integers can be used as pointers if (actual->pointer_type == NULL) { + + if (actual->array_type != NULL) { + return eq_type(expect->element_type, actual->array_type->element_type); + } + return is_integer_type(actual); } diff --git a/compiler/main/x86_code_gen/cg_x86.c b/compiler/main/x86_code_gen/cg_x86.c index 0584a933e..4d4855646 100644 --- a/compiler/main/x86_code_gen/cg_x86.c +++ b/compiler/main/x86_code_gen/cg_x86.c @@ -82,6 +82,8 @@ bool compile_and_write_x86(struct AST* ast, struct Ctx* ctx) { return false; } + data_write_data_segment(ctx_tables(ctx)->data, fout); + fprintf(fout, "section .text\n"); fprintf(fout, "global _start\n\n"); diff --git a/compiler/main/x86_code_gen/cg_x86_basic_block.c b/compiler/main/x86_code_gen/cg_x86_basic_block.c index 60accf1a2..2f9ae4439 100644 --- a/compiler/main/x86_code_gen/cg_x86_basic_block.c +++ b/compiler/main/x86_code_gen/cg_x86_basic_block.c @@ -16,13 +16,15 @@ #include "x86_code_gen/compile_ir/compile_tac.h" #include "cg_x86_basic_block.h" -void emit_asm_x86_basic_block(struct BasicBlock* block, struct Ctx* ctx, struct IBuffer* ibu, struct RAT* rat, char* current_function_name) { +bool emit_asm_x86_basic_block(struct BasicBlock* block, struct Ctx* ctx, struct IBuffer* ibu, struct RAT* rat, char* current_function_name) { + + bool success = false; if (block == NULL) { - return; + return true; } if (block->visited_emit_asm) { - return; + return true; } block->visited_emit_asm = true; @@ -30,13 +32,25 @@ void emit_asm_x86_basic_block(struct BasicBlock* block, struct Ctx* ctx, struct for (size_t i = 0; i < tacbuffer_count(block->buffer); i++) { struct TAC* t = tacbuffer_get(block->buffer, i); - emit_asm_x86_single_tac(rat, t, ctx, ibu, current_function_name); + success = emit_asm_x86_single_tac(rat, t, ctx, ibu, current_function_name); + + if (!success) { + return false; + } } //false/default branch gets emitted first, //because there is no label for it in a lot of cases //this way we can avoid an extra jump that's really //not necessary. - emit_asm_x86_basic_block(block->branch_2, ctx, ibu, rat, current_function_name); - emit_asm_x86_basic_block(block->branch_1, ctx, ibu, rat, current_function_name); + success = emit_asm_x86_basic_block(block->branch_2, ctx, ibu, rat, current_function_name); + if (!success) { + return false; + } + success = emit_asm_x86_basic_block(block->branch_1, ctx, ibu, rat, current_function_name); + if (!success) { + return false; + } + + return true; } diff --git a/compiler/main/x86_code_gen/cg_x86_basic_block.h b/compiler/main/x86_code_gen/cg_x86_basic_block.h index 140fbc234..2fb9f22fa 100644 --- a/compiler/main/x86_code_gen/cg_x86_basic_block.h +++ b/compiler/main/x86_code_gen/cg_x86_basic_block.h @@ -8,4 +8,5 @@ struct IBuffer; -void emit_asm_x86_basic_block(struct BasicBlock* block, struct Ctx* ctx, struct IBuffer* ibu, struct RAT* rat, char* current_function_name); +// @returns false on error +bool emit_asm_x86_basic_block(struct BasicBlock* block, struct Ctx* ctx, struct IBuffer* ibu, struct RAT* rat, char* current_function_name); diff --git a/compiler/main/x86_code_gen/cg_x86_single_function.c b/compiler/main/x86_code_gen/cg_x86_single_function.c index b70796aff..67f482241 100644 --- a/compiler/main/x86_code_gen/cg_x86_single_function.c +++ b/compiler/main/x86_code_gen/cg_x86_single_function.c @@ -69,7 +69,10 @@ void compile_and_write_x86_single_function(struct Method* m, struct Ctx* ctx, st rat_print(rat); } - emit_asm_x86_basic_block(root, ctx, ibu, rat, current_function_name); + bool success = emit_asm_x86_basic_block(root, ctx, ibu, rat, current_function_name); + + // TODO: propagate this error + assert(success); //delete the basic block graph for (int i = 0; i < nblocks; i++) { diff --git a/compiler/main/x86_code_gen/cg_x86_single_tac.c b/compiler/main/x86_code_gen/cg_x86_single_tac.c index 51a03b055..400a471ff 100644 --- a/compiler/main/x86_code_gen/cg_x86_single_tac.c +++ b/compiler/main/x86_code_gen/cg_x86_single_tac.c @@ -36,6 +36,7 @@ bool emit_asm_x86_single_tac(struct RAT* rat, struct TAC* tac, struct Ctx* ctx, case TAC_COPY: compile_tac_copy_x86(rat, tac, ibu); break; case TAC_CONST_VALUE: compile_tac_const_value_x86(rat, tac, ibu); break; + case TAC_CONST_DATA: compile_tac_const_data_x86(rat, tac, ctx, ibu); break; case TAC_CALL: compile_tac_call_x86(rat, tac, ibu, ctx, current_function_name); break; case TAC_ICALL: compile_tac_icall_x86(rat, tac, ibu, ctx, current_function_name); break; case TAC_PARAM: compile_tac_param_x86(rat, tac, ibu); break; diff --git a/compiler/main/x86_code_gen/compile_ir/compile_tac.h b/compiler/main/x86_code_gen/compile_ir/compile_tac.h index 1f7534f17..45b8c15ff 100644 --- a/compiler/main/x86_code_gen/compile_ir/compile_tac.h +++ b/compiler/main/x86_code_gen/compile_ir/compile_tac.h @@ -11,6 +11,7 @@ struct ST; void compile_tac_return_x86(struct RAT* rat, struct TAC* tac, struct Ctx* ctx, struct IBuffer* ibu); void compile_tac_const_value_x86(struct RAT* rat, struct TAC* tac, struct IBuffer* ibu); +void compile_tac_const_data_x86(struct RAT* rat, struct TAC* tac, struct Ctx* ctx, struct IBuffer* ibu); void compile_tac_copy_x86(struct RAT* rat, struct TAC* tac, struct IBuffer* ibu); void compile_tac_load_local_addr_x86(struct RAT* rat, struct TAC* tac, struct Ctx* ctx, struct IBuffer* ibu); diff --git a/compiler/main/x86_code_gen/compile_ir/compile_tac_const_data.c b/compiler/main/x86_code_gen/compile_ir/compile_tac_const_data.c new file mode 100644 index 000000000..828de317d --- /dev/null +++ b/compiler/main/x86_code_gen/compile_ir/compile_tac_const_data.c @@ -0,0 +1,30 @@ +#include +#include +#include +#include + +#include "rat/rat.h" + +#include "tables/symtable/symtable.h" +#include "tac/tac.h" +#include "x86_code_gen/compile_ir/compile_tac.h" + +void compile_tac_const_data_x86(struct RAT* rat, struct TAC* tac, struct Ctx* ctx, struct IBuffer* ibu) { + + const int reg = rat_get_register(rat, tac_dest(tac)); + + const uint64_t offset = tac_const_value(tac); + + struct DataTable* data_table = ctx_tables(ctx)->data; + + char* symbol = data_symbol(data_table, offset); + + assert(symbol != NULL); + + char* c; + asprintf(&c, "TAC_CONST_DATA %s", symbol); + + mov_const_symbol(reg, symbol, c); + + free(c); +} diff --git a/examples/assert_examples.sh b/examples/assert_examples.sh index cac44cae9..80a583de4 100755 --- a/examples/assert_examples.sh +++ b/examples/assert_examples.sh @@ -56,7 +56,7 @@ for test_file in $(find . -name '*.dg'); do /tmp/program > /tmp/program_stdout - diff /tmp/program_stdout "$expected_stdout_file" + diff --ignore-trailing-space /tmp/program_stdout "$expected_stdout_file" stdout_res=$? if [[ stdout_res -eq 0 ]]; then echo "[PASS][STDOUT] $test_file" diff --git a/stdlib/syscalls.dg b/stdlib/syscalls.dg index c48d5af1c..7521a8517 100644 --- a/stdlib/syscalls.dg +++ b/stdlib/syscalls.dg @@ -3,5 +3,11 @@ @syscall fn write(uint fd, *char buf, uint64 count) -> int {} +@syscall +fn open([char] filename, int flags, int mode) -> int {} + +@syscall +fn read(int fd, *char buf, uint64 count) -> int {} + @syscall fn exit(int code) -> int {} diff --git a/tables/CMakeLists.txt b/tables/CMakeLists.txt index 7af145ad1..f5f04076e 100644 --- a/tables/CMakeLists.txt +++ b/tables/CMakeLists.txt @@ -12,6 +12,8 @@ add_library("sd-tables" STATIC stst/stst.c stst/stst_print.c + data/data.c + symtable/symtable.c ) diff --git a/tables/data/data.c b/tables/data/data.c new file mode 100644 index 000000000..cbd6211db --- /dev/null +++ b/tables/data/data.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "data.h" + +struct DataTable { + + struct DataEntry** entries; + size_t count_entries; + size_t capacity; +}; + +struct DataTable* data_ctor() { + + struct DataTable* res = calloc(1, sizeof(struct DataTable)); + res->count_entries = 0; + res->capacity = 10; + + res->entries = calloc(res->capacity, sizeof(struct DataEntry*)); + + return res; +} + +static void data_resize(struct DataTable* data) { + + if ((data->count_entries + 1) >= data->capacity) { + data->capacity *= 2; + data->entries = realloc(data->entries, data->capacity); + + assert(data->entries); + } +} + +// @brief Turns any string into something +// that can be used as a label in the assembly. +// The result needs to be freed. +// @param str any string +static char* data_make_symbol(char* str) { + + const size_t len = strlen(str); + char* res = calloc(len + 1, sizeof(char)); + + sprintf(res, "str_"); + int j = strlen(res); + + for (int i = 0; i < len; i++) { + char c = str[i]; + if (isalnum(c)) { + res[j++] = c; + } else { + //TODO: make this bijective to the actual string. + //Currently, this would collapse "ab?" and "ab*" + //to the same symbol + res[j++] = '_'; + } + } + return res; +} + +size_t data_count(struct DataTable* data) { + return data->count_entries; +} + +void data_write_data_segment(struct DataTable* data, FILE* fout) { + + fprintf(fout, "section .data\n"); + + for (size_t i = 0; i < data->count_entries; i++) { + struct DataEntry* e = data->entries[i]; + size_t len = strlen(e->value); + + fprintf(fout, "%s: db \"%s\", %ld\n", e->symbol, e->value, len); + } +} + +void data_insert(struct DataTable* data, char* str) { + + data_resize(data); + + struct DataEntry* entry = malloc(sizeof(struct DataEntry)); + + entry->value = strdup(str); + entry->symbol = data_make_symbol(str); + + data->entries[data->count_entries++] = entry; +} + +char* data_symbol(struct DataTable* data, uint32_t offset) { + + assert(data); + + if (offset >= data->count_entries) { + fprintf(stderr, "%s:%s: could not find symbol for offset %d\n", __FILE__, __func__, offset); + return NULL; + } + + return data->entries[offset]->symbol; +} diff --git a/tables/data/data.h b/tables/data/data.h new file mode 100644 index 000000000..aea434171 --- /dev/null +++ b/tables/data/data.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +struct DataTable; + +struct DataEntry { + + // needs to be freed + char* symbol; + // needs to be freed + char* value; +}; + +struct DataTable* data_ctor(); + +// @returns the count of entries in data table +size_t data_count(struct DataTable* data); + +void data_write_data_segment(struct DataTable* data, FILE* fout); + +// @brief inserts a new entry into data table +// @param str the string to enter +void data_insert(struct DataTable* data, char* str); + +// @brief may return e.g. "str_hello" +// for string "hello" stored in table +// @returns NULL if entry is not found +char* data_symbol(struct DataTable* data, uint32_t offset); diff --git a/tables/symtable/symtable.c b/tables/symtable/symtable.c index 74734b162..99f99fbab 100644 --- a/tables/symtable/symtable.c +++ b/tables/symtable/symtable.c @@ -31,6 +31,8 @@ struct ST* st_ctor() { st->stst = stst_ctor(); st->lvst = lvst_ctor(); + st->data = data_ctor(); + return st; } diff --git a/tables/symtable/symtable.h b/tables/symtable/symtable.h index b1d08b11b..e728b6961 100644 --- a/tables/symtable/symtable.h +++ b/tables/symtable/symtable.h @@ -5,6 +5,7 @@ #include "tables/lvst/lvst.h" #include "tables/sst/sst.h" #include "tables/stst/stst.h" +#include "tables/data/data.h" struct ST { //struct SymTable @@ -18,6 +19,9 @@ struct ST { struct SST* sst; //may be NULL struct STST* stst; //may be NULL + // things which should be in .data segment + struct DataTable* data; + // ----------------------------- //all the type nodes that were additionally diff --git a/tac/_struct.h b/tac/_struct.h index 24a6428d8..c0e0f0fff 100644 --- a/tac/_struct.h +++ b/tac/_struct.h @@ -30,6 +30,8 @@ struct TAC { enum TAC_OP op; + // used for TAC_CONST_VALUE (literal value) + // used for TAC_CONST_DATA (as data table offset) int64_t const_value; // only used for TAC_PARAM. diff --git a/tac/tac.c b/tac/tac.c index b91dd97d4..a1c856369 100644 --- a/tac/tac.c +++ b/tac/tac.c @@ -12,6 +12,7 @@ int32_t tac_opt_dest(struct TAC* tac) { switch (tac->kind) { case TAC_CONST_VALUE: + case TAC_CONST_DATA: case TAC_BINARY_OP: case TAC_CALL: case TAC_ICALL: @@ -37,7 +38,7 @@ int32_t tac_opt_dest(struct TAC* tac) { return -1; default: - fprintf(stderr, "%s: unhandled case %d\n", __func__, tac->kind); + fprintf(stderr, "%s:%s: unhandled case %d\n", __FILE__, __func__, tac->kind); return -1; } } @@ -61,9 +62,10 @@ int32_t tac_dest(struct TAC* tac) { case TAC_UNARY_OP: case TAC_COPY: case TAC_CONST_VALUE: + case TAC_CONST_DATA: break; default: - fprintf(stderr, "[TAC] invalid access (kind == %d)\n", tac_kind(tac)); + fprintf(stderr, "[TAC] invalid access (kind == %d):%s\n", tac_kind(tac), __func__); return -1; } return tac->dest; @@ -98,9 +100,10 @@ int64_t tac_const_value(struct TAC* tac) { case TAC_SETUP_STACKFRAME: case TAC_LOAD: case TAC_CONST_VALUE: + case TAC_CONST_DATA: break; default: - fprintf(stderr, "[TAC] invalid access (kind == %d)\n", tac_kind(tac)); + fprintf(stderr, "[TAC] invalid access (kind == %d), %s\n", tac_kind(tac), __func__); assert(false); } return tac->const_value; @@ -163,6 +166,7 @@ int32_t tac_max_temp(struct TAC* tac) { case TAC_LOAD_LOCAL_ADDR: case TAC_LOAD_FUNCTION_PTR: case TAC_CONST_VALUE: + case TAC_CONST_DATA: return tac->dest; case TAC_IF_GOTO: case TAC_STORE_LOCAL: @@ -176,7 +180,7 @@ int32_t tac_max_temp(struct TAC* tac) { return 0; default: - fprintf(stderr, "%s: unexpected case %u\n", __func__, tac->kind); + fprintf(stderr, "%s:%s: unexpected case %u\n", __FILE__, __func__, tac->kind); return -1; } } @@ -210,6 +214,7 @@ bool tac_needs_register(struct TAC* tac) { switch (tac->kind) { case TAC_CONST_VALUE: + case TAC_CONST_DATA: case TAC_LOAD_LOCAL_ADDR: case TAC_LOAD_FUNCTION_PTR: case TAC_COPY: @@ -219,11 +224,25 @@ bool tac_needs_register(struct TAC* tac) { case TAC_CALL: case TAC_ICALL: return true; - break; + + case TAC_GOTO: + case TAC_IF_GOTO: + case TAC_IF_CMP_GOTO: + case TAC_STORE_LOCAL: + case TAC_PARAM: + case TAC_RETURN: + case TAC_SETUP_STACKFRAME: + case TAC_SETUP_SP: + case TAC_NOP: + case TAC_LABEL_INDEXED: + case TAC_LABEL_FUNCTION: + case TAC_STORE: + return false; default: + fprintf(stderr, "[TAC] unhandled case (kind == %d): %s\n", tac_kind(tac), __func__); + assert(false); return false; - break; } } @@ -273,11 +292,12 @@ int tac_mark_used(struct TAC* tac, bool* used_map, size_t map_size) { case TAC_LOAD_FUNCTION_PTR: case TAC_NOP: case TAC_CONST_VALUE: + case TAC_CONST_DATA: case TAC_ICALL: return 0; default: - fprintf(stderr, "%s: unexpected case %u\n", __func__, tac->kind); + fprintf(stderr, "%s:%s: unexpected case %u\n", __FILE__, __func__, tac->kind); return 1; } } @@ -307,6 +327,7 @@ int tac_mark_defines(struct TAC* tac, bool* defines_map, size_t map_size) { case TAC_LOAD_LOCAL_ADDR: case TAC_LOAD_FUNCTION_PTR: case TAC_CONST_VALUE: + case TAC_CONST_DATA: case TAC_ICALL: check_bounds(tac->kind, tac->dest, map_size); defines_map[tac->dest] = true; @@ -314,7 +335,7 @@ int tac_mark_defines(struct TAC* tac, bool* defines_map, size_t map_size) { break; default: - fprintf(stderr, "%s: unexpected case %u\n", __func__, tac->kind); + fprintf(stderr, "%s:%s: unexpected case %u\n", __FILE__, __func__, tac->kind); return 1; } } diff --git a/tac/tac.h b/tac/tac.h index 7b6641bca..cad3593ad 100644 --- a/tac/tac.h +++ b/tac/tac.h @@ -59,6 +59,10 @@ enum TAC_KIND { TAC_CONST_VALUE, + // Load a symbol from .data segment into register. + // The assembler will figure out the rest here. + TAC_CONST_DATA, + TAC_CALL, //call to a label (string) without anything else TAC_ICALL, // call to a temporary 't1 = call t2'. // So we can comput arbitrary address to call. diff --git a/tac/tac_ctor.c b/tac/tac_ctor.c index 4b0e948df..c5ef9bd2e 100644 --- a/tac/tac_ctor.c +++ b/tac/tac_ctor.c @@ -137,6 +137,14 @@ struct TAC* makeTACConst16(uint32_t tmp, int value) { return t; } +struct TAC* makeTACConstData(uint32_t tmp, uint32_t value_offset_data_table) { + struct TAC* t = makeTAC(); + t->kind = TAC_CONST_DATA; + t->dest = tmp; + t->const_value = value_offset_data_table; + return t; +} + struct TAC* makeTACBinOp(uint32_t dest, enum TAC_OP op, uint32_t src) { struct TAC* t = makeTAC(); diff --git a/tac/tac_ctor.h b/tac/tac_ctor.h index f2fb7fb93..b740fc923 100644 --- a/tac/tac_ctor.h +++ b/tac/tac_ctor.h @@ -35,6 +35,7 @@ struct TAC* makeTACLoadFunctionPtr(uint32_t tmp, uint32_t sst_index); struct TAC* makeTACConst(uint32_t tmp, int value); struct TAC* makeTACConst16(uint32_t tmp, int value); +struct TAC* makeTACConstData(uint32_t tmp, uint32_t value_offset_data_table); struct TAC* makeTACBinOp(uint32_t dest, enum TAC_OP op, uint32_t src); struct TAC* makeTACUnaryOp(uint32_t dest, uint32_t src, enum TAC_OP op); diff --git a/tac/tac_str.c b/tac/tac_str.c index 7781f3f26..d70edcfbd 100644 --- a/tac/tac_str.c +++ b/tac/tac_str.c @@ -115,6 +115,10 @@ char* tac_tostring(struct TAC* t, struct SST* sst, struct LVST* lvst) { sprintf(buf, "t%d = %ld", dest, const_value); break; + case TAC_CONST_DATA: + sprintf(buf, "t%d = %ld (DATA table offset)", dest, const_value); + break; + case TAC_COPY: sprintf(buf, "t%d = t%lu", dest, arg1); break; From 44745f1a6534badba819265a8a270b0a28f45f13 Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 04:18:59 +0100 Subject: [PATCH 2/9] examples: Hello World example for 'write' syscall --- examples/syscalls/write_string/write_string.dg | 9 +++++++++ examples/syscalls/write_string/write_string.exitcode | 1 + examples/syscalls/write_string/write_string.stdout | 1 + 3 files changed, 11 insertions(+) create mode 100644 examples/syscalls/write_string/write_string.dg create mode 100644 examples/syscalls/write_string/write_string.exitcode create mode 100644 examples/syscalls/write_string/write_string.stdout diff --git a/examples/syscalls/write_string/write_string.dg b/examples/syscalls/write_string/write_string.dg new file mode 100644 index 000000000..4eb2cad78 --- /dev/null +++ b/examples/syscalls/write_string/write_string.dg @@ -0,0 +1,9 @@ +fn main () ~> int { + + [char] filename = "Hello, World!"; + *char ptr = filename; + + write(1, ptr, 12); + + return 0; +} diff --git a/examples/syscalls/write_string/write_string.exitcode b/examples/syscalls/write_string/write_string.exitcode new file mode 100644 index 000000000..573541ac9 --- /dev/null +++ b/examples/syscalls/write_string/write_string.exitcode @@ -0,0 +1 @@ +0 diff --git a/examples/syscalls/write_string/write_string.stdout b/examples/syscalls/write_string/write_string.stdout new file mode 100644 index 000000000..3fa0d4b98 --- /dev/null +++ b/examples/syscalls/write_string/write_string.stdout @@ -0,0 +1 @@ +Hello, World From 8846cd525216403a868e594d2d2cbe59580e1ed9 Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 04:28:10 +0100 Subject: [PATCH 3/9] tables: free DataTable on st_free --- tables/data/data.c | 13 +++++++++++++ tables/data/data.h | 1 + tables/symtable/symtable.c | 2 ++ 3 files changed, 16 insertions(+) diff --git a/tables/data/data.c b/tables/data/data.c index cbd6211db..9637b0c3f 100644 --- a/tables/data/data.c +++ b/tables/data/data.c @@ -26,6 +26,19 @@ struct DataTable* data_ctor() { return res; } +void data_dtor(struct DataTable* data) { + + for (size_t i = 0; i < data->count_entries; i++) { + struct DataEntry* entry = data->entries[i]; + + free(entry->symbol); + free(entry->value); + free(entry); + } + free(data->entries); + free(data); +} + static void data_resize(struct DataTable* data) { if ((data->count_entries + 1) >= data->capacity) { diff --git a/tables/data/data.h b/tables/data/data.h index aea434171..7b43d003f 100644 --- a/tables/data/data.h +++ b/tables/data/data.h @@ -14,6 +14,7 @@ struct DataEntry { }; struct DataTable* data_ctor(); +void data_dtor(struct DataTable* data); // @returns the count of entries in data table size_t data_count(struct DataTable* data); diff --git a/tables/symtable/symtable.c b/tables/symtable/symtable.c index 99f99fbab..7c1d24227 100644 --- a/tables/symtable/symtable.c +++ b/tables/symtable/symtable.c @@ -6,6 +6,7 @@ #include "tables/lvst/lvst.h" #include "tables/sst/sst.h" #include "tables/stst/stst.h" +#include "tables/data/data.h" #include "symtable.h" struct ST* st_ctor() { @@ -41,6 +42,7 @@ void st_free(struct ST* st) { if (st->sst != NULL) { sst_free(st->sst); } if (st->lvst != NULL) { lvst_free(st->lvst); } if (st->stst != NULL) { stst_free(st->stst); } + data_dtor(st->data); for (int i = 0; i < st->inferred_types_count; i++) { free_type(st->inferred_types[i]); From 6065b428e82516a214b7b5638d6a31aba4d7301f Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 04:45:10 +0100 Subject: [PATCH 4/9] tables/data: define injective map string -> symbol Previously, 2 distinct strings could be mapped to the same symbol. This should hopefully not happen now. --- tables/data/data.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tables/data/data.c b/tables/data/data.c index 9637b0c3f..c80863a3f 100644 --- a/tables/data/data.c +++ b/tables/data/data.c @@ -53,25 +53,29 @@ static void data_resize(struct DataTable* data) { // that can be used as a label in the assembly. // The result needs to be freed. // @param str any string -static char* data_make_symbol(char* str) { +static char* data_make_symbol(const char* str) { const size_t len = strlen(str); - char* res = calloc(len + 1, sizeof(char)); + + char* res = calloc(len * 3 + 10, sizeof(char)); + if (!res) return NULL; sprintf(res, "str_"); - int j = strlen(res); + size_t j = strlen(res); - for (int i = 0; i < len; i++) { + for (size_t i = 0; i < len; i++) { char c = str[i]; if (isalnum(c)) { res[j++] = c; - } else { - //TODO: make this bijective to the actual string. - //Currently, this would collapse "ab?" and "ab*" - //to the same symbol + } else if (c == '_') { res[j++] = '_'; + } else { + // Add '$' to signal that an escape code follows + sprintf(res + j, "$%02X", (unsigned char)c); + j += 3; } } + return res; } From 99a4f553f1c12073345125b77e8d565cf4c2e41a Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 16:34:55 +0100 Subject: [PATCH 5/9] compiler: data table: correct offset for string Instead of hardcoding (just done for PoC), define a function 'data_string_offset' to return the offset in DataTable for a string. --- compiler/main/gen_tac/gen_tac.h | 4 +++- compiler/main/gen_tac/gen_tac_const_data.c | 17 ++++++++++++----- compiler/main/gen_tac/gen_tac_term.c | 4 +--- examples/syscalls/read_file/read_file.dg | 17 +++++++++++++++++ examples/syscalls/write_string/write_string.dg | 11 ++++++++--- .../syscalls/write_string/write_string.stdout | 2 +- tables/data/data.c | 13 +++++++++++++ tables/data/data.h | 5 +++++ 8 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 examples/syscalls/read_file/read_file.dg diff --git a/compiler/main/gen_tac/gen_tac.h b/compiler/main/gen_tac/gen_tac.h index 0044db484..8c289da9b 100644 --- a/compiler/main/gen_tac/gen_tac.h +++ b/compiler/main/gen_tac/gen_tac.h @@ -51,7 +51,9 @@ bool tac_term(struct TACBuffer* buffer, struct Term* t, struct Ctx* ctx); bool tac_term_addr(struct TACBuffer* buffer, struct Term* t, struct Ctx* ctx); void tac_constvalue(struct TACBuffer* buffer, struct ConstValue* c); -void tac_const_data(struct TACBuffer* buffer, struct StringConst* c); + +// @returns false on error +bool tac_const_data(struct TACBuffer* buffer, struct StringConst* c, struct Ctx* ctx); //---- int int_value_from_const(struct ConstValue* cv); diff --git a/compiler/main/gen_tac/gen_tac_const_data.c b/compiler/main/gen_tac/gen_tac_const_data.c index 752d81800..608f2ebac 100644 --- a/compiler/main/gen_tac/gen_tac_const_data.c +++ b/compiler/main/gen_tac/gen_tac_const_data.c @@ -1,17 +1,24 @@ #include +#include "tables/symtable/symtable.h" #include "tac/tac.h" #include "tac/tacbuffer.h" #include "gen_tac.h" -void tac_const_data(struct TACBuffer* buffer, struct StringConst* str) { +#include "tables/data/data.h" - //struct DataTable* table = ctx_tables(ctx)->data; - //const uint32_t offset = data_offset(str->value); - //TODO: find the offset in data table - const uint32_t offset = 0; +bool tac_const_data(struct TACBuffer* buffer, struct StringConst* str, struct Ctx* ctx) { + + const int32_t offset = data_string_offset(ctx_tables(ctx)->data, str->value); + + if (offset < 0) { + fprintf(stderr, "%s:%s: could not find offset of '%s' in data table\n", __FILE__, __func__, str->value); + return false; + } tacbuffer_append( buffer, makeTACConstData(make_temp(), offset)); + + return true; } diff --git a/compiler/main/gen_tac/gen_tac_term.c b/compiler/main/gen_tac/gen_tac_term.c index 280c85986..94036ef9f 100644 --- a/compiler/main/gen_tac/gen_tac_term.c +++ b/compiler/main/gen_tac/gen_tac_term.c @@ -10,9 +10,7 @@ bool tac_term(struct TACBuffer* buffer, struct Term* t, struct Ctx* ctx) { case 4: tac_call(buffer, t->ptr.m4, ctx); break; case 5: tac_expr(buffer, t->ptr.m5, ctx); break; case 6: tac_variable(buffer, t->ptr.m6, ctx); break; - case 8: - tac_const_data(buffer, t->ptr.m8); - break; + case 8: return tac_const_data(buffer, t->ptr.m8, ctx); case 11: fprintf(stderr, "Fatal Error. Lambdas should not exist at this stage.\n"); return false; diff --git a/examples/syscalls/read_file/read_file.dg b/examples/syscalls/read_file/read_file.dg new file mode 100644 index 000000000..88847b2e0 --- /dev/null +++ b/examples/syscalls/read_file/read_file.dg @@ -0,0 +1,17 @@ +fn main () ~> int { + + [char] filename = "read_file.dg"; + int fd = open(filename, 0, 0); + + if fd < 0 { + return 1; + } + + char c = ' '; + + read(fd, &c, 1); + + write(1, &c, 1); + + return 0; +} diff --git a/examples/syscalls/write_string/write_string.dg b/examples/syscalls/write_string/write_string.dg index 4eb2cad78..fb0364ad2 100644 --- a/examples/syscalls/write_string/write_string.dg +++ b/examples/syscalls/write_string/write_string.dg @@ -1,9 +1,14 @@ fn main () ~> int { - [char] filename = "Hello, World!"; - *char ptr = filename; + [char] str1 = "Hello, "; + *char ptr = str1; - write(1, ptr, 12); + write(1, ptr, 7); + + [char] str2 = "World!"; + ptr = str2; + + write(1, ptr, 6); return 0; } diff --git a/examples/syscalls/write_string/write_string.stdout b/examples/syscalls/write_string/write_string.stdout index 3fa0d4b98..8ab686eaf 100644 --- a/examples/syscalls/write_string/write_string.stdout +++ b/examples/syscalls/write_string/write_string.stdout @@ -1 +1 @@ -Hello, World +Hello, World! diff --git a/tables/data/data.c b/tables/data/data.c index c80863a3f..442cf3d3a 100644 --- a/tables/data/data.c +++ b/tables/data/data.c @@ -118,3 +118,16 @@ char* data_symbol(struct DataTable* data, uint32_t offset) { return data->entries[offset]->symbol; } + +int32_t data_string_offset(struct DataTable* data, char* str) { + + for (size_t i = 0; i < data->count_entries; i++) { + struct DataEntry* e = data->entries[i]; + + if (strcmp(str, e->value) == 0) { + return i; + } + } + + return -1; +} diff --git a/tables/data/data.h b/tables/data/data.h index 7b43d003f..46eb5fb09 100644 --- a/tables/data/data.h +++ b/tables/data/data.h @@ -29,3 +29,8 @@ void data_insert(struct DataTable* data, char* str); // for string "hello" stored in table // @returns NULL if entry is not found char* data_symbol(struct DataTable* data, uint32_t offset); + +// @brief returns offset of string in data table +// @param str The string from 'struct StringConst' +// @returns < 0 on error +int32_t data_string_offset(struct DataTable* data, char* str); From 104852d5a01a33b9446581db1a9aa24cd3ec41cb Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 16:45:04 +0100 Subject: [PATCH 6/9] remove unused file gen_tac_const_value.c oopsie, must have accidentally copied that. Unused file, builds without that. --- compiler/main/gen_tac/gen_tac_const_value.c | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 compiler/main/gen_tac/gen_tac_const_value.c diff --git a/compiler/main/gen_tac/gen_tac_const_value.c b/compiler/main/gen_tac/gen_tac_const_value.c deleted file mode 100644 index 1ae689808..000000000 --- a/compiler/main/gen_tac/gen_tac_const_value.c +++ /dev/null @@ -1,12 +0,0 @@ -#include - -#include "tac/tac.h" -#include "tac/tacbuffer.h" -#include "gen_tac.h" - -void tac_constvalue(struct TACBuffer* buffer, struct ConstValue* c) { - - tacbuffer_append( - buffer, - makeTACConst(make_temp(), int_value_from_const(c))); -} From d948870f3a55851ed15380d2b4f16422099eedc8 Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 18:11:24 +0100 Subject: [PATCH 7/9] compiler: NULL-terminate strings in .data segment This is so that they can be passed into functions expecting C strings which are NULL-terminated. - fix tac/tac_str.c to print width of load/store. - fix examples/syscalls/read_file/read_file.dg to read from it's own source file and print the first few chars. Also add a .stdout file there so it gets picked up by the test suite. --- examples/syscalls/read_file/read_file.dg | 25 ++++++++++++++++---- examples/syscalls/read_file/read_file.stdout | 1 + stdlib/syscalls.dg | 6 ++--- tables/data/data.c | 12 ++++++---- tac/tac_str.c | 12 ++++++---- tac/test/test_tac.c | 4 ++-- 6 files changed, 42 insertions(+), 18 deletions(-) create mode 100644 examples/syscalls/read_file/read_file.stdout diff --git a/examples/syscalls/read_file/read_file.dg b/examples/syscalls/read_file/read_file.dg index 88847b2e0..7aab842d3 100644 --- a/examples/syscalls/read_file/read_file.dg +++ b/examples/syscalls/read_file/read_file.dg @@ -1,16 +1,31 @@ fn main () ~> int { - [char] filename = "read_file.dg"; - int fd = open(filename, 0, 0); + // since this is run as part of tests, + // the path needs to be relative from examples/ + [char] filename = "syscalls/read_file/read_file.dg"; + *char path = filename; + int fd = open(path, 0, 1); - if fd < 0 { + if fd < 0 { return 1; } char c = ' '; + *char ptr = &c; - read(fd, &c, 1); - + read(fd, ptr, 1); + write(1, &c, 1); + read(fd, ptr, 1); + write(1, &c, 1); + read(fd, ptr, 1); + write(1, &c, 1); + read(fd, ptr, 1); + write(1, &c, 1); + read(fd, ptr, 1); + write(1, &c, 1); + read(fd, ptr, 1); + write(1, &c, 1); + read(fd, ptr, 1); write(1, &c, 1); return 0; diff --git a/examples/syscalls/read_file/read_file.stdout b/examples/syscalls/read_file/read_file.stdout new file mode 100644 index 000000000..009c88aab --- /dev/null +++ b/examples/syscalls/read_file/read_file.stdout @@ -0,0 +1 @@ +fn main diff --git a/stdlib/syscalls.dg b/stdlib/syscalls.dg index 7521a8517..b89888024 100644 --- a/stdlib/syscalls.dg +++ b/stdlib/syscalls.dg @@ -1,13 +1,13 @@ @syscall -fn write(uint fd, *char buf, uint64 count) -> int {} +fn write(uint fd, *char buf, uint64 count) -> int64 {} @syscall -fn open([char] filename, int flags, int mode) -> int {} +fn open(*char filename, int flags, int mode) -> int {} @syscall -fn read(int fd, *char buf, uint64 count) -> int {} +fn read(int fd, *char buf, uint64 count) -> int64 {} @syscall fn exit(int code) -> int {} diff --git a/tables/data/data.c b/tables/data/data.c index 442cf3d3a..237b6fdc0 100644 --- a/tables/data/data.c +++ b/tables/data/data.c @@ -83,15 +83,19 @@ size_t data_count(struct DataTable* data) { return data->count_entries; } +static void data_write_data_segment_entry(struct DataEntry* e, FILE* fout) { + + // The ',0' at the end NULL-terminates the string. + + fprintf(fout, "%s: db \"%s\",0\n", e->symbol, e->value); +} + void data_write_data_segment(struct DataTable* data, FILE* fout) { fprintf(fout, "section .data\n"); for (size_t i = 0; i < data->count_entries; i++) { - struct DataEntry* e = data->entries[i]; - size_t len = strlen(e->value); - - fprintf(fout, "%s: db \"%s\", %ld\n", e->symbol, e->value, len); + data_write_data_segment_entry(data->entries[i], fout); } } diff --git a/tac/tac_str.c b/tac/tac_str.c index d70edcfbd..e88f45e5c 100644 --- a/tac/tac_str.c +++ b/tac/tac_str.c @@ -134,12 +134,16 @@ char* tac_tostring(struct TAC* t, struct SST* sst, struct LVST* lvst) { sprintf(buf, "store l%d (%s) = t%lu", dest, name, arg1); } break; - case TAC_LOAD: - sprintf(buf, "t%d = [t%lu]", dest, arg1); + case TAC_LOAD: { + const uint8_t width = tac_load_store_width(t); + sprintf(buf, "t%d = [t%lu] (%d bytes)", dest, arg1, width); break; - case TAC_STORE: - sprintf(buf, "[t%d] = t%lu", dest, arg1); + } + case TAC_STORE: { + const uint8_t width = tac_load_store_width(t); + sprintf(buf, "[t%d] = t%lu (%d bytes)", dest, arg1, width); break; + } case TAC_NOP: sprintf(buf, "%s", "nop"); diff --git a/tac/test/test_tac.c b/tac/test/test_tac.c index fbfb54804..300e441a0 100644 --- a/tac/test/test_tac.c +++ b/tac/test/test_tac.c @@ -145,8 +145,8 @@ void test_tac_tostring() { assert_str(makeTACLabel(1), "L1:"); assert_str(makeTACLoadLocalAddr(1, 0, 8), "load t1 = &l0 (x) (8 bytes)"); assert_str(makeTACStoreLocal(0, 2), "store l0 (x) = t2"); - assert_str(makeTACLoad(2, 1, 2), "t2 = [t1]"); - assert_str(makeTACStore(2, 1, 2), "[t2] = t1"); + assert_str(makeTACLoad(2, 1, 2), "t2 = [t1] (2 bytes)"); + assert_str(makeTACStore(2, 1, 2), "[t2] = t1 (2 bytes)"); assert_str(makeTACBinOp(2, TAC_OP_ADD, 1), "t2 += t1"); assert_str(makeTACBinOp(2, TAC_OP_SUB, 1), "t2 -= t1"); From bbee2a546bde56abf939350d425afc8bf0a9f352 Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 19:08:31 +0100 Subject: [PATCH 8/9] docs: document TAC_CONST_DATA --- docs/html/tac.html | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/html/tac.html b/docs/html/tac.html index afbc106b6..d420abbbb 100644 --- a/docs/html/tac.html +++ b/docs/html/tac.html @@ -287,6 +287,14 @@

Other

- - + + TAC_CONST_DATA t1 = 0 + TMP + - + offset into data table + - + - + TAC_NOP nop - From 9c4576402b65f7cdaa3a3f72f43717cae2527c1a Mon Sep 17 00:00:00 2001 From: Alexander Hansen Date: Sun, 9 Mar 2025 19:31:40 +0100 Subject: [PATCH 9/9] lexer: support escaped newline in string literal Propagate the escaped chars into the assembly. NASM supports these when the string appears in backticks [1]. References: [1] https://www.cs.ubbcluj.ro/~vancea/asc/practic/nasm_html/nasmdoc3.html --- examples/assert_examples.sh | 2 +- examples/syscalls/read_file/read_file.dg | 3 ++ .../syscalls/write_string/write_string.dg | 4 +-- tables/data/data.c | 30 ++++++++++++++++++- token/token/token.c | 19 +++++++++--- 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/examples/assert_examples.sh b/examples/assert_examples.sh index 80a583de4..cac44cae9 100755 --- a/examples/assert_examples.sh +++ b/examples/assert_examples.sh @@ -56,7 +56,7 @@ for test_file in $(find . -name '*.dg'); do /tmp/program > /tmp/program_stdout - diff --ignore-trailing-space /tmp/program_stdout "$expected_stdout_file" + diff /tmp/program_stdout "$expected_stdout_file" stdout_res=$? if [[ stdout_res -eq 0 ]]; then echo "[PASS][STDOUT] $test_file" diff --git a/examples/syscalls/read_file/read_file.dg b/examples/syscalls/read_file/read_file.dg index 7aab842d3..51e807389 100644 --- a/examples/syscalls/read_file/read_file.dg +++ b/examples/syscalls/read_file/read_file.dg @@ -28,5 +28,8 @@ fn main () ~> int { read(fd, ptr, 1); write(1, &c, 1); + c = '\n'; + write(1, &c, 1); + return 0; } diff --git a/examples/syscalls/write_string/write_string.dg b/examples/syscalls/write_string/write_string.dg index fb0364ad2..2a55ff5f6 100644 --- a/examples/syscalls/write_string/write_string.dg +++ b/examples/syscalls/write_string/write_string.dg @@ -5,10 +5,10 @@ fn main () ~> int { write(1, ptr, 7); - [char] str2 = "World!"; + [char] str2 = "World!\n"; ptr = str2; - write(1, ptr, 6); + write(1, ptr, 7); return 0; } diff --git a/tables/data/data.c b/tables/data/data.c index 237b6fdc0..e27f3e38c 100644 --- a/tables/data/data.c +++ b/tables/data/data.c @@ -83,11 +83,39 @@ size_t data_count(struct DataTable* data) { return data->count_entries; } +void data_print_escaped_str(FILE* fout, const char* str) { + while (*str) { + switch (*str) { + case '\n': fprintf(fout, "\\n"); break; + case '\t': fprintf(fout, "\\t"); break; + case '\r': fprintf(fout, "\\r"); break; + case '\b': fprintf(fout, "\\b"); break; + case '\f': fprintf(fout, "\\f"); break; + case '\"': fprintf(fout, "\\\""); break; + case '\\': fprintf(fout, "\\\\"); break; + default: + if (isprint((unsigned char)*str)) { + // Printable character -> write as-is + fputc(*str, fout); + } else { + // Non-printable character -> use hex notation + fprintf(fout, "\\x%02X", (unsigned char)*str); + } + break; + } + str++; + } +} + static void data_write_data_segment_entry(struct DataEntry* e, FILE* fout) { // The ',0' at the end NULL-terminates the string. - fprintf(fout, "%s: db \"%s\",0\n", e->symbol, e->value); + fprintf(fout, "%s: db ", e->symbol); + fprintf(fout, "`"); + data_print_escaped_str(fout, e->value); + fprintf(fout, "`"); + fprintf(fout, ",0\n"); } void data_write_data_segment(struct DataTable* data, FILE* fout) { diff --git a/token/token/token.c b/token/token/token.c index f7fb1f66a..1c258a587 100644 --- a/token/token/token.c +++ b/token/token/token.c @@ -47,16 +47,27 @@ struct Token* makeTokenStringConst(char* value) { res->kind = STRINGCONST; - res->value_ptr = malloc(sizeof(char) * (strlen(value) + 1)); + res->value_ptr = calloc(sizeof(char) * (strlen(value) + 1), sizeof(char)); if (!res->value_ptr) { free(res); return NULL; } - //extra '"' at start and end - strcpy(res->value_ptr, value + 1); - res->value_ptr[strlen(res->value_ptr) - 1] = '\0'; + // extra '"' at start and end + const size_t len = strlen(value) - 1; + int j = 0; + for (int i = 1; i < len; i++) { + char c = value[i]; + + // unescape '\n' + if (c == '\\' && i + 1 < len && value[i + 1] == 'n') { + res->value_ptr[j++] = '\n'; + i++; + } else { + res->value_ptr[j++] = c; + } + } res->line_num = -1;