diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml index d6858e99..27a9f619 100644 --- a/.github/workflows/bazel.yml +++ b/.github/workflows/bazel.yml @@ -29,7 +29,8 @@ jobs: x11proto-dev \ libxext-dev \ libxrandr-dev \ - libreadline-dev + libreadline-dev \ + nasm - name: Generate libXpm config.h run: | diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 70a9c194..01aaf9a2 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -34,6 +34,7 @@ "milestone-5/cpp08": "1.1.0", "milestone-5/cpp09": "1.0.0", "milestone-5/inception": "1.0.0", + "pcc/libasm": "0.0.0", "rushes/hotrace": "2.1.0", "rushes/libunit": "1.0.0", "tools/push-swap-visualizer-minecraft": "1.0.0" diff --git a/BUILD.bazel b/BUILD.bazel index dea3d54e..0d2f8fe4 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -11,13 +11,13 @@ alias( actual = "//central/minilibx:mlx", ) -### MILESTONE 0 +### Milestone 0 alias( name = "libft", actual = "//milestone-0/libft:libft", ) -### MILESTONE 1 +### Milestone 1 alias( name = "ft_printf", actual = "//milestone-1/ft_printf:ft_printf", @@ -28,7 +28,7 @@ alias( actual = "//milestone-1/get_next_line:get_next_line", ) -### MILESTONE 2 +### Milestone 2 alias( name = "fdf", actual = "//milestone-2/fdf:fdf", @@ -44,7 +44,7 @@ alias( actual = "//milestone-2/push_swap:push_swap", ) -### MILESTONE 3 +### Milestone 3 alias( name = "minishell", actual = "//milestone-3/minishell:minishell", @@ -55,7 +55,7 @@ alias( actual = "//milestone-3/philosophers:philosophers", ) -### MILESTONE 4 +### Milestone 4 alias( name = "cub3d", actual = "//milestone-4/cub3d:cub3d", @@ -156,7 +156,7 @@ alias( actual = "//milestone-4/cpp04/ex02:abstract", ) -### MILESTONE 5 +### Milestone 5 alias( name = "cpp05-ex00", actual = "//milestone-5/cpp05/ex00:bureaucrat", @@ -231,3 +231,22 @@ alias ( name = "cpp09-ex01", actual = "//milestone-5/cpp09/ex01:RPN" ) + +alias ( + name = "cpp09-ex02", + actual = "//milestone-5/cpp09/ex02:PmergeMe" +) + +### PCC + +#### Compilation branch + +alias ( + name = "libasm", + actual = "//pcc/libasm:libasm" +) +alias ( + name = "libasm-test", + actual = "//pcc/libasm:test" +) + diff --git a/external-libs/libXpm/doc/xpm.PS.gz b/external-libs/libXpm/doc/xpm.PS.gz deleted file mode 100644 index 40e25de9..00000000 Binary files a/external-libs/libXpm/doc/xpm.PS.gz and /dev/null differ diff --git a/external-libs/libXpm/test/pixmaps/invalid/CVE-2016-10164-poc.xpm.gz.gz.gz b/external-libs/libXpm/test/pixmaps/invalid/CVE-2016-10164-poc.xpm.gz.gz.gz deleted file mode 100644 index 71d45153..00000000 Binary files a/external-libs/libXpm/test/pixmaps/invalid/CVE-2016-10164-poc.xpm.gz.gz.gz and /dev/null differ diff --git a/flake.nix b/flake.nix index 5965df87..e74dd599 100644 --- a/flake.nix +++ b/flake.nix @@ -7,7 +7,12 @@ c_formatter_42.url = "github:maix-flake/c_formatter_42"; }; - outputs = { nixpkgs, flake-utils, c_formatter_42, ... }: + outputs = { + nixpkgs, + flake-utils, + c_formatter_42, + ... + }: flake-utils.lib.eachDefaultSystem (system: let pkgs = nixpkgs.legacyPackages.${system}; @@ -29,8 +34,8 @@ openssl lld - # Sharp - stdenv.cc.cc.lib + # ASM + nasm # Rust pkg-config @@ -54,9 +59,6 @@ # Node packages in PATH export PATH="$PWD/node_modules/.bin/:$PATH" - # Sharp - export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${pkgs.stdenv.cc.cc.lib}/lib:${pkgs.xorg.libX11.dev}/lib" - # minilibx export X11_LIB_PATH="${combinedX11}" echo "Starting configure script modification..." diff --git a/pcc/libasm/.gitignore b/pcc/libasm/.gitignore new file mode 100644 index 00000000..76e579ae --- /dev/null +++ b/pcc/libasm/.gitignore @@ -0,0 +1,2 @@ +test + diff --git a/pcc/libasm/BUILD.bazel b/pcc/libasm/BUILD.bazel new file mode 100644 index 00000000..8ce4cdf4 --- /dev/null +++ b/pcc/libasm/BUILD.bazel @@ -0,0 +1,32 @@ +load("@rules_cc//cc:cc_binary.bzl", "cc_binary") +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +genrule( + name = "compile_libasm", + srcs = glob(["src/*.s"]), + outs = ["libasm.a"], + cmd = """ + OBJS="" + for src in $(SRCS); do + obj="$(@D)/$$(basename $${src%.s}.o)" + nasm -f elf64 -Werror $$src -o $$obj + OBJS="$$OBJS $$obj" + done + ar rcs $@ $$OBJS + """, + message = "Compiling libasm with nasm...", +) + +cc_library( + name = "libasm", + srcs = [":compile_libasm"], + visibility = ["//visibility:public"], +) + +cc_binary( + name = "test", + srcs = ["test.c"], + deps = [":libasm"], + linkopts = ["-Wl,-z,noexecstack"], + visibility = ["//visibility:public"], +) diff --git a/pcc/libasm/Makefile b/pcc/libasm/Makefile new file mode 100644 index 00000000..1cc3ad61 --- /dev/null +++ b/pcc/libasm/Makefile @@ -0,0 +1,41 @@ +NAME := libasm.a +AR := ar +ARFLAGS := rcs +AS := nasm +ASFLAGS := -f elf64 -Wall -Werror +SRC := strlen strcpy strcmp write read strdup +SRC := $(addsuffix .s,$(addprefix src/ft_,$(SRC))) +OBJ := $(SRC:.s=.o) + +TNAME := test +TSRC := test.c +# This tells the linker to create the ELF section and explicitly mark it as +# non-executable and non-allocating. Not including a '.note.GNU-stack' section +# in the assembly code implies executable stack. +TLDFLAGS := -Wl,-z,noexecstack + +all: $(NAME) + +$(NAME): $(OBJ) + $(AR) $(ARFLAGS) $(NAME) $(OBJ) + +%.o: %.s Makefile FORCE + $(AS) $(ASFLAGS) $< -o $@ + +clean: + $(RM) $(OBJ) + +fclean: clean + $(RM) $(NAME) $(TNAME) + +re: fclean + $(MAKE) all + +$(TNAME): $(NAME) + $(CC) $(TSRC) $(TLDFLAGS) $(NAME) -o $(TNAME) + +FORCE: + +.PHONY: all clean fclean re test FORCE + + diff --git a/pcc/libasm/README.txt b/pcc/libasm/README.txt new file mode 100644 index 00000000..c9ca3740 --- /dev/null +++ b/pcc/libasm/README.txt @@ -0,0 +1,74 @@ +libasm +====== + +A library of basic functions, written in assembly. + +Project specifications +====================== + +This project is programmed for baseline AMD64 with the System V AMD64 convention, for the GNU/Linux ABI. +The Intel syntax is the one used, as required by the subject. +My implementation is vanilla, as in it is not programmed with AVX or any AMD64 extension in mind. +It should therefore be portable on AMD64 Linux. +Assembling with NASM version 2.15.05. + +Provided functions +------------------ + +The functions provided by this project are the following: + +- ft_strlen (strlen.3) +- ft_strcpy (strcpy.3) +- ft_strcmp (strcmp.3) +- ft_write (write.2) +- ft_read (read.2) +- ft_strdup (strdup.3, call to malloc allowed) + +Appendix +======== + +Appendix A +---------- + +"Why align the stack?" +Before executing a 'call' instruction, the stack pointer (rsp) must be 16-byte aligned. +When a function is entered, the 'call' instruction pushes an 8-byte return address onto the stack, leaving it misaligned. +To fix this before calling another C function, we must adjust rsp by 8 bytes. +Pushing a dummy register (or saving a register we need) offsets that 8-byte imbalance. +Failing to do this causes modern C library functions (which use strict SSE/AVX instructions) to segfault. + +Appendix B +---------- + +"With Respect to the Procedure Linking Table" +Modern Linux enforces PIE (Position Independent Executables). +Memory addresses for external C library functions (like malloc or __errno_location) are randomized at runtime and cannot be reached via a static 32-bit relative jump. +Appending 'WRT ..plt' (With Respect To Procedure Linkage Table) forces the assembler to generate an R_X86_64_PLT32 relocation. +This routes our call through the PLT trampoline, which dynamically resolves the true memory address via the GOT (Global Offset Table) at runtime. + +The 'default rel' line is used to indicate to the assembler that we are purposely writing position independent code. +AFAIK, 'WRT ..plt' does route the call, but '-Wall' still warns, and we cannot compile because of '-Werror'. +We would use '[warning -reloc-rel-dword]' to suppress the NASM warning for more recent versions of NASM, but it does not exist in 2.15.05, which is the version installed on 42 Lyon's computers. + +Appendix C +---------- + +Technically errno is a 32-bit integer. +We can move the 32-bit portion of a register someplace else by leveraging the last 32 bits out of the 64 bits of a register. +We can use the convenient 32-bit version of the 64-bit integer for that. For example, EAX for RAX. +We don't need to zero out RAX in this example. + +Resources +========= + +"AMD 64-Bit Technology: The AMD64 x86-64™ Architecture Programmers Overview" by AMD +https://refspecs.linuxbase.org/x86_64-overview.pdf + +"Linux ABI description" +https://docs.kernel.org/admin-guide/abi.html + +"x64 Cheat Sheet" +https://cs.brown.edu/courses/cs033/docs/guides/x64_cheatsheet.pdf + +"x86 calling conventions" +https://en.wikipedia.org/wiki/X86_calling_conventions diff --git a/pcc/libasm/src/ft_read.s b/pcc/libasm/src/ft_read.s new file mode 100644 index 00000000..e7dab82b --- /dev/null +++ b/pcc/libasm/src/ft_read.s @@ -0,0 +1,32 @@ +; input: rdi -> file descriptor (int) +; input: rsi -> pointer to data buffer (const void*) +; input: rdx -> count of bytes of data to read (size_t) +; output: rax -> bytes read, -1 on error (size_t) + +; appendix B +default rel + +section .text + global ft_read + extern __errno_location + +ft_read: + mov rax, 0 ; 0 is syscall no. for sys_read on Linux + ; all data already in place, we just call + syscall + + cmp rax, 0 + jl .lerror + ret ; after call rax will be nbytes read + +.lerror: + neg rax ; invert code + + push rax ; appendix A + call __errno_location WRT ..plt ; appendix B + pop rcx ; pop back the saved code into rcx + + mov [rax], ecx ; appendix C + + mov rax, -1 ; libc errors out with -1 + ret diff --git a/pcc/libasm/src/ft_strcmp.s b/pcc/libasm/src/ft_strcmp.s new file mode 100644 index 00000000..1c5987cf --- /dev/null +++ b/pcc/libasm/src/ft_strcmp.s @@ -0,0 +1,46 @@ +; input: rdi -> s1 pointer of NUL-terminated string +; input: rsi -> s2 pointer of NUL-terminated string +; output: rax -> int difference between data in pointers + +; Technically strcmp is expected to return: +; * 0 on s1 == s2 +; * any negative value on s1 < s2 +; * any positive value on s1 > s2 +; But this is an implementation closer to that of the libc. We return the +; difference between the first different character of each string. +; i.e. some optimized implementations (like Valgrind's) will return 0, -1 or 1. +; This is the reason you cannot compare the result of different strcmp +; implementations as a test, only the sign matters. +; @see strcmp(3) + +section .text + global ft_strcmp + +ft_strcmp: + ; xor is a fast way to bzero a reg + xor rax, rax + xor rcx, rcx + +.find_loop: + mov al, byte [rdi] + mov cl, byte [rsi] + + ; early stop when any char diff + cmp al, cl + jne .done + + ; just check rsi's because the previous check made sure they are the same + cmp al, 0 + je .done + + inc rdi + inc rsi + jmp .find_loop + +.done: + ; because rax and rcx were bzero'd, they hold only the exact positive + ; unsigned value of the characters, meaning subbing them correctly here + ; handles neg results as intended + sub rax, rcx + ret + diff --git a/pcc/libasm/src/ft_strcpy.s b/pcc/libasm/src/ft_strcpy.s new file mode 100644 index 00000000..b1d4f27b --- /dev/null +++ b/pcc/libasm/src/ft_strcpy.s @@ -0,0 +1,27 @@ +; input: rdi -> destination pointer +; input: rsi -> source pointer to NUL-terminated string +; output: rax -> original dest pointer + +section .text + global ft_strcpy + +ft_strcpy: + mov rax, rdi ; dest ptr in rax + +.find_loop: + ; copy data byte + mov cl, byte [rsi] + mov byte [rdi], cl + + ; check if str end + cmp cl, 0 + je .done + + ; move to next dest and src bytes + inc rdi + inc rsi + jmp .find_loop + +.done: + ret ; rax still has original ptr + diff --git a/pcc/libasm/src/ft_strdup.s b/pcc/libasm/src/ft_strdup.s new file mode 100644 index 00000000..4dc808d4 --- /dev/null +++ b/pcc/libasm/src/ft_strdup.s @@ -0,0 +1,32 @@ +; input: rdi -> source pointer to NUL-terminated string +; output: rax -> original source pointer + +; appendix B +default rel + +section .text + global ft_strdup + extern malloc ; byte size into rdi, allign stack + extern ft_strlen + extern ft_strcpy + +ft_strdup: + push rdi ; save original ptr + call ft_strlen WRT ..plt + + inc rax ; ptr to NUL + mov rdi, rax ; setup for malloc + + call malloc WRT ..plt + cmp rax, 0 ; err handling + je .lerror + + mov rdi, rax ; setup for strcpy + pop rsi ; get back ptr + + call ft_strcpy WRT ..plt + ret + +.lerror: + pop rdi ; appendix A + ret diff --git a/pcc/libasm/src/ft_strlen.s b/pcc/libasm/src/ft_strlen.s new file mode 100644 index 00000000..df54dc06 --- /dev/null +++ b/pcc/libasm/src/ft_strlen.s @@ -0,0 +1,21 @@ +; input: rdi -> pointer to NUL-terminated string +; output: rax -> length (size_t) + +section .text + global ft_strlen + +ft_strlen: + mov rax, rdi ; save str ptr in rax + +.find_loop: + cmp byte [rdi], 0 + je .done + inc rdi + jmp .find_loop + +.done: + ; rdi now points to NUL terminator + sub rdi, rax ; calc ptrs diff + mov rax, rdi ; ret len in rax + ret + diff --git a/pcc/libasm/src/ft_write.s b/pcc/libasm/src/ft_write.s new file mode 100644 index 00000000..8d806b00 --- /dev/null +++ b/pcc/libasm/src/ft_write.s @@ -0,0 +1,32 @@ +; input: rdi -> file descriptor (int) +; input: rsi -> pointer to data buffer (const void*) +; input: rdx -> count of bytes of data to write (size_t) +; output: rax -> bytes written, -1 on error (size_t) + +; appendix B +default rel + +section .text + global ft_write + extern __errno_location + +ft_write: + mov rax, 1 ; 1 is syscall no. for sys_write on Linux + ; all data already in place, we just call + syscall + + cmp rax, 0 + jl .lerror + ret ; after call rax will be nbytes written + +.lerror: + neg rax ; invert code + + push rax ; appendix A + call __errno_location WRT ..plt ; appendix B + pop rcx ; pop back the saved code into rcx + + mov [rax], ecx ; appendix C + + mov rax, -1 + ret diff --git a/pcc/libasm/test.c b/pcc/libasm/test.c new file mode 100644 index 00000000..ee02c684 --- /dev/null +++ b/pcc/libasm/test.c @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include + +#define ASSERT_EQ(actual, expected, label) \ + do { \ + if ((actual) == (expected)) { \ + printf("\033[32mok\033[0m %s\n", label); \ + } else { \ + printf("\033[31mKO\033[0m %s (line %d): Expected %ld, got %ld\n", label, \ + __LINE__, (long)(expected), (long)(actual)); \ + } \ + } while (0) + +extern size_t ft_strlen(const char *s); +extern char *ft_strcpy(const char *dest, const char *src); +extern int ft_strcmp(const char *s1, const char *s2); +extern ssize_t ft_write(int fildes, const void *buf, size_t nbyte); +extern ssize_t ft_read(int fildes, void *buf, size_t nbyte); +extern char *ft_strdup(const char *s); + +void comp_ft_strcmp(const char *s1, const char *s2, const char *label) { + int my_res = ft_strcmp(s1, s2); + int std_res = strcmp(s1, s2); + + int my_sign = (my_res > 0) - (my_res < 0); + int std_sign = (std_res > 0) - (std_res < 0); + + ASSERT_EQ(my_sign, std_sign, label); +} + +int main(void) { + { + ASSERT_EQ(ft_strlen("hello, world"), 12, "ft_strlen should work"); + ASSERT_EQ(ft_strlen(""), 0, "ft_strlen should work with empty strings"); + } + + { + char *s = calloc(128, sizeof(const char)); + const char *o = + "Violence is the last refuge of the incompetent — Isaac Asimov"; + const char *sn = ft_strcpy(s, o); + + ASSERT_EQ(strcmp(sn, o), 0, "ft_strcpy should copy full string"); + ASSERT_EQ(sn, s, "ft_strcpy should return dest"); + char dest_buf[10] = "overwrite"; + ft_strcpy(dest_buf, ""); + ASSERT_EQ(dest_buf[0], '\0', "ft_strcpy (empty) should place NUL"); + ASSERT_EQ(dest_buf[1], 'v', + "ft_strcpy (empty) should not overwrite past NUL"); + + free(s); + } + + { + comp_ft_strcmp("hello", "hella", "ft_strcmp should handle positive diff"); + comp_ft_strcmp("hella", "hello", "ft_strcmp should handle negative diff"); + comp_ft_strcmp("", "", "ft_strcmp should handle equality"); + comp_ft_strcmp("\xff", "\x01", "ft_strcmp should compare as unsigned char"); + comp_ft_strcmp("ᓚᘏᗢ", "ᓚᘏᗢ", "ft_strcmp should handle special characters"); + // well it should really not make any difference for 'special' characters or + // not, but I like the cat :-) + } + + { + int fd = open("/dev/null", O_WRONLY); + if (fd != -1) { + ASSERT_EQ(ft_write(fd, "Testing ft_write...\n", 20), 20, + "ft_write should print exact number of characters"); + ASSERT_EQ(ft_write(fd, "fail", 0), 0, + "ft_write with 0 bytes should return 0"); + close(fd); + } else + printf("skip: could not open() to test valid write\n"); + + errno = 0; + ASSERT_EQ(ft_write(-1, "fail", 4), -1, + "ft_write should return -1 on error"); + ASSERT_EQ(errno, EBADF, "ft_write should set errno on error"); + } + + { + char buf[50]; + int fd = open("/dev/urandom", O_RDONLY); + if (fd != -1) { + ssize_t ret = ft_read(fd, buf, 10); + ASSERT_EQ(ret, 10, "ft_read should work"); + ASSERT_EQ(ft_read(fd, buf, 0), 0, "ft_read with 0 bytes should return 0"); + close(fd); + } else + printf("skip: could not open() to test valid read\n"); + + errno = 0; + ssize_t ret2 = ft_read(-1, buf, 10); + ASSERT_EQ(ret2, -1, "ft_read should error out properly"); + ASSERT_EQ(errno, EBADF, "ft_read should set errno on error"); + } + + { + const char *orig1 = + "A language that doesn't have everything is actually " + "easier to program in than some that do — Dennis Ritchie"; + char *dup1 = ft_strdup(orig1); + ASSERT_EQ(dup1 != orig1, 1, + "ft_strdup should allocate a new memory address"); + ASSERT_EQ(strcmp(dup1, orig1), 0, + "ft_strdup should copy the exact contents"); + free(dup1); + + const char *orig2 = ""; + char *dup2 = ft_strdup(orig2); + ASSERT_EQ(dup2 != orig2, 1, + "ft_strdup (empty str) should still allocate new memory"); + ASSERT_EQ(strcmp(dup2, orig2), 0, + "ft_strdup (empty str) should cleanly copy the NUL terminator"); + free(dup2); + } + + return 0; +} diff --git a/piscine-c/shell00/ex01/testShell00.tar b/piscine-c/shell00/ex01/testShell00.tar deleted file mode 100644 index 85356ef0..00000000 Binary files a/piscine-c/shell00/ex01/testShell00.tar and /dev/null differ diff --git a/piscine-c/shell00/ex02/exo2.tar b/piscine-c/shell00/ex02/exo2.tar deleted file mode 100644 index 8d0adc7a..00000000 Binary files a/piscine-c/shell00/ex02/exo2.tar and /dev/null differ diff --git a/release-please-config.json b/release-please-config.json index 0317ef78..549214f7 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -141,6 +141,10 @@ "release-type": "simple", "package-name": "inception" }, + "pcc/libasm": { + "release-type": "simple", + "package-name": "libasm" + }, "rushes/hotrace": { "release-type": "simple", "package-name": "hotrace"