From c49437c84f458b51fad25e9497c09ce0c7123c23 Mon Sep 17 00:00:00 2001 From: Uoc Tamika Date: Tue, 26 May 2026 10:27:12 +0000 Subject: [PATCH 1/6] Patch::macros.h : remove macros STATIC_INLINE, ALWAYS_INLINE, and others, and rename ATTR into ATTRIBUTE --- .../stdoc/utility/compiler/macros/macros.h | 80 ++++++------------- 1 file changed, 26 insertions(+), 54 deletions(-) diff --git a/include/stdoc/utility/compiler/macros/macros.h b/include/stdoc/utility/compiler/macros/macros.h index 76adc58..aca26ee 100644 --- a/include/stdoc/utility/compiler/macros/macros.h +++ b/include/stdoc/utility/compiler/macros/macros.h @@ -56,80 +56,68 @@ #endif /* STDOC_COMPILER_GNU_LIKE */ /* - * STDOC_ATTR_NORETURN + * STDOC_ATTRIBUTE_NORETURN * Indicates that a function never returns (exit, abort, infinite loop). */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_NORETURN __attribute__((noreturn)) +# define STDOC_ATTRIBUTE_NORETURN __attribute__((noreturn)) #elif STDOC_COMPILER_MSVC -# define STDOC_ATTR_NORETURN __declspec(noreturn) +# define STDOC_ATTRIBUTE_NORETURN __declspec(noreturn) #else -# define STDOC_ATTR_NORETURN -#endif /* STDOC_ATTR_NORETURN */ +# define STDOC_ATTRIBUTE_NORETURN +#endif /* STDOC_ATTRIBUTE_NORETURN */ /* - * STDOC_ATTR_UNUSED + * STDOC_ATTRIBUTE_UNUSED * Marks a variable or parameter as intentionally unused. */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_UNUSED __attribute__((unused)) +# define STDOC_ATTRIBUTE_UNUSED __attribute__((unused)) #else -# define STDOC_ATTR_UNUSED -#endif /* STDOC_ATTR_UNUSED */ +# define STDOC_ATTRIBUTE_UNUSED +#endif /* STDOC_ATTRIBUTE_UNUSED */ /* - * STDOC_ATTR_ALWAYS_INLINE - * Forces inlining of a function, even at low optimization levels. - */ -#if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_ALWAYS_INLINE inline __attribute__((always_inline)) -#elif STDOC_COMPILER_MSVC -# define STDOC_ATTR_ALWAYS_INLINE __forceinline -#else -# define STDOC_ATTR_ALWAYS_INLINE inline -#endif /* STDOC_ATTR_ALWAYS_INLINE */ - -/* - * STDOC_ATTR_PRINTF(fmt, arg) + * STDOC_ATTRIBUTE_PRINTF(fmt, arg) * Enables compile-time format string checking for printf-like functions. */ #if STDOC_COMPILER_GNU_LIKE && !STDOC_COMPILER_ICC -# define STDOC_ATTR_PRINTF(fmt, arg) __attribute__((format(printf, fmt, arg))) +# define STDOC_ATTRIBUTE_PRINTF(fmt, arg) __attribute__((format(printf, fmt, arg))) #else -# define STDOC_ATTR_PRINTF(fmt, arg) -#endif /* STDOC_ATTR_PRINTF */ +# define STDOC_ATTRIBUTE_PRINTF(fmt, arg) +#endif /* STDOC_ATTRIBUTE_PRINTF */ /* - * STDOC_ATTR_SCANF(fmt, arg) + * STDOC_ATTRIBUTE_SCANF(fmt, arg) * Enables compile-time format string checking for scanf-like functions. */ #if STDOC_COMPILER_GNU_LIKE && !STDOC_COMPILER_ICC -# define STDOC_ATTR_SCANF(fmt, arg) __attribute__((format(scanf, fmt, arg))) +# define STDOC_ATTRIBUTE_SCANF(fmt, arg) __attribute__((format(scanf, fmt, arg))) #else -# define STDOC_ATTR_SCANF(fmt, arg) -#endif /* STDOC_ATTR_SCANF */ +# define STDOC_ATTRIBUTE_SCANF(fmt, arg) +#endif /* STDOC_ATTRIBUTE_SCANF */ /* - * STDOC_ATTR_WARN_UNUSED + * STDOC_ATTRIBUTE_WARN_UNUSED * Generates a warning if the function's return value is ignored. */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_WARN_UNUSED __attribute__((warn_unused_result)) +# define STDOC_ATTRIBUTE_WARN_UNUSED __attribute__((warn_unused_result)) #else -# define STDOC_ATTR_WARN_UNUSED -#endif /* STDOC_ATTR_WARN_UNUSED */ +# define STDOC_ATTRIBUTE_WARN_UNUSED +#endif /* STDOC_ATTRIBUTE_WARN_UNUSED */ /* - * STDOC_ATTR_DEPRECATED(msg) + * STDOC_ATTRIBUTE_DEPRECATED(msg) * Marks a function or variable as deprecated. */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_DEPRECATED(msg) __attribute__((deprecated(msg))) +# define STDOC_ATTRIBUTE_DEPRECATED(msg) __attribute__((deprecated(msg))) #elif STDOC_COMPILER_MSVC -# define STDOC_ATTR_DEPRECATED(msg) __declspec(deprecated(msg)) +# define STDOC_ATTRIBUTE_DEPRECATED(msg) __declspec(deprecated(msg)) #else -# define STDOC_ATTR_DEPRECATED(msg) -#endif /* STDOC_ATTR_DEPRECATED */ +# define STDOC_ATTRIBUTE_DEPRECATED(msg) +#endif /* STDOC_ATTRIBUTE_DEPRECATED */ /* * STDOC_PUBLIC / STDOC_LOCAL @@ -166,22 +154,6 @@ # define STDOC_UNLIKELY(x) (x) #endif /* branch prediction */ -/* - * STDOC_INLINE / STDOC_STATIC_INLINE - * Portable inline keywords. - */ -#ifndef STDOC_INLINE -# if STDOC_COMPILER_MSVC -# define STDOC_INLINE __inline -# else -# define STDOC_INLINE inline -# endif -#endif /* STDOC_INLINE */ - -#ifndef STDOC_STATIC_INLINE -# define STDOC_STATIC_INLINE static STDOC_INLINE -#endif /* STDOC_STATIC_INLINE */ - /* * STDOC_ARRAY_SIZE(arr) * Number of elements in a static array. From e0aeb2ecf5bc97654018a2cd46de4ef6b2c8674b Mon Sep 17 00:00:00 2001 From: Uoc Tamika Date: Tue, 26 May 2026 10:28:00 +0000 Subject: [PATCH 2/6] Patch::stdoc_printf.h : changing the macros based from new commit in macros.h --- include/stdoc/io/stdoc_printf.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/stdoc/io/stdoc_printf.h b/include/stdoc/io/stdoc_printf.h index 0da0e6f..58def9f 100644 --- a/include/stdoc/io/stdoc_printf.h +++ b/include/stdoc/io/stdoc_printf.h @@ -69,8 +69,7 @@ #if !defined(STDOC_USE_LIBC) -STDOC_STATIC_INLINE long stdoc_syscall_write(int fd, const void* buf, - unsigned long count) +static inline long stdoc_syscall_write(int fd, const void* buf, unsigned long count) { long ret; @@ -176,10 +175,10 @@ STDOC_STATIC_INLINE long stdoc_syscall_write(int fd, const void* buf, * * Notes: * - Format string is validated at compile-time when supported - * via STDOC_ATTR_PRINTF + * via STDOC_ATTRIBUTE_PRINTF * - Behavior is undefined for unsupported format specifiers */ -STDOC_ATTR_PRINTF(1, 2) +STDOC_ATTRIBUTE_PRINTF(1, 2) int stdoc_printf(const char* format, ...); #endif /* STDOC_PRINTF_H */ From 354562ddd9331ccf06a895f6f605e1b3e840b839 Mon Sep 17 00:00:00 2001 From: Uoc Tamika Date: Tue, 26 May 2026 11:25:24 +0000 Subject: [PATCH 3/6] Patch::stdoc.h : include scanf --- include/stdoc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/stdoc.h b/include/stdoc.h index 14e03a3..f2e5471 100644 --- a/include/stdoc.h +++ b/include/stdoc.h @@ -19,6 +19,7 @@ #include "stdoc/io/stdoc_printf.h" #include "stdoc/io/stdoc_version.h" +#include "stdoc/io/stdoc_scanf.h" #include "stdoc/utility/compiler/macros/macros.h" #endif /* STDOC_H */ From 8f42cba09d2ce5b91780b77cecca82ae7728ff9f Mon Sep 17 00:00:00 2001 From: Uoc Tamika Date: Tue, 26 May 2026 11:27:21 +0000 Subject: [PATCH 4/6] Major::stdoc_scanf.h : creating stdoc_scanf header --- include/stdoc/io/stdoc_scanf.h | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/stdoc/io/stdoc_scanf.h diff --git a/include/stdoc/io/stdoc_scanf.h b/include/stdoc/io/stdoc_scanf.h new file mode 100644 index 0000000..68424fe --- /dev/null +++ b/include/stdoc/io/stdoc_scanf.h @@ -0,0 +1,42 @@ +#ifndef STDOC_SCANF_H +#define STDOC_SCANF_H + +#include +#include + +/* + * stdoc_scanf + * + * Minimal scanf-like function reading from stdin (fd 0). + * + * Parameters: + * format - format string + * ... - pointers to variables to store input + * + * Returns: + * Number of successfully matched and assigned input items. + * + * Supported format specifiers: + * %d - int (decimal) + * %u - unsigned int (decimal) + * %x - unsigned int (hexadecimal, lowercase/uppercase accepted) + * %c - char (no whitespace skipping) + * %s - string (whitespace-delimited, char* must be large enough) + * %p - void* (hexadecimal, optional 0x prefix) + * %% - literal '%' + * + * Behavior: + * - Input is buffered (single syscall per scanf call if possible) + * - Whitespace (space, tab, newline) skips automatically for %d/%u/%x/%s/%p + * - %c does NOT skip whitespace + * - Returns EOF if input fails before any conversion. + * + * Limitations: + * - No width, no assignment suppression, no length modifiers. + * - No floating-point support. + */ + +STDOC_ATTRIBUTE_SCANF(1, 2) +int stdoc_scanf(const char* format, ...); + +#endif /* STDOC_SCANF_H */ From b370d4882234e1b8de366a9b73326659c9eabdd5 Mon Sep 17 00:00:00 2001 From: Uoc Tamika Date: Tue, 26 May 2026 11:28:03 +0000 Subject: [PATCH 5/6] Major::stdoc_scanf.c : creating implementations of stdoc_scanf function --- io/stdoc_scanf.c | 371 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100644 io/stdoc_scanf.c diff --git a/io/stdoc_scanf.c b/io/stdoc_scanf.c new file mode 100644 index 0000000..534930f --- /dev/null +++ b/io/stdoc_scanf.c @@ -0,0 +1,371 @@ +#include +#include +#include +#include + +#ifndef STDOC_SCANF_BUF_SIZE +#define STDOC_SCANF_BUF_SIZE 4096 +#endif + +/* + * syscall_read - raw read from stdin (fd 0) + * Returns number of bytes read, 0 on EOF, -1 on error. + */ +static long syscall_read(void* buf, unsigned long count) +{ + long ret; +#if defined(__x86_64__) + __asm__ volatile("mov $0, %%rax\n" /* syscall number 0 = read */ + "syscall" + : "=a"(ret) + : "D"(0), "S"(buf), "d"(count) + : "rcx", "r11", "memory"); +#elif defined(__i386__) + __asm__ volatile("mov $3, %%eax\n" + "int $0x80" + : "=a"(ret) + : "b"(0), "c"(buf), "d"(count) + : "memory"); +#elif defined(__aarch64__) + __asm__ volatile("mov x8, #63\n" /* read syscall number */ + "svc #0" + : "=r"(ret) + : "r"(0), "r"(buf), "r"(count) + : "x8", "memory"); +#elif defined(__arm__) + __asm__ volatile("mov r7, #3\n" + "swi #0" + : "=r"(ret) + : "r"(0), "r"(buf), "r"(count) + : "r7", "memory"); +#else +#error "syscall_read not implemented for this architecture" +#endif + return ret; +} + +/* + * Scan context: holds current character and buffer state. + */ +struct scan_ctx { + char* buf; /* internal buffer */ + int idx; /* current index in buffer */ + int len; /* bytes available in buffer (0 = need refill) */ + int eof; /* end-of-file flag */ + int cur; /* current character (or -1 if none) */ +}; + +/* Refill buffer and set cur to first character. */ +static void refill(struct scan_ctx* ctx) +{ + if (ctx->eof) { + ctx->cur = -1; + return; + } + if (ctx->idx >= ctx->len) { + long n = syscall_read(ctx->buf, STDOC_SCANF_BUF_SIZE); + if (n <= 0) { + ctx->eof = 1; + ctx->cur = -1; + return; + } + ctx->len = (int)n; + ctx->idx = 0; + } + ctx->cur = (unsigned char)ctx->buf[ctx->idx++]; +} + +/* Peek next character (without consuming) */ +static int peek(struct scan_ctx* ctx) +{ + if (ctx->cur == -1 && !ctx->eof) + refill(ctx); + return ctx->cur; +} + +/* Consume current character and advance to next. */ +static void consume(struct scan_ctx* ctx) +{ + if (ctx->cur != -1) { + ctx->cur = -1; /* force refill on next access */ + } +} + +/* Skip whitespace (space, tab, newline, carriage return, form feed, vertical tab). */ +static void skip_whitespace(struct scan_ctx* ctx) +{ + int c; + while ((c = peek(ctx)) != -1) { + if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') + consume(ctx); + else + break; + } +} + +/* Read a character (not skipping whitespace) */ +static int read_char(struct scan_ctx* ctx) +{ + int c = peek(ctx); + if (c != -1) + consume(ctx); + return c; +} + +/* Read a string (non-whitespace characters) into buffer, null-terminated. Returns 0 if none. */ +static int read_string(struct scan_ctx* ctx, char* dest, size_t max_len) +{ + skip_whitespace(ctx); + int c; + size_t i = 0; + while ((c = peek(ctx)) != -1) { + if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') + break; + if (i + 1 < max_len) { + dest[i++] = (char)c; + } + consume(ctx); + } + if (i == 0) + return 0; + dest[i] = '\0'; + return 1; +} + +/* Convert hex digit to value (0-15), -1 if invalid. */ +static int hex_digit_val(int c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + return -1; +} + +/* Read unsigned integer in given base (10 or 16). Returns 1 on success, 0 on failure. */ +static int read_unsigned(struct scan_ctx* ctx, unsigned int* out, int base) +{ + skip_whitespace(ctx); + unsigned long val = 0; + int c = peek(ctx); + int consumed = 0; + if (base == 16 && c == '0') { + /* optional 0x prefix */ + consume(ctx); + c = peek(ctx); + if (c == 'x' || c == 'X') { + consume(ctx); + c = peek(ctx); + } else { + /* just a single zero */ + *out = 0; + return 1; + } + } + while ((c = peek(ctx)) != -1) { + int digit = -1; + if (base == 10) { + if (c >= '0' && c <= '9') + digit = c - '0'; + } else if (base == 16) { + digit = hex_digit_val(c); + } + if (digit < 0 || (unsigned)digit >= (unsigned)base) + break; + /* check overflow (simplistic) */ + if (val > (ULONG_MAX - (unsigned long)digit) / (unsigned long)base) { + /* overflow, consume but don't store further? just break */ + break; + } + val = val * (unsigned long)base + (unsigned long)digit; + consume(ctx); + consumed = 1; + c = peek(ctx); + } + if (!consumed) + return 0; + *out = (unsigned int)val; + return 1; +} + +/* Read signed integer (decimal). */ +static int read_signed(struct scan_ctx* ctx, int* out) +{ + skip_whitespace(ctx); + int sign = 1; + int c = peek(ctx); + if (c == '-') { + sign = -1; + consume(ctx); + } else if (c == '+') { + consume(ctx); + } + unsigned int uval; + if (!read_unsigned(ctx, &uval, 10)) + return 0; + /* Check overflow for negative */ + if (sign == -1) { + if (uval > (unsigned int)INT_MAX + 1U) + *out = INT_MIN; + else + *out = -(int)uval; + } else { + if (uval > (unsigned int)INT_MAX) + *out = INT_MAX; + else + *out = (int)uval; + } + return 1; +} + +/* Read pointer (hex) */ +static int read_pointer(struct scan_ctx* ctx, void** out) +{ + skip_whitespace(ctx); + unsigned long addr = 0; + int consumed = 0; + int c = peek(ctx); + /* optional 0x prefix */ + if (c == '0') { + consume(ctx); + c = peek(ctx); + if (c == 'x' || c == 'X') { + consume(ctx); + c = peek(ctx); + } else { + /* just zero pointer */ + *out = (void*)0; + return 1; + } + } + while ((c = peek(ctx)) != -1) { + int digit = hex_digit_val(c); + if (digit < 0) + break; + if (addr > (ULONG_MAX - (unsigned long)digit) / 16) { + break; + } + addr = addr * 16 + (unsigned long)digit; + consume(ctx); + consumed = 1; + c = peek(ctx); + } + if (!consumed) + return 0; + *out = (void*)addr; + return 1; +} + +int stdoc_scanf(const char* format, ...) +{ + struct scan_ctx ctx; + char buf[STDOC_SCANF_BUF_SIZE]; + ctx.buf = buf; + ctx.idx = 0; + ctx.len = 0; + ctx.eof = 0; + ctx.cur = -1; + + va_list args; + va_start(args, format); + + int items = 0; + const char* p = format; + + while (*p) { + if (*p == '%') { + p++; + if (*p == '\0') break; + if (*p == '%') { + /* literal '%' - skip whitespace? no, just match a '%' */ + int c = read_char(&ctx); + if (c != '%') { + /* mismatch, fail */ + va_end(args); + return items == 0 ? -1 : items; + } + p++; + continue; + } + /* Skip whitespace for all specifiers except %c? Actually standard scanf skips whitespace for %d,%u,%x,%s,%p but not %c */ + bool skip_ws = (*p != 'c'); + if (skip_ws) + skip_whitespace(&ctx); + switch (*p) { + case 'd': { + int* ptr = va_arg(args, int*); + if (read_signed(&ctx, ptr)) + items++; + else + goto done; + break; + } + case 'u': { + unsigned int* ptr = va_arg(args, unsigned int*); + if (read_unsigned(&ctx, ptr, 10)) + items++; + else + goto done; + break; + } + case 'x': { + unsigned int* ptr = va_arg(args, unsigned int*); + if (read_unsigned(&ctx, ptr, 16)) + items++; + else + goto done; + break; + } + case 'c': { + char* ptr = va_arg(args, char*); + int c = read_char(&ctx); + if (c != -1) { + *ptr = (char)c; + items++; + } else { + goto done; + } + break; + } + case 's': { + char* ptr = va_arg(args, char*); + /* arbitrary limit; user must allocate enough */ + if (read_string(&ctx, ptr, 4096)) /* large enough, but not safe? we can use size param? skip for minimal */ + items++; + else + goto done; + break; + } + case 'p': { + void** ptr = va_arg(args, void**); + if (read_pointer(&ctx, ptr)) + items++; + else + goto done; + break; + } + default: + /* unknown specifier: ignore and match literal? just fail */ + goto done; + } + p++; + } else if (*p == ' ' || *p == '\t' || *p == '\n') { + /* whitespace in format: skip any whitespace in input */ + skip_whitespace(&ctx); + p++; + } else { + /* literal character */ + int c = read_char(&ctx); + if (c != (unsigned char)*p) { + goto done; + } + p++; + } + } + +done: + va_end(args); + return items; +} From 380534e5654074df91fea66466799dc9045a8655 Mon Sep 17 00:00:00 2001 From: Uoc Tamika Date: Tue, 26 May 2026 11:39:39 +0000 Subject: [PATCH 6/6] Patch:stdoc_scnaf.c : fixing scanf return 0 without user inputting something --- io/stdoc_scanf.c | 332 ++++++++++++++++++++++++----------------------- 1 file changed, 171 insertions(+), 161 deletions(-) diff --git a/io/stdoc_scanf.c b/io/stdoc_scanf.c index 534930f..3db84f9 100644 --- a/io/stdoc_scanf.c +++ b/io/stdoc_scanf.c @@ -3,19 +3,17 @@ #include #include -#ifndef STDOC_SCANF_BUF_SIZE #define STDOC_SCANF_BUF_SIZE 4096 -#endif /* - * syscall_read - raw read from stdin (fd 0) + * syscall_read - raw read from stdin (file descriptor 0) * Returns number of bytes read, 0 on EOF, -1 on error. */ static long syscall_read(void* buf, unsigned long count) { long ret; #if defined(__x86_64__) - __asm__ volatile("mov $0, %%rax\n" /* syscall number 0 = read */ + __asm__ volatile("mov $0, %%rax\n" "syscall" : "=a"(ret) : "D"(0), "S"(buf), "d"(count) @@ -27,7 +25,7 @@ static long syscall_read(void* buf, unsigned long count) : "b"(0), "c"(buf), "d"(count) : "memory"); #elif defined(__aarch64__) - __asm__ volatile("mov x8, #63\n" /* read syscall number */ + __asm__ volatile("mov x8, #63\n" "svc #0" : "=r"(ret) : "r"(0), "r"(buf), "r"(count) @@ -39,173 +37,146 @@ static long syscall_read(void* buf, unsigned long count) : "r"(0), "r"(buf), "r"(count) : "r7", "memory"); #else -#error "syscall_read not implemented for this architecture" +#error "Unsupported architecture" #endif return ret; } /* - * Scan context: holds current character and buffer state. + * Scan context: holds internal buffer and state. */ struct scan_ctx { - char* buf; /* internal buffer */ - int idx; /* current index in buffer */ - int len; /* bytes available in buffer (0 = need refill) */ - int eof; /* end-of-file flag */ - int cur; /* current character (or -1 if none) */ + char* buf; // pointer to buffer + int pos; // current position in buffer + int len; // number of valid bytes in buffer + int eof; // end-of-file flag (non-zero if EOF reached) }; -/* Refill buffer and set cur to first character. */ -static void refill(struct scan_ctx* ctx) +/* + * Refill buffer from stdin. + * Returns first character read, or -1 on EOF/error. + */ +static int refill(struct scan_ctx* ctx) { - if (ctx->eof) { - ctx->cur = -1; - return; - } - if (ctx->idx >= ctx->len) { - long n = syscall_read(ctx->buf, STDOC_SCANF_BUF_SIZE); - if (n <= 0) { - ctx->eof = 1; - ctx->cur = -1; - return; - } - ctx->len = (int)n; - ctx->idx = 0; + if (ctx->eof) return -1; + long n = syscall_read(ctx->buf, STDOC_SCANF_BUF_SIZE); + if (n <= 0) { + ctx->eof = 1; + return -1; } - ctx->cur = (unsigned char)ctx->buf[ctx->idx++]; + ctx->pos = 0; + ctx->len = (int)n; + return (unsigned char)ctx->buf[ctx->pos++]; } -/* Peek next character (without consuming) */ -static int peek(struct scan_ctx* ctx) +/* + * Return next character from input, consuming it. + * Returns -1 on EOF. + */ +static int next_char(struct scan_ctx* ctx) { - if (ctx->cur == -1 && !ctx->eof) - refill(ctx); - return ctx->cur; + if (ctx->pos < ctx->len) + return (unsigned char)ctx->buf[ctx->pos++]; + return refill(ctx); } -/* Consume current character and advance to next. */ -static void consume(struct scan_ctx* ctx) +/* + * Push back one character into the buffer. + * Only guarantees one level of unget. + */ +static void unget_char(struct scan_ctx* ctx, int c) { - if (ctx->cur != -1) { - ctx->cur = -1; /* force refill on next access */ + if (ctx->pos > 0) { + ctx->pos--; + ctx->buf[ctx->pos] = (char)c; } } -/* Skip whitespace (space, tab, newline, carriage return, form feed, vertical tab). */ +/* + * Skip whitespace characters (space, tab, newline, etc.). + */ static void skip_whitespace(struct scan_ctx* ctx) { int c; - while ((c = peek(ctx)) != -1) { - if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') - consume(ctx); - else + while ((c = next_char(ctx)) != -1) { + if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v')) { + unget_char(ctx, c); break; - } -} - -/* Read a character (not skipping whitespace) */ -static int read_char(struct scan_ctx* ctx) -{ - int c = peek(ctx); - if (c != -1) - consume(ctx); - return c; -} - -/* Read a string (non-whitespace characters) into buffer, null-terminated. Returns 0 if none. */ -static int read_string(struct scan_ctx* ctx, char* dest, size_t max_len) -{ - skip_whitespace(ctx); - int c; - size_t i = 0; - while ((c = peek(ctx)) != -1) { - if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') - break; - if (i + 1 < max_len) { - dest[i++] = (char)c; } - consume(ctx); } - if (i == 0) - return 0; - dest[i] = '\0'; - return 1; -} - -/* Convert hex digit to value (0-15), -1 if invalid. */ -static int hex_digit_val(int c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - return -1; } -/* Read unsigned integer in given base (10 or 16). Returns 1 on success, 0 on failure. */ +/* + * Read an unsigned integer in given base (10 or 16). + * Returns 1 on success, 0 on failure. + */ static int read_unsigned(struct scan_ctx* ctx, unsigned int* out, int base) { skip_whitespace(ctx); + int c = next_char(ctx); + if (c == -1) return 0; unsigned long val = 0; - int c = peek(ctx); int consumed = 0; + + // Handle optional 0x prefix for hex if (base == 16 && c == '0') { - /* optional 0x prefix */ - consume(ctx); - c = peek(ctx); - if (c == 'x' || c == 'X') { - consume(ctx); - c = peek(ctx); + int n = next_char(ctx); + if (n == 'x' || n == 'X') { + c = next_char(ctx); } else { - /* just a single zero */ + unget_char(ctx, n); *out = 0; return 1; } } - while ((c = peek(ctx)) != -1) { + + while (c != -1) { int digit = -1; - if (base == 10) { - if (c >= '0' && c <= '9') - digit = c - '0'; - } else if (base == 16) { - digit = hex_digit_val(c); - } - if (digit < 0 || (unsigned)digit >= (unsigned)base) - break; - /* check overflow (simplistic) */ - if (val > (ULONG_MAX - (unsigned long)digit) / (unsigned long)base) { - /* overflow, consume but don't store further? just break */ - break; - } - val = val * (unsigned long)base + (unsigned long)digit; - consume(ctx); + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (base == 16 && c >= 'a' && c <= 'f') + digit = c - 'a' + 10; + else if (base == 16 && c >= 'A' && c <= 'F') + digit = c - 'A' + 10; + + if (digit < 0 || digit >= base) break; + if (val > (ULONG_MAX - digit) / (unsigned long)base) break; // overflow protection + + val = val * base + digit; consumed = 1; - c = peek(ctx); + c = next_char(ctx); } - if (!consumed) - return 0; + + if (!consumed) return 0; *out = (unsigned int)val; return 1; } -/* Read signed integer (decimal). */ +/* + * Read a signed decimal integer. + * Returns 1 on success, 0 on failure. + */ static int read_signed(struct scan_ctx* ctx, int* out) { skip_whitespace(ctx); + int c = next_char(ctx); + if (c == -1) return 0; + int sign = 1; - int c = peek(ctx); if (c == '-') { sign = -1; - consume(ctx); + c = next_char(ctx); } else if (c == '+') { - consume(ctx); + c = next_char(ctx); } + + if (c == -1 || (c < '0' || c > '9')) return 0; + unget_char(ctx, c); + unsigned int uval; - if (!read_unsigned(ctx, &uval, 10)) - return 0; - /* Check overflow for negative */ + if (!read_unsigned(ctx, &uval, 10)) return 0; + + // Handle signed overflow if (sign == -1) { if (uval > (unsigned int)INT_MAX + 1U) *out = INT_MIN; @@ -220,53 +191,99 @@ static int read_signed(struct scan_ctx* ctx, int* out) return 1; } -/* Read pointer (hex) */ +/* + * Read a single character (no whitespace skipping). + * Returns 1 on success, 0 on EOF. + */ +static int read_char(struct scan_ctx* ctx, char* out) +{ + int c = next_char(ctx); + if (c == -1) return 0; + *out = (char)c; + return 1; +} + +/* + * Read a whitespace-delimited string. + * max_len: maximum number of characters to store (including null terminator). + * Returns 1 on success, 0 on failure. + */ +static int read_string(struct scan_ctx* ctx, char* out, size_t max_len) +{ + skip_whitespace(ctx); + size_t i = 0; + int c; + while ((c = next_char(ctx)) != -1) { + if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') { + unget_char(ctx, c); + break; + } + if (i + 1 < max_len) + out[i++] = (char)c; + } + if (i == 0) return 0; + out[i] = '\0'; + return 1; +} + +/* + * Read a pointer value in hexadecimal. + * Returns 1 on success, 0 on failure. + */ static int read_pointer(struct scan_ctx* ctx, void** out) { skip_whitespace(ctx); + int c = next_char(ctx); + if (c == -1) return 0; + unsigned long addr = 0; int consumed = 0; - int c = peek(ctx); - /* optional 0x prefix */ + + // Optional 0x prefix if (c == '0') { - consume(ctx); - c = peek(ctx); - if (c == 'x' || c == 'X') { - consume(ctx); - c = peek(ctx); + int n = next_char(ctx); + if (n == 'x' || n == 'X') { + c = next_char(ctx); } else { - /* just zero pointer */ + unget_char(ctx, n); *out = (void*)0; return 1; } } - while ((c = peek(ctx)) != -1) { - int digit = hex_digit_val(c); - if (digit < 0) - break; - if (addr > (ULONG_MAX - (unsigned long)digit) / 16) { - break; - } - addr = addr * 16 + (unsigned long)digit; - consume(ctx); + + while (c != -1) { + int digit = -1; + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'a' && c <= 'f') + digit = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + digit = c - 'A' + 10; + else break; + + if (addr > (ULONG_MAX - digit) / 16) break; // overflow protection + addr = addr * 16 + digit; consumed = 1; - c = peek(ctx); + c = next_char(ctx); } - if (!consumed) - return 0; + + if (!consumed) return 0; *out = (void*)addr; return 1; } +/* + * stdoc_scanf - minimal scanf implementation using direct syscalls. + * Returns number of successfully matched items, or -1 on EOF before any match. + */ int stdoc_scanf(const char* format, ...) { struct scan_ctx ctx; char buf[STDOC_SCANF_BUF_SIZE]; ctx.buf = buf; - ctx.idx = 0; + ctx.pos = 0; ctx.len = 0; ctx.eof = 0; - ctx.cur = -1; va_list args; va_start(args, format); @@ -277,22 +294,20 @@ int stdoc_scanf(const char* format, ...) while (*p) { if (*p == '%') { p++; - if (*p == '\0') break; - if (*p == '%') { - /* literal '%' - skip whitespace? no, just match a '%' */ - int c = read_char(&ctx); + if (*p == '%') { // literal '%' + int c = next_char(&ctx); if (c != '%') { - /* mismatch, fail */ va_end(args); - return items == 0 ? -1 : items; + return items ? items : -1; } p++; continue; } - /* Skip whitespace for all specifiers except %c? Actually standard scanf skips whitespace for %d,%u,%x,%s,%p but not %c */ + + // Skip whitespace for all specifiers except %c bool skip_ws = (*p != 'c'); - if (skip_ws) - skip_whitespace(&ctx); + if (skip_ws) skip_whitespace(&ctx); + switch (*p) { case 'd': { int* ptr = va_arg(args, int*); @@ -320,19 +335,15 @@ int stdoc_scanf(const char* format, ...) } case 'c': { char* ptr = va_arg(args, char*); - int c = read_char(&ctx); - if (c != -1) { - *ptr = (char)c; + if (read_char(&ctx, ptr)) items++; - } else { + else goto done; - } break; } case 's': { char* ptr = va_arg(args, char*); - /* arbitrary limit; user must allocate enough */ - if (read_string(&ctx, ptr, 4096)) /* large enough, but not safe? we can use size param? skip for minimal */ + if (read_string(&ctx, ptr, 4096)) // FIXME: no width limit items++; else goto done; @@ -346,18 +357,17 @@ int stdoc_scanf(const char* format, ...) goto done; break; } - default: - /* unknown specifier: ignore and match literal? just fail */ + default: // unknown specifier, abort goto done; } p++; } else if (*p == ' ' || *p == '\t' || *p == '\n') { - /* whitespace in format: skip any whitespace in input */ + // Whitespace in format: skip any whitespace in input skip_whitespace(&ctx); p++; } else { - /* literal character */ - int c = read_char(&ctx); + // Literal character match + int c = next_char(&ctx); if (c != (unsigned char)*p) { goto done; }