diff --git a/include/stdoc.h b/include/stdoc.h index 14e03a3..f2e5471 100644 --- a/include/stdoc.h +++ b/include/stdoc.h @@ -19,6 +19,7 @@ #include "stdoc/io/stdoc_printf.h" #include "stdoc/io/stdoc_version.h" +#include "stdoc/io/stdoc_scanf.h" #include "stdoc/utility/compiler/macros/macros.h" #endif /* STDOC_H */ diff --git a/include/stdoc/io/stdoc_printf.h b/include/stdoc/io/stdoc_printf.h index 0da0e6f..58def9f 100644 --- a/include/stdoc/io/stdoc_printf.h +++ b/include/stdoc/io/stdoc_printf.h @@ -69,8 +69,7 @@ #if !defined(STDOC_USE_LIBC) -STDOC_STATIC_INLINE long stdoc_syscall_write(int fd, const void* buf, - unsigned long count) +static inline long stdoc_syscall_write(int fd, const void* buf, unsigned long count) { long ret; @@ -176,10 +175,10 @@ STDOC_STATIC_INLINE long stdoc_syscall_write(int fd, const void* buf, * * Notes: * - Format string is validated at compile-time when supported - * via STDOC_ATTR_PRINTF + * via STDOC_ATTRIBUTE_PRINTF * - Behavior is undefined for unsupported format specifiers */ -STDOC_ATTR_PRINTF(1, 2) +STDOC_ATTRIBUTE_PRINTF(1, 2) int stdoc_printf(const char* format, ...); #endif /* STDOC_PRINTF_H */ diff --git a/include/stdoc/io/stdoc_scanf.h b/include/stdoc/io/stdoc_scanf.h new file mode 100644 index 0000000..68424fe --- /dev/null +++ b/include/stdoc/io/stdoc_scanf.h @@ -0,0 +1,42 @@ +#ifndef STDOC_SCANF_H +#define STDOC_SCANF_H + +#include +#include + +/* + * stdoc_scanf + * + * Minimal scanf-like function reading from stdin (fd 0). + * + * Parameters: + * format - format string + * ... - pointers to variables to store input + * + * Returns: + * Number of successfully matched and assigned input items. + * + * Supported format specifiers: + * %d - int (decimal) + * %u - unsigned int (decimal) + * %x - unsigned int (hexadecimal, lowercase/uppercase accepted) + * %c - char (no whitespace skipping) + * %s - string (whitespace-delimited, char* must be large enough) + * %p - void* (hexadecimal, optional 0x prefix) + * %% - literal '%' + * + * Behavior: + * - Input is buffered (single syscall per scanf call if possible) + * - Whitespace (space, tab, newline) skips automatically for %d/%u/%x/%s/%p + * - %c does NOT skip whitespace + * - Returns EOF if input fails before any conversion. + * + * Limitations: + * - No width, no assignment suppression, no length modifiers. + * - No floating-point support. + */ + +STDOC_ATTRIBUTE_SCANF(1, 2) +int stdoc_scanf(const char* format, ...); + +#endif /* STDOC_SCANF_H */ diff --git a/include/stdoc/utility/compiler/macros/macros.h b/include/stdoc/utility/compiler/macros/macros.h index 76adc58..aca26ee 100644 --- a/include/stdoc/utility/compiler/macros/macros.h +++ b/include/stdoc/utility/compiler/macros/macros.h @@ -56,80 +56,68 @@ #endif /* STDOC_COMPILER_GNU_LIKE */ /* - * STDOC_ATTR_NORETURN + * STDOC_ATTRIBUTE_NORETURN * Indicates that a function never returns (exit, abort, infinite loop). */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_NORETURN __attribute__((noreturn)) +# define STDOC_ATTRIBUTE_NORETURN __attribute__((noreturn)) #elif STDOC_COMPILER_MSVC -# define STDOC_ATTR_NORETURN __declspec(noreturn) +# define STDOC_ATTRIBUTE_NORETURN __declspec(noreturn) #else -# define STDOC_ATTR_NORETURN -#endif /* STDOC_ATTR_NORETURN */ +# define STDOC_ATTRIBUTE_NORETURN +#endif /* STDOC_ATTRIBUTE_NORETURN */ /* - * STDOC_ATTR_UNUSED + * STDOC_ATTRIBUTE_UNUSED * Marks a variable or parameter as intentionally unused. */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_UNUSED __attribute__((unused)) +# define STDOC_ATTRIBUTE_UNUSED __attribute__((unused)) #else -# define STDOC_ATTR_UNUSED -#endif /* STDOC_ATTR_UNUSED */ +# define STDOC_ATTRIBUTE_UNUSED +#endif /* STDOC_ATTRIBUTE_UNUSED */ /* - * STDOC_ATTR_ALWAYS_INLINE - * Forces inlining of a function, even at low optimization levels. - */ -#if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_ALWAYS_INLINE inline __attribute__((always_inline)) -#elif STDOC_COMPILER_MSVC -# define STDOC_ATTR_ALWAYS_INLINE __forceinline -#else -# define STDOC_ATTR_ALWAYS_INLINE inline -#endif /* STDOC_ATTR_ALWAYS_INLINE */ - -/* - * STDOC_ATTR_PRINTF(fmt, arg) + * STDOC_ATTRIBUTE_PRINTF(fmt, arg) * Enables compile-time format string checking for printf-like functions. */ #if STDOC_COMPILER_GNU_LIKE && !STDOC_COMPILER_ICC -# define STDOC_ATTR_PRINTF(fmt, arg) __attribute__((format(printf, fmt, arg))) +# define STDOC_ATTRIBUTE_PRINTF(fmt, arg) __attribute__((format(printf, fmt, arg))) #else -# define STDOC_ATTR_PRINTF(fmt, arg) -#endif /* STDOC_ATTR_PRINTF */ +# define STDOC_ATTRIBUTE_PRINTF(fmt, arg) +#endif /* STDOC_ATTRIBUTE_PRINTF */ /* - * STDOC_ATTR_SCANF(fmt, arg) + * STDOC_ATTRIBUTE_SCANF(fmt, arg) * Enables compile-time format string checking for scanf-like functions. */ #if STDOC_COMPILER_GNU_LIKE && !STDOC_COMPILER_ICC -# define STDOC_ATTR_SCANF(fmt, arg) __attribute__((format(scanf, fmt, arg))) +# define STDOC_ATTRIBUTE_SCANF(fmt, arg) __attribute__((format(scanf, fmt, arg))) #else -# define STDOC_ATTR_SCANF(fmt, arg) -#endif /* STDOC_ATTR_SCANF */ +# define STDOC_ATTRIBUTE_SCANF(fmt, arg) +#endif /* STDOC_ATTRIBUTE_SCANF */ /* - * STDOC_ATTR_WARN_UNUSED + * STDOC_ATTRIBUTE_WARN_UNUSED * Generates a warning if the function's return value is ignored. */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_WARN_UNUSED __attribute__((warn_unused_result)) +# define STDOC_ATTRIBUTE_WARN_UNUSED __attribute__((warn_unused_result)) #else -# define STDOC_ATTR_WARN_UNUSED -#endif /* STDOC_ATTR_WARN_UNUSED */ +# define STDOC_ATTRIBUTE_WARN_UNUSED +#endif /* STDOC_ATTRIBUTE_WARN_UNUSED */ /* - * STDOC_ATTR_DEPRECATED(msg) + * STDOC_ATTRIBUTE_DEPRECATED(msg) * Marks a function or variable as deprecated. */ #if STDOC_COMPILER_GNU_LIKE -# define STDOC_ATTR_DEPRECATED(msg) __attribute__((deprecated(msg))) +# define STDOC_ATTRIBUTE_DEPRECATED(msg) __attribute__((deprecated(msg))) #elif STDOC_COMPILER_MSVC -# define STDOC_ATTR_DEPRECATED(msg) __declspec(deprecated(msg)) +# define STDOC_ATTRIBUTE_DEPRECATED(msg) __declspec(deprecated(msg)) #else -# define STDOC_ATTR_DEPRECATED(msg) -#endif /* STDOC_ATTR_DEPRECATED */ +# define STDOC_ATTRIBUTE_DEPRECATED(msg) +#endif /* STDOC_ATTRIBUTE_DEPRECATED */ /* * STDOC_PUBLIC / STDOC_LOCAL @@ -166,22 +154,6 @@ # define STDOC_UNLIKELY(x) (x) #endif /* branch prediction */ -/* - * STDOC_INLINE / STDOC_STATIC_INLINE - * Portable inline keywords. - */ -#ifndef STDOC_INLINE -# if STDOC_COMPILER_MSVC -# define STDOC_INLINE __inline -# else -# define STDOC_INLINE inline -# endif -#endif /* STDOC_INLINE */ - -#ifndef STDOC_STATIC_INLINE -# define STDOC_STATIC_INLINE static STDOC_INLINE -#endif /* STDOC_STATIC_INLINE */ - /* * STDOC_ARRAY_SIZE(arr) * Number of elements in a static array. diff --git a/io/stdoc_scanf.c b/io/stdoc_scanf.c new file mode 100644 index 0000000..3db84f9 --- /dev/null +++ b/io/stdoc_scanf.c @@ -0,0 +1,381 @@ +#include +#include +#include +#include + +#define STDOC_SCANF_BUF_SIZE 4096 + +/* + * syscall_read - raw read from stdin (file descriptor 0) + * Returns number of bytes read, 0 on EOF, -1 on error. + */ +static long syscall_read(void* buf, unsigned long count) +{ + long ret; +#if defined(__x86_64__) + __asm__ volatile("mov $0, %%rax\n" + "syscall" + : "=a"(ret) + : "D"(0), "S"(buf), "d"(count) + : "rcx", "r11", "memory"); +#elif defined(__i386__) + __asm__ volatile("mov $3, %%eax\n" + "int $0x80" + : "=a"(ret) + : "b"(0), "c"(buf), "d"(count) + : "memory"); +#elif defined(__aarch64__) + __asm__ volatile("mov x8, #63\n" + "svc #0" + : "=r"(ret) + : "r"(0), "r"(buf), "r"(count) + : "x8", "memory"); +#elif defined(__arm__) + __asm__ volatile("mov r7, #3\n" + "swi #0" + : "=r"(ret) + : "r"(0), "r"(buf), "r"(count) + : "r7", "memory"); +#else +#error "Unsupported architecture" +#endif + return ret; +} + +/* + * Scan context: holds internal buffer and state. + */ +struct scan_ctx { + char* buf; // pointer to buffer + int pos; // current position in buffer + int len; // number of valid bytes in buffer + int eof; // end-of-file flag (non-zero if EOF reached) +}; + +/* + * Refill buffer from stdin. + * Returns first character read, or -1 on EOF/error. + */ +static int refill(struct scan_ctx* ctx) +{ + if (ctx->eof) return -1; + long n = syscall_read(ctx->buf, STDOC_SCANF_BUF_SIZE); + if (n <= 0) { + ctx->eof = 1; + return -1; + } + ctx->pos = 0; + ctx->len = (int)n; + return (unsigned char)ctx->buf[ctx->pos++]; +} + +/* + * Return next character from input, consuming it. + * Returns -1 on EOF. + */ +static int next_char(struct scan_ctx* ctx) +{ + if (ctx->pos < ctx->len) + return (unsigned char)ctx->buf[ctx->pos++]; + return refill(ctx); +} + +/* + * Push back one character into the buffer. + * Only guarantees one level of unget. + */ +static void unget_char(struct scan_ctx* ctx, int c) +{ + if (ctx->pos > 0) { + ctx->pos--; + ctx->buf[ctx->pos] = (char)c; + } +} + +/* + * Skip whitespace characters (space, tab, newline, etc.). + */ +static void skip_whitespace(struct scan_ctx* ctx) +{ + int c; + while ((c = next_char(ctx)) != -1) { + if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v')) { + unget_char(ctx, c); + break; + } + } +} + +/* + * Read an unsigned integer in given base (10 or 16). + * Returns 1 on success, 0 on failure. + */ +static int read_unsigned(struct scan_ctx* ctx, unsigned int* out, int base) +{ + skip_whitespace(ctx); + int c = next_char(ctx); + if (c == -1) return 0; + unsigned long val = 0; + int consumed = 0; + + // Handle optional 0x prefix for hex + if (base == 16 && c == '0') { + int n = next_char(ctx); + if (n == 'x' || n == 'X') { + c = next_char(ctx); + } else { + unget_char(ctx, n); + *out = 0; + return 1; + } + } + + while (c != -1) { + int digit = -1; + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (base == 16 && c >= 'a' && c <= 'f') + digit = c - 'a' + 10; + else if (base == 16 && c >= 'A' && c <= 'F') + digit = c - 'A' + 10; + + if (digit < 0 || digit >= base) break; + if (val > (ULONG_MAX - digit) / (unsigned long)base) break; // overflow protection + + val = val * base + digit; + consumed = 1; + c = next_char(ctx); + } + + if (!consumed) return 0; + *out = (unsigned int)val; + return 1; +} + +/* + * Read a signed decimal integer. + * Returns 1 on success, 0 on failure. + */ +static int read_signed(struct scan_ctx* ctx, int* out) +{ + skip_whitespace(ctx); + int c = next_char(ctx); + if (c == -1) return 0; + + int sign = 1; + if (c == '-') { + sign = -1; + c = next_char(ctx); + } else if (c == '+') { + c = next_char(ctx); + } + + if (c == -1 || (c < '0' || c > '9')) return 0; + unget_char(ctx, c); + + unsigned int uval; + if (!read_unsigned(ctx, &uval, 10)) return 0; + + // Handle signed overflow + if (sign == -1) { + if (uval > (unsigned int)INT_MAX + 1U) + *out = INT_MIN; + else + *out = -(int)uval; + } else { + if (uval > (unsigned int)INT_MAX) + *out = INT_MAX; + else + *out = (int)uval; + } + return 1; +} + +/* + * Read a single character (no whitespace skipping). + * Returns 1 on success, 0 on EOF. + */ +static int read_char(struct scan_ctx* ctx, char* out) +{ + int c = next_char(ctx); + if (c == -1) return 0; + *out = (char)c; + return 1; +} + +/* + * Read a whitespace-delimited string. + * max_len: maximum number of characters to store (including null terminator). + * Returns 1 on success, 0 on failure. + */ +static int read_string(struct scan_ctx* ctx, char* out, size_t max_len) +{ + skip_whitespace(ctx); + size_t i = 0; + int c; + while ((c = next_char(ctx)) != -1) { + if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') { + unget_char(ctx, c); + break; + } + if (i + 1 < max_len) + out[i++] = (char)c; + } + if (i == 0) return 0; + out[i] = '\0'; + return 1; +} + +/* + * Read a pointer value in hexadecimal. + * Returns 1 on success, 0 on failure. + */ +static int read_pointer(struct scan_ctx* ctx, void** out) +{ + skip_whitespace(ctx); + int c = next_char(ctx); + if (c == -1) return 0; + + unsigned long addr = 0; + int consumed = 0; + + // Optional 0x prefix + if (c == '0') { + int n = next_char(ctx); + if (n == 'x' || n == 'X') { + c = next_char(ctx); + } else { + unget_char(ctx, n); + *out = (void*)0; + return 1; + } + } + + while (c != -1) { + int digit = -1; + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'a' && c <= 'f') + digit = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + digit = c - 'A' + 10; + else break; + + if (addr > (ULONG_MAX - digit) / 16) break; // overflow protection + addr = addr * 16 + digit; + consumed = 1; + c = next_char(ctx); + } + + if (!consumed) return 0; + *out = (void*)addr; + return 1; +} + +/* + * stdoc_scanf - minimal scanf implementation using direct syscalls. + * Returns number of successfully matched items, or -1 on EOF before any match. + */ +int stdoc_scanf(const char* format, ...) +{ + struct scan_ctx ctx; + char buf[STDOC_SCANF_BUF_SIZE]; + ctx.buf = buf; + ctx.pos = 0; + ctx.len = 0; + ctx.eof = 0; + + va_list args; + va_start(args, format); + + int items = 0; + const char* p = format; + + while (*p) { + if (*p == '%') { + p++; + if (*p == '%') { // literal '%' + int c = next_char(&ctx); + if (c != '%') { + va_end(args); + return items ? items : -1; + } + p++; + continue; + } + + // Skip whitespace for all specifiers except %c + bool skip_ws = (*p != 'c'); + if (skip_ws) skip_whitespace(&ctx); + + switch (*p) { + case 'd': { + int* ptr = va_arg(args, int*); + if (read_signed(&ctx, ptr)) + items++; + else + goto done; + break; + } + case 'u': { + unsigned int* ptr = va_arg(args, unsigned int*); + if (read_unsigned(&ctx, ptr, 10)) + items++; + else + goto done; + break; + } + case 'x': { + unsigned int* ptr = va_arg(args, unsigned int*); + if (read_unsigned(&ctx, ptr, 16)) + items++; + else + goto done; + break; + } + case 'c': { + char* ptr = va_arg(args, char*); + if (read_char(&ctx, ptr)) + items++; + else + goto done; + break; + } + case 's': { + char* ptr = va_arg(args, char*); + if (read_string(&ctx, ptr, 4096)) // FIXME: no width limit + items++; + else + goto done; + break; + } + case 'p': { + void** ptr = va_arg(args, void**); + if (read_pointer(&ctx, ptr)) + items++; + else + goto done; + break; + } + default: // unknown specifier, abort + goto done; + } + p++; + } else if (*p == ' ' || *p == '\t' || *p == '\n') { + // Whitespace in format: skip any whitespace in input + skip_whitespace(&ctx); + p++; + } else { + // Literal character match + int c = next_char(&ctx); + if (c != (unsigned char)*p) { + goto done; + } + p++; + } + } + +done: + va_end(args); + return items; +}