diff --git a/kernel/Makefile b/kernel/Makefile index fc971d30..6c2b2f1f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -72,6 +72,7 @@ CXX_SOURCES += $(shell find smp -name '*.cpp' 2>/dev/null | sort) CXX_SOURCES += $(shell find rc -name '*.cpp' 2>/dev/null | sort) CXX_SOURCES += $(shell find fs -name '*.cpp' 2>/dev/null | sort) CXX_SOURCES += $(shell find resource -name '*.cpp' 2>/dev/null | sort) +CXX_SOURCES += $(shell find socket -name '*.cpp' 2>/dev/null | sort) CXX_SOURCES += $(shell find arch/$(ARCH) -name '*.cpp' 2>/dev/null | sort) # Unit test sources (only when STLX_UNIT_TESTS_ENABLED=1) diff --git a/kernel/arch/aarch64/syscall/linux_syscalls.h b/kernel/arch/aarch64/syscall/linux_syscalls.h index 03a29710..ec54ab40 100644 --- a/kernel/arch/aarch64/syscall/linux_syscalls.h +++ b/kernel/arch/aarch64/syscall/linux_syscalls.h @@ -17,6 +17,13 @@ constexpr uint64_t MPROTECT = 226; constexpr uint64_t EXIT = 93; constexpr uint64_t EXIT_GROUP = 94; constexpr uint64_t SET_TID_ADDRESS = 96; +constexpr uint64_t SOCKET = 198; +constexpr uint64_t SOCKETPAIR = 199; +constexpr uint64_t BIND = 200; +constexpr uint64_t LISTEN = 201; +constexpr uint64_t ACCEPT = 202; +constexpr uint64_t CONNECT = 203; +constexpr uint64_t FCNTL = 25; } // namespace syscall::linux_nr diff --git a/kernel/arch/x86_64/syscall/linux_syscalls.h b/kernel/arch/x86_64/syscall/linux_syscalls.h index b8f5fd7b..1c1804ef 100644 --- a/kernel/arch/x86_64/syscall/linux_syscalls.h +++ b/kernel/arch/x86_64/syscall/linux_syscalls.h @@ -19,6 +19,13 @@ constexpr uint64_t EXIT = 60; constexpr uint64_t ARCH_PRCTL = 158; constexpr uint64_t SET_TID_ADDRESS = 218; constexpr uint64_t EXIT_GROUP = 231; +constexpr uint64_t SOCKET = 41; +constexpr uint64_t CONNECT = 42; +constexpr uint64_t ACCEPT = 43; +constexpr uint64_t BIND = 49; +constexpr uint64_t LISTEN = 50; +constexpr uint64_t SOCKETPAIR = 53; +constexpr uint64_t FCNTL = 72; } // namespace syscall::linux_nr diff --git a/kernel/common/string.cpp b/kernel/common/string.cpp index ca8a0a9b..7c0302e9 100644 --- a/kernel/common/string.cpp +++ b/kernel/common/string.cpp @@ -1,5 +1,13 @@ #include "string.h" +extern "C" void* memset(void* dest, int c, size_t n) { + auto* d = static_cast(dest); + for (size_t i = 0; i < n; ++i) { + d[i] = static_cast(c); + } + return dest; +} + namespace string { size_t strlen(const char* s) { @@ -20,11 +28,7 @@ void* memcpy(void* dest, const void* src, size_t n) { } void* memset(void* dest, int c, size_t n) { - auto* d = static_cast(dest); - for (size_t i = 0; i < n; ++i) { - d[i] = static_cast(c); - } - return dest; + return ::memset(dest, c, n); } int memcmp(const void* s1, const void* s2, size_t n) { diff --git a/kernel/fs/fs.cpp b/kernel/fs/fs.cpp index 7506f784..81bd6ed5 100644 --- a/kernel/fs/fs.cpp +++ b/kernel/fs/fs.cpp @@ -60,6 +60,7 @@ int32_t node::ioctl(file*, uint32_t, uint64_t) { return ERR_NOSYS; } int32_t node::open(file*, uint32_t) { return OK; } int32_t node::on_close(file*) { return OK; } int32_t node::readlink(char*, size_t, size_t*) { return ERR_NOSYS; } +int32_t node::create_socket(const char*, size_t, void*, node**) { return ERR_NOSYS; } int32_t node::getattr(vattr* attr) { if (!attr) return ERR_INVAL; @@ -245,6 +246,14 @@ __PRIVILEGED_CODE int32_t lookup(const char* path, node** out) { return resolve_path(path, out); } +__PRIVILEGED_CODE int32_t resolve_parent_path( + const char* path, node** out_parent, + const char** out_name, size_t* out_name_len +) { + if (!path || !out_parent || !out_name || !out_name_len) return ERR_INVAL; + return resolve_parent(path, out_parent, out_name, out_name_len); +} + __PRIVILEGED_CODE int32_t mount(const char* source, const char* target, const char* fs_name, uint32_t flags) { diff --git a/kernel/fs/fs.h b/kernel/fs/fs.h index 3d230aff..2dcb24c6 100644 --- a/kernel/fs/fs.h +++ b/kernel/fs/fs.h @@ -76,6 +76,16 @@ ssize_t readdir(file* f, dirent* entries, size_t count); */ __PRIVILEGED_CODE int32_t lookup(const char* path, node** out); +/** + * @brief Resolve the parent directory of a path and extract the final component. + * On success, *out_parent has add_ref() called; caller must release. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t resolve_parent_path( + const char* path, node** out_parent, + const char** out_name, size_t* out_name_len +); + } // namespace fs #endif // STELLUX_FS_FS_H diff --git a/kernel/fs/fstypes.h b/kernel/fs/fstypes.h index d052f011..d6def255 100644 --- a/kernel/fs/fstypes.h +++ b/kernel/fs/fstypes.h @@ -10,7 +10,8 @@ enum class node_type : uint32_t { directory, symlink, char_device, - block_device + block_device, + socket }; constexpr size_t NAME_MAX = 255; @@ -22,7 +23,8 @@ constexpr uint32_t O_WRONLY = 1; constexpr uint32_t O_RDWR = 2; constexpr uint32_t O_CREAT = 0x40; constexpr uint32_t O_TRUNC = 0x200; -constexpr uint32_t O_APPEND = 0x400; +constexpr uint32_t O_APPEND = 0x400; +constexpr uint32_t O_NONBLOCK = 0x800; constexpr uint32_t ACCESS_MODE_MASK = 0x3; diff --git a/kernel/fs/node.h b/kernel/fs/node.h index 969972b9..034e81e1 100644 --- a/kernel/fs/node.h +++ b/kernel/fs/node.h @@ -51,6 +51,9 @@ class node : public rc::ref_counted { // --- Symlink --- virtual int32_t readlink(char* buf, size_t size, size_t* out_len); + // --- Socket node creation (directory nodes may override) --- + virtual int32_t create_socket(const char* name, size_t len, void* impl, node** out); + /** * ref_counted contract. Destroys the node and frees privileged memory. * @note Privilege: **required** diff --git a/kernel/fs/ramfs/ramfs.cpp b/kernel/fs/ramfs/ramfs.cpp index bc41887a..d8cef3b0 100644 --- a/kernel/fs/ramfs/ramfs.cpp +++ b/kernel/fs/ramfs/ramfs.cpp @@ -1,4 +1,5 @@ #include "fs/ramfs/ramfs.h" +#include "fs/socket_node.h" #include "fs/fs.h" #include "common/string.h" #include "mm/heap.h" @@ -145,6 +146,36 @@ int32_t dir_node::create(const char* name, size_t len, uint32_t mode, fs::node** return fs::OK; } +int32_t dir_node::create_socket(const char* name, size_t len, void* impl, fs::node** out) { + (void)impl; + if (!name || !out || len == 0) return fs::ERR_INVAL; + if (len > fs::NAME_MAX) return fs::ERR_NAMETOOLONG; + + sync::irq_lock_guard guard(m_lock); + + if (find_child(name, len)) { + return fs::ERR_EXIST; + } + + char name_buf[fs::NAME_MAX + 1]; + string::memcpy(name_buf, name, len); + name_buf[len] = '\0'; + + void* mem = heap::kzalloc(sizeof(fs::socket_node)); + if (!mem) { + return fs::ERR_NOMEM; + } + auto* child = new (mem) fs::socket_node(m_fs, name_buf); + + child->set_parent(this); + m_children.push_back(child); + m_child_count++; + + child->add_ref(); + *out = child; + return fs::OK; +} + int32_t dir_node::mkdir(const char* name, size_t len, uint32_t mode, fs::node** out) { if (!name || !out || len == 0) return fs::ERR_INVAL; if (len > fs::NAME_MAX) return fs::ERR_NAMETOOLONG; diff --git a/kernel/fs/ramfs/ramfs.h b/kernel/fs/ramfs/ramfs.h index 519af653..fe13ed8c 100644 --- a/kernel/fs/ramfs/ramfs.h +++ b/kernel/fs/ramfs/ramfs.h @@ -27,6 +27,7 @@ class dir_node : public fs::node { int32_t rmdir(const char* name, size_t len) override; ssize_t readdir(fs::file* f, fs::dirent* entries, size_t count) override; int32_t getattr(fs::vattr* attr) override; + int32_t create_socket(const char* name, size_t len, void* impl, fs::node** out) override; private: fs::node* find_child(const char* name, size_t len); diff --git a/kernel/fs/socket_node.cpp b/kernel/fs/socket_node.cpp new file mode 100644 index 00000000..3ba63c84 --- /dev/null +++ b/kernel/fs/socket_node.cpp @@ -0,0 +1,23 @@ +#include "fs/socket_node.h" +#include "fs/fs.h" +#include "socket/listener.h" + +namespace fs { + +socket_node::socket_node(instance* fs, const char* name) + : node(node_type::socket, fs, name) {} + +int32_t socket_node::getattr(vattr* attr) { + if (!attr) { + return fs::ERR_INVAL; + } + attr->type = node_type::socket; + attr->size = 0; + return fs::OK; +} + +void socket_node::set_listener(rc::strong_ref ls) { + m_listener = static_cast&&>(ls); +} + +} // namespace fs diff --git a/kernel/fs/socket_node.h b/kernel/fs/socket_node.h new file mode 100644 index 00000000..d303f313 --- /dev/null +++ b/kernel/fs/socket_node.h @@ -0,0 +1,26 @@ +#ifndef STELLUX_FS_SOCKET_NODE_H +#define STELLUX_FS_SOCKET_NODE_H + +#include "fs/node.h" +#include "rc/strong_ref.h" + +namespace socket { struct listener_state; } + +namespace fs { + +class socket_node : public node { +public: + socket_node(instance* fs, const char* name); + + int32_t getattr(vattr* attr) override; + + socket::listener_state* get_listener() const { return m_listener.ptr(); } + void set_listener(rc::strong_ref ls); + +private: + rc::strong_ref m_listener; +}; + +} // namespace fs + +#endif // STELLUX_FS_SOCKET_NODE_H diff --git a/kernel/resource/handle_table.cpp b/kernel/resource/handle_table.cpp index b921ae09..66c007d2 100644 --- a/kernel/resource/handle_table.cpp +++ b/kernel/resource/handle_table.cpp @@ -15,7 +15,7 @@ __PRIVILEGED_CODE void init_handle_table(handle_table* table) { for (uint32_t i = 0; i < MAX_TASK_HANDLES; i++) { table->entries[i].used = false; table->entries[i].generation = 0; - table->entries[i].reserved = 0; + table->entries[i].flags = 0; table->entries[i].rights = 0; table->entries[i].type = resource_type::UNKNOWN; table->entries[i].obj = nullptr; @@ -52,6 +52,7 @@ __PRIVILEGED_CODE int32_t alloc_handle( resource_add_ref(obj); entry.used = true; entry.generation++; + entry.flags = 0; entry.rights = rights; entry.type = type; entry.obj = obj; @@ -70,7 +71,8 @@ __PRIVILEGED_CODE int32_t get_handle_object( handle_table* table, handle_t handle, uint32_t required_rights, - resource_object** out_obj + resource_object** out_obj, + uint32_t* out_flags ) { if (!table || !out_obj) { return HANDLE_ERR_INVAL; @@ -93,6 +95,59 @@ __PRIVILEGED_CODE int32_t get_handle_object( resource_add_ref(entry.obj); *out_obj = entry.obj; + if (out_flags) { + *out_flags = entry.flags; + } + return HANDLE_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t get_handle_flags( + handle_table* table, + handle_t handle, + uint32_t* out_flags +) { + if (!table || !out_flags) { + return HANDLE_ERR_INVAL; + } + if (handle < 0 || static_cast(handle) >= MAX_TASK_HANDLES) { + return HANDLE_ERR_NOENT; + } + + sync::irq_lock_guard guard(table->lock); + handle_entry& entry = table->entries[static_cast(handle)]; + if (!entry.used) { + return HANDLE_ERR_NOENT; + } + + *out_flags = entry.flags; + return HANDLE_OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t set_handle_flags( + handle_table* table, + handle_t handle, + uint32_t flags +) { + if (!table) { + return HANDLE_ERR_INVAL; + } + if (handle < 0 || static_cast(handle) >= MAX_TASK_HANDLES) { + return HANDLE_ERR_NOENT; + } + + sync::irq_lock_guard guard(table->lock); + handle_entry& entry = table->entries[static_cast(handle)]; + if (!entry.used) { + return HANDLE_ERR_NOENT; + } + + entry.flags = flags; return HANDLE_OK; } @@ -122,7 +177,7 @@ __PRIVILEGED_CODE int32_t remove_handle( entry.rights = 0; entry.type = resource_type::UNKNOWN; entry.obj = nullptr; - entry.reserved = 0; + entry.flags = 0; *out_obj = obj; return HANDLE_OK; diff --git a/kernel/resource/handle_table.h b/kernel/resource/handle_table.h index 7a853da0..c8de9cd8 100644 --- a/kernel/resource/handle_table.h +++ b/kernel/resource/handle_table.h @@ -13,7 +13,7 @@ constexpr uint32_t MAX_TASK_HANDLES = 128; struct handle_entry { bool used; uint16_t generation; - uint16_t reserved; + uint32_t flags; uint32_t rights; resource_type type; resource_object* obj; @@ -58,7 +58,28 @@ __PRIVILEGED_CODE int32_t get_handle_object( handle_table* table, handle_t handle, uint32_t required_rights, - resource_object** out_obj + resource_object** out_obj, + uint32_t* out_flags = nullptr +); + +/** + * @brief Get per-handle flags (O_NONBLOCK, etc.). + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t get_handle_flags( + handle_table* table, + handle_t handle, + uint32_t* out_flags +); + +/** + * @brief Set per-handle flags (O_NONBLOCK, etc.). + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t set_handle_flags( + handle_table* table, + handle_t handle, + uint32_t flags ); /** diff --git a/kernel/resource/providers/file_provider.cpp b/kernel/resource/providers/file_provider.cpp index 5956e4cd..d20d22d2 100644 --- a/kernel/resource/providers/file_provider.cpp +++ b/kernel/resource/providers/file_provider.cpp @@ -30,7 +30,8 @@ __PRIVILEGED_CODE static int32_t map_fs_error_to_resource(int32_t fs_err) { } } -__PRIVILEGED_CODE static ssize_t file_read(resource_object* obj, void* kdst, size_t count) { +__PRIVILEGED_CODE static ssize_t file_read(resource_object* obj, void* kdst, size_t count, uint32_t flags) { + (void)flags; if (!obj || !obj->impl || !kdst) { return ERR_INVAL; } @@ -42,7 +43,8 @@ __PRIVILEGED_CODE static ssize_t file_read(resource_object* obj, void* kdst, siz return rc; } -__PRIVILEGED_CODE static ssize_t file_write(resource_object* obj, const void* ksrc, size_t count) { +__PRIVILEGED_CODE static ssize_t file_write(resource_object* obj, const void* ksrc, size_t count, uint32_t flags) { + (void)flags; if (!obj || !obj->impl || !ksrc) { return ERR_INVAL; } diff --git a/kernel/resource/resource.cpp b/kernel/resource/resource.cpp index 4d3ff3f2..dd89c842 100644 --- a/kernel/resource/resource.cpp +++ b/kernel/resource/resource.cpp @@ -122,14 +122,15 @@ __PRIVILEGED_CODE ssize_t read( } resource_object* obj = nullptr; - int32_t rc = get_handle_object(&owner->handles, handle, RIGHT_READ, &obj); + uint32_t handle_flags = 0; + int32_t rc = get_handle_object(&owner->handles, handle, RIGHT_READ, &obj, &handle_flags); if (rc != HANDLE_OK) { return (rc == HANDLE_ERR_ACCESS) ? ERR_ACCESS : ERR_BADF; } ssize_t result = ERR_UNSUP; if (obj->ops && obj->ops->read) { - result = obj->ops->read(obj, kdst, count); + result = obj->ops->read(obj, kdst, count, handle_flags); } resource_release(obj); @@ -150,14 +151,15 @@ __PRIVILEGED_CODE ssize_t write( } resource_object* obj = nullptr; - int32_t rc = get_handle_object(&owner->handles, handle, RIGHT_WRITE, &obj); + uint32_t handle_flags = 0; + int32_t rc = get_handle_object(&owner->handles, handle, RIGHT_WRITE, &obj, &handle_flags); if (rc != HANDLE_OK) { return (rc == HANDLE_ERR_ACCESS) ? ERR_ACCESS : ERR_BADF; } ssize_t result = ERR_UNSUP; if (obj->ops && obj->ops->write) { - result = obj->ops->write(obj, ksrc, count); + result = obj->ops->write(obj, ksrc, count, handle_flags); } resource_release(obj); diff --git a/kernel/resource/resource.h b/kernel/resource/resource.h index 09f92034..c618d58a 100644 --- a/kernel/resource/resource.h +++ b/kernel/resource/resource.h @@ -11,8 +11,8 @@ namespace resource { struct resource_object; -using read_fn = ssize_t (*)(resource_object* obj, void* kdst, size_t count); -using write_fn = ssize_t (*)(resource_object* obj, const void* ksrc, size_t count); +using read_fn = ssize_t (*)(resource_object* obj, void* kdst, size_t count, uint32_t flags); +using write_fn = ssize_t (*)(resource_object* obj, const void* ksrc, size_t count, uint32_t flags); using close_fn = void (*)(resource_object* obj); struct resource_ops { @@ -44,6 +44,12 @@ constexpr int32_t ERR_TABLEFULL = -7; constexpr int32_t ERR_UNSUP = -8; constexpr int32_t ERR_NOTDIR = -9; constexpr int32_t ERR_NAMETOOLONG = -10; +constexpr int32_t ERR_PIPE = -11; +constexpr int32_t ERR_NOTCONN = -12; +constexpr int32_t ERR_CONNREFUSED = -13; +constexpr int32_t ERR_ADDRINUSE = -14; +constexpr int32_t ERR_ISCONN = -15; +constexpr int32_t ERR_AGAIN = -16; /** * @brief Initialize handle table storage in task. diff --git a/kernel/resource/resource_types.h b/kernel/resource/resource_types.h index 0959d67a..75e684e3 100644 --- a/kernel/resource/resource_types.h +++ b/kernel/resource/resource_types.h @@ -8,6 +8,7 @@ namespace resource { enum class resource_type : uint16_t { UNKNOWN = 0, FILE = 1, + SOCKET = 2, }; using handle_t = int32_t; diff --git a/kernel/socket/listener.cpp b/kernel/socket/listener.cpp new file mode 100644 index 00000000..636b3a7c --- /dev/null +++ b/kernel/socket/listener.cpp @@ -0,0 +1,29 @@ +#include "socket/listener.h" +#include "resource/resource.h" +#include "mm/heap.h" + +namespace socket { + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void listener_state::ref_destroy(listener_state* self) { + if (!self) { + return; + } + + // No lock needed: refcount is 0, so no other thread has access. + // socket_close already drained the queue and woke waiters before + // dropping its ref. This handles the case where entries remain + // (e.g., socket_node was the last ref holder after unlink). + self->closed = true; + while (pending_conn* pc = self->accept_queue.pop_front()) { + self->pending_count--; + resource::resource_release(pc->server_obj); + heap::kfree(pc); + } + + heap::kfree_delete(self); +} + +} // namespace socket diff --git a/kernel/socket/listener.h b/kernel/socket/listener.h new file mode 100644 index 00000000..b83eff76 --- /dev/null +++ b/kernel/socket/listener.h @@ -0,0 +1,39 @@ +#ifndef STELLUX_SOCKET_LISTENER_H +#define STELLUX_SOCKET_LISTENER_H + +#include "common/types.h" +#include "common/list.h" +#include "rc/ref_counted.h" +#include "sync/spinlock.h" +#include "sync/wait_queue.h" + +namespace resource { struct resource_object; } + +namespace socket { + +constexpr uint32_t DEFAULT_BACKLOG = 16; +constexpr uint32_t MAX_BACKLOG = 32; + +struct pending_conn { + list::node link; + resource::resource_object* server_obj; +}; + +struct listener_state : rc::ref_counted { + sync::spinlock lock; + bool closed; + list::head accept_queue; + sync::wait_queue accept_wq; + uint32_t backlog; + uint32_t pending_count; + + /** + * Drains accept_queue, releases all pending server objects, frees self. + * @note Privilege: **required** + */ + __PRIVILEGED_CODE static void ref_destroy(listener_state* self); +}; + +} // namespace socket + +#endif // STELLUX_SOCKET_LISTENER_H diff --git a/kernel/socket/ring_buffer.cpp b/kernel/socket/ring_buffer.cpp new file mode 100644 index 00000000..bc849358 --- /dev/null +++ b/kernel/socket/ring_buffer.cpp @@ -0,0 +1,188 @@ +#include "socket/ring_buffer.h" +#include "mm/heap.h" +#include "common/string.h" +#include "resource/resource.h" + +namespace socket { + +static inline size_t readable_bytes(const ring_buffer* rb) { + return (rb->head - rb->tail) & (rb->capacity - 1); +} + +static inline size_t writable_bytes(const ring_buffer* rb) { + return rb->capacity - 1 - readable_bytes(rb); +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE ring_buffer* ring_buffer_create(size_t capacity) { + if (capacity == 0) { + return nullptr; + } + + size_t cap = 1; + while (cap < capacity + 1) { + cap <<= 1; + } + + auto* rb = static_cast(heap::kzalloc(sizeof(ring_buffer))); + if (!rb) { + return nullptr; + } + + rb->data = static_cast(heap::uzalloc(cap)); + if (!rb->data) { + heap::kfree(rb); + return nullptr; + } + + rb->capacity = cap; + rb->head = 0; + rb->tail = 0; + rb->writer_closed = false; + rb->reader_closed = false; + rb->lock = sync::SPINLOCK_INIT; + rb->read_wq.init(); + rb->write_wq.init(); + + return rb; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void ring_buffer_destroy(ring_buffer* rb) { + if (!rb) { + return; + } + if (rb->data) { + heap::ufree(rb->data); + rb->data = nullptr; + } + heap::kfree(rb); +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE ssize_t ring_buffer_read(ring_buffer* rb, uint8_t* buf, size_t len, bool nonblock) { + if (!rb || !buf || len == 0) { + return resource::ERR_INVAL; + } + + sync::irq_state irq = sync::spin_lock_irqsave(rb->lock); + + if (readable_bytes(rb) == 0 && !rb->writer_closed) { + if (nonblock) { + sync::spin_unlock_irqrestore(rb->lock, irq); + return resource::ERR_AGAIN; + } + while (readable_bytes(rb) == 0 && !rb->writer_closed) { + irq = sync::wait(rb->read_wq, rb->lock, irq); + } + } + + size_t avail = readable_bytes(rb); + if (avail == 0) { + sync::spin_unlock_irqrestore(rb->lock, irq); + return 0; // EOF + } + + size_t to_read = avail < len ? avail : len; + size_t tail_idx = rb->tail & (rb->capacity - 1); + size_t first = rb->capacity - tail_idx; + if (first > to_read) { + first = to_read; + } + + string::memcpy(buf, rb->data + tail_idx, first); + if (first < to_read) { + string::memcpy(buf + first, rb->data, to_read - first); + } + + rb->tail += to_read; + + sync::spin_unlock_irqrestore(rb->lock, irq); + sync::wake_one(rb->write_wq); + + return static_cast(to_read); +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE ssize_t ring_buffer_write(ring_buffer* rb, const uint8_t* buf, size_t len, bool nonblock) { + if (!rb || !buf || len == 0) { + return resource::ERR_INVAL; + } + + sync::irq_state irq = sync::spin_lock_irqsave(rb->lock); + + if (writable_bytes(rb) == 0 && !rb->reader_closed) { + if (nonblock) { + sync::spin_unlock_irqrestore(rb->lock, irq); + return resource::ERR_AGAIN; + } + while (writable_bytes(rb) == 0 && !rb->reader_closed) { + irq = sync::wait(rb->write_wq, rb->lock, irq); + } + } + + if (rb->reader_closed) { + sync::spin_unlock_irqrestore(rb->lock, irq); + return resource::ERR_PIPE; + } + + size_t space = writable_bytes(rb); + size_t to_write = space < len ? space : len; + size_t head_idx = rb->head & (rb->capacity - 1); + size_t first = rb->capacity - head_idx; + if (first > to_write) { + first = to_write; + } + + string::memcpy(rb->data + head_idx, buf, first); + if (first < to_write) { + string::memcpy(rb->data, buf + first, to_write - first); + } + + rb->head += to_write; + + sync::spin_unlock_irqrestore(rb->lock, irq); + sync::wake_one(rb->read_wq); + + return static_cast(to_write); +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void ring_buffer_close_write(ring_buffer* rb) { + if (!rb) { + return; + } + + sync::irq_state irq = sync::spin_lock_irqsave(rb->lock); + rb->writer_closed = true; + sync::spin_unlock_irqrestore(rb->lock, irq); + + sync::wake_all(rb->read_wq); +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void ring_buffer_close_read(ring_buffer* rb) { + if (!rb) { + return; + } + + sync::irq_state irq = sync::spin_lock_irqsave(rb->lock); + rb->reader_closed = true; + sync::spin_unlock_irqrestore(rb->lock, irq); + + sync::wake_all(rb->write_wq); +} + +} // namespace socket diff --git a/kernel/socket/ring_buffer.h b/kernel/socket/ring_buffer.h new file mode 100644 index 00000000..6b8b3760 --- /dev/null +++ b/kernel/socket/ring_buffer.h @@ -0,0 +1,66 @@ +#ifndef STELLUX_SOCKET_RING_BUFFER_H +#define STELLUX_SOCKET_RING_BUFFER_H + +#include "common/types.h" +#include "sync/spinlock.h" +#include "sync/wait_queue.h" + +namespace socket { + +constexpr size_t DEFAULT_CAPACITY = 8192; + +struct ring_buffer { + uint8_t* data; + size_t capacity; + size_t head; // write position + size_t tail; // read position + bool writer_closed; + bool reader_closed; + sync::spinlock lock; + sync::wait_queue read_wq; + sync::wait_queue write_wq; +}; + +/** + * Allocate and initialize a ring buffer. + * Control struct from privileged heap, data from unprivileged heap. + * @return Ring buffer pointer on success, nullptr on allocation failure. + * @note Privilege: **required** + */ +[[nodiscard]] __PRIVILEGED_CODE ring_buffer* ring_buffer_create(size_t capacity); + +/** + * Free a ring buffer and its data. Must only be called when no waiters remain. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void ring_buffer_destroy(ring_buffer* rb); + +/** + * Read from ring buffer. Blocks when empty unless nonblock is true. + * @return Bytes read (> 0), 0 on EOF, ERR_AGAIN if nonblock and empty, or negative error. + * @note Privilege: **required** + */ +[[nodiscard]] __PRIVILEGED_CODE ssize_t ring_buffer_read(ring_buffer* rb, uint8_t* buf, size_t len, bool nonblock = false); + +/** + * Write to ring buffer. Blocks when full unless nonblock is true. + * @return Bytes written (> 0), ERR_AGAIN if nonblock and full, ERR_PIPE if reader closed. + * @note Privilege: **required** + */ +[[nodiscard]] __PRIVILEGED_CODE ssize_t ring_buffer_write(ring_buffer* rb, const uint8_t* buf, size_t len, bool nonblock = false); + +/** + * Mark the write side as closed. Wakes all blocked readers so they can see EOF. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void ring_buffer_close_write(ring_buffer* rb); + +/** + * Mark the read side as closed. Wakes all blocked writers so they get ERR_PIPE. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE void ring_buffer_close_read(ring_buffer* rb); + +} // namespace socket + +#endif // STELLUX_SOCKET_RING_BUFFER_H diff --git a/kernel/socket/unix_socket.cpp b/kernel/socket/unix_socket.cpp new file mode 100644 index 00000000..2b8b8c65 --- /dev/null +++ b/kernel/socket/unix_socket.cpp @@ -0,0 +1,214 @@ +#include "socket/unix_socket.h" +#include "mm/heap.h" +#include "sync/spinlock.h" +#include "fs/fstypes.h" + +namespace socket { + +__PRIVILEGED_CODE void unix_channel::ref_destroy(unix_channel* self) { + if (!self) { + return; + } + ring_buffer_destroy(self->buf_a_to_b); + ring_buffer_destroy(self->buf_b_to_a); + heap::kfree_delete(self); +} + +__PRIVILEGED_CODE static ssize_t socket_read( + resource::resource_object* obj, void* kdst, size_t count, uint32_t flags +) { + if (!obj || !obj->impl || !kdst) { + return resource::ERR_INVAL; + } + auto* sock = static_cast(obj->impl); + if (sock->state != SOCK_STATE_CONNECTED) { + return resource::ERR_NOTCONN; + } + ring_buffer* rb = sock->is_side_a + ? sock->channel->buf_b_to_a + : sock->channel->buf_a_to_b; + bool nonblock = (flags & fs::O_NONBLOCK) != 0; + return ring_buffer_read(rb, static_cast(kdst), count, nonblock); +} + +__PRIVILEGED_CODE static ssize_t socket_write( + resource::resource_object* obj, const void* ksrc, size_t count, uint32_t flags +) { + if (!obj || !obj->impl || !ksrc) { + return resource::ERR_INVAL; + } + auto* sock = static_cast(obj->impl); + if (sock->state != SOCK_STATE_CONNECTED) { + return resource::ERR_NOTCONN; + } + ring_buffer* rb = sock->is_side_a + ? sock->channel->buf_a_to_b + : sock->channel->buf_b_to_a; + bool nonblock = (flags & fs::O_NONBLOCK) != 0; + return ring_buffer_write(rb, static_cast(ksrc), count, nonblock); +} + +__PRIVILEGED_CODE static void socket_close(resource::resource_object* obj) { + if (!obj || !obj->impl) { + return; + } + auto* sock = static_cast(obj->impl); + + switch (sock->state) { + case SOCK_STATE_UNBOUND: + break; + + case SOCK_STATE_BOUND: + // bound_node strong_ref drops via ~unix_socket + break; + + case SOCK_STATE_LISTENING: { + if (sock->listener) { + sync::irq_state irq = sync::spin_lock_irqsave(sock->listener->lock); + sock->listener->closed = true; + while (pending_conn* pc = sock->listener->accept_queue.pop_front()) { + sock->listener->pending_count--; + resource::resource_release(pc->server_obj); + heap::kfree(pc); + } + sync::spin_unlock_irqrestore(sock->listener->lock, irq); + sync::wake_all(sock->listener->accept_wq); + } + // bound_node and listener strong_refs drop via ~unix_socket + break; + } + + case SOCK_STATE_CONNECTED: { + rc::strong_ref chan = sock->channel; + if (chan) { + if (sock->is_side_a) { + ring_buffer_close_write(chan->buf_a_to_b); + ring_buffer_close_read(chan->buf_b_to_a); + } else { + ring_buffer_close_write(chan->buf_b_to_a); + ring_buffer_close_read(chan->buf_a_to_b); + } + } + break; + } + } + + heap::kfree_delete(sock); + obj->impl = nullptr; +} + +static const resource::resource_ops g_socket_ops = { + socket_read, + socket_write, + socket_close, +}; + +const resource::resource_ops* get_socket_ops() { + return &g_socket_ops; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t create_unbound_socket( + resource::resource_object** out +) { + if (!out) { + return resource::ERR_INVAL; + } + + auto* sock = heap::kalloc_new(); + if (!sock) { + return resource::ERR_NOMEM; + } + sock->state = SOCK_STATE_UNBOUND; + sock->lock = sync::SPINLOCK_INIT; + sock->is_side_a = false; + + auto* obj = heap::kalloc_new(); + if (!obj) { + heap::kfree_delete(sock); + return resource::ERR_NOMEM; + } + obj->type = resource::resource_type::SOCKET; + obj->ops = &g_socket_ops; + obj->impl = sock; + + *out = obj; + return resource::OK; +} + +/** + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t create_socket_pair( + resource::resource_object** out_a, + resource::resource_object** out_b +) { + if (!out_a || !out_b) { + return resource::ERR_INVAL; + } + + auto chan = rc::make_kref(); + if (!chan) { + return resource::ERR_NOMEM; + } + chan->buf_a_to_b = nullptr; + chan->buf_b_to_a = nullptr; + + chan->buf_a_to_b = ring_buffer_create(DEFAULT_CAPACITY); + if (!chan->buf_a_to_b) { + return resource::ERR_NOMEM; + } + + chan->buf_b_to_a = ring_buffer_create(DEFAULT_CAPACITY); + if (!chan->buf_b_to_a) { + return resource::ERR_NOMEM; + } + + auto* sock_a = heap::kalloc_new(); + if (!sock_a) { + return resource::ERR_NOMEM; + } + sock_a->state = SOCK_STATE_CONNECTED; + sock_a->lock = sync::SPINLOCK_INIT; + sock_a->is_side_a = true; + sock_a->channel = chan; + + auto* sock_b = heap::kalloc_new(); + if (!sock_b) { + heap::kfree_delete(sock_a); + return resource::ERR_NOMEM; + } + sock_b->state = SOCK_STATE_CONNECTED; + sock_b->lock = sync::SPINLOCK_INIT; + sock_b->is_side_a = false; + sock_b->channel = static_cast&&>(chan); + + auto* obj_a = heap::kalloc_new(); + if (!obj_a) { + heap::kfree_delete(sock_b); + heap::kfree_delete(sock_a); + return resource::ERR_NOMEM; + } + obj_a->type = resource::resource_type::SOCKET; + obj_a->ops = &g_socket_ops; + obj_a->impl = sock_a; + + auto* obj_b = heap::kalloc_new(); + if (!obj_b) { + heap::kfree_delete(obj_a); + heap::kfree_delete(sock_b); + heap::kfree_delete(sock_a); + return resource::ERR_NOMEM; + } + obj_b->type = resource::resource_type::SOCKET; + obj_b->ops = &g_socket_ops; + obj_b->impl = sock_b; + + *out_a = obj_a; + *out_b = obj_b; + return resource::OK; +} + +} // namespace socket diff --git a/kernel/socket/unix_socket.h b/kernel/socket/unix_socket.h new file mode 100644 index 00000000..69adb20f --- /dev/null +++ b/kernel/socket/unix_socket.h @@ -0,0 +1,74 @@ +#ifndef STELLUX_SOCKET_UNIX_SOCKET_H +#define STELLUX_SOCKET_UNIX_SOCKET_H + +#include "common/types.h" +#include "rc/ref_counted.h" +#include "rc/strong_ref.h" +#include "sync/spinlock.h" +#include "socket/ring_buffer.h" +#include "socket/listener.h" +#include "resource/resource.h" +#include "fs/node.h" + +namespace socket { + +constexpr uint32_t SOCK_STATE_UNBOUND = 0; +constexpr uint32_t SOCK_STATE_BOUND = 1; +constexpr uint32_t SOCK_STATE_LISTENING = 2; +constexpr uint32_t SOCK_STATE_CONNECTED = 3; + +constexpr size_t UNIX_PATH_MAX = 108; + +struct unix_channel : rc::ref_counted { + ring_buffer* buf_a_to_b; + ring_buffer* buf_b_to_a; + + /** + * @note Privilege: **required** + */ + __PRIVILEGED_CODE static void ref_destroy(unix_channel* self); +}; + +struct unix_socket { + uint32_t state; + sync::spinlock lock; + + // CONNECTED state + rc::strong_ref channel; + bool is_side_a; + + // BOUND / LISTENING state + char bound_path[UNIX_PATH_MAX]; + rc::strong_ref bound_node; + + // LISTENING state + rc::strong_ref listener; +}; + +/** + * Create a connected socket pair. + * On success, *out_a and *out_b each have refcount 1. + * Caller must install handles and release the creation refs. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t create_socket_pair( + resource::resource_object** out_a, + resource::resource_object** out_b +); + +/** + * Create an unbound socket. Returns a resource_object with refcount 1. + * @note Privilege: **required** + */ +__PRIVILEGED_CODE int32_t create_unbound_socket( + resource::resource_object** out +); + +/** + * Access the global socket ops table (for connect server-side object creation). + */ +const resource::resource_ops* get_socket_ops(); + +} // namespace socket + +#endif // STELLUX_SOCKET_UNIX_SOCKET_H diff --git a/kernel/syscall/handlers/sys_fd.cpp b/kernel/syscall/handlers/sys_fd.cpp index 01fa3f8b..8a44c297 100644 --- a/kernel/syscall/handlers/sys_fd.cpp +++ b/kernel/syscall/handlers/sys_fd.cpp @@ -31,6 +31,18 @@ inline int64_t map_resource_error(int64_t rc) { return syscall::EBADF; case resource::ERR_UNSUP: return syscall::ENOSYS; + case resource::ERR_PIPE: + return syscall::EPIPE; + case resource::ERR_NOTCONN: + return syscall::ENOTCONN; + case resource::ERR_CONNREFUSED: + return syscall::ECONNREFUSED; + case resource::ERR_ADDRINUSE: + return syscall::EADDRINUSE; + case resource::ERR_ISCONN: + return syscall::EISCONN; + case resource::ERR_AGAIN: + return syscall::EAGAIN; case resource::ERR_IO: default: return syscall::EIO; @@ -218,3 +230,38 @@ DEFINE_SYSCALL1(close, fd) { } return 0; } + +namespace { +constexpr uint64_t F_GETFL = 3; +constexpr uint64_t F_SETFL = 4; +constexpr uint32_t SETFL_MASK = fs::O_NONBLOCK | fs::O_APPEND; +} // anonymous namespace + +DEFINE_SYSCALL3(fcntl, fd, cmd, arg) { + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + if (cmd == F_GETFL) { + uint32_t flags = 0; + int32_t rc = resource::get_handle_flags( + &task->handles, static_cast(fd), &flags); + if (rc != resource::HANDLE_OK) { + return syscall::EBADF; + } + return static_cast(flags); + } + + if (cmd == F_SETFL) { + uint32_t flags = static_cast(arg) & SETFL_MASK; + int32_t rc = resource::set_handle_flags( + &task->handles, static_cast(fd), flags); + if (rc != resource::HANDLE_OK) { + return syscall::EBADF; + } + return 0; + } + + return syscall::EINVAL; +} diff --git a/kernel/syscall/handlers/sys_fd.h b/kernel/syscall/handlers/sys_fd.h index 689916a7..74918682 100644 --- a/kernel/syscall/handlers/sys_fd.h +++ b/kernel/syscall/handlers/sys_fd.h @@ -8,5 +8,6 @@ DECLARE_SYSCALL(open); DECLARE_SYSCALL(read); DECLARE_SYSCALL(write); DECLARE_SYSCALL(close); +DECLARE_SYSCALL(fcntl); #endif // STELLUX_SYSCALL_HANDLERS_SYS_FD_H diff --git a/kernel/syscall/handlers/sys_socket.cpp b/kernel/syscall/handlers/sys_socket.cpp new file mode 100644 index 00000000..03134204 --- /dev/null +++ b/kernel/syscall/handlers/sys_socket.cpp @@ -0,0 +1,511 @@ +#include "syscall/handlers/sys_socket.h" + +#include "socket/unix_socket.h" +#include "socket/listener.h" +#include "fs/fs.h" +#include "fs/socket_node.h" +#include "resource/resource.h" +#include "sched/sched.h" +#include "sched/task.h" +#include "mm/uaccess.h" +#include "mm/heap.h" +#include "common/string.h" +#include "hw/barrier.h" + +namespace { + +constexpr uint64_t AF_UNIX = 1; +constexpr uint64_t SOCK_STREAM = 1; +constexpr size_t SUN_PATH_OFFSET = 2; + +struct sockaddr_un { + uint16_t sun_family; + char sun_path[socket::UNIX_PATH_MAX]; +}; + +int64_t parse_sockaddr_un(uint64_t addr, uint64_t addrlen, char* kpath_out) { + if (addrlen < sizeof(uint16_t)) { + return syscall::EINVAL; + } + if (addr == 0) { + return syscall::EFAULT; + } + + sockaddr_un kaddr{}; + size_t copy_len = static_cast(addrlen) < sizeof(sockaddr_un) + ? static_cast(addrlen) : sizeof(sockaddr_un); + int32_t rc = mm::uaccess::copy_from_user( + &kaddr, reinterpret_cast(addr), copy_len); + if (rc != mm::uaccess::OK) { + return syscall::EFAULT; + } + if (kaddr.sun_family != AF_UNIX) { + return syscall::EINVAL; + } + + size_t path_max = copy_len - SUN_PATH_OFFSET; + if (path_max == 0) { + return syscall::EINVAL; + } + + bool found_null = false; + for (size_t i = 0; i < path_max; i++) { + if (kaddr.sun_path[i] == '\0') { + found_null = true; + break; + } + } + if (!found_null) { + return syscall::EINVAL; + } + if (kaddr.sun_path[0] == '\0') { + return syscall::EINVAL; + } + if (kaddr.sun_path[0] != '/') { + return syscall::EINVAL; + } + + string::memcpy(kpath_out, kaddr.sun_path, socket::UNIX_PATH_MAX); + return 0; +} + +} // anonymous namespace + +DEFINE_SYSCALL3(socket, domain, type, protocol) { + if (domain != AF_UNIX) { + return syscall::EINVAL; + } + if (type != SOCK_STREAM) { + return syscall::EINVAL; + } + if (protocol != 0) { + return syscall::EINVAL; + } + + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + resource::resource_object* obj = nullptr; + int32_t rc = socket::create_unbound_socket(&obj); + if (rc != resource::OK) { + return syscall::ENOMEM; + } + + resource::handle_t h = -1; + rc = resource::alloc_handle( + &task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h + ); + if (rc != resource::HANDLE_OK) { + resource::resource_release(obj); + return syscall::EMFILE; + } + resource::resource_release(obj); + return h; +} + +DEFINE_SYSCALL4(socketpair, domain, type, protocol, sv) { + if (domain != AF_UNIX) { + return syscall::EINVAL; + } + if (type != SOCK_STREAM) { + return syscall::EINVAL; + } + if (protocol != 0) { + return syscall::EINVAL; + } + if (sv == 0) { + return syscall::EFAULT; + } + + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + int32_t rc = socket::create_socket_pair(&obj_a, &obj_b); + if (rc != resource::OK) { + return syscall::ENOMEM; + } + + resource::handle_t h0 = -1; + rc = resource::alloc_handle( + &task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0 + ); + if (rc != resource::HANDLE_OK) { + resource::resource_release(obj_a); + resource::resource_release(obj_b); + return syscall::EMFILE; + } + resource::resource_release(obj_a); + + resource::handle_t h1 = -1; + rc = resource::alloc_handle( + &task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1 + ); + if (rc != resource::HANDLE_OK) { + resource::close(task, h0); + resource::resource_release(obj_b); + return syscall::EMFILE; + } + resource::resource_release(obj_b); + + int32_t kbuf[2] = {h0, h1}; + int32_t copy_rc = mm::uaccess::copy_to_user( + reinterpret_cast(sv), kbuf, sizeof(kbuf) + ); + if (copy_rc != mm::uaccess::OK) { + resource::close(task, h1); + resource::close(task, h0); + return syscall::EFAULT; + } + + return 0; +} + +DEFINE_SYSCALL3(bind, fd, addr, addrlen) { + char kpath[socket::UNIX_PATH_MAX]; + int64_t parse_rc = parse_sockaddr_un(addr, addrlen, kpath); + if (parse_rc != 0) { + return parse_rc; + } + + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + resource::resource_object* obj = nullptr; + int32_t rc = resource::get_handle_object( + &task->handles, static_cast(fd), + resource::RIGHT_READ, &obj + ); + if (rc != resource::HANDLE_OK) { + return syscall::EBADF; + } + if (obj->type != resource::resource_type::SOCKET || !obj->impl) { + resource::resource_release(obj); + return syscall::EINVAL; + } + auto* sock = static_cast(obj->impl); + if (sock->state != socket::SOCK_STATE_UNBOUND) { + resource::resource_release(obj); + return syscall::EINVAL; + } + + fs::node* parent = nullptr; + const char* name = nullptr; + size_t name_len = 0; + rc = fs::resolve_parent_path(kpath, &parent, &name, &name_len); + if (rc != fs::OK) { + resource::resource_release(obj); + if (rc == fs::ERR_NOENT) return syscall::ENOENT; + if (rc == fs::ERR_NOTDIR) return syscall::ENOTDIR; + return syscall::EINVAL; + } + + fs::node* sock_node = nullptr; + rc = parent->create_socket(name, name_len, nullptr, &sock_node); + if (parent->release()) { + fs::node::ref_destroy(parent); + } + if (rc != fs::OK) { + resource::resource_release(obj); + if (rc == fs::ERR_EXIST) return syscall::EADDRINUSE; + if (rc == fs::ERR_NOMEM) return syscall::ENOMEM; + return syscall::EIO; + } + + string::memcpy(sock->bound_path, kpath, socket::UNIX_PATH_MAX); + sock->bound_node = rc::strong_ref::adopt(sock_node); + sock->state = socket::SOCK_STATE_BOUND; + + resource::resource_release(obj); + return 0; +} + +DEFINE_SYSCALL2(listen, fd, backlog) { + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + resource::resource_object* obj = nullptr; + int32_t rc = resource::get_handle_object( + &task->handles, static_cast(fd), + resource::RIGHT_READ, &obj + ); + if (rc != resource::HANDLE_OK) { + return syscall::EBADF; + } + if (obj->type != resource::resource_type::SOCKET || !obj->impl) { + resource::resource_release(obj); + return syscall::EINVAL; + } + auto* sock = static_cast(obj->impl); + if (sock->state != socket::SOCK_STATE_BOUND) { + resource::resource_release(obj); + return syscall::EINVAL; + } + + auto ls = rc::make_kref(); + if (!ls) { + resource::resource_release(obj); + return syscall::ENOMEM; + } + ls->lock = sync::SPINLOCK_INIT; + ls->closed = false; + ls->accept_queue.init(); + ls->accept_wq.init(); + uint32_t bl = static_cast(backlog); + if (bl == 0) bl = socket::DEFAULT_BACKLOG; + if (bl > socket::MAX_BACKLOG) bl = socket::MAX_BACKLOG; + ls->backlog = bl; + ls->pending_count = 0; + + sock->listener = ls; + + if (sock->bound_node) { + auto* sn = static_cast(sock->bound_node.ptr()); + rc::strong_ref ls_copy = ls; + sn->set_listener(static_cast&&>(ls_copy)); + } + + sock->state = socket::SOCK_STATE_LISTENING; + resource::resource_release(obj); + return 0; +} + +DEFINE_SYSCALL3(connect, fd, addr, addrlen) { + char kpath[socket::UNIX_PATH_MAX]; + int64_t parse_rc = parse_sockaddr_un(addr, addrlen, kpath); + if (parse_rc != 0) { + return parse_rc; + } + + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + resource::resource_object* client_obj = nullptr; + int32_t rc = resource::get_handle_object( + &task->handles, static_cast(fd), + resource::RIGHT_READ, &client_obj + ); + if (rc != resource::HANDLE_OK) { + return syscall::EBADF; + } + if (client_obj->type != resource::resource_type::SOCKET || !client_obj->impl) { + resource::resource_release(client_obj); + return syscall::EINVAL; + } + auto* client_sock = static_cast(client_obj->impl); + if (client_sock->state == socket::SOCK_STATE_CONNECTED) { + resource::resource_release(client_obj); + return syscall::EISCONN; + } + if (client_sock->state == socket::SOCK_STATE_LISTENING) { + resource::resource_release(client_obj); + return syscall::EINVAL; + } + + fs::node* target_node = nullptr; + rc = fs::lookup(kpath, &target_node); + if (rc != fs::OK) { + resource::resource_release(client_obj); + if (rc == fs::ERR_NOENT) return syscall::ENOENT; + return syscall::ECONNREFUSED; + } + if (target_node->type() != fs::node_type::socket) { + if (target_node->release()) { + fs::node::ref_destroy(target_node); + } + resource::resource_release(client_obj); + return syscall::ECONNREFUSED; + } + + auto* sn = static_cast(target_node); + socket::listener_state* raw_ls = sn->get_listener(); + if (!raw_ls) { + if (target_node->release()) { + fs::node::ref_destroy(target_node); + } + resource::resource_release(client_obj); + return syscall::ECONNREFUSED; + } + + rc::strong_ref ls_ref = + rc::strong_ref::try_from_raw(raw_ls); + if (target_node->release()) { + fs::node::ref_destroy(target_node); + } + if (!ls_ref) { + resource::resource_release(client_obj); + return syscall::ECONNREFUSED; + } + + // Allocate everything BEFORE acquiring the listener lock + auto chan = rc::make_kref(); + if (!chan) { + resource::resource_release(client_obj); + return syscall::ENOMEM; + } + chan->buf_a_to_b = nullptr; + chan->buf_b_to_a = nullptr; + + chan->buf_a_to_b = socket::ring_buffer_create(socket::DEFAULT_CAPACITY); + if (!chan->buf_a_to_b) { + resource::resource_release(client_obj); + return syscall::ENOMEM; + } + + chan->buf_b_to_a = socket::ring_buffer_create(socket::DEFAULT_CAPACITY); + if (!chan->buf_b_to_a) { + resource::resource_release(client_obj); + return syscall::ENOMEM; + } + + auto* server_sock = heap::kalloc_new(); + if (!server_sock) { + resource::resource_release(client_obj); + return syscall::ENOMEM; + } + server_sock->state = socket::SOCK_STATE_CONNECTED; + server_sock->lock = sync::SPINLOCK_INIT; + server_sock->is_side_a = true; + server_sock->channel = chan; + + auto* server_obj = heap::kalloc_new(); + if (!server_obj) { + heap::kfree_delete(server_sock); + resource::resource_release(client_obj); + return syscall::ENOMEM; + } + server_obj->type = resource::resource_type::SOCKET; + server_obj->ops = socket::get_socket_ops(); + server_obj->impl = server_sock; + + auto* pc = static_cast( + heap::kzalloc(sizeof(socket::pending_conn))); + if (!pc) { + heap::kfree_delete(server_obj); + heap::kfree_delete(server_sock); + resource::resource_release(client_obj); + return syscall::ENOMEM; + } + + // Lock listener, re-check state, enqueue + sync::irq_state irq = sync::spin_lock_irqsave(ls_ref->lock); + if (ls_ref->closed || ls_ref->pending_count >= ls_ref->backlog) { + sync::spin_unlock_irqrestore(ls_ref->lock, irq); + heap::kfree(pc); + heap::kfree_delete(server_obj); + heap::kfree_delete(server_sock); + resource::resource_release(client_obj); + return syscall::ECONNREFUSED; + } + + // Mutate client socket to CONNECTED (side B) before waking the + // accept thread so that readers on AArch64 never see state == + // CONNECTED with a null channel pointer. + client_sock->channel = static_cast&&>(chan); + client_sock->is_side_a = false; + barrier::smp_write(); + client_sock->state = socket::SOCK_STATE_CONNECTED; + + pc->server_obj = server_obj; + ls_ref->accept_queue.push_back(pc); + ls_ref->pending_count++; + sync::spin_unlock_irqrestore(ls_ref->lock, irq); + sync::wake_one(ls_ref->accept_wq); + + resource::resource_release(client_obj); + return 0; +} + +DEFINE_SYSCALL3(accept, fd, addr, addrlen) { + sched::task* task = sched::current(); + if (!task) { + return syscall::EIO; + } + + resource::resource_object* listen_obj = nullptr; + uint32_t handle_flags = 0; + int32_t rc = resource::get_handle_object( + &task->handles, static_cast(fd), + resource::RIGHT_READ, &listen_obj, &handle_flags + ); + if (rc != resource::HANDLE_OK) { + return syscall::EBADF; + } + if (listen_obj->type != resource::resource_type::SOCKET || !listen_obj->impl) { + resource::resource_release(listen_obj); + return syscall::EINVAL; + } + auto* sock = static_cast(listen_obj->impl); + if (sock->state != socket::SOCK_STATE_LISTENING || !sock->listener) { + resource::resource_release(listen_obj); + return syscall::EINVAL; + } + + bool nonblock = (handle_flags & fs::O_NONBLOCK) != 0; + socket::listener_state* ls = sock->listener.ptr(); + + sync::irq_state irq = sync::spin_lock_irqsave(ls->lock); + if (ls->accept_queue.empty()) { + if (ls->closed) { + sync::spin_unlock_irqrestore(ls->lock, irq); + resource::resource_release(listen_obj); + return syscall::EINVAL; + } + if (nonblock) { + sync::spin_unlock_irqrestore(ls->lock, irq); + resource::resource_release(listen_obj); + return syscall::EAGAIN; + } + } + while (ls->accept_queue.empty() && !ls->closed) { + irq = sync::wait(ls->accept_wq, ls->lock, irq); + } + if (ls->accept_queue.empty()) { + sync::spin_unlock_irqrestore(ls->lock, irq); + resource::resource_release(listen_obj); + return syscall::EINVAL; + } + + socket::pending_conn* pc = ls->accept_queue.pop_front(); + ls->pending_count--; + sync::spin_unlock_irqrestore(ls->lock, irq); + + resource::resource_object* server_obj = pc->server_obj; + heap::kfree(pc); + + resource::handle_t new_handle = -1; + rc = resource::alloc_handle( + &task->handles, server_obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &new_handle + ); + if (rc != resource::HANDLE_OK) { + resource::resource_release(server_obj); + resource::resource_release(listen_obj); + return syscall::EMFILE; + } + resource::resource_release(server_obj); + resource::resource_release(listen_obj); + + if (addr != 0 && addrlen != 0) { + uint16_t sun_family = static_cast(AF_UNIX); + mm::uaccess::copy_to_user(reinterpret_cast(addr), &sun_family, sizeof(sun_family)); + uint32_t out_len = sizeof(uint16_t); + mm::uaccess::copy_to_user(reinterpret_cast(addrlen), &out_len, sizeof(out_len)); + } + + return new_handle; +} diff --git a/kernel/syscall/handlers/sys_socket.h b/kernel/syscall/handlers/sys_socket.h new file mode 100644 index 00000000..380f9e63 --- /dev/null +++ b/kernel/syscall/handlers/sys_socket.h @@ -0,0 +1,13 @@ +#ifndef STELLUX_SYSCALL_HANDLERS_SYS_SOCKET_H +#define STELLUX_SYSCALL_HANDLERS_SYS_SOCKET_H + +#include "syscall/syscall_table.h" + +DECLARE_SYSCALL(socket); +DECLARE_SYSCALL(socketpair); +DECLARE_SYSCALL(bind); +DECLARE_SYSCALL(listen); +DECLARE_SYSCALL(accept); +DECLARE_SYSCALL(connect); + +#endif // STELLUX_SYSCALL_HANDLERS_SYS_SOCKET_H diff --git a/kernel/syscall/syscall_table.cpp b/kernel/syscall/syscall_table.cpp index cc9eba16..c5feaed3 100644 --- a/kernel/syscall/syscall_table.cpp +++ b/kernel/syscall/syscall_table.cpp @@ -6,6 +6,7 @@ #include "syscall/handlers/sys_io.h" #include "syscall/handlers/sys_fd.h" #include "syscall/handlers/sys_mmap.h" +#include "syscall/handlers/sys_socket.h" namespace syscall { @@ -31,6 +32,14 @@ __PRIVILEGED_CODE void init_syscall_table() { REGISTER_SYSCALL(linux_nr::EXIT_GROUP, exit_group); REGISTER_SYSCALL(linux_nr::SET_TID_ADDRESS, set_tid_address); + REGISTER_SYSCALL(linux_nr::SOCKET, socket); + REGISTER_SYSCALL(linux_nr::SOCKETPAIR, socketpair); + REGISTER_SYSCALL(linux_nr::BIND, bind); + REGISTER_SYSCALL(linux_nr::LISTEN, listen); + REGISTER_SYSCALL(linux_nr::ACCEPT, accept); + REGISTER_SYSCALL(linux_nr::CONNECT, connect); + REGISTER_SYSCALL(linux_nr::FCNTL, fcntl); + REGISTER_SYSCALL(SYS_ELEVATE, elevate); register_arch_syscalls(); diff --git a/kernel/syscall/syscall_table.h b/kernel/syscall/syscall_table.h index 0247cde5..cdcee818 100644 --- a/kernel/syscall/syscall_table.h +++ b/kernel/syscall/syscall_table.h @@ -21,6 +21,12 @@ constexpr int64_t EEXIST = -17; constexpr int64_t ENOTTY = -25; constexpr int64_t ENAMETOOLONG = -36; constexpr int64_t ENOSYS = -38; +constexpr int64_t EAGAIN = -11; +constexpr int64_t EPIPE = -32; +constexpr int64_t EADDRINUSE = -98; +constexpr int64_t EISCONN = -106; +constexpr int64_t ENOTCONN = -107; +constexpr int64_t ECONNREFUSED = -111; extern handler_t g_syscall_table[MAX_SYSCALL_NUM]; diff --git a/kernel/tests/socket/socket.test.cpp b/kernel/tests/socket/socket.test.cpp new file mode 100644 index 00000000..032ca577 --- /dev/null +++ b/kernel/tests/socket/socket.test.cpp @@ -0,0 +1,699 @@ +#define STLX_TEST_TIER TIER_SCHED + +#include "stlx_unit_test.h" +#include "socket/unix_socket.h" +#include "socket/ring_buffer.h" +#include "socket/listener.h" +#include "resource/resource.h" +#include "resource/handle_table.h" +#include "sched/sched.h" +#include "sched/task.h" +#include "mm/heap.h" +#include "common/string.h" +#include "fs/fstypes.h" +#include "fs/fs.h" +#include "fs/socket_node.h" + +TEST_SUITE(socket_test); + +// --------------------------------------------------------------------------- +// Ring buffer tests +// --------------------------------------------------------------------------- + +TEST(socket_test, ring_buffer_create_destroy) { + auto* rb = socket::ring_buffer_create(socket::DEFAULT_CAPACITY); + ASSERT_NOT_NULL(rb); + ASSERT_NOT_NULL(rb->data); + EXPECT_GT(rb->capacity, socket::DEFAULT_CAPACITY); + EXPECT_EQ(rb->head, 0u); + EXPECT_EQ(rb->tail, 0u); + EXPECT_FALSE(rb->writer_closed); + EXPECT_FALSE(rb->reader_closed); + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_write_read_basic) { + auto* rb = socket::ring_buffer_create(64); + ASSERT_NOT_NULL(rb); + + const uint8_t msg[] = "hello ring"; + ssize_t nw = socket::ring_buffer_write(rb, msg, 10); + EXPECT_EQ(nw, static_cast(10)); + + uint8_t buf[32] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(10)); + EXPECT_EQ(string::memcmp(buf, msg, 10), 0); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_multiple_writes_single_read) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + socket::ring_buffer_write(rb, reinterpret_cast("aaa"), 3); + socket::ring_buffer_write(rb, reinterpret_cast("bbb"), 3); + socket::ring_buffer_write(rb, reinterpret_cast("ccc"), 3); + + uint8_t buf[32] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(9)); + EXPECT_EQ(string::memcmp(buf, "aaabbbccc", 9), 0); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_short_read) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + socket::ring_buffer_write(rb, reinterpret_cast("xyz"), 3); + + uint8_t buf[1] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, 1); + EXPECT_EQ(nr, static_cast(1)); + EXPECT_EQ(buf[0], static_cast('x')); + + nr = socket::ring_buffer_read(rb, buf, 1); + EXPECT_EQ(nr, static_cast(1)); + EXPECT_EQ(buf[0], static_cast('y')); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_eof_after_close_write) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + socket::ring_buffer_write(rb, reinterpret_cast("ab"), 2); + socket::ring_buffer_close_write(rb); + + uint8_t buf[32] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(2)); + + nr = socket::ring_buffer_read(rb, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(0)); // EOF + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_epipe_after_close_read) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + socket::ring_buffer_close_read(rb); + + ssize_t nw = socket::ring_buffer_write(rb, reinterpret_cast("x"), 1); + EXPECT_EQ(nw, static_cast(resource::ERR_PIPE)); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_nonblock_empty_returns_eagain) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + uint8_t buf[8] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, sizeof(buf), true); + EXPECT_EQ(nr, static_cast(resource::ERR_AGAIN)); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_nonblock_full_returns_eagain) { + auto* rb = socket::ring_buffer_create(16); + ASSERT_NOT_NULL(rb); + + uint8_t fill[64]; + string::memset(fill, 'A', sizeof(fill)); + + // Fill the buffer + ssize_t nw = socket::ring_buffer_write(rb, fill, sizeof(fill)); + EXPECT_GT(nw, static_cast(0)); + + // Now try non-blocking write when full + nw = socket::ring_buffer_write(rb, fill, 1, true); + EXPECT_EQ(nw, static_cast(resource::ERR_AGAIN)); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_nonblock_with_data_returns_data) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + socket::ring_buffer_write(rb, reinterpret_cast("test"), 4); + + uint8_t buf[32] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, sizeof(buf), true); + EXPECT_EQ(nr, static_cast(4)); + EXPECT_EQ(string::memcmp(buf, "test", 4), 0); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_nonblock_eof_returns_zero) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + socket::ring_buffer_close_write(rb); + + uint8_t buf[8] = {}; + ssize_t nr = socket::ring_buffer_read(rb, buf, sizeof(buf), true); + EXPECT_EQ(nr, static_cast(0)); // EOF, not EAGAIN + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_zero_length_returns_inval) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + uint8_t buf[1] = {}; + EXPECT_EQ(socket::ring_buffer_read(rb, buf, 0), static_cast(resource::ERR_INVAL)); + EXPECT_EQ(socket::ring_buffer_write(rb, buf, 0), static_cast(resource::ERR_INVAL)); + + socket::ring_buffer_destroy(rb); +} + +TEST(socket_test, ring_buffer_null_args_returns_inval) { + auto* rb = socket::ring_buffer_create(256); + ASSERT_NOT_NULL(rb); + + EXPECT_EQ(socket::ring_buffer_read(nullptr, nullptr, 1), static_cast(resource::ERR_INVAL)); + EXPECT_EQ(socket::ring_buffer_read(rb, nullptr, 1), static_cast(resource::ERR_INVAL)); + EXPECT_EQ(socket::ring_buffer_write(nullptr, nullptr, 1), static_cast(resource::ERR_INVAL)); + EXPECT_EQ(socket::ring_buffer_write(rb, nullptr, 1), static_cast(resource::ERR_INVAL)); + + socket::ring_buffer_destroy(rb); +} + +// --------------------------------------------------------------------------- +// Socket pair creation and data flow +// --------------------------------------------------------------------------- + +TEST(socket_test, create_socket_pair_succeeds) { + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + ASSERT_NOT_NULL(obj_a); + ASSERT_NOT_NULL(obj_b); + EXPECT_EQ(obj_a->type, resource::resource_type::SOCKET); + EXPECT_EQ(obj_b->type, resource::resource_type::SOCKET); + EXPECT_NOT_NULL(obj_a->ops); + EXPECT_NOT_NULL(obj_b->ops); + EXPECT_NOT_NULL(obj_a->impl); + EXPECT_NOT_NULL(obj_b->impl); + + resource::resource_release(obj_a); + resource::resource_release(obj_b); +} + +TEST(socket_test, socketpair_write_read) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + const char* msg = "socket-hello"; + ASSERT_EQ(resource::write(task, h0, msg, 12), static_cast(12)); + + char buf[32] = {}; + ASSERT_EQ(resource::read(task, h1, buf, 32), static_cast(12)); + EXPECT_STREQ(buf, "socket-hello"); + + EXPECT_EQ(resource::close(task, h0), resource::OK); + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +TEST(socket_test, socketpair_bidirectional) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + ASSERT_EQ(resource::write(task, h1, "world", 5), static_cast(5)); + + char buf[32] = {}; + ASSERT_EQ(resource::read(task, h0, buf, 32), static_cast(5)); + EXPECT_STREQ(buf, "world"); + + EXPECT_EQ(resource::close(task, h0), resource::OK); + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +TEST(socket_test, socketpair_eof_on_peer_close) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + EXPECT_EQ(resource::close(task, h0), resource::OK); + + char buf[8] = {}; + ssize_t nr = resource::read(task, h1, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(0)); // EOF + + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +TEST(socket_test, socketpair_epipe_on_peer_close) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + EXPECT_EQ(resource::close(task, h1), resource::OK); + + ssize_t nw = resource::write(task, h0, "x", 1); + EXPECT_EQ(nw, static_cast(resource::ERR_PIPE)); + + EXPECT_EQ(resource::close(task, h0), resource::OK); +} + +TEST(socket_test, socketpair_drain_then_eof) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + ASSERT_EQ(resource::write(task, h0, "abc", 3), static_cast(3)); + EXPECT_EQ(resource::close(task, h0), resource::OK); + + char buf[8] = {}; + ssize_t nr = resource::read(task, h1, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(3)); + EXPECT_EQ(string::memcmp(buf, "abc", 3), 0); + + nr = resource::read(task, h1, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(0)); // EOF after drain + + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +// --------------------------------------------------------------------------- +// Unbound socket tests +// --------------------------------------------------------------------------- + +TEST(socket_test, create_unbound_socket) { + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + ASSERT_NOT_NULL(obj); + EXPECT_EQ(obj->type, resource::resource_type::SOCKET); + + auto* sock = static_cast(obj->impl); + ASSERT_NOT_NULL(sock); + EXPECT_EQ(sock->state, socket::SOCK_STATE_UNBOUND); + + resource::resource_release(obj); +} + +TEST(socket_test, unbound_socket_read_returns_notconn) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + char buf[8] = {}; + ssize_t nr = resource::read(task, h, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(resource::ERR_NOTCONN)); + + EXPECT_EQ(resource::close(task, h), resource::OK); +} + +TEST(socket_test, unbound_socket_write_returns_notconn) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + ssize_t nw = resource::write(task, h, "x", 1); + EXPECT_EQ(nw, static_cast(resource::ERR_NOTCONN)); + + EXPECT_EQ(resource::close(task, h), resource::OK); +} + +// --------------------------------------------------------------------------- +// Handle flags / fcntl tests +// --------------------------------------------------------------------------- + +TEST(socket_test, handle_flags_default_zero) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + uint32_t flags = 0xFFFF; + ASSERT_EQ(resource::get_handle_flags(&task->handles, h, &flags), resource::HANDLE_OK); + EXPECT_EQ(flags, 0u); + + EXPECT_EQ(resource::close(task, h), resource::OK); +} + +TEST(socket_test, handle_flags_set_and_get) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h, fs::O_NONBLOCK), resource::HANDLE_OK); + + uint32_t flags = 0; + ASSERT_EQ(resource::get_handle_flags(&task->handles, h, &flags), resource::HANDLE_OK); + EXPECT_BITS_SET(flags, fs::O_NONBLOCK); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h, 0), resource::HANDLE_OK); + ASSERT_EQ(resource::get_handle_flags(&task->handles, h, &flags), resource::HANDLE_OK); + EXPECT_EQ(flags, 0u); + + EXPECT_EQ(resource::close(task, h), resource::OK); +} + +TEST(socket_test, handle_flags_invalid_handle) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + uint32_t flags = 0; + EXPECT_EQ(resource::get_handle_flags(&task->handles, -1, &flags), resource::HANDLE_ERR_NOENT); + EXPECT_EQ(resource::set_handle_flags(&task->handles, -1, 0), resource::HANDLE_ERR_NOENT); + EXPECT_EQ(resource::get_handle_flags(&task->handles, 9999, &flags), resource::HANDLE_ERR_NOENT); +} + +TEST(socket_test, handle_flags_cleared_on_close) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h, fs::O_NONBLOCK), resource::HANDLE_OK); + EXPECT_EQ(resource::close(task, h), resource::OK); + + uint32_t flags = 0; + EXPECT_EQ(resource::get_handle_flags(&task->handles, h, &flags), resource::HANDLE_ERR_NOENT); +} + +// --------------------------------------------------------------------------- +// Non-blocking socket read/write via handle flags +// --------------------------------------------------------------------------- + +TEST(socket_test, nonblock_socketpair_read_eagain) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h0, fs::O_NONBLOCK), resource::HANDLE_OK); + + char buf[8] = {}; + ssize_t nr = resource::read(task, h0, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(resource::ERR_AGAIN)); + + EXPECT_EQ(resource::close(task, h0), resource::OK); + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +TEST(socket_test, nonblock_socketpair_read_with_data) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h1, fs::O_NONBLOCK), resource::HANDLE_OK); + + ASSERT_EQ(resource::write(task, h0, "data", 4), static_cast(4)); + + char buf[32] = {}; + ssize_t nr = resource::read(task, h1, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(4)); + EXPECT_EQ(string::memcmp(buf, "data", 4), 0); + + EXPECT_EQ(resource::close(task, h0), resource::OK); + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +TEST(socket_test, nonblock_socketpair_eof_not_eagain) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h1, fs::O_NONBLOCK), resource::HANDLE_OK); + EXPECT_EQ(resource::close(task, h0), resource::OK); + + char buf[8] = {}; + ssize_t nr = resource::read(task, h1, buf, sizeof(buf)); + EXPECT_EQ(nr, static_cast(0)); // EOF, not EAGAIN + + EXPECT_EQ(resource::close(task, h1), resource::OK); +} + +// --------------------------------------------------------------------------- +// Listener state tests +// --------------------------------------------------------------------------- + +TEST(socket_test, listener_state_create_destroy) { + auto ls = rc::make_kref(); + ASSERT_TRUE(static_cast(ls)); + ls->lock = sync::SPINLOCK_INIT; + ls->closed = false; + ls->accept_queue.init(); + ls->accept_wq.init(); + ls->backlog = 16; + ls->pending_count = 0; + + EXPECT_FALSE(ls->closed); + EXPECT_TRUE(ls->accept_queue.empty()); + EXPECT_EQ(ls->pending_count, 0u); +} + +// --------------------------------------------------------------------------- +// Channel ref counting +// --------------------------------------------------------------------------- + +TEST(socket_test, channel_refcount_after_socketpair) { + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + auto* sock_a = static_cast(obj_a->impl); + auto* sock_b = static_cast(obj_b->impl); + ASSERT_NOT_NULL(sock_a); + ASSERT_NOT_NULL(sock_b); + + EXPECT_EQ(sock_a->channel.ptr(), sock_b->channel.ptr()); + EXPECT_EQ(sock_a->channel->ref_count(), 2u); + + resource::resource_release(obj_a); + EXPECT_EQ(sock_b->channel->ref_count(), 1u); + + resource::resource_release(obj_b); +} + +// --------------------------------------------------------------------------- +// Socket type validation +// --------------------------------------------------------------------------- + +TEST(socket_test, socket_handle_has_socket_type) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + const resource::handle_entry& entry = task->handles.entries[static_cast(h)]; + EXPECT_TRUE(entry.used); + EXPECT_EQ(entry.type, resource::resource_type::SOCKET); + + EXPECT_EQ(resource::close(task, h), resource::OK); +} + +TEST(socket_test, close_invalid_handle_returns_badf) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + EXPECT_EQ(resource::close(task, -1), resource::ERR_BADF); + EXPECT_EQ(resource::close(task, 9999), resource::ERR_BADF); +} + +// --------------------------------------------------------------------------- +// VFS socket node +// --------------------------------------------------------------------------- + +TEST(socket_test, node_type_socket_in_fstypes) { + fs::vattr attr; + attr.type = fs::node_type::socket; + attr.size = 0; + EXPECT_EQ(attr.type, fs::node_type::socket); +} + +// --------------------------------------------------------------------------- +// Double close safety +// --------------------------------------------------------------------------- + +TEST(socket_test, double_close_returns_badf) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj_a = nullptr; + resource::resource_object* obj_b = nullptr; + ASSERT_EQ(socket::create_socket_pair(&obj_a, &obj_b), resource::OK); + + resource::handle_t h0 = -1, h1 = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_a, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h0), resource::HANDLE_OK); + resource::resource_release(obj_a); + ASSERT_EQ(resource::alloc_handle(&task->handles, obj_b, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h1), resource::HANDLE_OK); + resource::resource_release(obj_b); + + EXPECT_EQ(resource::close(task, h0), resource::OK); + EXPECT_EQ(resource::close(task, h0), resource::ERR_BADF); + + EXPECT_EQ(resource::close(task, h1), resource::OK); + EXPECT_EQ(resource::close(task, h1), resource::ERR_BADF); +} + +// --------------------------------------------------------------------------- +// get_handle_object with flags output +// --------------------------------------------------------------------------- + +TEST(socket_test, get_handle_object_returns_flags) { + sched::task* task = sched::current(); + ASSERT_NOT_NULL(task); + + resource::resource_object* obj = nullptr; + ASSERT_EQ(socket::create_unbound_socket(&obj), resource::OK); + + resource::handle_t h = -1; + ASSERT_EQ(resource::alloc_handle(&task->handles, obj, resource::resource_type::SOCKET, + resource::RIGHT_READ | resource::RIGHT_WRITE, &h), resource::HANDLE_OK); + resource::resource_release(obj); + + ASSERT_EQ(resource::set_handle_flags(&task->handles, h, fs::O_NONBLOCK), resource::HANDLE_OK); + + resource::resource_object* out = nullptr; + uint32_t out_flags = 0; + ASSERT_EQ(resource::get_handle_object(&task->handles, h, resource::RIGHT_READ, &out, &out_flags), + resource::HANDLE_OK); + EXPECT_BITS_SET(out_flags, fs::O_NONBLOCK); + resource::resource_release(out); + + EXPECT_EQ(resource::close(task, h), resource::OK); +} diff --git a/userland/apps/init/src/init.c b/userland/apps/init/src/init.c index 1faabbb4..e11da9fe 100644 --- a/userland/apps/init/src/init.c +++ b/userland/apps/init/src/init.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include static int run_vma_syscall_demo(void) { @@ -122,6 +124,90 @@ static int run_resource_fd_demo(void) { return 0; } +static int run_socketpair_demo(void) { + int sv[2]; + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) != 0) { + printf("socketpair failed: errno=%d (%s)\r\n", errno, strerror(errno)); + return 1; + } + printf("socketpair ok: sv[0]=%d sv[1]=%d\r\n", sv[0], sv[1]); + + const char* msg = "hello sockets"; + ssize_t nw = write(sv[0], msg, strlen(msg)); + if (nw != (ssize_t)strlen(msg)) { + printf("write sv[0] failed: %ld errno=%d\r\n", (long)nw, errno); + close(sv[0]); close(sv[1]); + return 1; + } + + char buf[64] = {}; + ssize_t nr = read(sv[1], buf, sizeof(buf) - 1); + if (nr != (ssize_t)strlen(msg) || memcmp(buf, msg, strlen(msg)) != 0) { + printf("read sv[1] failed: got %ld bytes \"%s\"\r\n", (long)nr, buf); + close(sv[0]); close(sv[1]); + return 1; + } + printf("socketpair data ok: wrote \"%s\", read \"%s\"\r\n", msg, buf); + + const char* reply = "world"; + nw = write(sv[1], reply, strlen(reply)); + nr = read(sv[0], buf, sizeof(buf) - 1); + if (nr < 0) { + printf("read sv[0] reply failed: errno=%d (%s)\r\n", errno, strerror(errno)); + close(sv[0]); close(sv[1]); + return 1; + } + buf[nr] = '\0'; + if (nr != (ssize_t)strlen(reply) || memcmp(buf, reply, strlen(reply)) != 0) { + printf("bidirectional failed: got \"%s\"\r\n", buf); + close(sv[0]); close(sv[1]); + return 1; + } + printf("socketpair bidirectional ok\r\n"); + + close(sv[0]); + nr = read(sv[1], buf, sizeof(buf)); + if (nr != 0) { + printf("EOF test failed: expected 0, got %ld\r\n", (long)nr); + close(sv[1]); + return 1; + } + printf("socketpair EOF ok: close sv[0] -> read sv[1] returns 0\r\n"); + + close(sv[1]); + + // non-blocking test + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) != 0) { + printf("socketpair (nonblock test) failed\r\n"); + return 1; + } + if (fcntl(sv[0], F_SETFL, O_NONBLOCK) != 0) { + printf("fcntl F_SETFL O_NONBLOCK failed: errno=%d\r\n", errno); + close(sv[0]); close(sv[1]); + return 1; + } + int fl = fcntl(sv[0], F_GETFL, 0); + if (!(fl & O_NONBLOCK)) { + printf("fcntl F_GETFL: O_NONBLOCK not set (flags=0x%x)\r\n", fl); + close(sv[0]); close(sv[1]); + return 1; + } + printf("fcntl O_NONBLOCK set ok (flags=0x%x)\r\n", fl); + + errno = 0; + nr = read(sv[0], buf, sizeof(buf)); + if (nr != -1 || errno != EAGAIN) { + printf("nonblock read expected EAGAIN, got nr=%ld errno=%d\r\n", (long)nr, errno); + close(sv[0]); close(sv[1]); + return 1; + } + printf("nonblock read -> EAGAIN ok\r\n"); + + close(sv[0]); + close(sv[1]); + return 0; +} + int main(void) { printf("hello from userspace!\r\n"); @@ -131,5 +217,8 @@ int main(void) { int rc_fd = run_resource_fd_demo(); printf("Resource FD demo %s\r\n", rc_fd == 0 ? "passed" : "failed"); - return (rc_vma == 0 && rc_fd == 0) ? 0 : 1; + int rc_sock = run_socketpair_demo(); + printf("Socketpair demo %s\r\n", rc_sock == 0 ? "passed" : "failed"); + + return (rc_vma == 0 && rc_fd == 0 && rc_sock == 0) ? 0 : 1; }