diff --git a/.bazelrc b/.bazelrc index 789e024745e..765fb0c477e 100644 --- a/.bazelrc +++ b/.bazelrc @@ -46,8 +46,9 @@ import %workspace%/build/tools/clang_tidy/clang_tidy.bazelrc build --incompatible_remote_local_fallback_for_remote_cache # Use -isystem for cc_library includes attribute – this prevents warnings for misbehaving external -# code. -build:linux --features=external_include_paths --host_features=external_include_paths +# code. Applies to both Linux and macOS via the shared `:unix` config (see the +# `build:linux --config=unix` / `build:macos --config=unix` lines below). +build:unix --features=external_include_paths --host_features=external_include_paths # Forward compatibility with future Bazel versions: # Disable deprecated cfg = "host" Bazel rule setting. Blocked on perfetto. diff --git a/.bazelversion b/.bazelversion index 47da986f86f..44931da2660 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -9.1.0 +9.1.1 diff --git a/.clang-tidy b/.clang-tidy index a912a000882..b134895c61a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,4 +1,7 @@ --- +# This is workerd's base clang-tidy config. The default Bazel clang-tidy config +# is generated by merging this file with dependency configs, such as capnp-cpp's +# `.clang-tidy`, while preserving workerd-specific settings below. # TODO: We currently only enable select clang-tidy checks. While many checks provide little value or # produce false positives, try to incrementally enable most of them. # TODO: these checks are in progress of cleaning up diff --git a/.github/workflows/_bazel.yml b/.github/workflows/_bazel.yml index 1bafc95fc0a..f344db8e113 100644 --- a/.github/workflows/_bazel.yml +++ b/.github/workflows/_bazel.yml @@ -121,13 +121,6 @@ jobs: # Strip comment in front of WORKERS_MIRROR_URL, then substitute secret to use it. sed -e '/WORKERS_MIRROR_URL/ { s@# *@@; s@WORKERS_MIRROR_URL@${{ secrets.WORKERS_MIRROR_URL }}@; }' -i.bak build/deps/nodejs.MODULE.bazel fi - - name: Bazel build (Windows workaround) - if: runner.os == 'Windows' - # HACK: Work around Bazel Windows bug: Some targets need to be compiled without symlink - # support. Since we still need symlinks to compile C++ code properly, compile these targets - # separately. - run: | - bazel --nowindows_enable_symlinks build ${{ inputs.extra_bazel_args }} --config=ci --profile build-win-workaround.bazel-profile.gz --remote_cache=https://bazel:${{ secrets.BAZEL_CACHE_KEY }}@bazel-remote-cache.devprod.cloudflare.dev //src/wpt:wpt-all@tsproject //src/node:node@tsproject //src/pyodide:pyodide_static@tsproject - name: Bazel build run: | bazel build --remote_cache=https://bazel:${{ secrets.BAZEL_CACHE_KEY }}@bazel-remote-cache.devprod.cloudflare.dev --config=ci ${{ inputs.extra_bazel_args }} //... diff --git a/AGENTS.md b/AGENTS.md index 68f6dab4245..67562812883 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -300,6 +300,5 @@ See the markdown files in the `docs/` directory for additional information on sp - [development.md](docs/development.md) - Development environment setup and tools - [api-updates.md](docs/api-updates.md) - Guidelines for adding new JavaScript APIs -- [pyodide.md](docs/pyodide.md) - Pyodide package management and updates Some source directories also contain README.md files with more specific information about that component. Proactively look for these when working in unfamiliar areas of the codebase. Proactively suggest updates to the documentation when it is missing or out of date, but do not make edits without confirming accuracy. diff --git a/BUILD.bazel b/BUILD.bazel index f9f8c1a3714..0fdbedfc240 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -34,6 +34,17 @@ npm_link_package( package = "@workerd/jsg", ) +genrule( + name = "merged_clang_tidy_config", + srcs = [ + ":.clang-tidy", + "@capnp-cpp//:.clang-tidy", + ], + outs = ["merged.clang-tidy"], + cmd = "$(location //build/tools/clang_tidy:merge_clang_tidy_configs) $@ $(location :.clang-tidy) $(location @capnp-cpp//:.clang-tidy)", + tools = ["//build/tools/clang_tidy:merge_clang_tidy_configs"], +) + # Plugin to generate .js files capnp_es_bins.capnpc_js_binary( name = "capnpc_js_plugin", @@ -142,6 +153,6 @@ selects.config_setting_group( # Clang-tidy config to use label_flag( name = "clang_tidy_config", - build_setting_default = ":.clang-tidy", + build_setting_default = ":merged_clang_tidy_config", visibility = ["//visibility:public"], ) diff --git a/MODULE.bazel b/MODULE.bazel index 5519dd686a9..4b750849ad6 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -15,6 +15,7 @@ archive_override( "//:patches/sqlite/0002-macOS-missing-PATH-fix.patch", "//:patches/sqlite/0003-sqlite-complete-early-exit.patch", "//:patches/sqlite/0004-invalid-wal-on-rollback-fix.patch", + "//:patches/sqlite/0005-authorizer-rename-to-destination-name.patch", ], remote_file_integrity = { "MODULE.bazel": "sha256-+H38CSP6DtMT+YPSy9lplRLcfBApkukY4vdX6lBjDfI=", diff --git a/build/AGENTS.md b/build/AGENTS.md index 198317aaac9..36167090a04 100644 --- a/build/AGENTS.md +++ b/build/AGENTS.md @@ -65,4 +65,6 @@ Lives in `deps/`. Uses jsonc manifests + codegen: - `gen/` — **autogenerated**; do not hand-edit - `*.MODULE.bazel` (e.g., `rust.MODULE.bazel`, `v8.MODULE.bazel`) — included by root `MODULE.bazel` - `workerd-v8/` — separate Bazel module wrapping V8 dependency -- `python/` — Pyodide package lists (versioned `.bzl` files) + +Pyodide package metadata lives in `build/python_metadata.bzl`; the checked-in, pre-filtered +package lock files live in `src/pyodide/python-lock/`. diff --git a/build/ci.bazelrc b/build/ci.bazelrc index c2827f0da79..2d17e05081d 100644 --- a/build/ci.bazelrc +++ b/build/ci.bazelrc @@ -9,6 +9,8 @@ build:ci --verbose_failures # closer towards the suggested value of 200. Note the number of maximum build jobs is controlled by # the --local_resources=cpu flag and still limited to the number of cores by default. build:ci --jobs=64 +# attempt to recover from remote cache errors without needing to retry build +build:ci --rewind_lost_inputs # Do not check for changes in external repository files, should speed up bazel invocations after the first one build:ci --noexperimental_check_external_repository_files # Only build runfile trees when needed. Runfile trees are useful for directly invoking bazel-built diff --git a/build/deps/dep_pyodide.bzl b/build/deps/dep_pyodide.bzl index bc727d73fc8..010ca57be69 100644 --- a/build/deps/dep_pyodide.bzl +++ b/build/deps/dep_pyodide.bzl @@ -12,23 +12,51 @@ def _pyodide_core(*, version, sha256, **_kwds): ) return [name] -def _pyodide_packages(*, tag, lockfile_hash, all_wheels_hash, **_kwds): - lock_name = "pyodide-lock_%s.json" % tag - http_file( - name = lock_name, - sha256 = lockfile_hash, - url = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/%s/pyodide-lock.json" % tag, - ) +# Base URL the build downloads the stdlib wheels from. +PYTHON_PACKAGES_URL = "https://pyodide-capnp-bin.edgeworker.net/" + +def _stdlib_wheels_repo_impl(rctx): + # Built-in Python package support has been removed, so workers can no longer + # request arbitrary packages. The checked-in lock files + # (src/pyodide/python-lock/) are pre-filtered to contain exactly the + # packages that are still loaded at runtime (the CPython stdlib modules and + # the shared libraries they depend on). We download just those wheels. They + # are embedded directly into the Pyodide bundle. + lock = json.decode(rctx.read(rctx.attr.lockfile)) + for pkg in lock["packages"].values(): + file_name = pkg["file_name"] + rctx.download( + url = "%spython-package-bucket/%s/%s" % (PYTHON_PACKAGES_URL, rctx.attr.tag, file_name), + output = file_name, + sha256 = pkg["sha256"], + ) + rctx.file("BUILD.bazel", """\ +filegroup( + name = "whls", + srcs = glob(["*"], exclude = ["BUILD.bazel"]), + visibility = ["//visibility:public"], +) - # Use @workerd prefix on build_file so we can use this from edgeworker too - archive_name = "all_pyodide_wheels_%s" % tag - http_archive( - name = archive_name, - build_file = "@workerd//:build/BUILD.all_pyodide_wheels", - sha256 = all_wheels_hash, - urls = ["https://github.com/cloudflare/pyodide-build-scripts/releases/download/%s/all_wheels.zip" % tag], +# Individual wheels, so they can be embedded into the Pyodide bundle one data module at a time. +exports_files(glob(["*"], exclude = ["BUILD.bazel"])) +""") + +_stdlib_wheels_repo = repository_rule( + implementation = _stdlib_wheels_repo_impl, + attrs = { + "tag": attr.string(mandatory = True), + "lockfile": attr.label(mandatory = True, allow_single_file = True), + }, +) + +def _pyodide_packages(*, tag, **_kwds): + name = "all_pyodide_wheels_%s" % tag + _stdlib_wheels_repo( + name = name, + tag = tag, + lockfile = "@workerd//src/pyodide:python-lock/pyodide-lock_%s.json" % tag, ) - return [lock_name, archive_name] + return [name] VENDOR_R2 = "https://pub-25a5b2f2f1b84655b185a505c7a3ad23.r2.dev/" @@ -83,16 +111,10 @@ def _snapshot_http_files_version( baseline_snapshot = None, baseline_snapshot_hash = None, baseline_snapshot_integrity = None, - numpy_snapshot = None, - numpy_snapshot_integrity = None, - fastapi_snapshot = None, - fastapi_snapshot_integrity = None, dedicated_fastapi_snapshot = None, dedicated_fastapi_snapshot_integrity = None, **_kwds): return (_snapshot_http_file(name, "baseline-snapshot/", baseline_snapshot, baseline_snapshot_integrity, baseline_snapshot_hash) + - _snapshot_http_file(name, "test-snapshot/", numpy_snapshot, numpy_snapshot_integrity, None) + - _snapshot_http_file(name, "test-snapshot/", fastapi_snapshot, fastapi_snapshot_integrity, None) + _snapshot_http_file(name, "", dedicated_fastapi_snapshot, dedicated_fastapi_snapshot_integrity, None, VENDOR_R2)) def _snapshot_http_files(): diff --git a/build/deps/deps.jsonc b/build/deps/deps.jsonc index e92e34a1422..b127866e403 100644 --- a/build/deps/deps.jsonc +++ b/build/deps/deps.jsonc @@ -56,7 +56,7 @@ "owner": "fastfloat", "repo": "fast_float", "branch": "main", - "freeze_commit": "cb1d42aaa1e14b09e1452cfdef373d051b8c02a4", + "freeze_commit": "05087a303dad9c98768b33c829d398223a649bc6", "build_file_content": "cc_library(name = 'fast_float', hdrs = glob(['include/fast_float/*.h']), visibility = ['//visibility:public'], include_prefix = 'third_party/fast_float/src')", "use_module_bazel_from_bcr": "8.0.2" }, @@ -78,7 +78,7 @@ "use_bazel_dep": true, "owner": "google", "repo": "highway", - "freeze_commit": "84379d1c73de9681b54fbe1c035a23c7bd5d272d", + "freeze_commit": "2607d3b5b0113992fe84d3848859eae13b3b52c1", "use_module_bazel_from_bcr": "1.3.0" }, { @@ -102,7 +102,7 @@ "repo": "perfetto", "patches": [ "//:patches/perfetto/0001-Don-t-attempt-to-use-rules_android.patch", - "//:patches/perfetto/0002-disable-info-level-logging.patch" + "//:patches/perfetto/0002-disable-info-level-logging-re2.patch" ] }, { diff --git a/build/deps/gen/build_deps.MODULE.bazel b/build/deps/gen/build_deps.MODULE.bazel index f8857886eee..ffd40abfffe 100644 --- a/build/deps/gen/build_deps.MODULE.bazel +++ b/build/deps/gen/build_deps.MODULE.bazel @@ -5,7 +5,7 @@ http = use_extension("@//:build/exts/http.bzl", "http") git_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") # abseil-cpp -bazel_dep(name = "abseil-cpp", version = "20260107.1") +bazel_dep(name = "abseil-cpp", version = "20260526.0") # apple_support bazel_dep(name = "apple_support", version = "2.5.4") @@ -14,10 +14,10 @@ bazel_dep(name = "apple_support", version = "2.5.4") bazel_dep(name = "aspect_rules_esbuild", version = "0.26.0") # aspect_rules_js -bazel_dep(name = "aspect_rules_js", version = "3.1.1") +bazel_dep(name = "aspect_rules_js", version = "3.1.2") # aspect_rules_ts -bazel_dep(name = "aspect_rules_ts", version = "3.8.9") +bazel_dep(name = "aspect_rules_ts", version = "3.8.10") # bazel_lib bazel_dep(name = "bazel_lib", version = "3.3.1") @@ -79,7 +79,7 @@ bazel_dep(name = "rules_nodejs", version = "6.7.4") bazel_dep(name = "rules_oci", version = "2.3.0") # rules_python -bazel_dep(name = "rules_python", version = "2.0.1") +bazel_dep(name = "rules_python", version = "2.0.2") # rules_rust bazel_dep(name = "rules_rust", version = "0.70.0") @@ -91,10 +91,10 @@ bazel_dep(name = "rules_shell", version = "0.8.0") http.archive( name = "wasm_tools_linux_arm64", build_file_content = "exports_files([\"wasm-tools\"])", - sha256 = "cb7a3ae7a79aeb3dbcdb1d06eedea7bb45e6d5c7a21e960e14e45d582b2b9f97", - strip_prefix = "wasm-tools-1.248.0-aarch64-linux", + sha256 = "a6f7684f4bc618068cf9ae09cf3c1ccfe72a97061324c8f7a15f409f6a4c18c3", + strip_prefix = "wasm-tools-1.250.0-aarch64-linux", type = "tgz", - url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.248.0/wasm-tools-1.248.0-aarch64-linux.tar.gz", + url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.250.0/wasm-tools-1.250.0-aarch64-linux.tar.gz", ) use_repo(http, "wasm_tools_linux_arm64") @@ -102,10 +102,10 @@ use_repo(http, "wasm_tools_linux_arm64") http.archive( name = "wasm_tools_linux_x64", build_file_content = "exports_files([\"wasm-tools\"])", - sha256 = "dcd7d587b0f4644aabc85cd4471cb795de84f36a68ee01201d5261f87c0d6349", - strip_prefix = "wasm-tools-1.248.0-x86_64-linux", + sha256 = "b746c34e7c4162b8812eb29397ebe076834e496a8c46fe68d793379a2741eb50", + strip_prefix = "wasm-tools-1.250.0-x86_64-linux", type = "tgz", - url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.248.0/wasm-tools-1.248.0-x86_64-linux.tar.gz", + url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.250.0/wasm-tools-1.250.0-x86_64-linux.tar.gz", ) use_repo(http, "wasm_tools_linux_x64") @@ -113,10 +113,10 @@ use_repo(http, "wasm_tools_linux_x64") http.archive( name = "wasm_tools_macos_arm64", build_file_content = "exports_files([\"wasm-tools\"])", - sha256 = "4e03e9e342176a9c52e0c25b9707c7f809daeb0f4986742258c69749681efe79", - strip_prefix = "wasm-tools-1.248.0-aarch64-macos", + sha256 = "1efe40e1923a80947db3a9a8b84c64442c539988a25cfd4ebe516d00bb5c4ba3", + strip_prefix = "wasm-tools-1.250.0-aarch64-macos", type = "tgz", - url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.248.0/wasm-tools-1.248.0-aarch64-macos.tar.gz", + url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.250.0/wasm-tools-1.250.0-aarch64-macos.tar.gz", ) use_repo(http, "wasm_tools_macos_arm64") @@ -124,10 +124,10 @@ use_repo(http, "wasm_tools_macos_arm64") http.archive( name = "wasm_tools_macos_x64", build_file_content = "exports_files([\"wasm-tools\"])", - sha256 = "188568c2990bb4c09a0936d84bfb6255199f97e4844cd45f418b59c3d6238788", - strip_prefix = "wasm-tools-1.248.0-x86_64-macos", + sha256 = "491edeb43ba81154b44da5f8da7dba64a4a3fed5ab4985c58e9a48d1b75ad41b", + strip_prefix = "wasm-tools-1.250.0-x86_64-macos", type = "tgz", - url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.248.0/wasm-tools-1.248.0-x86_64-macos.tar.gz", + url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.250.0/wasm-tools-1.250.0-x86_64-macos.tar.gz", ) use_repo(http, "wasm_tools_macos_x64") @@ -135,9 +135,9 @@ use_repo(http, "wasm_tools_macos_x64") http.archive( name = "wasm_tools_windows_x64", build_file_content = "exports_files([\"wasm-tools.exe\"])", - sha256 = "09063f9c0bc07f412d58a8c1a0202260231d8a94a9dfb7b81892d517de995c1c", - strip_prefix = "wasm-tools-1.248.0-x86_64-windows/", + sha256 = "e6ab7924618d1caeb6eaa9debdf2a20ad9248f830731493776b283e42e1cd62e", + strip_prefix = "wasm-tools-1.250.0-x86_64-windows/", type = "zip", - url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.248.0/wasm-tools-1.248.0-x86_64-windows.zip", + url = "https://github.com/bytecodealliance/wasm-tools/releases/download/v1.250.0/wasm-tools-1.250.0-x86_64-windows.zip", ) use_repo(http, "wasm_tools_windows_x64") diff --git a/build/deps/gen/deps.MODULE.bazel b/build/deps/gen/deps.MODULE.bazel index ef4f84cf7b9..e09d1457887 100644 --- a/build/deps/gen/deps.MODULE.bazel +++ b/build/deps/gen/deps.MODULE.bazel @@ -27,10 +27,10 @@ bazel_dep(name = "brotli", version = "1.2.0.bcr.1") # capnp-cpp http.archive( name = "capnp-cpp", - sha256 = "a87651d1772c138643e1fc28ca0cbc8eda0445ca265bc200c083c31a75919386", - strip_prefix = "capnproto-capnproto-911e53d/c++", + sha256 = "b2c065b37cd6daac4d30943bd1574f6e70b59ac1a217c859cef7d0cf7ba94efa", + strip_prefix = "capnproto-capnproto-fd6aad7/c++", type = "tgz", - url = "https://github.com/capnproto/capnproto/tarball/911e53d67841687afe9a349fd8c0d39fe024515a", + url = "https://github.com/capnproto/capnproto/tarball/fd6aad7ca96cf4c9e2bcbd74d4132691bfa8e898", ) use_repo(http, "capnp-cpp") @@ -52,10 +52,10 @@ archive_override( build_file_content = "cc_library(name = 'fast_float', hdrs = glob(['include/fast_float/*.h']), visibility = ['//visibility:public'], include_prefix = 'third_party/fast_float/src')", remote_file_integrity = {"MODULE.bazel": "sha256-Q1BGZO/fpMbPE0libIcTXJuHkmMlxyBFjzlu7iVWjto="}, remote_file_urls = {"MODULE.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/fast_float/8.0.2/MODULE.bazel"]}, - sha256 = "8f1dc06ac2ea1a39343c1bfbd8319134f295677ed04f0a4e63c296f5bd4d20d6", - strip_prefix = "fastfloat-fast_float-cb1d42a", + sha256 = "aa2ab8d370d1011a7a6d4ab90589b298b7e5973fe00041c06ecc7298328c25b4", + strip_prefix = "fastfloat-fast_float-05087a3", type = "tgz", - url = "https://github.com/fastfloat/fast_float/tarball/cb1d42aaa1e14b09e1452cfdef373d051b8c02a4", + url = "https://github.com/fastfloat/fast_float/tarball/05087a303dad9c98768b33c829d398223a649bc6", ) # fp16 @@ -77,10 +77,10 @@ archive_override( module_name = "highway", remote_file_integrity = {"MODULE.bazel": "sha256-2UVSfmwaox6VsgqN+q+Ci+ofGKIJCDc+psSq2YsurfQ="}, remote_file_urls = {"MODULE.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/highway/1.3.0/MODULE.bazel"]}, - sha256 = "840fa6f31239aa9f900e2aa8a62330950881501981343f70d7db868529bcd15b", - strip_prefix = "google-highway-84379d1", + sha256 = "4fa8749ccaf7d47c4a9e0ff635d0347eecf432901c92a131f64e30ff5094c2e9", + strip_prefix = "google-highway-2607d3b", type = "tgz", - url = "https://github.com/google/highway/tarball/84379d1c73de9681b54fbe1c035a23c7bd5d272d", + url = "https://github.com/google/highway/tarball/2607d3b5b0113992fe84d3848859eae13b3b52c1", ) # nbytes @@ -111,12 +111,12 @@ archive_override( patch_strip = 1, patches = [ "//:patches/perfetto/0001-Don-t-attempt-to-use-rules_android.patch", - "//:patches/perfetto/0002-disable-info-level-logging.patch", + "//:patches/perfetto/0002-disable-info-level-logging-re2.patch", ], - sha256 = "90aea67f5ac88ae7bb56bc24574beb5cd924a5ae9d861826a6fd151c13b4767b", - strip_prefix = "google-perfetto-b34c975", + sha256 = "daa181f99c264de1edd2e5c67890f79dd2ea728a29ed3a0f1699dc2624b7521e", + strip_prefix = "google-perfetto-4c2a81c", type = "tgz", - url = "https://api.github.com/repos/google/perfetto/tarball/v54.0", + url = "https://api.github.com/repos/google/perfetto/tarball/v56.0", ) # simdutf @@ -136,10 +136,10 @@ bazel_dep(name = "tcmalloc", version = "0.0.0-20250927-12f2552") # workerd-cxx http.archive( name = "workerd-cxx", - sha256 = "31052a6fec0da501196a4f026469b837ef688c49b455fc437cdb70281f6b38cb", - strip_prefix = "cloudflare-workerd-cxx-a53da2e", + sha256 = "9646dec14f91a4be66f6232ffa0feaac9f529cc22f507db162c7752ffb4e28dc", + strip_prefix = "cloudflare-workerd-cxx-ece882c", type = "tgz", - url = "https://github.com/cloudflare/workerd-cxx/tarball/a53da2e9d35710dcad089574625b6c01cf9535d3", + url = "https://github.com/cloudflare/workerd-cxx/tarball/ece882c9b1ba99ddaddaeb97fe3da27874eff79e", ) use_repo(http, "workerd-cxx") @@ -148,7 +148,7 @@ bazel_dep(name = "zlib") git_override( module_name = "zlib", build_file = "//:build/BUILD.zlib", - commit = "5c1dfd53066bf58d3d28197f715717dd88762443", + commit = "3246f1b60849cc505e231c5d19d0cbf358093555", patch_strip = 1, patches = [ "//:patches/zlib/0001-Add-dummy-MODULE.bazel.patch", diff --git a/build/deps/python.MODULE.bazel b/build/deps/python.MODULE.bazel index 74f03c39d48..fe68a743049 100644 --- a/build/deps/python.MODULE.bazel +++ b/build/deps/python.MODULE.bazel @@ -18,4 +18,4 @@ pip.parse( use_repo(pip, "py_deps", "v8_python_deps") pyodide = use_extension("//build/deps:dep_pyodide.bzl", "pyodide") -use_repo(pyodide, "all_pyodide_wheels_20240829.4", "all_pyodide_wheels_20250808", "beautifulsoup4_src_0.26.0a2", "beautifulsoup4_src_0.28.2", "beautifulsoup4_src_development", "fastapi_src_0.26.0a2", "fastapi_src_0.28.2", "fastapi_src_development", "pyodide-0.26.0a2", "pyodide-0.28.2", "pyodide-lock_20240829.4.json", "pyodide-lock_20250808.json", "pyodide-snapshot-baseline-4569679fb.bin", "pyodide-snapshot-baseline-61eedf943.bin", "pyodide-snapshot-ew-py-package-snapshot_fastapi-v2.bin", "pyodide-snapshot-ew-py-package-snapshot_numpy-v2.bin", "pyodide-snapshot-package_snapshot_fastapi-a6ccb56fe.bin", "pyodide-snapshot-package_snapshot_numpy-60c9cb28e.bin", "pyodide-snapshot-snapshot_a6b652a95810783f5078b9a5dbd4a07c30718acb4ff724e82c25db7353dd7f2d.bin", "pyodide_0.26.0a2_2024-03-01_79.capnp.bin", "pyodide_0.28.2_2025-01-16_10.capnp.bin", "pyodide_dev.capnp.bin", "pytest-asyncio_src_0.26.0a2", "pytest-asyncio_src_0.28.2", "pytest-asyncio_src_development", "python-workers-runtime-sdk_src_0.26.0a2", "python-workers-runtime-sdk_src_0.28.2", "python-workers-runtime-sdk_src_development", "scipy_src_0.26.0a2", "shapely_src_0.28.2", "shapely_src_development") +use_repo(pyodide, "all_pyodide_wheels_20240829.4", "all_pyodide_wheels_20250808", "beautifulsoup4_src_0.26.0a2", "beautifulsoup4_src_0.28.2", "beautifulsoup4_src_development", "fastapi_src_0.26.0a2", "fastapi_src_0.28.2", "fastapi_src_development", "numpy_src_0.28.2", "numpy_src_development", "pyodide-0.26.0a2", "pyodide-0.28.2", "pyodide-snapshot-baseline-4569679fb.bin", "pyodide-snapshot-baseline-61eedf943.bin", "pyodide-snapshot-snapshot_a6b652a95810783f5078b9a5dbd4a07c30718acb4ff724e82c25db7353dd7f2d.bin", "pyodide_0.26.0a2_2024-03-01_83.capnp.bin", "pyodide_0.28.2_2025-01-16_14.capnp.bin", "pyodide_dev.capnp.bin", "pytest-asyncio_src_0.26.0a2", "pytest-asyncio_src_0.28.2", "pytest-asyncio_src_development", "python-workers-runtime-sdk_src_0.26.0a2", "python-workers-runtime-sdk_src_0.28.2", "python-workers-runtime-sdk_src_development", "scipy_src_0.26.0a2", "shapely_src_0.28.2", "shapely_src_development") diff --git a/build/deps/update-deps.py b/build/deps/update-deps.py index c781c7a70d1..e2f6059f3c3 100755 --- a/build/deps/update-deps.py +++ b/build/deps/update-deps.py @@ -658,10 +658,10 @@ def process_config(deps_file): def run(): - if TARGET_FILTER is None: - global GITHUB_ACCESS_TOKEN - GITHUB_ACCESS_TOKEN = read_access_token() + global GITHUB_ACCESS_TOKEN + GITHUB_ACCESS_TOKEN = read_access_token() + if TARGET_FILTER is None: # Clean all generated .bazel files for f in GEN_DIR.glob("*.bazel"): f.unlink() diff --git a/build/deps/v8.MODULE.bazel b/build/deps/v8.MODULE.bazel index bab02ad75e3..45bdd6bad07 100644 --- a/build/deps/v8.MODULE.bazel +++ b/build/deps/v8.MODULE.bazel @@ -18,9 +18,9 @@ http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "ht git_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") -VERSION = "14.8.180" +VERSION = "14.9.207.7" -INTEGRITY = "sha256-VQdRoKnkJKBl0GAChhM2Gk+X5HOnZqh8A3PhTtRXoBs=" +INTEGRITY = "sha256-VYxDt58kH84sbpuTa8tsfNzwqh2mcDSGP1Sg0Gh7u3M=" PATCHES = [ "0001-Allow-manually-setting-ValueDeserializer-format-vers.patch", @@ -58,8 +58,7 @@ PATCHES = [ "0033-Return-false-on-Object.hasOwnProperty-with-intercept.patch", "0034-Remove-V8-MODULE.bazel-llvm-toolchain-and-libcxx-rep.patch", "0035-Remove-libcxx-dep-from-defs.bzl-not-resolvable-via-h.patch", - "0036-Fix-non-conforming-braced-init-list-in-value_or.patch", - "0037-Fix-non-portable-std-atomic_flag-construction-in-run.patch", + "0036-Fix-non-portable-std-atomic_flag-construction-in-run.patch", ] http_archive( diff --git a/build/perfetto/MODULE.bazel b/build/perfetto/MODULE.bazel index 6e170ea0ef2..2ec92934a0d 100644 --- a/build/perfetto/MODULE.bazel +++ b/build/perfetto/MODULE.bazel @@ -2,4 +2,4 @@ module(name = "perfetto_cfg") bazel_dep(name = "rules_python", version = "1.6.3") bazel_dep(name = "rules_cc", version = "0.2.11") -bazel_dep(name = "protobuf", version = "34.1") +bazel_dep(name = "protobuf", version = "35.0") diff --git a/build/perfetto/perfetto_cfg.bzl b/build/perfetto/perfetto_cfg.bzl index 33783eddb22..3c32482ae5e 100644 --- a/build/perfetto/perfetto_cfg.bzl +++ b/build/perfetto/perfetto_cfg.bzl @@ -50,6 +50,7 @@ PERFETTO_CONFIG = struct( # overridden in Google internal builds. base_platform = ["//:perfetto_base_default_platform"], zlib = ["@zlib//:zlib"], + re2 = [], expat = ["@perfetto_dep_expat//:expat"], jsoncpp = ["@perfetto_dep_jsoncpp//:jsoncpp"], linenoise = ["@perfetto_dep_linenoise//:linenoise"], @@ -60,6 +61,7 @@ PERFETTO_CONFIG = struct( protobuf_lite = ["@protobuf//:protobuf_lite"], protobuf_full = ["@protobuf//:protobuf"], protobuf_descriptor_proto = ["@protobuf//:descriptor_proto"], + error_prone_annotations = [], # The Python targets are empty on the standalone build because we assume # any relevant deps are installed on the system or are not applicable. @@ -115,6 +117,7 @@ PERFETTO_CONFIG = struct( # Go protos have all sorts of strange behavior in Google3 so need special # handling as the rules for other languages do not work for Go. go_proto_library_visibility = "//visibility:private", + trace_processor_proto_library_visibility = ["//visibility:public"], # This struct allows the embedder to customize copts and other args passed # to rules like cc_binary. Prefixed rules (e.g. perfetto_cc_binary) will diff --git a/build/python/packages_20240829_4.bzl b/build/python/packages_20240829_4.bzl deleted file mode 100644 index ebd5facb4c0..00000000000 --- a/build/python/packages_20240829_4.bzl +++ /dev/null @@ -1,175 +0,0 @@ -# This file is automatically generated by the Pyodide build script repo -# (https://github.com/cloudflare/pyodide-build-scripts) and should not be manually modified. - -PACKAGES_20240829_4 = { - "info": { - "tag": "20240829.4", - "lockfile_hash": "c2d9c67ea55a672b95a3beb8d66bfbe7df736edb4bb657383b263151e7e85ef4", - "all_wheels_hash": "94653dc8cfbea62b8013db3b8584bc02544ad6fc647b0d83bdee5dfcda5d4b62", - }, - "import_tests": { - "aiohttp": [ - "aiohttp", - ], - "aiosignal": [ - "aiosignal", - ], - "annotated-types": [ - "annotated_types", - ], - "anyio": [ - "anyio", - ], - "async-timeout": [ - "async_timeout", - ], - "attrs": [ - "attr", - "attrs", - ], - "certifi": [ - "certifi", - ], - "charset-normalizer": [ - "charset_normalizer", - ], - "distro": [ - "distro", - ], - "fastapi": [ - "fastapi", - ], - "frozenlist": [ - "frozenlist", - ], - "h11": [ - "h11", - ], - "hashlib": [ - "_hashlib", - ], - "httpcore": [ - "httpcore", - ], - "httpx": [ - "httpx", - ], - "idna": [ - "idna", - ], - "jsonpatch": [ - "jsonpatch", - ], - "jsonpointer": [ - "jsonpointer", - ], - "langchain": [ - "langchain", - ], - "langchain-core": [ - "langchain_core", - "langchain_core.callbacks", - "langchain_core.language_models.llms", - "langchain_core.output_parsers", - "langchain_core.prompts", - ], - "langchain_openai": [ - "langchain_openai", - "langchain_openai.chat_models.base", - ], - "langsmith": [ - "langsmith", - "langsmith.client", - ], - "lzma": [ - "_lzma", - "lzma", - ], - "micropip": [ - "micropip", - ], - "multidict": [ - "multidict", - ], - "numpy": [ - "numpy", - ], - "openai": [ - "openai", - ], - "packaging": [ - "packaging", - ], - "pydantic": [ - "pydantic", - ], - "pydantic_core": [ - "pydantic_core", - ], - "pydecimal": [ - "_pydecimal", - ], - "pydoc_data": [ - "pydoc_data", - ], - "pyyaml": [ - "_yaml", - "yaml", - ], - "regex": [ - "regex", - ], - "requests": [ - "requests", - ], - "six": [ - "six", - ], - "sniffio": [ - "sniffio", - ], - "sqlite3": [ - "_sqlite3", - "sqlite3", - ], - "ssl": [ - "_ssl", - "ssl", - ], - "starlette": [ - "starlette", - "starlette.applications", - "starlette.authentication", - "starlette.background", - "starlette.concurrency", - "starlette.config", - "starlette.convertors", - "starlette.datastructures", - "starlette.endpoints", - "starlette.exceptions", - "starlette.formparsers", - "starlette.middleware", - "starlette.middleware.base", - "starlette.requests", - "starlette.responses", - "starlette.routing", - "starlette.schemas", - ], - "tenacity": [ - "tenacity", - ], - "tiktoken": [ - "tiktoken", - "tiktoken_ext", - ], - "typing-extensions": [ - "typing_extensions", - ], - "urllib3": [ - "urllib3", - ], - "yarl": [ - "yarl", - ], - }, -} diff --git a/build/python/packages_20250808.bzl b/build/python/packages_20250808.bzl deleted file mode 100644 index 56ab55f7738..00000000000 --- a/build/python/packages_20250808.bzl +++ /dev/null @@ -1,284 +0,0 @@ -# This file is automatically generated by the Pyodide build script repo -# (https://github.com/cloudflare/pyodide-build-scripts) and should not be manually modified. - -PACKAGES_20250808 = { - "import_tests": { - "Jinja2": [ - "jinja2", - ], - "MarkupSafe": [ - "markupsafe", - ], - "aiohappyeyeballs": [ - "aiohappyeyeballs", - ], - "aiohttp": [ - "aiohttp", - ], - "aiohttp-tests": [ - "aiohttp", - ], - "aiosignal": [ - "aiosignal", - ], - "annotated-types": [ - "annotated_types", - ], - "annotated-types-tests": [ - "annotated_types", - ], - "anyio": [ - "anyio", - ], - "async-timeout": [ - "async_timeout", - ], - "attrs": [ - "attr", - "attrs", - ], - "beautifulsoup4": [ - "bs4", - ], - "beautifulsoup4-tests": [ - "bs4", - ], - "certifi": [ - "certifi", - ], - "cffi": [ - "cffi", - ], - "charset-normalizer": [ - "charset_normalizer", - ], - "cryptography": [ - "cryptography", - "cryptography.fernet", - "cryptography.hazmat", - "cryptography.utils", - "cryptography.x509", - ], - "distro": [ - "distro", - ], - "fastapi": [ - "fastapi", - ], - "frozenlist": [ - "frozenlist", - ], - "h11": [ - "h11", - ], - "h11-tests": [ - "h11", - ], - "hashlib": [ - "_hashlib", - ], - "httpcore": [ - "httpcore", - ], - "httpx": [ - "httpx", - ], - "idna": [ - "idna", - ], - "jiter": [ - "jiter", - ], - "jsonpatch": [ - "jsonpatch", - ], - "jsonpointer": [ - "jsonpointer", - ], - "langchain": [ - "langchain", - ], - "langchain-community": [ - "langchain_community", - "langchain_community.chat_message_histories", - "langchain_community.utilities", - ], - "langchain-core": [ - "langchain_core", - "langchain_core.callbacks", - "langchain_core.language_models.llms", - "langchain_core.output_parsers", - "langchain_core.prompts", - ], - "langchain-text-splitters": [ - "langchain_text_splitters", - ], - "langchain_openai": [ - "langchain_openai", - "langchain_openai.chat_models.base", - ], - "langsmith": [ - "langsmith", - "langsmith.client", - ], - "lzma": [ - "_lzma", - "lzma", - ], - "micropip": [ - "micropip", - ], - "multidict": [ - "multidict", - ], - "numpy": [ - "numpy", - ], - "openai": [ - "openai", - ], - "packaging": [ - "packaging", - ], - "propcache": [ - "propcache", - ], - "pycparser": [ - "pycparser", - ], - "pydantic": [ - "pydantic", - "pydantic.alias_generators", - "pydantic.aliases", - "pydantic.annotated_handlers", - "pydantic.class_validators", - "pydantic.color", - "pydantic.config", - "pydantic.dataclasses", - "pydantic.datetime_parse", - "pydantic.decorator", - "pydantic.deprecated", - "pydantic.env_settings", - "pydantic.error_wrappers", - "pydantic.errors", - "pydantic.experimental", - "pydantic.fields", - "pydantic.functional_serializers", - "pydantic.functional_validators", - "pydantic.generics", - "pydantic.json", - "pydantic.json_schema", - "pydantic.main", - "pydantic.networks", - "pydantic.parse", - "pydantic.plugin", - "pydantic.root_model", - "pydantic.schema", - "pydantic.tools", - "pydantic.type_adapter", - "pydantic.types", - "pydantic.typing", - "pydantic.utils", - "pydantic.v1", - "pydantic.validate_call_decorator", - "pydantic.validators", - "pydantic.version", - "pydantic.warnings", - ], - "pydantic_core": [ - "pydantic_core", - ], - "pydecimal": [ - "_pydecimal", - ], - "pydoc_data": [ - "pydoc_data", - ], - "pyparsing": [ - "pyparsing", - ], - "pyyaml": [ - "_yaml", - "yaml", - ], - "regex": [ - "regex", - ], - "regex-tests": [ - "regex", - ], - "requests": [ - "requests", - ], - "requests-toolbelt": [ - "requests_toolbelt", - ], - "setuptools": [ - "_distutils_hack", - "pkg_resources", - "setuptools", - ], - "setuptools-tests": [ - "_distutils_hack", - "pkg_resources", - "setuptools", - ], - "six": [ - "six", - ], - "sniffio": [ - "sniffio", - ], - "sniffio-tests": [ - "sniffio", - ], - "soupsieve": [ - "soupsieve", - ], - "sqlalchemy": [ - "sqlalchemy", - ], - "sqlalchemy-tests": [ - "sqlalchemy", - ], - "sqlite3": [ - "_sqlite3", - "sqlite3", - ], - "ssl": [ - "_ssl", - "ssl", - ], - "starlette": [ - "starlette", - ], - "tblib": [ - "tblib", - ], - "tenacity": [ - "tenacity", - ], - "tiktoken": [ - "tiktoken", - "tiktoken_ext", - ], - "typing-extensions": [ - "typing_extensions", - ], - "urllib3": [ - "urllib3", - "urllib3.contrib.emscripten", - ], - "yarl": [ - "yarl", - ], - "zstandard": [ - "zstandard", - ], - }, - "info": { - "all_wheels_hash": "7228cf17e569e31238f74b00e4cb702f0b4fc1fa55e6a5144be461e75240048b", - "lockfile_hash": "315f5f3922d40253b3d9dae9ecea08110a9764c43fdfb240276d902769684dee", - "tag": "20250808", - }, -} diff --git a/build/python_metadata.bzl b/build/python_metadata.bzl index d9bc7f136ae..ca6d36f8fdb 100644 --- a/build/python_metadata.bzl +++ b/build/python_metadata.bzl @@ -1,8 +1,6 @@ # After updating this file, make sure to run "bazel mod tidy" load("@bazel_lib//lib:base64.bzl", "base64") load("@bazel_lib//lib:strings.bzl", "chr") -load("//:build/python/packages_20240829_4.bzl", "PACKAGES_20240829_4") -load("//:build/python/packages_20250808.bzl", "PACKAGES_20250808") def _chunk(data, length): return [data[i:i + length] for i in range(0, len(data), length)] @@ -24,38 +22,21 @@ PYODIDE_VERSIONS = [ }, ] -# This is the list of all the package metadata that we use. +# The below is a list of package tags for the old builtin packages support. # -# IMPORTANT: packages that are present here should never be removed after the package version is -# released to the public. This is so that we don't break workers using those packages. -# -# ORDER MATTERS: the order of the keys in this dictionary matters, older package bundles should come -# first. -_package_lockfiles = [ - PACKAGES_20240829_4, - PACKAGES_20250808, +# Now that built-in package support is gone, the only packages we load are the CPython stdlib +# modules and the shared libraries they depend on. Newer Pyodide versions bundle all of these +# builtin modules directly into the core distribution, so future package bundle versions won't +# need a lock file (or per-package wheel downloads) here at all. +PYTHON_LOCKFILES = [ + { + "tag": "20240829.4", + }, + { + "tag": "20250808", + }, ] -# The below is a list of pyodide-lock.json files for each package bundle version that we support. -# Each of these gets embedded in the workerd and EW binary. -PYTHON_LOCKFILES = [meta["info"] for meta in _package_lockfiles] - -# Used to generate the import tests, where we import each top level name from each package and check -# that it doesn't fail. -PYTHON_IMPORTS_TO_TEST = {meta["info"]["tag"]: meta["import_tests"] for meta in _package_lockfiles} - -# Each new package bundle should contain the same packages as the previous. We verify this -# constraint here. -def verify_no_packages_were_removed(): - for curr_info, next_info in zip(_package_lockfiles[:-1], _package_lockfiles[1:]): - curr_pkgs = curr_info["import_tests"] - next_pkgs = next_info["import_tests"] - missing_pkgs = [pkg for pkg in curr_pkgs if pkg not in next_pkgs] - if missing_pkgs: - fail("Some packages from version ", curr_info["info"]["tag"], " missing in version", next_info["info"]["tag"], ":\n", " ", ", ".join(missing_pkgs), "\n\n") - -verify_no_packages_were_removed() - def _bundle_id(*, pyodide_version, pyodide_date, backport, **_kwds): return "%s_%s_%s" % (pyodide_version, pyodide_date, backport) @@ -90,8 +71,6 @@ def _make_bundle_version_info(versions): entry["real_pyodide_version"] = entry["pyodide_version"] entry["feature_flags"] = [entry["flag"]] entry["feature_string_flags"] = [entry["enable_flag_name"]] - if "packages" in entry: - entry["packages"] = entry["packages"]["info"]["tag"] _add_integrity(entry) result[name] = entry _make_vendored_packages(entry) @@ -129,19 +108,15 @@ BUNDLE_VERSION_INFO = _make_bundle_version_info([ "released": True, "pyodide_version": "0.26.0a2", "pyodide_date": "2024-03-01", - "packages": PACKAGES_20240829_4, - "backport": "79", - "integrity": "sha256-LO3jNW3PXEiwHm10GgnssxwKw+v37KMGZBiBwjUReVk=", + "packages": "20240829.4", + "backport": "83", + "integrity": "sha256-b5xYvWAd5U7jolloM/yW2xESIrvmGMRHXzYUktezCGk=", "flag": "pythonWorkers", "enable_flag_name": "python_workers", "emscripten_version": "3.1.52", "python_version": "3.12.1", "baseline_snapshot": "baseline-61eedf943.bin", "baseline_snapshot_hash": "61eedf9432d635bdf091b26efece020b3543429a609fad7af9e8d4de2ec44f47", - "numpy_snapshot": "ew-py-package-snapshot_numpy-v2.bin", - "numpy_snapshot_hash": "5055deb53f404afacba73642fd10e766b123e661847e8fdf4f1ec92d8ca624dc", - "fastapi_snapshot": "ew-py-package-snapshot_fastapi-v2.bin", - "fastapi_snapshot_hash": "d204956a074cd74f7fe72e029e9a82686fcb8a138b509f765e664a03bfdd50fb", "vendored_packages_for_tests": VENDORED_VERSION_INDEPENDENT + [ { # Downloaded from https://pub-25a5b2f2f1b84655b185a505c7a3ad23.r2.dev/fastapi-312-vendored-for-ew-testing.zip @@ -161,19 +136,15 @@ BUNDLE_VERSION_INFO = _make_bundle_version_info([ "released": True, "pyodide_version": "0.28.2", "pyodide_date": "2025-01-16", - "packages": PACKAGES_20250808, - "backport": "10", - "integrity": "sha256-k37ELtvRw8fd3QHsMgja0Tl+4QKP1qGTnNdjxUiqb2E=", + "packages": "20250808", + "backport": "14", + "integrity": "sha256-dFxfG3CZ3z3B6fKYJ9SYVMtvGuY+6zZSoElCIbF4xw0=", "flag": "pythonWorkers20250116", "enable_flag_name": "python_workers_20250116", "emscripten_version": "4.0.9", "python_version": "3.13.2", "baseline_snapshot": "baseline-4569679fb.bin", "baseline_snapshot_hash": "4569679fb78a3c5c8dbfa73d57c61c6a5394617632fbac7b5873ba322c85463d", - "numpy_snapshot": "package_snapshot_numpy-60c9cb28e.bin", - "numpy_snapshot_hash": "60c9cb28e6dc1ea6ab38b25471ddaa315b667637c9dd6f94aceb2acc6519c623", - "fastapi_snapshot": "package_snapshot_fastapi-a6ccb56fe.bin", - "fastapi_snapshot_hash": "a6ccb56fe9eac265d139727d0134e8d6432c5fe25c8c0b8ec95252b13493b297", "dedicated_fastapi_snapshot": "snapshot_a6b652a95810783f5078b9a5dbd4a07c30718acb4ff724e82c25db7353dd7f2d.bin", "dedicated_fastapi_snapshot_hash": "4af6f012a5fb32f31a426e6f109e88ae85b18ee3dd131e1caaaad989cd962bbe", "vendored_packages_for_tests": VENDORED_VERSION_INDEPENDENT + [ @@ -182,6 +153,11 @@ BUNDLE_VERSION_INFO = _make_bundle_version_info([ "abi": "3.13", "sha256": "955091f1bd2eb33255ff2633df990bedc96e2f6294e78f2b416078777394f942", }, + { + "name": "numpy", + "abi": "3.13", + "sha256": "dc77accd1313a87dadd2ed31bffad3b698dcb9829804e84fc857a9a669a94d3f", + }, { "name": "shapely", "abi": "3.13", diff --git a/build/tools/clang_tidy/BUILD b/build/tools/clang_tidy/BUILD index 1f995c978aa..5013c6a733f 100644 --- a/build/tools/clang_tidy/BUILD +++ b/build/tools/clang_tidy/BUILD @@ -1 +1,9 @@ +load("@rules_python//python:defs.bzl", "py_binary") + exports_files(["clang_tidy_wrapper.sh"]) + +py_binary( + name = "merge_clang_tidy_configs", + srcs = ["merge_clang_tidy_configs.py"], + visibility = ["//visibility:public"], +) diff --git a/build/tools/clang_tidy/clang_tidy.bzl b/build/tools/clang_tidy/clang_tidy.bzl index 6a6b291df0d..e7c2630a424 100644 --- a/build/tools/clang_tidy/clang_tidy.bzl +++ b/build/tools/clang_tidy/clang_tidy.bzl @@ -101,6 +101,7 @@ def _clang_tidy_aspect_impl(target, ctx): ctx.attr._clang_tidy_plugin.files, ] + clang_tidy_config = ctx.attr._clang_tidy_config.files.to_list()[0] plugin_path = ctx.attr._clang_tidy_plugin.files.to_list()[0].path outs = [] @@ -120,7 +121,8 @@ def _clang_tidy_aspect_impl(target, ctx): # clang-tidy arguments # do not print statistics args.add("--quiet") - args.add("--config-file=" + ctx.attr._clang_tidy_config.files.to_list()[0].short_path) + args.add("--experimental-custom-checks") + args.add("--config-file=" + clang_tidy_config.path) if ctx.attr.clang_tidy_args: args.add_all(ctx.attr.clang_tidy_args.split(" ")) @@ -151,11 +153,19 @@ def _clang_tidy_aspect_impl(target, ctx): # TODO(cleanup): These paths provide required includes, but if the toolchain was working # properly we wouldn't need them in the first place... # Linux includes - args.add("-isystem/usr/lib/llvm-19/include/c++/v1") - args.add("-isystem/usr/lib/llvm-19/lib/clang/19/include") + # Prefer the newest installed LLVM headers, but keep older fallbacks for developers on + # older distro/toolchain packages. Clang's include_next searches this list in order. + # As of may '26 llvm head 23, so 24 will be ok for quite a while. + for llvm_version in range(24, 18, -1): + args.add("-isystem/usr/lib/llvm-{}/include/c++/v1".format(llvm_version)) args.add("-isystem/usr/include") args.add("-isystem/usr/include/x86_64-linux-gnu") + # Keep clang resource headers after glibc headers so include_next from the newest resource + # directory cannot skip into an older resource directory with the same include guard. + for llvm_version in range(24, 18, -1): + args.add("-isystem/usr/lib/llvm-{}/lib/clang/{}/include".format(llvm_version, llvm_version)) + # macOS includes args.add("-isystem/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1") diff --git a/build/tools/clang_tidy/merge_clang_tidy_configs.py b/build/tools/clang_tidy/merge_clang_tidy_configs.py new file mode 100644 index 00000000000..4bf952028c4 --- /dev/null +++ b/build/tools/clang_tidy/merge_clang_tidy_configs.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +"""Merge clang-tidy config files for Bazel clang-tidy actions. + +The first config is the base config. It is emitted with all of its settings, +except that its `Checks` and `CustomChecks` sections are replaced by merged +sections from every input config. + +Additional configs only contribute: + +* `Checks`: appended to the base checks. A leading `-*` from additional configs + is ignored so that a reusable config cannot reset the primary config's checks. + Duplicate checks are removed, preserving first occurrence. +* `CustomChecks`: appended verbatim after the base config's custom checks. + +All other settings from additional configs are ignored. This lets workerd use +its own `WarningsAsErrors`, `HeaderFilterRegex`, `CheckOptions`, etc. while +still reusing custom checks defined by dependency configs like capnproto's. +""" + +import re +import sys +from pathlib import Path + +TOP_LEVEL_KEY = re.compile(r"^([A-Za-z][A-Za-z0-9_]*)\s*:") +BLOCK_SCALAR = re.compile(r":\s*[>|][1-9]?[+-]?\s*$") + + +def split_sections(lines): + config = { + "preamble": [], + "sections": {}, + } + current_key = None + current_lines = [] + # Avoid treating key-looking lines inside YAML block scalars as new sections. + block_scalar_indent = None + pending_block_scalar_parent_indent = None + + for line in lines: + stripped = line.strip() + indent = len(line) - len(line.lstrip(" ")) + + if block_scalar_indent is not None: + if not stripped or indent >= block_scalar_indent: + current_lines.append(line) + continue + block_scalar_indent = None + + if pending_block_scalar_parent_indent is not None: + if not stripped: + current_lines.append(line) + continue + if indent > pending_block_scalar_parent_indent: + block_scalar_indent = indent + pending_block_scalar_parent_indent = None + current_lines.append(line) + continue + pending_block_scalar_parent_indent = None + + match = TOP_LEVEL_KEY.match(line) + if match: + if current_key is None: + config["preamble"].extend(current_lines) + else: + config["sections"][current_key] = current_lines + current_key = match.group(1) + current_lines = [line] + else: + current_lines.append(line) + + if BLOCK_SCALAR.search(strip_yaml_comment(line)): + pending_block_scalar_parent_indent = indent + + if current_key is None: + config["preamble"].extend(current_lines) + else: + config["sections"][current_key] = current_lines + + return config + + +def strip_yaml_comment(line): + quote = None + index = 0 + while index < len(line): + char = line[index] + if char in ("'", '"'): + if ( + quote == "'" + and char == "'" + and index + 1 < len(line) + and line[index + 1] == "'" + ): + index += 1 + elif quote == char: + quote = None + elif quote is None: + quote = char + elif quote == '"' and char == "\\": + index += 1 + elif char == "#" and quote is None: + return line[:index] + index += 1 + return line + + +def parse_checks(section_lines, skip_reset): + if not section_lines: + return [] + + first = section_lines[0] + _, value = first.split(":", 1) + first_value = value.strip() + if first_value in (">", "|", ">-", "|-", ">+", "|+"): + check_lines = section_lines[1:] + else: + check_lines = [first_value, *section_lines[1:]] + + checks = [] + for line in check_lines: + for part in strip_yaml_comment(line).split(","): + check = part.strip() + if not check: + continue + if skip_reset and check == "-*": + continue + checks.append(check) + return checks + + +def parse_custom_checks(section_lines): + if not section_lines: + return [] + + return [line.rstrip("\n") for line in section_lines[1:]] + + +def emit_checks(checks): + if not checks: + return [] + + lines = ["Checks: >\n"] + for index, check in enumerate(checks): + suffix = "," if index + 1 < len(checks) else "" + lines.append(f" {check}{suffix}\n") + return lines + + +def emit_custom_checks(custom_checks): + if not custom_checks: + return [] + + return ["CustomChecks:\n"] + [line + "\n" for line in custom_checks] + + +def merge_configs(config_paths): + parsed_configs = [] + for path in config_paths: + with Path(path).open(encoding="utf-8") as config: + parsed_configs.append(split_sections(config.readlines())) + + first_config = parsed_configs[0] + checks = [] + custom_checks = [] + + for index, config in enumerate(parsed_configs): + sections = config["sections"] + checks.extend(parse_checks(sections.get("Checks", []), skip_reset=index > 0)) + custom_checks.extend(parse_custom_checks(sections.get("CustomChecks", []))) + + checks = list(dict.fromkeys(checks)) + + output = [] + inserted_checks = False + output.extend(first_config["preamble"]) + + for key, lines in first_config["sections"].items(): + if key == "Checks": + output.extend(emit_checks(checks)) + output.append("\n") + output.extend(emit_custom_checks(custom_checks)) + if custom_checks: + output.append("\n") + inserted_checks = True + elif key != "CustomChecks": + output.extend(lines) + + if not inserted_checks: + output.extend(emit_checks(checks)) + if checks: + output.append("\n") + output.extend(emit_custom_checks(custom_checks)) + + return output + + +def main(): + if len(sys.argv) < 3: + sys.exit("usage: merge_clang_tidy_configs.py OUTPUT CONFIG [CONFIG ...]") + + output_path = sys.argv[1] + config_paths = sys.argv[2:] + with Path(output_path).open("w", encoding="utf-8") as output: + output.writelines(merge_configs(config_paths)) + + +if __name__ == "__main__": + main() diff --git a/build/wd_test.bzl b/build/wd_test.bzl index 6091637a8f5..e95b53253ad 100644 --- a/build/wd_test.bzl +++ b/build/wd_test.bzl @@ -1,4 +1,3 @@ -load("@aspect_rules_ts//ts:defs.bzl", "ts_config", "ts_project") load("@workerd//:build/lint_test.bzl", "lint_test") def wd_test( @@ -6,7 +5,6 @@ def wd_test( data = [], name = None, args = [], - ts_deps = [], lint = True, python_snapshot_test = False, generate_default_variant = True, @@ -50,24 +48,6 @@ def wd_test( name = src.removesuffix(".capnp").removesuffix(".wd-test").removesuffix(".ts-wd-test") if len(ts_srcs) != 0: - # NOTE: We intentionally do not use isolated_typecheck here. While isolated_typecheck can - # improve build parallelism by separating transpilation from type-checking, it requires - # isolatedDeclarations in tsconfig (which mandates explicit return type annotations on all - # exports) and uses --noResolve during transpilation. The --noResolve flag prevents - # TypeScript from finding @types/node, breaking IDE support for Node.js imports like - # 'node:assert'. Since wd_test TypeScript files are typically standalone test files (leaf - # nodes in the dependency graph), the parallelization benefits would be minimal anyway. - ts_config( - name = name + "@ts_config", - src = "tsconfig.json", - deps = ["@workerd//tools:base-tsconfig"], - ) - ts_project( - name = name + "@ts_project", - srcs = ts_srcs, - tsconfig = ":" + name + "@ts_config", - deps = ["//src/node:node@tsproject"] + ts_deps, - ) data += [js_src.removesuffix(".ts") + ".js" for js_src in ts_srcs] if lint: diff --git a/build/wd_ts_bundle.bzl b/build/wd_ts_bundle.bzl index ed0e319e2cd..9d5faaefc17 100644 --- a/build/wd_ts_bundle.bzl +++ b/build/wd_ts_bundle.bzl @@ -64,6 +64,7 @@ def wd_ts_bundle( srcs = ts_srcs, allow_js = True, declaration = True, + composite = True, tsconfig = ":" + name + "@tsconfig", deps = deps, out_dir = out_dir.removesuffix("/"), diff --git a/build/wd_ts_project.bzl b/build/wd_ts_project.bzl index d3e4b44cb49..25697eedd57 100644 --- a/build/wd_ts_project.bzl +++ b/build/wd_ts_project.bzl @@ -1,7 +1,7 @@ load("@aspect_rules_ts//ts:defs.bzl", "ts_config", "ts_project") load("@workerd//:build/lint_test.bzl", "lint_test") -def wd_ts_project(name, srcs, deps, tsconfig_json, eslintrc_json = None, source_map = True, testonly = False, composite = False): +def wd_ts_project(name, srcs, deps, tsconfig_json, eslintrc_json = None, source_map = True, testonly = False, composite = True): """Bazel rule for a workerd TypeScript project, setting common options""" ts_config( diff --git a/build/wd_ts_test.bzl b/build/wd_ts_test.bzl index cedc3f1ee2d..373292e4eea 100644 --- a/build/wd_ts_test.bzl +++ b/build/wd_ts_test.bzl @@ -2,7 +2,7 @@ load("@aspect_rules_js//js:defs.bzl", "js_test") load("//:build/typescript.bzl", "js_name", "module_name") load("//:build/wd_ts_project.bzl", "wd_ts_project") -def wd_ts_test(src, tsconfig_json, deps = [], eslintrc_json = None, composite = False, **kwargs): +def wd_ts_test(src, tsconfig_json, deps = [], eslintrc_json = None, composite = True, **kwargs): """Bazel rule to compile and run a TypeScript test""" name = module_name(src) diff --git a/cfsetup.yaml b/cfsetup.yaml index c312e7ee13d..443fde3c564 100644 --- a/cfsetup.yaml +++ b/cfsetup.yaml @@ -16,6 +16,15 @@ trixie: &default-build - &pre-bazel-write-gcp-creds python3 -c 'import os; p="/tmp/bazel_cache_gcp_creds.json"; fd=os.open(p, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600); os.write(fd, os.environ["GCP_CREDS"].encode()); os.close(fd)' - bazel test -k --config=ci --config=ci-limit-storage --config=ci-linux-common --config=ci-test //... --announce_rc --remote_cache=https://storage.googleapis.com/cloudflare-edgeworker-bazel-build-cache --google_credentials=/tmp/bazel_cache_gcp_creds.json --remote_local_fallback=True --remote_timeout=10 + ci-bazel-x64-release: + nosubmodule: true + base_image: *ci-image-bazel-amd64 + tmpfs_tmp: true + post-cache: + - *pre-bazel-install-deps + - *pre-bazel-write-gcp-creds + - bazel test -k --config=ci --config=ci-limit-storage --config=ci-linux-common --config=ci-test --config=release_linux //... --announce_rc --remote_cache=https://storage.googleapis.com/cloudflare-edgeworker-bazel-build-cache --google_credentials=/tmp/bazel_cache_gcp_creds.json --remote_local_fallback=True --remote_timeout=10 + ci-bazel-x64-asan: nosubmodule: true base_image: *ci-image-bazel-amd64 diff --git a/ci/build.yml b/ci/build.yml index 56b31662869..937cae857fb 100644 --- a/ci/build.yml +++ b/ci/build.yml @@ -32,6 +32,12 @@ include: jobPrefix: "linux-x64" CFSETUP_TARGET: "ci-bazel-x64" + - component: $CI_SERVER_FQDN/cloudflare/ci/cfsetup/build@~latest + inputs: + <<: *cfsetup-input-template + jobPrefix: "linux-x64-release" + CFSETUP_TARGET: "ci-bazel-x64-release" + - component: $CI_SERVER_FQDN/cloudflare/ci/cfsetup/build@~latest inputs: <<: *cfsetup-input-template @@ -91,6 +97,9 @@ linux-x64-build: linux-x64-asan-build: <<: *job-template +linux-x64-release-build: + <<: *job-template + linux-x64-lint-build: <<: *job-template diff --git a/compile_flags.txt b/compile_flags.txt index 2a8ac4e0268..6f852a6ce5e 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -1,7 +1,6 @@ -std=c++23 -stdlib=libc++ -xc++ --nostdinc -Ibazel-bin/external/+new_local_repository+com_cloudflare_lol_html/_virtual_includes/lolhtml -Ibazel-bin/external/perfetto+/ -Iexternal/ada-url+/ @@ -15,22 +14,6 @@ -Iexternal/+http+ncrypto/include -isystembazel-bin/external/sqlite3+ -Isrc --isystem/usr/lib/llvm-22/include/c++/v1 --isystem/usr/lib/llvm-22/lib/clang/22/include --isystem/usr/lib/llvm-21/include/c++/v1 --isystem/usr/lib/llvm-21/lib/clang/21/include --isystem/usr/lib/llvm-20/include/c++/v1 --isystem/usr/lib/llvm-20/lib/clang/20/include --isystem/usr/lib/llvm-19/include/c++/v1 --isystem/usr/lib/llvm-19/lib/clang/19/include --isystem/usr/include/x86_64-linux-gnu --isystem/usr/include/aarch64-linux-gnu --isystem/usr/include --isystem/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1 --isystem/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/21/include --isystem/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/17/include --isystem/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include --isystem/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Kernel.framework/Versions/Current/Headers -isystembazel-bin/_virtual_includes/icudata-embed -isystembazel-bin/external/+http+capnp-cpp/src -isystembazel-bin/external/+http+capnp-cpp/src/capnp/_virtual_includes/capnp diff --git a/deps/rust/Cargo.lock b/deps/rust/Cargo.lock index f8bab379876..a45a75c760e 100644 --- a/deps/rust/Cargo.lock +++ b/deps/rust/Cargo.lock @@ -88,9 +88,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "base64" @@ -157,9 +157,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" dependencies = [ "allocator-api2", ] @@ -231,9 +231,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.61" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "jobserver", @@ -327,9 +327,9 @@ dependencies = [ [[package]] name = "compact_str" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +checksum = "9dfdd1c2274d9aa354115b09dc9a901d6c5576818cdf70d14cae2bdb47df00ab" dependencies = [ "castaway", "cfg-if", @@ -477,9 +477,9 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -509,9 +509,9 @@ dependencies = [ [[package]] name = "either" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" [[package]] name = "embedded-io" @@ -740,9 +740,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" [[package]] name = "heck" @@ -758,9 +758,9 @@ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "hstr" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faa57007c3c9dab34df2fa4c1fb52fe9c34ec5a27ed9d8edea53254b50cd7887" +checksum = "83bb87e4b300d73412f6dcc7022ee7741452b51b155c2b06e5994d0770c2dbe2" dependencies = [ "hashbrown 0.14.5", "new_debug_unreachable", @@ -886,7 +886,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.17.0", + "hashbrown 0.17.1", ] [[package]] @@ -928,9 +928,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.97" +version = "0.3.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" dependencies = [ "cfg-if", "futures-util", @@ -958,9 +958,9 @@ checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "log" -version = "0.4.29" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" [[package]] name = "lol_html" @@ -993,9 +993,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "mime" @@ -1015,9 +1015,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", @@ -1032,9 +1032,9 @@ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] name = "nix" -version = "0.31.2" +version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" +checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" dependencies = [ "bitflags", "cfg-if", @@ -1345,7 +1345,7 @@ source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1 dependencies = [ "aho-corasick", "bitflags", - "compact_str 0.9.0", + "compact_str 0.9.1", "is-macro", "itertools", "memchr", @@ -1363,7 +1363,7 @@ source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1 dependencies = [ "bitflags", "bstr", - "compact_str 0.9.0", + "compact_str 0.9.1", "memchr", "ruff_python_ast", "ruff_python_trivia", @@ -1502,9 +1502,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -1582,9 +1582,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys", @@ -1627,9 +1627,9 @@ dependencies = [ [[package]] name = "swc_atoms" -version = "9.0.0" +version = "9.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4ccbe2ecad10ad7432100f878a107b1d972a8aee83ca53184d00c23a078bb8a" +checksum = "845f31910b5236db42dba106e8277681098d183b9b65b8dfa88ca8abe464aeff" dependencies = [ "hstr", "once_cell", @@ -1638,9 +1638,9 @@ dependencies = [ [[package]] name = "swc_common" -version = "21.0.1" +version = "21.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "078f2144aa2c33ff8485773f1b81b9985fa2d00f4ad60879158ad6897db2de88" +checksum = "da38f2cee8e659bf0ec7f51ec5b37ec58c9127de755d3fe0b2c2353ec9474b09" dependencies = [ "anyhow", "ast_node", @@ -1690,9 +1690,9 @@ dependencies = [ [[package]] name = "swc_ecma_ast" -version = "23.0.0" +version = "23.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39f4173ab7e676eed4938d5ad8bbdf14418f87c9a8d36e6cdda82ac9645912b0" +checksum = "550ee54eab536fe357090fec6d42d083c28cf44cc9bcfa93b1ea5e1606f3b2f7" dependencies = [ "bitflags", "is-macro", @@ -1709,9 +1709,9 @@ dependencies = [ [[package]] name = "swc_ecma_codegen" -version = "26.0.1" +version = "26.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fafbcdd29cc03b0c04860bb0143e781e13a4e2dac03eb8747df520f602e0aa94" +checksum = "39230b073d1d785ac7a905354161e21970d15956e348f46a85deb0da0d4d5132" dependencies = [ "ascii", "compact_str 0.7.1", @@ -1755,9 +1755,9 @@ dependencies = [ [[package]] name = "swc_ecma_parser" -version = "39.0.2" +version = "39.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b13829b24cbdb2d7a08282bd968af8a258fd762c918df9a7b82291d44068bbc" +checksum = "bca883cdbd6107a96f60a23fd90623c9a90cf37741fc08a3337cee9bbd6c4c1a" dependencies = [ "bitflags", "either", @@ -1775,9 +1775,9 @@ dependencies = [ [[package]] name = "swc_ecma_transforms_base" -version = "42.0.0" +version = "42.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e58612045e827e7d3ae9f1dc6ae3590ba9abc6a3d93ff2adf27350ab409822" +checksum = "1f0c8ee943a8f9099391cecef5b3eafc98aba64dfa5f6f7cd336a32989d92d1a" dependencies = [ "better_scoped_tls", "indexmap", @@ -1840,9 +1840,9 @@ dependencies = [ [[package]] name = "swc_ecma_utils" -version = "29.1.0" +version = "29.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e64243f2c9e9c9e631a18b42ad51f62137cf4f57b21fb93b1d58836322c2c81" +checksum = "3d69b480aa02b5ff2ab951478d8e7633eeda42940aeb5fe0386eebc19dd3b1e4" dependencies = [ "dragonbox_ecma", "indexmap", @@ -2027,9 +2027,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.52.2" +version = "1.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "110a78583f19d5cdb2c5ccf321d1290344e71313c6c37d43520d386027d18386" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" dependencies = [ "libc", "mio", @@ -2191,9 +2191,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.120" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" dependencies = [ "cfg-if", "once_cell", @@ -2204,9 +2204,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.120" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2214,9 +2214,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.120" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" dependencies = [ "bumpalo", "proc-macro2", @@ -2227,9 +2227,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.120" +version = "0.2.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" dependencies = [ "unicode-ident", ] @@ -2304,18 +2304,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "bce33a6288fa3f072a8c2c7d0f2fdbb90e28298f0135c1f99b96c3db2efcc60b" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "8fd425244944f4ab65ccff928e7323354c5a018c75838362fdce749dfad2ee1e" dependencies = [ "proc-macro2", "quote", @@ -2324,9 +2324,9 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] diff --git a/docs/jsg.md b/docs/jsg.md index 8469e82a782..e8cfeac8ae2 100644 --- a/docs/jsg.md +++ b/docs/jsg.md @@ -2301,6 +2301,11 @@ v8::Local handle = backing.createHandle(js); ### `jsg::BufferSource` +** Deprecated: Do not use.** The `jsg::BufferSource` and `jsg::BackingStore` APIs are in the +process of being replaced by the `jsg::JsBufferSource`, `jsg::JsArrayBuffer`, and related APIs +in `jsvalue.h`. The `jsg::BufferSource` and `jsg::BackingStore` will be removed once all of the +replacements are fully applied. + Wraps a JavaScript ArrayBuffer or ArrayBufferView, retaining the original reference and supporting detachment. diff --git a/docs/pyodide.md b/docs/pyodide.md deleted file mode 100644 index 61cb9aeeecf..00000000000 --- a/docs/pyodide.md +++ /dev/null @@ -1,15 +0,0 @@ -# Pyodide Package Indices - -workerd is linked against a Pyodide lock file, which is located within an R2 bucket. At build time this lock file is fetched and bundled into the binary. (See WORKSPACE and search for `pyodide-lock.json`) - -If you know where the R2 bucket is (See build/pyodide_bucket.bzl) then the `pyodide-lock.json` file is located inside the root of the R2 directory for the Pyodide package bundle release. - -This lock file contains some information used by workerd to pull in package requirements, including but not limited to: - -- The versions of each package included in the package bundle -- The file names and SHA hashes of each package available for download in the bucket -- What the dependencies are for each package - -## Generating pyodide_bucket.bzl - -We have scripts and GitHub actions set up for building and uploading Pyodide package bundles onto R2. These are available [here](https://github.com/cloudflare/pyodide-build-scripts). Simply follow the instructions on that repo to build a new version of Pyodide or a new package bundle release. diff --git a/patches/perfetto/0001-Don-t-attempt-to-use-rules_android.patch b/patches/perfetto/0001-Don-t-attempt-to-use-rules_android.patch index 36998fd8b10..603b33cdc62 100644 --- a/patches/perfetto/0001-Don-t-attempt-to-use-rules_android.patch +++ b/patches/perfetto/0001-Don-t-attempt-to-use-rules_android.patch @@ -1,14 +1,14 @@ -From 418089631def5cb0cb92b550f2500bcff1230980 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Felix Hanau Date: Sat, 15 Nov 2025 16:55:07 -0500 Subject: Don't attempt to use rules_android diff --git a/MODULE.bazel b/MODULE.bazel -index 47f7f25cfd..dc006ebba8 100644 +index f94686dcb5d94d0bc7c9e5113203450086e8fb36..87eb246bfe8502d5a6c667b395a26c218b81e6f5 100644 --- a/MODULE.bazel +++ b/MODULE.bazel -@@ -14,102 +14,10 @@ +@@ -14,10 +14,8 @@ """Perfetto Bazel module configuration for bzlmod.""" @@ -19,6 +19,11 @@ index 47f7f25cfd..dc006ebba8 100644 +module(name = "perfetto") +bazel_dep(name = "perfetto_cfg", version = "0.0.0") + bazel_dep(name = "abseil-cpp", version = "20250127.0", repo_name = "com_google_absl") + git_override( +@@ -28,97 +26,6 @@ git_override( + + bazel_dep(name = "re2", version = "2024-07-02.bcr.1") bazel_dep(name = "bazel_skylib", version = "1.7.1") -bazel_dep(name = "platforms", version = "0.0.10") -bazel_dep(name = "protobuf", version = "31.1", repo_name = "com_google_protobuf") @@ -100,23 +105,24 @@ index 47f7f25cfd..dc006ebba8 100644 -) -maven.install( - name = "perfetto_maven", +- # Use rules_android's aar_import to avoid toolchain type mismatch. +- aar_import_bzl_label = "@rules_android//rules:rules.bzl", - artifacts = [ - "androidx.test:runner:1.6.2", - "androidx.test:monitor:1.7.2", - "com.google.truth:truth:1.4.4", - "junit:junit:4.13.2", - "androidx.test.ext:junit:1.2.1", +- "com.google.errorprone:error_prone_annotations:2.36.0", - ], - repositories = [ - "https://maven.google.com", - "https://repo1.maven.org/maven2", - ], -- # Use rules_android's aar_import to avoid toolchain type mismatch. -- aar_import_bzl_label = "@rules_android//rules:rules.bzl", -) -use_repo(maven, "perfetto_maven") diff --git a/bazel/rules.bzl b/bazel/rules.bzl -index 563382a18b..d7e77cf214 100644 +index 958e9bc0aabb47a4039f67e39e4868ea2676d36d..2e11ef26b27597679241b5577d4fff7a7e2331e3 100644 --- a/bazel/rules.bzl +++ b/bazel/rules.bzl @@ -13,9 +13,7 @@ diff --git a/patches/perfetto/0002-disable-info-level-logging.patch b/patches/perfetto/0002-disable-info-level-logging-re2.patch similarity index 59% rename from patches/perfetto/0002-disable-info-level-logging.patch rename to patches/perfetto/0002-disable-info-level-logging-re2.patch index 798104dae25..0afbfd40df5 100644 --- a/patches/perfetto/0002-disable-info-level-logging.patch +++ b/patches/perfetto/0002-disable-info-level-logging-re2.patch @@ -1,11 +1,11 @@ -From cf26a646ce050b734771bfc212e6c9f1cc9f7f14 Mon Sep 17 00:00:00 2001 +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Dan Carney Date: Wed, 17 Dec 2025 10:59:18 +0000 -Subject: disable info level logging +Subject: disable info level logging, re2 diff --git a/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h b/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h -index a9bfdbc2aa..278785fb7b 100644 +index 8fb2bc7cab11ccc7a669ba023bca62569c0b151c..149c2b18c4e8c553dea7f8ab90dd12ea661d3534 100644 --- a/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h +++ b/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h @@ -36,7 +36,7 @@ @@ -17,6 +17,12 @@ index a9bfdbc2aa..278785fb7b 100644 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_VERSION_GEN() (1) #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_TP_PERCENTILE() (1) #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_TP_LINENOISE() (1) --- -2.51.0 - +@@ -55,7 +55,7 @@ + #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_ENABLE_RT_MUTEX() (1) + #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_ENABLE_LOCKFREE_TASKRUNNER() (1) + #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_ENABLE_SOCK_INOTIFY() (1) +-#define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_RE2() (1) ++#define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_RE2() (0) + #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_PCRE2() (0) + + struct PerfettoBuildFlag { diff --git a/patches/sqlite/0005-authorizer-rename-to-destination-name.patch b/patches/sqlite/0005-authorizer-rename-to-destination-name.patch new file mode 100644 index 00000000000..e4a6f14fa3d --- /dev/null +++ b/patches/sqlite/0005-authorizer-rename-to-destination-name.patch @@ -0,0 +1,16 @@ +diff --git a/src/alter.c b/src/alter.c +index 7e6ab32557..e5f0eac4e6 100644 +--- a/src/alter.c ++++ b/src/alter.c +@@ -179,8 +179,9 @@ void sqlite3AlterRenameTable( + #endif + + #ifndef SQLITE_OMIT_AUTHORIZATION +- /* Invoke the authorization callback. */ +- if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, 0) ){ ++ /* Invoke the authorization callback. Pass the new table name as the 4th ++ ** argument so the authorizer can reject renames into reserved namespaces. */ ++ if( sqlite3AuthCheck(pParse, SQLITE_ALTER_TABLE, zDb, pTab->zName, zName) ){ + goto exit_rename_table; + } + #endif diff --git a/patches/v8/0001-Allow-manually-setting-ValueDeserializer-format-vers.patch b/patches/v8/0001-Allow-manually-setting-ValueDeserializer-format-vers.patch index ca156bf25d7..fe5c5b51817 100644 --- a/patches/v8/0001-Allow-manually-setting-ValueDeserializer-format-vers.patch +++ b/patches/v8/0001-Allow-manually-setting-ValueDeserializer-format-vers.patch @@ -37,7 +37,7 @@ index 0cb3e045bc46ec732956318b980e749d1847d06d..40ad805c7970cc9379e69f046205836d * Reads raw data in various common formats to the buffer. * Note that integer types are read in base-128 varint format, not with a diff --git a/src/api/api.cc b/src/api/api.cc -index 32fc059d6d2375fd87cb2f263a5846444fd9d0d1..e98a1d272b663f26e41313f10b99cd564793c5f8 100644 +index 688c2b935e2515bef645714ba572b1e9ba0ef076..aa5bedf6064524bff7b711b6c27034bafadd86e4 100644 --- a/src/api/api.cc +++ b/src/api/api.cc @@ -3706,6 +3706,10 @@ uint32_t ValueDeserializer::GetWireFormatVersion() const { @@ -52,7 +52,7 @@ index 32fc059d6d2375fd87cb2f263a5846444fd9d0d1..e98a1d272b663f26e41313f10b99cd56 PrepareForExecutionScope api_scope{context, RCCId::kAPI_ValueDeserializer_ReadValue}; diff --git a/src/objects/value-serializer.h b/src/objects/value-serializer.h -index 43dc34d6189d7332e019db758760eb5c71a9fe99..b84dcc77d4060d13c389b4afed101847c85998da 100644 +index bef656e4ed0de07435da6fb2c044171784ca2cbe..edde26bf9cc19d7a1a50ec780484cd1871602060 100644 --- a/src/objects/value-serializer.h +++ b/src/objects/value-serializer.h @@ -221,6 +221,13 @@ class ValueDeserializer { diff --git a/patches/v8/0002-Allow-manually-setting-ValueSerializer-format-versio.patch b/patches/v8/0002-Allow-manually-setting-ValueSerializer-format-versio.patch index 3ba2899ffa3..1b5b32e383e 100644 --- a/patches/v8/0002-Allow-manually-setting-ValueSerializer-format-versio.patch +++ b/patches/v8/0002-Allow-manually-setting-ValueSerializer-format-versio.patch @@ -23,7 +23,7 @@ index 40ad805c7970cc9379e69f046205836dbd760373..596be18adeb3a5a81794aaa44b1d347d * Writes out a header, which includes the format version. */ diff --git a/src/api/api.cc b/src/api/api.cc -index e98a1d272b663f26e41313f10b99cd564793c5f8..73f38d9a2b25d7420b73f96c34e8aa1764a69223 100644 +index aa5bedf6064524bff7b711b6c27034bafadd86e4..8edaf36e3b8f657c94c4c65c7a02a83f4f312a88 100644 --- a/src/api/api.cc +++ b/src/api/api.cc @@ -3578,6 +3578,10 @@ ValueSerializer::ValueSerializer(Isolate* v8_isolate, Delegate* delegate) @@ -38,10 +38,10 @@ index e98a1d272b663f26e41313f10b99cd564793c5f8..73f38d9a2b25d7420b73f96c34e8aa17 void ValueSerializer::SetTreatArrayBufferViewsAsHostObjects(bool mode) { diff --git a/src/objects/value-serializer.cc b/src/objects/value-serializer.cc -index b32852867593bcfd3d0d1b87539d9f904f054aa8..6dc0e13d885aa537a09d580cfc147546cf6fc432 100644 +index 15611eea993e686c6a94e1e0113c97c1588c5830..949a81b3610ff373836e5e248387479bb3c7358a 100644 --- a/src/objects/value-serializer.cc +++ b/src/objects/value-serializer.cc -@@ -298,6 +298,7 @@ ValueSerializer::ValueSerializer(Isolate* isolate, +@@ -303,6 +303,7 @@ ValueSerializer::ValueSerializer(Isolate* isolate, : isolate_(isolate), delegate_(delegate), zone_(isolate->allocator(), ZONE_NAME), @@ -49,7 +49,7 @@ index b32852867593bcfd3d0d1b87539d9f904f054aa8..6dc0e13d885aa537a09d580cfc147546 id_map_(isolate->heap(), ZoneAllocationPolicy(&zone_)), array_buffer_transfer_map_(isolate->heap(), ZoneAllocationPolicy(&zone_)) { -@@ -317,9 +318,17 @@ ValueSerializer::~ValueSerializer() { +@@ -322,9 +323,17 @@ ValueSerializer::~ValueSerializer() { } } @@ -68,23 +68,25 @@ index b32852867593bcfd3d0d1b87539d9f904f054aa8..6dc0e13d885aa537a09d580cfc147546 } void ValueSerializer::SetTreatArrayBufferViewsAsHostObjects(bool mode) { -@@ -1112,10 +1121,12 @@ Maybe ValueSerializer::WriteJSArrayBufferView( +@@ -1110,10 +1119,12 @@ Maybe ValueSerializer::WriteJSArrayBufferView( WriteVarint(static_cast(tag)); - WriteVarint(static_cast(view->byte_offset())); - WriteVarint(static_cast(view->byte_length())); + WriteVarint(view->byte_offset()); + WriteVarint(view->byte_length()); - uint32_t flags = +- JSArrayBufferViewIsLengthTracking::encode(view->is_length_tracking()) | +- JSArrayBufferViewIsBackedByRab::encode(view->is_backed_by_rab()); +- WriteVarint(flags); + if (version_ >= 14) { + uint32_t flags = - JSArrayBufferViewIsLengthTracking::encode(view->is_length_tracking()) | - JSArrayBufferViewIsBackedByRab::encode(view->is_backed_by_rab()); -- WriteVarint(flags); ++ JSArrayBufferViewIsLengthTracking::encode(view->is_length_tracking()) | ++ JSArrayBufferViewIsBackedByRab::encode(view->is_backed_by_rab()); + WriteVarint(flags); + } return ThrowIfOutOfMemory(); } diff --git a/src/objects/value-serializer.h b/src/objects/value-serializer.h -index b84dcc77d4060d13c389b4afed101847c85998da..06475f7b9c2a797066f5cfd32b232e5aa55f1f75 100644 +index edde26bf9cc19d7a1a50ec780484cd1871602060..1409bf5d0009f9b663892913ebc1e773921f585b 100644 --- a/src/objects/value-serializer.h +++ b/src/objects/value-serializer.h @@ -54,6 +54,11 @@ class ValueSerializer { diff --git a/patches/v8/0003-Allow-Windows-builds-under-Bazel.patch b/patches/v8/0003-Allow-Windows-builds-under-Bazel.patch index 74197dba5a8..1317fd0d681 100644 --- a/patches/v8/0003-Allow-Windows-builds-under-Bazel.patch +++ b/patches/v8/0003-Allow-Windows-builds-under-Bazel.patch @@ -6,10 +6,10 @@ Subject: Allow Windows builds under Bazel Signed-off-by: James M Snell diff --git a/BUILD.bazel b/BUILD.bazel -index ae9c73762bef46c302a10a85076cbe913617965a..f79d7f3e434a126daa41b8effd6e98f0d487e773 100644 +index b432f8649854f3bf78e6b9eda54b7867c020da12..d0fd6fafc5d130b2a9150fba3433478c75bf4ad2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -4107,6 +4107,8 @@ filegroup( +@@ -4116,6 +4116,8 @@ filegroup( "@v8//bazel/config:is_inline_asm_x64": ["src/heap/base/asm/x64/push_registers_asm.cc"], "@v8//bazel/config:is_inline_asm_arm": ["src/heap/base/asm/arm/push_registers_asm.cc"], "@v8//bazel/config:is_inline_asm_arm64": ["src/heap/base/asm/arm64/push_registers_asm.cc"], diff --git a/patches/v8/0005-Speed-up-V8-bazel-build-by-always-using-target-cfg.patch b/patches/v8/0005-Speed-up-V8-bazel-build-by-always-using-target-cfg.patch index 1c73122d768..f61cbab5625 100644 --- a/patches/v8/0005-Speed-up-V8-bazel-build-by-always-using-target-cfg.patch +++ b/patches/v8/0005-Speed-up-V8-bazel-build-by-always-using-target-cfg.patch @@ -10,7 +10,7 @@ both target and exec configurations as generator tools depend on them. Signed-off-by: James M Snell diff --git a/BUILD.bazel b/BUILD.bazel -index f79d7f3e434a126daa41b8effd6e98f0d487e773..1ab690b670cbb9c717fe1f368f0e073e85830d4a 100644 +index d0fd6fafc5d130b2a9150fba3433478c75bf4ad2..83b2e07dd3911ad7c9bcc4fc019c02c53638d90f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -19,6 +19,7 @@ load( @@ -21,7 +21,7 @@ index f79d7f3e434a126daa41b8effd6e98f0d487e773..1ab690b670cbb9c717fe1f368f0e073e ) load(":bazel/v8-non-pointer-compression.bzl", "v8_binary_non_pointer_compression") -@@ -4504,22 +4505,20 @@ filegroup( +@@ -4505,22 +4506,20 @@ filegroup( ], ) @@ -50,7 +50,7 @@ index f79d7f3e434a126daa41b8effd6e98f0d487e773..1ab690b670cbb9c717fe1f368f0e073e ) v8_mksnapshot( -@@ -4740,7 +4739,6 @@ v8_binary( +@@ -4741,7 +4740,6 @@ v8_binary( srcs = [ "src/regexp/gen-regexp-special-case.cc", "src/regexp/special-case.h", diff --git a/patches/v8/0006-Implement-Promise-Context-Tagging.patch b/patches/v8/0006-Implement-Promise-Context-Tagging.patch index a35d1e1f891..0d7ce5a9e23 100644 --- a/patches/v8/0006-Implement-Promise-Context-Tagging.patch +++ b/patches/v8/0006-Implement-Promise-Context-Tagging.patch @@ -58,10 +58,10 @@ index 44bde532a6253f7c1891dbb51dc3de21daf7a238..8f620d08c0b8919fc3312c53bd9efa5d #endif // INCLUDE_V8_ISOLATE_H_ diff --git a/src/api/api.cc b/src/api/api.cc -index 73f38d9a2b25d7420b73f96c34e8aa1764a69223..2c226e1467d952fd80c5356f7993f8af00c5f35d 100644 +index 8edaf36e3b8f657c94c4c65c7a02a83f4f312a88..2bafa0935cb48cd379ea55485c3587e2054febda 100644 --- a/src/api/api.cc +++ b/src/api/api.cc -@@ -12679,6 +12679,25 @@ std::string SourceLocation::ToString() const { +@@ -12690,6 +12690,25 @@ std::string SourceLocation::ToString() const { .str(); } @@ -187,40 +187,40 @@ index f83ee777f596738f1a71606ba61a3a7fdbc2cd30..abfa23d9d5087148a25261e8a4aefdfc return instance; } diff --git a/src/compiler/js-create-lowering.cc b/src/compiler/js-create-lowering.cc -index 9150e7c14af4399897d8b6e474e6503499fcb30a..af2893886586b59ce5efb4758828d017be439522 100644 +index bbf3b14898eaa5caf9e90ff054048a3b197e15d5..56aa2697ba7377430222d09e149ce3bdc72ed116 100644 --- a/src/compiler/js-create-lowering.cc +++ b/src/compiler/js-create-lowering.cc -@@ -1122,10 +1122,12 @@ Reduction JSCreateLowering::ReduceJSCreatePromise(Node* node) { - jsgraph()->EmptyFixedArrayConstant()); - a.Store(AccessBuilder::ForJSObjectOffset(JSPromise::kReactionsOrResultOffset), +@@ -1123,10 +1123,12 @@ Reduction JSCreateLowering::ReduceJSCreatePromise(Node* node) { + a.Store(AccessBuilder::ForJSObjectOffset( + offsetof(JSPromise, reactions_or_result_)), jsgraph()->ZeroConstant()); + a.Store(AccessBuilder::ForJSObjectOffset(JSPromise::kContextTagOffset), + jsgraph()->ZeroConstant()); static_assert(v8::Promise::kPending == 0); - a.Store(AccessBuilder::ForJSObjectOffset(JSPromise::kFlagsOffset), + a.Store(AccessBuilder::ForJSObjectOffset(offsetof(JSPromise, flags_)), jsgraph()->ZeroConstant()); -- static_assert(JSPromise::kHeaderSize == 5 * kTaggedSize); -+ static_assert(JSPromise::kHeaderSize == 6 * kTaggedSize); - for (int offset = JSPromise::kHeaderSize; - offset < JSPromise::kSizeWithEmbedderFields; offset += kTaggedSize) { - a.Store(AccessBuilder::ForJSObjectOffset(offset), +- static_assert(sizeof(JSPromise) == 5 * kTaggedSize); ++ static_assert(sizeof(JSPromise) == 6 * kTaggedSize); + for (int offset = static_cast(sizeof(JSPromise)); + offset < static_cast(sizeof(JSPromise)) + + v8::Promise::kEmbedderFieldCount * kEmbedderDataSlotSize; diff --git a/src/diagnostics/objects-printer.cc b/src/diagnostics/objects-printer.cc -index e6b0a5cf4660606752518b976b14d7519a7538c8..ac218930c9de64bfae3a6ccbc9332c794de76018 100644 +index 97d027774870112a7b9050c2cd8bc0f9dc27a971..974359132e4d6c4d7a0aa02fbfe7a1663bb483c7 100644 --- a/src/diagnostics/objects-printer.cc +++ b/src/diagnostics/objects-printer.cc -@@ -996,6 +996,7 @@ void JSPromise::JSPromisePrint(std::ostream& os) { +@@ -1021,6 +1021,7 @@ void JSPromise::JSPromisePrint(std::ostream& os) { } os << "\n - has_handler: " << has_handler(); os << "\n - is_silent: " << is_silent(); + os << "\n - context_tag: " << Brief(context_tag()); - JSObjectPrintBody(os, *this); + JSObjectPrintBody(os, this); } diff --git a/src/execution/isolate-inl.h b/src/execution/isolate-inl.h -index 393b3d611743c86e7760760a41bdd6a6c5216691..5e0c1c62b6168e12af1ad067cd57604c17b17ce2 100644 +index 50b50e7517fa9683b484fc16bbcba309bcdaab3d..7b5988dc0cf5ceadac74136e61a3b20bcf0ac7c0 100644 --- a/src/execution/isolate-inl.h +++ b/src/execution/isolate-inl.h -@@ -133,6 +133,25 @@ bool Isolate::is_execution_terminating() { +@@ -126,6 +126,25 @@ bool Isolate::is_execution_terminating() { i::ReadOnlyRoots(this).termination_exception(); } @@ -247,10 +247,10 @@ index 393b3d611743c86e7760760a41bdd6a6c5216691..5e0c1c62b6168e12af1ad067cd57604c Tagged Isolate::VerifyBuiltinsResult(Tagged result) { if (is_execution_terminating() && !v8_flags.strict_termination_checks) { diff --git a/src/execution/isolate.cc b/src/execution/isolate.cc -index f4812695e9c53a85be0c6e554a99dda317d4807f..51666de8200590c2fc26c38090cbed41238ea489 100644 +index 682c93049ee8c1776bbd1db323eaa4812d15ac83..fd8817a012c2221f35c442bbf4b092a86cb5c23b 100644 --- a/src/execution/isolate.cc +++ b/src/execution/isolate.cc -@@ -629,6 +629,8 @@ void Isolate::Iterate(RootVisitor* v, ThreadLocalTop* thread) { +@@ -681,6 +681,8 @@ void Isolate::Iterate(RootVisitor* v, ThreadLocalTop* thread) { FullObjectSlot(&thread->pending_message_)); v->VisitRootPointer(Root::kStackRoots, nullptr, FullObjectSlot(&thread->context_)); @@ -259,7 +259,7 @@ index f4812695e9c53a85be0c6e554a99dda317d4807f..51666de8200590c2fc26c38090cbed41 for (v8::TryCatch* block = thread->try_catch_handler_; block != nullptr; block = block->next_) { -@@ -6233,6 +6235,7 @@ bool Isolate::Init(SnapshotData* startup_snapshot_data, +@@ -6339,6 +6341,7 @@ bool Isolate::Init(SnapshotData* startup_snapshot_data, shared_heap_object_cache_.push_back(ReadOnlyRoots(this).undefined_value()); } @@ -267,7 +267,7 @@ index f4812695e9c53a85be0c6e554a99dda317d4807f..51666de8200590c2fc26c38090cbed41 InitializeThreadLocal(); // Profiler has to be created after ThreadLocal is initialized -@@ -8400,5 +8403,40 @@ void Isolate::PrintNumberStringCacheStats(const char* comment, +@@ -8494,5 +8497,40 @@ void Isolate::PrintNumberStringCacheStats(const char* comment, PrintF("\n"); } @@ -309,10 +309,10 @@ index f4812695e9c53a85be0c6e554a99dda317d4807f..51666de8200590c2fc26c38090cbed41 } // namespace internal } // namespace v8 diff --git a/src/execution/isolate.h b/src/execution/isolate.h -index e11bb4083042e2b6fd4101eed0f0d06cae1b0ef1..633f3f8cdef1eceee6edfc921259b7a9895f5a84 100644 +index d0131fa4e09c8ba6e8ff7e92ae2a68dea9edcf4c..786652f5fe1d337aa92f89ea19f4c8feefea4ce2 100644 --- a/src/execution/isolate.h +++ b/src/execution/isolate.h -@@ -2450,6 +2450,15 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { +@@ -2466,6 +2466,15 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { v8::ExceptionContext callback_kind); void SetExceptionPropagationCallback(ExceptionPropagationCallback callback); @@ -328,7 +328,7 @@ index e11bb4083042e2b6fd4101eed0f0d06cae1b0ef1..633f3f8cdef1eceee6edfc921259b7a9 #ifdef V8_ENABLE_WASM_SIMD256_REVEC void set_wasm_revec_verifier_for_test( compiler::turboshaft::WasmRevecVerifier* verifier) { -@@ -2978,6 +2987,12 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { +@@ -3001,6 +3010,12 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { bool is_frozen_ = false; @@ -341,7 +341,7 @@ index e11bb4083042e2b6fd4101eed0f0d06cae1b0ef1..633f3f8cdef1eceee6edfc921259b7a9 friend class GlobalSafepoint; friend class heap::HeapTester; friend class IsolateForPointerCompression; -@@ -2985,6 +3000,7 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { +@@ -3008,6 +3023,7 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { friend class IsolateGroup; friend class TestSerializer; friend class SharedHeapNoClientsTest; @@ -350,10 +350,10 @@ index e11bb4083042e2b6fd4101eed0f0d06cae1b0ef1..633f3f8cdef1eceee6edfc921259b7a9 // The current entered Isolate and its thread data. Do not access these diff --git a/src/heap/factory.cc b/src/heap/factory.cc -index 00f8b2addf9c646b453cee15c1a71d8aeea9d928..40118ddd5b357d3cdead407ae580c9f5856f13e2 100644 +index 5afe0042de2cb045ff86ce4ed380c2dc841a568d..ae04a98df1ed8a290095324b5daeff9146b73686 100644 --- a/src/heap/factory.cc +++ b/src/heap/factory.cc -@@ -4859,6 +4859,12 @@ Handle Factory::NewJSPromiseWithoutHook() { +@@ -5040,6 +5040,12 @@ Handle Factory::NewJSPromiseWithoutHook() { DisallowGarbageCollection no_gc; Tagged raw = *promise; raw->set_reactions_or_result(Smi::zero(), SKIP_WRITE_BARRIER); @@ -367,26 +367,85 @@ index 00f8b2addf9c646b453cee15c1a71d8aeea9d928..40118ddd5b357d3cdead407ae580c9f5 // TODO(v8) remove once embedder data slots are always zero-initialized. InitEmbedderFields(*promise, Smi::zero()); diff --git a/src/maglev/maglev-graph-builder.cc b/src/maglev/maglev-graph-builder.cc -index 0168d146584cc3af44e6d21b886e73a072243435..53b178db65cac7ea1a47620442a3fbb7f2687c89 100644 +index 46c258d132a7119d09d899e7eefb3d83e0cbb9fb..92e11e55f8672022282e7a393637136b788e97b3 100644 --- a/src/maglev/maglev-graph-builder.cc +++ b/src/maglev/maglev-graph-builder.cc -@@ -14772,9 +14772,10 @@ VirtualObject* MaglevGraphBuilder::CreateJSPromiseObject() { +@@ -15376,9 +15376,10 @@ VirtualObject* MaglevGraphBuilder::CreateJSPromiseObject() { vobj->set(JSPromise::kElementsOffset, GetRootConstant(RootIndex::kEmptyFixedArray)); - vobj->set(JSPromise::kReactionsOrResultOffset, GetSmiConstant(0)); + vobj->set(offsetof(JSPromise, reactions_or_result_), GetSmiConstant(0)); + vobj->set(JSPromise::kContextTagOffset, GetSmiConstant(0)); static_assert(v8::Promise::kPending == 0); - vobj->set(JSPromise::kFlagsOffset, GetSmiConstant(0)); -- static_assert(JSPromise::kHeaderSize == 5 * kTaggedSize); -+ static_assert(JSPromise::kHeaderSize == 6 * kTaggedSize); - for (int offset = JSPromise::kHeaderSize; - offset < JSPromise::kSizeWithEmbedderFields; offset += kTaggedSize) { - vobj->set(offset, GetSmiConstant(0)); + vobj->set(offsetof(JSPromise, flags_), GetSmiConstant(0)); +- static_assert(sizeof(JSPromise) == 5 * kTaggedSize); ++ static_assert(sizeof(JSPromise) == 6 * kTaggedSize); + for (int offset = sizeof(JSPromise); + offset < static_cast(sizeof(JSPromise)) + + v8::Promise::kEmbedderFieldCount * kEmbedderDataSlotSize; +diff --git a/src/objects/js-promise-inl.h b/src/objects/js-promise-inl.h +index 21dfbffe3795544efb54d1b01dff2925c6de82f2..fc47da509721868f88fbfa0da566b8367d9db354 100644 +--- a/src/objects/js-promise-inl.h ++++ b/src/objects/js-promise-inl.h +@@ -27,6 +27,12 @@ void JSPromise::set_reactions_or_result( + reactions_or_result_.store(this, value, mode); + } + ++Tagged JSPromise::context_tag() const { return context_tag_.load(); } ++ ++void JSPromise::set_context_tag(Tagged value, WriteBarrierMode mode) { ++ context_tag_.store(this, value, mode); ++} ++ + int JSPromise::flags() const { return flags_.load().value(); } + + void JSPromise::set_flags(int value) { +diff --git a/src/objects/js-promise.h b/src/objects/js-promise.h +index 19e3f89938b04136972d82ee66d8ff8f2c2447b5..fad6740ef43573befce77eb54053f5a43b3bf2b6 100644 +--- a/src/objects/js-promise.h ++++ b/src/objects/js-promise.h +@@ -38,6 +38,10 @@ V8_OBJECT class JSPromise : public JSObjectWithEmbedderSlots { + Tagged> value, + WriteBarrierMode mode = UPDATE_WRITE_BARRIER); + ++ inline Tagged context_tag() const; ++ inline void set_context_tag(Tagged value, ++ WriteBarrierMode mode = UPDATE_WRITE_BARRIER); ++ + inline int flags() const; + inline void set_flags(int value); + +@@ -105,9 +109,16 @@ V8_OBJECT class JSPromise : public JSObjectWithEmbedderSlots { + // Smi 0 terminated list of PromiseReaction objects in case the JSPromise + // was not settled yet, otherwise the result. + TaggedMember> reactions_or_result_; ++ // The context tag (workerd Promise Context Tagging extension). ++ TaggedMember context_tag_; + // SmiTagged. + TaggedMember flags_; + ++ // Back-compat offset constant for the workerd Promise Context Tagging ++ // extension. Defined after the class body, like JSRegExp::kFlagsOffset etc. ++ static const int kContextTagOffset; ++ ++ + private: + // https://tc39.es/ecma262/#sec-triggerpromisereactions + static Handle TriggerPromiseReactions(Isolate* isolate, +@@ -116,6 +127,9 @@ V8_OBJECT class JSPromise : public JSObjectWithEmbedderSlots { + PromiseReaction::Type type); + } V8_OBJECT_END; + ++inline constexpr int JSPromise::kContextTagOffset = ++ offsetof(JSPromise, context_tag_); ++ + } // namespace internal + } // namespace v8 + diff --git a/src/objects/js-promise.tq b/src/objects/js-promise.tq -index f3078f569e4f8fe919a84190b40e5da31098a5d3..3832d2655e2db35286345c0c1c79b9aa8959a25b 100644 +index 11c4aff5cd69699aa6813498478962005c302e9f..532e163d98a5bc42658824ecdc0fdab96cc48b68 100644 --- a/src/objects/js-promise.tq +++ b/src/objects/js-promise.tq -@@ -32,6 +32,7 @@ extern class JSPromise extends JSObjectWithEmbedderSlots { +@@ -33,6 +33,7 @@ extern class JSPromise extends JSObjectWithEmbedderSlots { // Smi 0 terminated list of PromiseReaction objects in case the JSPromise was // not settled yet, otherwise the result. reactions_or_result: Zero|PromiseReaction|JSAny; @@ -394,29 +453,14 @@ index f3078f569e4f8fe919a84190b40e5da31098a5d3..3832d2655e2db35286345c0c1c79b9aa flags: SmiTagged; } -diff --git a/src/objects/value-serializer.cc b/src/objects/value-serializer.cc -index 6dc0e13d885aa537a09d580cfc147546cf6fc432..fc4dc2477d48c97145237e922d3bcba16140c47d 100644 ---- a/src/objects/value-serializer.cc -+++ b/src/objects/value-serializer.cc -@@ -1123,8 +1123,8 @@ Maybe ValueSerializer::WriteJSArrayBufferView( - WriteVarint(static_cast(view->byte_length())); - if (version_ >= 14) { - uint32_t flags = -- JSArrayBufferViewIsLengthTracking::encode(view->is_length_tracking()) | -- JSArrayBufferViewIsBackedByRab::encode(view->is_backed_by_rab()); -+ JSArrayBufferViewIsLengthTracking::encode(view->is_length_tracking()) | -+ JSArrayBufferViewIsBackedByRab::encode(view->is_backed_by_rab()); - WriteVarint(flags); - } - return ThrowIfOutOfMemory(); diff --git a/src/profiler/heap-snapshot-generator.cc b/src/profiler/heap-snapshot-generator.cc -index 694b3b19bb2ddf5ca8db05068aec37252e411dfa..857d659f05cbea5b31631700848dae3d5b3b22b5 100644 +index 8edad48dcf13067ae838e8ffe9372c9da4a754d1..91f9ea62d2d0a94fdf5fafdf688e0c8f3a1b2a3b 100644 --- a/src/profiler/heap-snapshot-generator.cc +++ b/src/profiler/heap-snapshot-generator.cc -@@ -2248,6 +2248,8 @@ void V8HeapExplorer::ExtractJSPromiseReferences(HeapEntry* entry, +@@ -2238,6 +2238,8 @@ void V8HeapExplorer::ExtractJSPromiseReferences(HeapEntry* entry, SetInternalReference(entry, "reactions_or_result", promise->reactions_or_result(), - JSPromise::kReactionsOrResultOffset); + offsetof(JSPromise, reactions_or_result_)); + SetInternalReference(entry, "context_tag", promise->context_tag(), + JSPromise::kContextTagOffset); } @@ -469,7 +513,7 @@ index cbe68d70430188fceab54bf3911c5d617e76cd62..896bac667ce40ef23c8c4fcd6174fcd2 } // namespace internal } // namespace v8 diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h -index b3fcf55dc8a5418183bb4aa1c874cb8a075698ed..9599b2c393ba3c68ee69d8441b053e6afa23dbfd 100644 +index 098819e04b21e838b7ed8d03c1897f585bc78444..d91af102ab39d4b4355181bb5cf525a64d3f64d0 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -434,20 +434,22 @@ constexpr bool CanTriggerGC(T... properties) { diff --git a/patches/v8/0008-increase-visibility-of-virtual-method.patch b/patches/v8/0008-increase-visibility-of-virtual-method.patch index d3c77bee7d9..1c881840374 100644 --- a/patches/v8/0008-increase-visibility-of-virtual-method.patch +++ b/patches/v8/0008-increase-visibility-of-virtual-method.patch @@ -9,10 +9,10 @@ v8-platform-wrapper.h implementation. Signed-off-by: James M Snell diff --git a/include/v8-platform.h b/include/v8-platform.h -index 3484e988d9fec1a132a435c63d873225ab07b0ec..99ccec9e23c5b860a890b2c52253edb3e2f6ea90 100644 +index 8f42ac0c878819434fcf075403a698fa4e3a20fc..264d096bcdaffd2b2a88414741d67f18aa3816a8 100644 --- a/include/v8-platform.h +++ b/include/v8-platform.h -@@ -1516,7 +1516,7 @@ class Platform { +@@ -1526,7 +1526,7 @@ class Platform { return &default_observer; } diff --git a/patches/v8/0009-Add-ValueSerializer-SetTreatFunctionsAsHostObjects.patch b/patches/v8/0009-Add-ValueSerializer-SetTreatFunctionsAsHostObjects.patch index 5968b1313a6..be8a4c9f536 100644 --- a/patches/v8/0009-Add-ValueSerializer-SetTreatFunctionsAsHostObjects.patch +++ b/patches/v8/0009-Add-ValueSerializer-SetTreatFunctionsAsHostObjects.patch @@ -30,7 +30,7 @@ index 596be18adeb3a5a81794aaa44b1d347dec6c0c7d..141f138e08de849e3e02b3b2b346e643 * Write raw data in various common formats to the buffer. * Note that integer types are written in base-128 varint format, not with a diff --git a/src/api/api.cc b/src/api/api.cc -index 2c226e1467d952fd80c5356f7993f8af00c5f35d..877765bf5f57a2953aa2d1e0869ae5db12e8b6b1 100644 +index 2bafa0935cb48cd379ea55485c3587e2054febda..b512d3d4f1967f0213b0f09d40095fbf61ea0dc9 100644 --- a/src/api/api.cc +++ b/src/api/api.cc @@ -3588,6 +3588,10 @@ void ValueSerializer::SetTreatArrayBufferViewsAsHostObjects(bool mode) { @@ -45,10 +45,10 @@ index 2c226e1467d952fd80c5356f7993f8af00c5f35d..877765bf5f57a2953aa2d1e0869ae5db Local value) { auto i_isolate = i::Isolate::Current(); diff --git a/src/objects/value-serializer.cc b/src/objects/value-serializer.cc -index fc4dc2477d48c97145237e922d3bcba16140c47d..97b7f51664dda24ffb0c94e4033b2eff2ba4daee 100644 +index 949a81b3610ff373836e5e248387479bb3c7358a..f190ac6f694f8c600666ca2685ff6907f020b9aa 100644 --- a/src/objects/value-serializer.cc +++ b/src/objects/value-serializer.cc -@@ -335,6 +335,10 @@ void ValueSerializer::SetTreatArrayBufferViewsAsHostObjects(bool mode) { +@@ -340,6 +340,10 @@ void ValueSerializer::SetTreatArrayBufferViewsAsHostObjects(bool mode) { treat_array_buffer_views_as_host_objects_ = mode; } @@ -59,7 +59,7 @@ index fc4dc2477d48c97145237e922d3bcba16140c47d..97b7f51664dda24ffb0c94e4033b2eff void ValueSerializer::WriteTag(SerializationTag tag) { uint8_t raw_tag = static_cast(tag); WriteRawBytes(&raw_tag, sizeof(raw_tag)); -@@ -604,13 +608,17 @@ Maybe ValueSerializer::WriteJSReceiver( +@@ -609,13 +613,17 @@ Maybe ValueSerializer::WriteJSReceiver( // Eliminate callable and exotic objects, which should not be serialized. InstanceType instance_type = receiver->map()->instance_type(); @@ -81,7 +81,7 @@ index fc4dc2477d48c97145237e922d3bcba16140c47d..97b7f51664dda24ffb0c94e4033b2eff } diff --git a/src/objects/value-serializer.h b/src/objects/value-serializer.h -index 06475f7b9c2a797066f5cfd32b232e5aa55f1f75..ddc5f27a80f93bae209f3fe8731d4df4baa58ead 100644 +index 1409bf5d0009f9b663892913ebc1e773921f585b..309df6a18eb7a3c24996b3e30a89ece37c47cc1c 100644 --- a/src/objects/value-serializer.h +++ b/src/objects/value-serializer.h @@ -102,6 +102,15 @@ class ValueSerializer { diff --git a/patches/v8/0010-Modify-where-to-look-for-fp16-dependency.-This-depen.patch b/patches/v8/0010-Modify-where-to-look-for-fp16-dependency.-This-depen.patch index ee09e33616c..d60e0573ee7 100644 --- a/patches/v8/0010-Modify-where-to-look-for-fp16-dependency.-This-depen.patch +++ b/patches/v8/0010-Modify-where-to-look-for-fp16-dependency.-This-depen.patch @@ -8,10 +8,10 @@ Subject: Modify where to look for fp16 dependency. This dependency is normally Signed-off-by: James M Snell diff --git a/BUILD.bazel b/BUILD.bazel -index 1ab690b670cbb9c717fe1f368f0e073e85830d4a..23f52ed12cecfcd7383cc3d389935ca487b8533e 100644 +index 83b2e07dd3911ad7c9bcc4fc019c02c53638d90f..6f917087bd2e188fd291db265cdda4353e9537fa 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -4130,17 +4130,23 @@ v8_library( +@@ -4139,17 +4139,23 @@ v8_library( ], ) diff --git a/patches/v8/0011-Revert-heap-Add-masm-specific-unwinding-annotations-.patch b/patches/v8/0011-Revert-heap-Add-masm-specific-unwinding-annotations-.patch index a6e1e1056cb..67f8bc173e9 100644 --- a/patches/v8/0011-Revert-heap-Add-masm-specific-unwinding-annotations-.patch +++ b/patches/v8/0011-Revert-heap-Add-masm-specific-unwinding-annotations-.patch @@ -14,10 +14,10 @@ of getting our V8 upgrade unblocked. Signed-off-by: James M Snell diff --git a/BUILD.gn b/BUILD.gn -index 382c7d3ed44eab5df1f33082d0d0ef85121bc47c..5d5320c956b322ac9beef18688c9faa0bb10477f 100644 +index dd22a8954e19e836405a7e6a2fcdb3241abbbf3d..a84a278a1c1cff1ec8a6c50239779bb11a03655a 100644 --- a/BUILD.gn +++ b/BUILD.gn -@@ -4638,8 +4638,8 @@ v8_header_set("v8_internal_headers") { +@@ -4646,8 +4646,8 @@ v8_header_set("v8_internal_headers") { "src/tasks/operations-barrier.h", "src/tasks/task-utils.h", "src/torque/runtime-macro-shims.h", @@ -27,7 +27,7 @@ index 382c7d3ed44eab5df1f33082d0d0ef85121bc47c..5d5320c956b322ac9beef18688c9faa0 "src/tracing/trace-id.h", "src/tracing/traced-value.h", "src/tracing/tracing-category-observer.h", -@@ -7560,12 +7560,7 @@ v8_source_set("v8_heap_base") { +@@ -7575,12 +7575,7 @@ v8_source_set("v8_heap_base") { ] if (current_cpu == "x64") { diff --git a/patches/v8/0012-Update-illegal-invocation-error-message-in-v8.patch b/patches/v8/0012-Update-illegal-invocation-error-message-in-v8.patch index 63fc9d23810..db67751b1fb 100644 --- a/patches/v8/0012-Update-illegal-invocation-error-message-in-v8.patch +++ b/patches/v8/0012-Update-illegal-invocation-error-message-in-v8.patch @@ -23,7 +23,7 @@ index 03d61c6130d8b3e082200599771f683536b6ac12..85e1f080247e598e94dfef776bb40beb "Immutable prototype object '%' cannot have their prototype set") \ T(ImportAttributesDuplicateKey, "Import attribute has duplicate key '%'") \ diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc -index def78150b205855608f4fe475ecc6e9af5ba20b9..949aa3a74d2233d09061b1ca15dc6a0d8feed554 100644 +index b712a219829f12ce7bfd79b9dee37faab550d67c..5047c1d33ed828ff0579b0f7c3b8b551fee5302d 100644 --- a/test/cctest/test-api.cc +++ b/test/cctest/test-api.cc @@ -223,6 +223,17 @@ THREADED_TEST(IsolateOfContext) { diff --git a/patches/v8/0013-Implement-cross-request-context-promise-resolve-hand.patch b/patches/v8/0013-Implement-cross-request-context-promise-resolve-hand.patch index 09fc6fb366b..826b4b6902d 100644 --- a/patches/v8/0013-Implement-cross-request-context-promise-resolve-hand.patch +++ b/patches/v8/0013-Implement-cross-request-context-promise-resolve-hand.patch @@ -1,26 +1,12 @@ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: James M Snell -Date: Mon, 16 Sep 2024 09:56:04 -0700 +Date: Fri, 5 Jun 2026 20:44:30 +0000 Subject: Implement cross-request context promise resolve handling Signed-off-by: James M Snell -diff --git a/BUILD.gn b/BUILD.gn -index 5d5320c956b322ac9beef18688c9faa0bb10477f..bef571374183084957964816e4d6422ec913df05 100644 ---- a/BUILD.gn -+++ b/BUILD.gn -@@ -4638,8 +4638,8 @@ v8_header_set("v8_internal_headers") { - "src/tasks/operations-barrier.h", - "src/tasks/task-utils.h", - "src/torque/runtime-macro-shims.h", -- "src/tracing/trace-event.h", - "src/tracing/trace-event-no-perfetto.h", -+ "src/tracing/trace-event.h", - "src/tracing/trace-id.h", - "src/tracing/traced-value.h", - "src/tracing/tracing-category-observer.h", diff --git a/include/v8-callbacks.h b/include/v8-callbacks.h -index cfba4bb26f865c0e38574f796200ffc5e0dc60fc..d5d937b0e852066b95a62d7bcf49668205a55391 100644 +index cfba4bb26f865c0e38574f796200ffc5e0dc60fc..da110b6546a712c2d1c4acc3d8d35f7ab55b2522 100644 --- a/include/v8-callbacks.h +++ b/include/v8-callbacks.h @@ -536,6 +536,25 @@ using FilterETWSessionByURL2Callback = FilterETWSessionByURLResult (*)( @@ -35,9 +21,9 @@ index cfba4bb26f865c0e38574f796200ffc5e0dc60fc..d5d937b0e852066b95a62d7bcf496682 + * reactions to the resolved promise to be enqueued. The idea is that the + * embedder sets this callback in the case it needs to defer the actual + * scheduling of the reactions to the given promise to a later time. -+ * Importantly, when this callback is invoked, the state of the promise -+ * should have already been updated. We're simply possibly deferring the -+ * enqueue of the reactions to the promise. ++ * Importantly, when this callback is invoked, the promise is still in ++ * the pending state. The entire settlement (status update, result storage, ++ * and reaction triggering) is deferred to the owning IoContext. + */ +using PromiseCrossContextResolveCallback = Maybe (*)( + v8::Isolate* isolate, Local tag, Local reactions, @@ -63,26 +49,32 @@ index 8f620d08c0b8919fc3312c53bd9efa5d11ded1c6..141fece655b6003921452b493f4879ba Isolate() = delete; ~Isolate() = delete; diff --git a/src/api/api.cc b/src/api/api.cc -index 877765bf5f57a2953aa2d1e0869ae5db12e8b6b1..178b429ad06ea349bb43dff578b27ae46ae14da7 100644 +index b512d3d4f1967f0213b0f09d40095fbf61ea0dc9..929bfca37dd63575524faf7c23586bf29c41fa95 100644 --- a/src/api/api.cc +++ b/src/api/api.cc -@@ -12695,7 +12695,13 @@ Isolate::PromiseContextScope::PromiseContextScope(Isolate* isolate, +@@ -12700,13 +12700,19 @@ void Isolate::SetPromiseCrossContextCallback( + isolate->set_promise_cross_context_callback(callback); + } + ++void Isolate::SetPromiseCrossContextResolveCallback( ++ PromiseCrossContextResolveCallback callback) { ++ i::Isolate* isolate = reinterpret_cast(this); ++ isolate->set_promise_cross_context_resolve_callback(callback); ++} ++ + Isolate::PromiseContextScope::PromiseContextScope(Isolate* isolate, + v8::Local tag) + : isolate_(reinterpret_cast(isolate)) { DCHECK(!isolate_->has_promise_context_tag()); DCHECK(!tag.IsEmpty()); i::Handle handle = Utils::OpenHandle(*tag); - isolate_->set_promise_context_tag(*handle); + isolate_->set_promise_context_tag(handle); -+} -+ -+void Isolate::SetPromiseCrossContextResolveCallback( -+ PromiseCrossContextResolveCallback callback) { -+ i::Isolate* isolate = reinterpret_cast(this); -+ isolate->set_promise_cross_context_resolve_callback(callback); } Isolate::PromiseContextScope::~PromiseContextScope() { diff --git a/src/builtins/promise-abstract-operations.tq b/src/builtins/promise-abstract-operations.tq -index 59b8d8d5e243cf46a8093c76613ae2ce420e22e8..838382738236c99b557989dcad53a2ffd32757f7 100644 +index 59b8d8d5e243cf46a8093c76613ae2ce420e22e8..fd7418198103f0cf31b47155794db048012cd110 100644 --- a/src/builtins/promise-abstract-operations.tq +++ b/src/builtins/promise-abstract-operations.tq @@ -23,6 +23,9 @@ extern transitioning runtime PromiseRejectEventFromStack( @@ -95,7 +87,15 @@ index 59b8d8d5e243cf46a8093c76613ae2ce420e22e8..838382738236c99b557989dcad53a2ff } // https://tc39.es/ecma262/#sec-promise-abstract-operations -@@ -252,7 +255,8 @@ transitioning builtin RejectPromise( +@@ -246,13 +249,16 @@ extern macro PromiseBuiltinsAssembler:: + transitioning builtin RejectPromise( + implicit context: Context)(promise: JSPromise, reason: JSAny, + debugEvent: Boolean): JSAny { ++ // Assert: The value of promise.[[PromiseState]] is "pending". ++ dcheck(promise.Status() == PromiseState::kPending); + const promiseHookFlags = PromiseHookFlags(); + + // If promise hook is enabled or the debugger is active, let // the runtime handle this operation, which greatly reduces // the complexity here and also avoids a couple of back and // forth between JavaScript and C++ land. @@ -106,25 +106,24 @@ index 59b8d8d5e243cf46a8093c76613ae2ce420e22e8..838382738236c99b557989dcad53a2ff !promise.HasHandler()) { // 7. If promise.[[PromiseIsHandled]] is false, perform diff --git a/src/builtins/promise-resolve.tq b/src/builtins/promise-resolve.tq -index 202180adbbae91a689a667c40d20b4b1b9cb6edd..c93ac5905d7b349d1c59e9fa86b48662313ea1c3 100644 +index 202180adbbae91a689a667c40d20b4b1b9cb6edd..5e618fcc7521d6c9ba15d83cca949099b9320264 100644 --- a/src/builtins/promise-resolve.tq +++ b/src/builtins/promise-resolve.tq -@@ -96,7 +96,9 @@ transitioning builtin ResolvePromise( +@@ -96,7 +96,8 @@ transitioning builtin ResolvePromise( // We also let the runtime handle it if promise == resolution. // We can use pointer comparison here, since the {promise} is guaranteed // to be a JSPromise inside this function and thus is reference comparable. - if (IsIsolatePromiseHookEnabledOrDebugIsActiveOrHasAsyncEventDelegate() || -+ + if (ToBoolean(runtime::PromiseResolveContextCheck(promise)) || + IsIsolatePromiseHookEnabledOrDebugIsActiveOrHasAsyncEventDelegate() || TaggedEqual(promise, resolution)) deferred { return runtime::ResolvePromise(promise, resolution); diff --git a/src/execution/isolate-inl.h b/src/execution/isolate-inl.h -index 5e0c1c62b6168e12af1ad067cd57604c17b17ce2..c07ac183137862444753a96a0a80149bf85cc44a 100644 +index 7b5988dc0cf5ceadac74136e61a3b20bcf0ac7c0..cffe632814a0ce4e103038fc5e2c011ccccaeb4f 100644 --- a/src/execution/isolate-inl.h +++ b/src/execution/isolate-inl.h -@@ -133,18 +133,20 @@ bool Isolate::is_execution_terminating() { +@@ -126,18 +126,20 @@ bool Isolate::is_execution_terminating() { i::ReadOnlyRoots(this).termination_exception(); } @@ -150,7 +149,7 @@ index 5e0c1c62b6168e12af1ad067cd57604c17b17ce2..c07ac183137862444753a96a0a80149b } void Isolate::set_promise_cross_context_callback( -@@ -152,6 +154,15 @@ void Isolate::set_promise_cross_context_callback( +@@ -145,6 +147,15 @@ void Isolate::set_promise_cross_context_callback( promise_cross_context_callback_ = callback; } @@ -167,10 +166,10 @@ index 5e0c1c62b6168e12af1ad067cd57604c17b17ce2..c07ac183137862444753a96a0a80149b Tagged Isolate::VerifyBuiltinsResult(Tagged result) { if (is_execution_terminating() && !v8_flags.strict_termination_checks) { diff --git a/src/execution/isolate.cc b/src/execution/isolate.cc -index 51666de8200590c2fc26c38090cbed41238ea489..e5b8c171873e461fdd9ba051b4240f5070b5fe86 100644 +index fd8817a012c2221f35c442bbf4b092a86cb5c23b..d8f0ba3aefd6cc6992f0dcc28937a1b50453785e 100644 --- a/src/execution/isolate.cc +++ b/src/execution/isolate.cc -@@ -629,8 +629,6 @@ void Isolate::Iterate(RootVisitor* v, ThreadLocalTop* thread) { +@@ -681,8 +681,6 @@ void Isolate::Iterate(RootVisitor* v, ThreadLocalTop* thread) { FullObjectSlot(&thread->pending_message_)); v->VisitRootPointer(Root::kStackRoots, nullptr, FullObjectSlot(&thread->context_)); @@ -179,21 +178,24 @@ index 51666de8200590c2fc26c38090cbed41238ea489..e5b8c171873e461fdd9ba051b4240f50 for (v8::TryCatch* block = thread->try_catch_handler_; block != nullptr; block = block->next_) { -@@ -8438,5 +8436,20 @@ MaybeHandle Isolate::RunPromiseCrossContextCallback( +@@ -8532,5 +8530,23 @@ MaybeHandle Isolate::RunPromiseCrossContextCallback( return v8::Utils::OpenHandle(*result); } +Maybe Isolate::RunPromiseCrossContextResolveCallback( -+ v8::Isolate* isolate, Handle tag, DirectHandle reactions, ++ v8::Isolate* isolate, Handle tag, DirectHandle promise_or_data, + DirectHandle argument, PromiseReaction::Type type) { + CHECK(promise_cross_context_resolve_callback_ != nullptr); + return promise_cross_context_resolve_callback_( -+ isolate, v8::Utils::ToLocal(tag), v8::Utils::ToLocal(reactions), ++ isolate, v8::Utils::ToLocal(tag), v8::Utils::ToLocal(promise_or_data), + v8::Utils::ToLocal(argument), -+ [type](v8::Isolate* isolate, v8::Local reactions, ++ [type](v8::Isolate* isolate, v8::Local promise_data, + v8::Local argument) { -+ JSPromise::ContinueTriggerPromiseReactions( -+ reinterpret_cast(isolate), Utils::OpenHandle(*reactions), ++ // The deferred action runs in the owning IoContext. Settle the promise ++ // and trigger its reactions here, where the context tag matches. ++ JSPromise::ContinueSettleAndTriggerReactions( ++ reinterpret_cast(isolate), ++ Cast(Utils::OpenHandle(*promise_data)), + Utils::OpenHandle(*argument), type); + }); +} @@ -201,7 +203,7 @@ index 51666de8200590c2fc26c38090cbed41238ea489..e5b8c171873e461fdd9ba051b4240f50 } // namespace internal } // namespace v8 diff --git a/src/execution/isolate.h b/src/execution/isolate.h -index 633f3f8cdef1eceee6edfc921259b7a9895f5a84..bdd57dc4a0eeff42e1918303fca8167414e3cb62 100644 +index 786652f5fe1d337aa92f89ea19f4c8feefea4ce2..6404d506dbf7183275137bd418b32396ca3ba07f 100644 --- a/src/execution/isolate.h +++ b/src/execution/isolate.h @@ -45,6 +45,7 @@ @@ -212,7 +214,7 @@ index 633f3f8cdef1eceee6edfc921259b7a9895f5a84..bdd57dc4a0eeff42e1918303fca81674 #include "src/objects/tagged.h" #include "src/runtime/runtime.h" #include "src/sandbox/code-pointer-table.h" -@@ -2450,14 +2451,22 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { +@@ -2466,15 +2467,24 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { v8::ExceptionContext callback_kind); void SetExceptionPropagationCallback(ExceptionPropagationCallback callback); @@ -228,16 +230,18 @@ index 633f3f8cdef1eceee6edfc921259b7a9895f5a84..bdd57dc4a0eeff42e1918303fca81674 + PromiseCrossContextResolveCallback callback); MaybeHandle RunPromiseCrossContextCallback( Handle context, Handle promise); + + Maybe RunPromiseCrossContextResolveCallback( + v8::Isolate* isolate, Handle tag, -+ DirectHandle reactions, DirectHandle argument, ++ DirectHandle promise_or_data, DirectHandle argument, + PromiseReaction::Type type); + + inline bool has_promise_context_resolve_callback(); - ++ #ifdef V8_ENABLE_WASM_SIMD256_REVEC void set_wasm_revec_verifier_for_test( -@@ -2987,9 +2996,11 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { + compiler::turboshaft::WasmRevecVerifier* verifier) { +@@ -3010,9 +3020,11 @@ class V8_EXPORT_PRIVATE Isolate final : private HiddenFactory { bool is_frozen_ = false; @@ -252,10 +256,10 @@ index 633f3f8cdef1eceee6edfc921259b7a9895f5a84..bdd57dc4a0eeff42e1918303fca81674 class PromiseCrossContextCallbackScope; diff --git a/src/heap/factory.cc b/src/heap/factory.cc -index 40118ddd5b357d3cdead407ae580c9f5856f13e2..6f40f75197ac0e59daf2bbe1c83192c86fb107ef 100644 +index ae04a98df1ed8a290095324b5daeff9146b73686..9c7d4e4790058bab32ac12c679486cc7e9538277 100644 --- a/src/heap/factory.cc +++ b/src/heap/factory.cc -@@ -4857,18 +4857,17 @@ Handle Factory::NewJSPromiseWithoutHook() { +@@ -5038,18 +5038,17 @@ Handle Factory::NewJSPromiseWithoutHook() { Handle promise = Cast(NewJSObject(isolate()->promise_function())); DisallowGarbageCollection no_gc; @@ -280,29 +284,37 @@ index 40118ddd5b357d3cdead407ae580c9f5856f13e2..6f40f75197ac0e59daf2bbe1c83192c8 } diff --git a/src/objects/js-promise.h b/src/objects/js-promise.h -index fd1f207420aae54ada4ccfebfef1f0345e987af1..d7ce50c130f2e32b0e4ab6fe682ac7e740f5586f 100644 +index fad6740ef43573befce77eb54053f5a43b3bf2b6..d87be7cfc2aca92dfca356c0ef2e459e68548e5d 100644 --- a/src/objects/js-promise.h +++ b/src/objects/js-promise.h -@@ -94,6 +94,11 @@ class JSPromise +@@ -105,6 +105,13 @@ V8_OBJECT class JSPromise : public JSObjectWithEmbedderSlots { static_assert(v8::Promise::kFulfilled == 1); static_assert(v8::Promise::kRejected == 2); -+ static void ContinueTriggerPromiseReactions(Isolate* isolate, -+ DirectHandle reactions, -+ DirectHandle argument, -+ PromiseReaction::Type type); ++ // Used by the cross-context settlement deferral: the entire settlement is ++ // deferred to the owning IoContext so that the promise context tag matches ++ // and reactions run in the correct request scope. ++ static void ContinueSettleAndTriggerReactions( ++ Isolate* isolate, DirectHandle promise, ++ DirectHandle argument, PromiseReaction::Type type); + - private: - // https://tc39.es/ecma262/#sec-triggerpromisereactions - static Handle TriggerPromiseReactions(Isolate* isolate, + public: + // Smi 0 terminated list of PromiseReaction objects in case the JSPromise + // was not settled yet, otherwise the result. diff --git a/src/objects/objects.cc b/src/objects/objects.cc -index ce4beebce1db30e934dede7cc889013a690d1340..081ffe3a15aa21556031d4d6db7951987e8e2ae8 100644 +index e55ab41b8e71abee7a3cfdffe2c98540b871d2b4..8e4c2a6407112fe80a394b9d65f54b4210d769fa 100644 --- a/src/objects/objects.cc +++ b/src/objects/objects.cc -@@ -4677,6 +4677,22 @@ Handle JSPromise::Fulfill(DirectHandle promise, - // 6. Set promise.[[PromiseState]] to "fulfilled". - promise->set_status(Promise::kFulfilled); +@@ -4692,6 +4692,28 @@ Handle JSPromise::Fulfill(DirectHandle promise, + } + #endif ++ // Cross-context check: if the promise belongs to a different request ++ // context, defer the ENTIRE settlement (status update + reaction ++ // triggering) to the owning IoContext. This ensures the promise stays ++ // kPending until settlement runs in the correct context, avoiding the ++ // CHECK(kPending) crash when a cross-context promise is settled while ++ // another request's microtask checkpoint is active. + Handle obj(promise->context_tag(), isolate); + bool needs_promise_context_switch = + !(*obj == Smi::zero() || @@ -312,20 +324,23 @@ index ce4beebce1db30e934dede7cc889013a690d1340..081ffe3a15aa21556031d4d6db795198 + if (isolate + ->RunPromiseCrossContextResolveCallback( + reinterpret_cast(isolate), Cast(obj), -+ reactions, value, PromiseReaction::kFulfill) ++ promise, value, PromiseReaction::kFulfill) + .IsNothing()) { + return {}; + } + return isolate->factory()->undefined_value(); + } + - // 7. Return TriggerPromiseReactions(reactions, value). - return TriggerPromiseReactions(isolate, reactions, value, - PromiseReaction::kFulfill); -@@ -4735,6 +4751,22 @@ Handle JSPromise::Reject(DirectHandle promise, - isolate->ReportPromiseReject(promise, reason, kPromiseRejectWithNoHandler); - } + // 1. Assert: The value of promise.[[PromiseState]] is "pending". + CHECK_EQ(Promise::kPending, promise->status()); + +@@ -4744,6 +4766,25 @@ Handle JSPromise::Reject(DirectHandle promise, + isolate->RunAllPromiseHooks(PromiseHookType::kResolve, promise, + isolate->factory()->undefined_value()); ++ // Cross-context check: if the promise belongs to a different request ++ // context, defer the ENTIRE settlement (status update + reaction ++ // triggering) to the owning IoContext. See JSPromise::Fulfill for details. + Handle obj(promise->context_tag(), isolate); + bool needs_promise_context_switch = + !(*obj == Smi::zero() || @@ -335,56 +350,55 @@ index ce4beebce1db30e934dede7cc889013a690d1340..081ffe3a15aa21556031d4d6db795198 + if (isolate + ->RunPromiseCrossContextResolveCallback( + reinterpret_cast(isolate), Cast(obj), -+ reactions, reason, PromiseReaction::kReject) ++ promise, reason, PromiseReaction::kReject) + .IsNothing()) { + return {}; + } + return isolate->factory()->undefined_value(); + } + - // 8. Return TriggerPromiseReactions(reactions, reason). - return TriggerPromiseReactions(isolate, reactions, reason, - PromiseReaction::kReject); -@@ -4843,6 +4875,14 @@ MaybeHandle JSPromise::Resolve(DirectHandle promise, + // 1. Assert: The value of promise.[[PromiseState]] is "pending". + CHECK_EQ(Promise::kPending, promise->status()); + +@@ -4872,6 +4913,33 @@ MaybeHandle JSPromise::Resolve(DirectHandle promise, } // static ++void JSPromise::ContinueSettleAndTriggerReactions( ++ Isolate* isolate, DirectHandle promise, ++ DirectHandle argument, PromiseReaction::Type type) { ++ // This runs in the owning IoContext after cross-context settlement deferral. ++ // The promise should still be pending — no other path should have settled it ++ // because the deferral kept it in kPending state. ++ CHECK_EQ(Promise::kPending, promise->status()); ++ ++ // Extract reactions before overwriting reactions_or_result. ++ DirectHandle reactions(promise->reactions(), isolate); ++ ++ // Set the result. ++ promise->set_reactions_or_result(Cast(*argument)); ++ ++ if (type == PromiseReaction::kReject) { ++ promise->set_status(Promise::kRejected); ++ if (!promise->has_handler()) { ++ isolate->ReportPromiseReject(Cast(promise), argument, ++ kPromiseRejectWithNoHandler); ++ } ++ } else { ++ promise->set_status(Promise::kFulfilled); ++ } + -+void JSPromise::ContinueTriggerPromiseReactions(Isolate* isolate, -+ DirectHandle reactions, -+ DirectHandle argument, -+ PromiseReaction::Type type) { + TriggerPromiseReactions(isolate, reactions, argument, type); +} + Handle JSPromise::TriggerPromiseReactions( Isolate* isolate, DirectHandle reactions, DirectHandle argument, PromiseReaction::Type type) { -diff --git a/src/objects/value-serializer.cc b/src/objects/value-serializer.cc -index 97b7f51664dda24ffb0c94e4033b2eff2ba4daee..8c0bf0824b200489919f46b18d240c8c5c15a8ec 100644 ---- a/src/objects/value-serializer.cc -+++ b/src/objects/value-serializer.cc -@@ -614,11 +614,12 @@ Maybe ValueSerializer::WriteJSReceiver( - } - return ThrowDataCloneError(MessageTemplate::kDataCloneError, receiver); - } else if (IsSpecialReceiverInstanceType(instance_type) && -- instance_type != JS_SPECIAL_API_OBJECT_TYPE -+ instance_type != JS_SPECIAL_API_OBJECT_TYPE - #if V8_ENABLE_WEBASSEMBLY -- && instance_type != WASM_STRUCT_TYPE && instance_type != WASM_ARRAY_TYPE -+ && instance_type != WASM_STRUCT_TYPE && -+ instance_type != WASM_ARRAY_TYPE - #endif -- ) { -+ ) { - return ThrowDataCloneError(MessageTemplate::kDataCloneError, receiver); - } - diff --git a/src/roots/roots.h b/src/roots/roots.h -index 47109e31a25db96a56a35a92bf0dabd90e0e42e5..391ad2ebeb504c73e679e80641cbe9b8a2e703a5 100644 +index c0374fe8adb34076c76a8b2d405306a5addb97b7..144f78bd13b743f862015a1164de436c444d8365 100644 --- a/src/roots/roots.h +++ b/src/roots/roots.h -@@ -427,7 +427,8 @@ class RootVisitor; +@@ -450,7 +450,8 @@ class RootVisitor; V(FunctionTemplateInfo, error_stack_getter_fun_template, \ ErrorStackGetterSharedFun) \ V(FunctionTemplateInfo, error_stack_setter_fun_template, \ @@ -459,7 +473,7 @@ index 896bac667ce40ef23c8c4fcd6174fcd2ebc2076f..0168c239decb00e8f5a722f7e2cb2c0f } // namespace internal } // namespace v8 diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h -index 9599b2c393ba3c68ee69d8441b053e6afa23dbfd..1319f166b415c3fe99d0d959615b795df1cf48e0 100644 +index d91af102ab39d4b4355181bb5cf525a64d3f64d0..af64dde6894b637055107cad82e374d3030ee0a8 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -449,7 +449,8 @@ constexpr bool CanTriggerGC(T... properties) { diff --git a/patches/v8/0014-Add-another-slot-in-the-isolate-for-embedder.patch b/patches/v8/0014-Add-another-slot-in-the-isolate-for-embedder.patch index 8d82f7a6852..5cfe99d60e1 100644 --- a/patches/v8/0014-Add-another-slot-in-the-isolate-for-embedder.patch +++ b/patches/v8/0014-Add-another-slot-in-the-isolate-for-embedder.patch @@ -6,10 +6,10 @@ Subject: Add another slot in the isolate for embedder Signed-off-by: James M Snell diff --git a/include/v8-internal.h b/include/v8-internal.h -index d958e8d8dbb78720b0b54b8fc053fba29b286790..b453ac38c288a928f4bdf33d4f573cf7294a377d 100644 +index a4c21eca749c005783f7560e404c9857481f5b36..706c18f5c80cf87ab3570f0b124139e325ba3c1d 100644 --- a/include/v8-internal.h +++ b/include/v8-internal.h -@@ -1027,7 +1027,7 @@ class Internals { +@@ -1053,7 +1053,7 @@ class Internals { // AccessorInfo::data and InterceptorInfo::data field. static const int kCallbackInfoDataOffset = 1 * kApiTaggedSize; diff --git a/patches/v8/0015-Add-ValueSerializer-SetTreatProxiesAsHostObjects.patch b/patches/v8/0015-Add-ValueSerializer-SetTreatProxiesAsHostObjects.patch index d1b3c920636..c5ba12f771f 100644 --- a/patches/v8/0015-Add-ValueSerializer-SetTreatProxiesAsHostObjects.patch +++ b/patches/v8/0015-Add-ValueSerializer-SetTreatProxiesAsHostObjects.patch @@ -30,7 +30,7 @@ index 141f138e08de849e3e02b3b2b346e643b9e40c70..bdcb2831c55e21c6d511f56dfc79a507 * Write raw data in various common formats to the buffer. * Note that integer types are written in base-128 varint format, not with a diff --git a/src/api/api.cc b/src/api/api.cc -index 178b429ad06ea349bb43dff578b27ae46ae14da7..8754d87f1db985d4021faf9ce275783ae1229dc8 100644 +index 929bfca37dd63575524faf7c23586bf29c41fa95..726e1c8ee519133450a3bfb2f58404743d80be89 100644 --- a/src/api/api.cc +++ b/src/api/api.cc @@ -3592,6 +3592,10 @@ void ValueSerializer::SetTreatFunctionsAsHostObjects(bool mode) { @@ -45,10 +45,10 @@ index 178b429ad06ea349bb43dff578b27ae46ae14da7..8754d87f1db985d4021faf9ce275783a Local value) { auto i_isolate = i::Isolate::Current(); diff --git a/src/objects/value-serializer.cc b/src/objects/value-serializer.cc -index 8c0bf0824b200489919f46b18d240c8c5c15a8ec..13d1a1340de579b8242bc3193c8c9002ecfd0468 100644 +index f190ac6f694f8c600666ca2685ff6907f020b9aa..bfc160df062c56a59b34794bf9ea1f2de8425ed7 100644 --- a/src/objects/value-serializer.cc +++ b/src/objects/value-serializer.cc -@@ -339,6 +339,10 @@ void ValueSerializer::SetTreatFunctionsAsHostObjects(bool mode) { +@@ -344,6 +344,10 @@ void ValueSerializer::SetTreatFunctionsAsHostObjects(bool mode) { treat_functions_as_host_objects_ = mode; } @@ -59,7 +59,7 @@ index 8c0bf0824b200489919f46b18d240c8c5c15a8ec..13d1a1340de579b8242bc3193c8c9002 void ValueSerializer::WriteTag(SerializationTag tag) { uint8_t raw_tag = static_cast(tag); WriteRawBytes(&raw_tag, sizeof(raw_tag)); -@@ -610,7 +614,12 @@ Maybe ValueSerializer::WriteJSReceiver( +@@ -615,7 +619,12 @@ Maybe ValueSerializer::WriteJSReceiver( InstanceType instance_type = receiver->map()->instance_type(); if (IsCallable(*receiver)) { if (treat_functions_as_host_objects_) { @@ -73,7 +73,7 @@ index 8c0bf0824b200489919f46b18d240c8c5c15a8ec..13d1a1340de579b8242bc3193c8c9002 } return ThrowDataCloneError(MessageTemplate::kDataCloneError, receiver); } else if (IsSpecialReceiverInstanceType(instance_type) && -@@ -1288,7 +1297,7 @@ Maybe ValueSerializer::WriteSharedObject( +@@ -1286,7 +1295,7 @@ Maybe ValueSerializer::WriteSharedObject( return ThrowIfOutOfMemory(); } @@ -83,7 +83,7 @@ index 8c0bf0824b200489919f46b18d240c8c5c15a8ec..13d1a1340de579b8242bc3193c8c9002 if (!delegate_) { isolate_->Throw(*isolate_->factory()->NewError( diff --git a/src/objects/value-serializer.h b/src/objects/value-serializer.h -index ddc5f27a80f93bae209f3fe8731d4df4baa58ead..496aab365007a45806264c8d3b981bd7a494f903 100644 +index 309df6a18eb7a3c24996b3e30a89ece37c47cc1c..515356ac3def15ace4167810f9ae1c9dd15feb67 100644 --- a/src/objects/value-serializer.h +++ b/src/objects/value-serializer.h @@ -111,6 +111,15 @@ class ValueSerializer { diff --git a/patches/v8/0017-Enable-V8-shared-linkage.patch b/patches/v8/0017-Enable-V8-shared-linkage.patch index 1e9495152c8..1f393184553 100644 --- a/patches/v8/0017-Enable-V8-shared-linkage.patch +++ b/patches/v8/0017-Enable-V8-shared-linkage.patch @@ -6,10 +6,10 @@ Subject: Enable V8 shared linkage Signed-off-by: James M Snell diff --git a/BUILD.bazel b/BUILD.bazel -index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f5955337189d47 100644 +index 6f917087bd2e188fd291db265cdda4353e9537fa..0ed98573e821a3104ce529ef27af88ac80ccacbf 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -1505,6 +1505,7 @@ filegroup( +@@ -1508,6 +1508,7 @@ filegroup( "src/builtins/constants-table-builder.cc", "src/builtins/constants-table-builder.h", "src/builtins/data-view-ops.h", @@ -17,7 +17,7 @@ index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f59553 "src/builtins/profile-data-reader.h", "src/builtins/superspread.h", "src/codegen/aligned-slot-allocator.cc", -@@ -1690,7 +1691,6 @@ filegroup( +@@ -1693,7 +1694,6 @@ filegroup( "src/execution/futex-emulation.h", "src/execution/interrupts-scope.cc", "src/execution/interrupts-scope.h", @@ -25,7 +25,7 @@ index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f59553 "src/execution/isolate.h", "src/execution/isolate-data.h", "src/execution/isolate-data-fields.h", -@@ -3314,7 +3314,6 @@ filegroup( +@@ -3322,7 +3322,6 @@ filegroup( filegroup( name = "v8_compiler_files", srcs = [ @@ -33,7 +33,7 @@ index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f59553 "src/compiler/access-builder.cc", "src/compiler/access-builder.h", "src/compiler/access-info.cc", -@@ -3920,8 +3919,6 @@ filegroup( +@@ -3929,8 +3928,6 @@ filegroup( "src/builtins/growable-fixed-array-gen.cc", "src/builtins/growable-fixed-array-gen.h", "src/builtins/number-builtins-reducer-inl.h", @@ -42,7 +42,7 @@ index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f59553 "src/builtins/setup-builtins-internal.cc", "src/builtins/torque-csa-header-includes.h", "src/codegen/turboshaft-builtins-assembler-inl.h", -@@ -4193,6 +4190,7 @@ filegroup( +@@ -4202,6 +4199,7 @@ filegroup( "src/snapshot/snapshot-empty.cc", "src/snapshot/static-roots-gen.cc", "src/snapshot/static-roots-gen.h", @@ -50,7 +50,7 @@ index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f59553 ], ) -@@ -4303,6 +4301,10 @@ filegroup( +@@ -4312,6 +4310,10 @@ filegroup( name = "noicu/snapshot_files", srcs = [ "src/init/setup-isolate-deserialize.cc", @@ -61,7 +61,7 @@ index 23f52ed12cecfcd7383cc3d389935ca487b8533e..8049d631cc50f2ca1059f81b59f59553 ] + select({ "@v8//bazel/config:v8_target_arm": [ "google3/snapshots/arm/noicu/embedded.S", -@@ -4320,6 +4322,7 @@ filegroup( +@@ -4329,6 +4331,7 @@ filegroup( name = "icu/snapshot_files", srcs = [ "src/init/setup-isolate-deserialize.cc", diff --git a/patches/v8/0018-Modify-where-to-look-for-fast_float-and-simdutf.patch b/patches/v8/0018-Modify-where-to-look-for-fast_float-and-simdutf.patch index 2193e8d9c08..0ded2812d84 100644 --- a/patches/v8/0018-Modify-where-to-look-for-fast_float-and-simdutf.patch +++ b/patches/v8/0018-Modify-where-to-look-for-fast_float-and-simdutf.patch @@ -12,10 +12,10 @@ include changes are needed. Signed-off-by: James M Snell diff --git a/BUILD.bazel b/BUILD.bazel -index 8049d631cc50f2ca1059f81b59f5955337189d47..8eb045f0260fc30a786acfabfcae3d62879aaef3 100644 +index 0ed98573e821a3104ce529ef27af88ac80ccacbf..c1eb923ec676d0a982a53d884bb7102fc1cc2c16 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -4612,17 +4612,19 @@ cc_library( +@@ -4613,17 +4613,19 @@ cc_library( ], ) @@ -46,7 +46,7 @@ index 8049d631cc50f2ca1059f81b59f5955337189d47..8eb045f0260fc30a786acfabfcae3d62 v8_library( name = "v8_libshared", -@@ -4653,15 +4655,15 @@ v8_library( +@@ -4654,15 +4656,15 @@ v8_library( ], deps = [ ":lib_dragonbox", diff --git a/patches/v8/0020-Add-methods-to-get-heap-and-external-memory-sizes-di.patch b/patches/v8/0020-Add-methods-to-get-heap-and-external-memory-sizes-di.patch index 64f0083e214..84a9c439e9f 100644 --- a/patches/v8/0020-Add-methods-to-get-heap-and-external-memory-sizes-di.patch +++ b/patches/v8/0020-Add-methods-to-get-heap-and-external-memory-sizes-di.patch @@ -29,10 +29,10 @@ index 141fece655b6003921452b493f4879baefb9169a..33900f10e20b5046b57643755c0c8d5f * Returns heap profiler for this isolate. Will return NULL until the isolate * is initialized. diff --git a/src/api/api.cc b/src/api/api.cc -index 8754d87f1db985d4021faf9ce275783ae1229dc8..e5c24959acc33dd61f42afee2c81d19e3d8332a2 100644 +index 726e1c8ee519133450a3bfb2f58404743d80be89..1e620bc345212093f2fa03e4d91a469e073a29c8 100644 --- a/src/api/api.cc +++ b/src/api/api.cc -@@ -10443,6 +10443,14 @@ void Isolate::GetHeapStatistics(HeapStatistics* heap_statistics) { +@@ -10456,6 +10456,14 @@ void Isolate::GetHeapStatistics(HeapStatistics* heap_statistics) { #endif // V8_ENABLE_WEBASSEMBLY } diff --git a/patches/v8/0021-Port-concurrent-mksnapshot-support.patch b/patches/v8/0021-Port-concurrent-mksnapshot-support.patch index 2579ed771bb..a11939b42dd 100644 --- a/patches/v8/0021-Port-concurrent-mksnapshot-support.patch +++ b/patches/v8/0021-Port-concurrent-mksnapshot-support.patch @@ -6,7 +6,7 @@ Subject: Port concurrent mksnapshot support Change-Id: I57c8158ff5d624e5379e6b072f27ac7a40419522 diff --git a/BUILD.bazel b/BUILD.bazel -index 8eb045f0260fc30a786acfabfcae3d62879aaef3..71db68eef5cce90ec721dabdef446aee224bce9a 100644 +index c1eb923ec676d0a982a53d884bb7102fc1cc2c16..caae1031551408325b65df9d0e19d994d5fb7eef 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -120,6 +120,11 @@ v8_flag(name = "v8_enable_hugepage") @@ -21,7 +21,7 @@ index 8eb045f0260fc30a786acfabfcae3d62879aaef3..71db68eef5cce90ec721dabdef446aee v8_flag(name = "v8_enable_future") # NOTE: Transitions are not recommended in library targets: -@@ -4541,6 +4546,13 @@ v8_mksnapshot( +@@ -4542,6 +4547,13 @@ v8_mksnapshot( "--no-turbo-verify-allocation", ], "//conditions:default": [], diff --git a/patches/v8/0022-Port-V8_USE_ZLIB-support.patch b/patches/v8/0022-Port-V8_USE_ZLIB-support.patch index 834d4bdcd19..51191279128 100644 --- a/patches/v8/0022-Port-V8_USE_ZLIB-support.patch +++ b/patches/v8/0022-Port-V8_USE_ZLIB-support.patch @@ -6,7 +6,7 @@ Subject: Port V8_USE_ZLIB support Change-Id: Icfedf3e90522f1ff5037517a39a5f0e3d44abace diff --git a/BUILD.bazel b/BUILD.bazel -index 71db68eef5cce90ec721dabdef446aee224bce9a..42ed7c61f0a5bcb5316e624e8c5b29c19d355970 100644 +index caae1031551408325b65df9d0e19d994d5fb7eef..4824be3f8fdec30f3a9defcc057195175360df46 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -162,6 +162,11 @@ v8_flag(name = "v8_enable_verify_predictable") @@ -29,7 +29,7 @@ index 71db68eef5cce90ec721dabdef446aee224bce9a..42ed7c61f0a5bcb5316e624e8c5b29c1 }, defines = [ "GOOGLE3", -@@ -4676,6 +4682,8 @@ v8_library( +@@ -4677,6 +4683,8 @@ v8_library( "@highway//:hwy", "@fast_float", "@simdutf", diff --git a/patches/v8/0023-Modify-where-to-look-for-dragonbox.patch b/patches/v8/0023-Modify-where-to-look-for-dragonbox.patch index 19f369a81f4..35b8a7ca1b3 100644 --- a/patches/v8/0023-Modify-where-to-look-for-dragonbox.patch +++ b/patches/v8/0023-Modify-where-to-look-for-dragonbox.patch @@ -5,10 +5,10 @@ Subject: Modify where to look for dragonbox diff --git a/BUILD.bazel b/BUILD.bazel -index 42ed7c61f0a5bcb5316e624e8c5b29c19d355970..9aca2fe40898d5963e2e0fed5ede9ac9ff0e7103 100644 +index 4824be3f8fdec30f3a9defcc057195175360df46..cef9c9e0141811331f898818d30b1ed70ca24675 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -4128,14 +4128,9 @@ filegroup( +@@ -4137,14 +4137,9 @@ filegroup( ) v8_library( diff --git a/patches/v8/0026-Implement-additional-Exception-construction-methods.patch b/patches/v8/0026-Implement-additional-Exception-construction-methods.patch index 10ebd9cf5d9..cffe70a1a39 100644 --- a/patches/v8/0026-Implement-additional-Exception-construction-methods.patch +++ b/patches/v8/0026-Implement-additional-Exception-construction-methods.patch @@ -25,10 +25,10 @@ index f240d9a609e92b4a3055256996ad69d8fc14ac49..f8546f34d207e4e2e6fd1c5d8b87b83b /** * Creates an error message for the given exception. diff --git a/src/api/api.cc b/src/api/api.cc -index e5c24959acc33dd61f42afee2c81d19e3d8332a2..8afb25744487519738fd58f49f197537ef070b4a 100644 +index 1e620bc345212093f2fa03e4d91a469e073a29c8..7aba7c60f762ddd3693c6237e616252ee37a3680 100644 --- a/src/api/api.cc +++ b/src/api/api.cc -@@ -11328,6 +11328,10 @@ DEFINE_ERROR(WasmCompileError, wasm_compile_error) +@@ -11341,6 +11341,10 @@ DEFINE_ERROR(WasmCompileError, wasm_compile_error) DEFINE_ERROR(WasmLinkError, wasm_link_error) DEFINE_ERROR(WasmRuntimeError, wasm_runtime_error) DEFINE_ERROR(WasmSuspendError, wasm_suspend_error) diff --git a/patches/v8/0028-bind-icu-to-googlesource.patch b/patches/v8/0028-bind-icu-to-googlesource.patch index 8401a781a6c..91fd20928aa 100644 --- a/patches/v8/0028-bind-icu-to-googlesource.patch +++ b/patches/v8/0028-bind-icu-to-googlesource.patch @@ -5,10 +5,10 @@ Subject: bind icu to googlesource diff --git a/BUILD.bazel b/BUILD.bazel -index 9aca2fe40898d5963e2e0fed5ede9ac9ff0e7103..90a1f55b11158ccdf35e13ac4a04505434eb7385 100644 +index cef9c9e0141811331f898818d30b1ed70ca24675..98789ec4cd98d9b076c78f8f4e78ec2c7d7e7614 100644 --- a/BUILD.bazel +++ b/BUILD.bazel -@@ -4653,7 +4653,7 @@ v8_library( +@@ -4654,7 +4654,7 @@ v8_library( copts = ["-Wno-implicit-fallthrough"], icu_deps = [ ":icu/generated_torque_definitions_headers", @@ -17,7 +17,7 @@ index 9aca2fe40898d5963e2e0fed5ede9ac9ff0e7103..90a1f55b11158ccdf35e13ac4a045054 ], icu_srcs = [ ":generated_regexp_special_case", -@@ -4776,7 +4776,7 @@ v8_binary( +@@ -4777,7 +4777,7 @@ v8_binary( ], deps = [ ":v8_libbase", diff --git a/patches/v8/0029-Add-v8-String-IsFlat-API.patch b/patches/v8/0029-Add-v8-String-IsFlat-API.patch index bf12eb3265f..9cb8b20e503 100644 --- a/patches/v8/0029-Add-v8-String-IsFlat-API.patch +++ b/patches/v8/0029-Add-v8-String-IsFlat-API.patch @@ -24,10 +24,10 @@ index 2b443d97d34fc6e69c47b9fd842898b9a2e43449..068adcc87d02e7c3333c3c6633b51be7 enum { kNone = 0, diff --git a/src/api/api.cc b/src/api/api.cc -index 8afb25744487519738fd58f49f197537ef070b4a..c1b6bb583314a90dfe77e2f2184db9f4cf722d77 100644 +index 7aba7c60f762ddd3693c6237e616252ee37a3680..c44b5b2c8bbca26a962927738d94ca6d3acc3e83 100644 --- a/src/api/api.cc +++ b/src/api/api.cc -@@ -5819,6 +5819,10 @@ bool String::IsOneByte() const { +@@ -5831,6 +5831,10 @@ bool String::IsOneByte() const { return Utils::OpenDirectHandle(this)->IsOneByteRepresentation(); } diff --git a/patches/v8/0031-Add-verify_write_barriers-flag-in-V8-s-bazel-config.patch b/patches/v8/0031-Add-verify_write_barriers-flag-in-V8-s-bazel-config.patch index 90ddb4919b7..cef696f9c9c 100644 --- a/patches/v8/0031-Add-verify_write_barriers-flag-in-V8-s-bazel-config.patch +++ b/patches/v8/0031-Add-verify_write_barriers-flag-in-V8-s-bazel-config.patch @@ -5,7 +5,7 @@ Subject: Add verify_write_barriers flag in V8's bazel config diff --git a/BUILD.bazel b/BUILD.bazel -index 90a1f55b11158ccdf35e13ac4a04505434eb7385..a5a0d7331b52b6228cf23aaacf968f335bf16307 100644 +index 98789ec4cd98d9b076c78f8f4e78ec2c7d7e7614..d285f32154b83e686b0f3805e601210ce5114eb1 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -552,6 +552,7 @@ v8_config( diff --git a/patches/v8/0032-Change-lamba-signature-to-get-around-windows-build-f.patch b/patches/v8/0032-Change-lamba-signature-to-get-around-windows-build-f.patch index c706a5ec44c..306e67b3d4c 100644 --- a/patches/v8/0032-Change-lamba-signature-to-get-around-windows-build-f.patch +++ b/patches/v8/0032-Change-lamba-signature-to-get-around-windows-build-f.patch @@ -5,10 +5,10 @@ Subject: Change lamba signature to get around windows build failure diff --git a/src/objects/backing-store.cc b/src/objects/backing-store.cc -index a1ad5db082346601d7c0a6f688f90deef78edcd6..776ec547f6dc186b17a5c209a6ea51874359a905 100644 +index 8e2c36a7c3d01f34f2717507d46e85dd78075ad3..25be05ef7fdc4b8e8ec23cc0aef76720fef95bf0 100644 --- a/src/objects/backing-store.cc +++ b/src/objects/backing-store.cc -@@ -322,7 +322,7 @@ std::unique_ptr BackingStore::TryAllocateAndPartiallyCommitMemory( +@@ -321,7 +321,7 @@ std::unique_ptr BackingStore::TryAllocateAndPartiallyCommitMemory( // For accounting purposes, whether a GC was necessary. bool did_retry = false; diff --git a/patches/v8/0033-Return-false-on-Object.hasOwnProperty-with-intercept.patch b/patches/v8/0033-Return-false-on-Object.hasOwnProperty-with-intercept.patch index fbc53ab118b..1249d63e0c3 100644 --- a/patches/v8/0033-Return-false-on-Object.hasOwnProperty-with-intercept.patch +++ b/patches/v8/0033-Return-false-on-Object.hasOwnProperty-with-intercept.patch @@ -5,7 +5,7 @@ Subject: Return false on Object.hasOwnProperty with interceptors diff --git a/src/objects/js-objects.cc b/src/objects/js-objects.cc -index af6cdd90b7382960d4e230fd9357467d2d155249..066ef6fb2f0524d48b162044f3b829f4f6c466f0 100644 +index 15fce882065d99d1d01e5c138e651118727e9a28..76ceae5d9878df28cd4aa6780fc1ae449a9ea89b 100644 --- a/src/objects/js-objects.cc +++ b/src/objects/js-objects.cc @@ -158,6 +158,9 @@ Maybe JSReceiver::HasOwnProperty(Isolate* isolate, diff --git a/patches/v8/0034-Remove-V8-MODULE.bazel-llvm-toolchain-and-libcxx-rep.patch b/patches/v8/0034-Remove-V8-MODULE.bazel-llvm-toolchain-and-libcxx-rep.patch index e7f1553bfc8..6f5f3f85b3b 100644 --- a/patches/v8/0034-Remove-V8-MODULE.bazel-llvm-toolchain-and-libcxx-rep.patch +++ b/patches/v8/0034-Remove-V8-MODULE.bazel-llvm-toolchain-and-libcxx-rep.patch @@ -7,20 +7,21 @@ These reference third_party/ sources that are not present in the GitHub tarball. Workerd provides its own toolchain, so these are not needed. diff --git a/MODULE.bazel b/MODULE.bazel -index 7d7ba53b579605a6f469fe01ddf699d1284110e3..7ddd8259e790ebbfce0bfbb08e08ed4130592f9b 100644 +index b8bf8bd29c7cd5834b20cc2f762f45befd093e1b..804316256e142358ddf8a4eb6d75393d1c8ae9ca 100644 --- a/MODULE.bazel +++ b/MODULE.bazel -@@ -22,167 +22,7 @@ pip.parse( +@@ -22,170 +22,6 @@ pip.parse( ) use_repo(pip, "v8_python_deps") -# Define the local LLVM toolchain repository -llvm_toolchain_repository = use_repo_rule("//bazel/toolchain:llvm_repository.bzl", "llvm_toolchain_repository") - +- -llvm_toolchain_repository( - name = "llvm_toolchain", - path = "third_party/llvm-build/Release+Asserts", - config_file_content = """ +-load("@rules_cc//cc:defs.bzl", "CcToolchainConfigInfo", "cc_common") -load("@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", "feature", "flag_group", "flag_set", "tool_path") - -def _impl(ctx): @@ -101,6 +102,7 @@ index 7d7ba53b579605a6f469fe01ddf699d1284110e3..7ddd8259e790ebbfce0bfbb08e08ed41 - - return cc_common.create_cc_toolchain_config_info( - ctx = ctx, +- toolchain_identifier = "local_clang", - features = features, - cxx_builtin_include_directories = [ - "{WORKSPACE_ROOT}/buildtools/third_party/libc++", @@ -113,7 +115,6 @@ index 7d7ba53b579605a6f469fe01ddf699d1284110e3..7ddd8259e790ebbfce0bfbb08e08ed41 - "{WORKSPACE_ROOT}/build/linux/debian_bullseye_amd64-sysroot/usr/include", - "{WORKSPACE_ROOT}/build/linux/debian_bullseye_amd64-sysroot/usr/local/include", - ], -- toolchain_identifier = "local_clang", - host_system_name = "local", - target_system_name = "local", - target_cpu = "k8", @@ -131,6 +132,7 @@ index 7d7ba53b579605a6f469fe01ddf699d1284110e3..7ddd8259e790ebbfce0bfbb08e08ed41 -) -""", - build_file_content = """ +-load("@rules_cc//cc:defs.bzl", "cc_toolchain") -load(":cc_toolchain_config.bzl", "cc_toolchain_config") - -package(default_visibility = ["//visibility:public"]) @@ -175,14 +177,15 @@ index 7d7ba53b579605a6f469fe01ddf699d1284110e3..7ddd8259e790ebbfce0bfbb08e08ed41 -) - -register_toolchains("@llvm_toolchain//:cc_toolchain_k8") - +- # Define local repository for libc++ from third_party sources libcxx_repository = use_repo_rule("//bazel/toolchain:libcxx_repository.bzl", "libcxx_repository") + diff --git a/bazel/toolchain/libcxx_repository.bzl b/bazel/toolchain/libcxx_repository.bzl -index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a4de08ceb 100644 +index 60aca4bf91d107716929ff71682b720222e14587..ed367ef64871133867e9eccb07c6482a4de08ceb 100644 --- a/bazel/toolchain/libcxx_repository.bzl +++ b/bazel/toolchain/libcxx_repository.bzl -@@ -1,99 +1,17 @@ +@@ -1,106 +1,17 @@ -"""Repository rule for building libc++ from third_party sources.""" +"""Stub repository rule: workerd uses the system/toolchain libc++, so @libcxx +is an empty shim that satisfies the dep in bazel/defs.bzl without pulling in @@ -196,7 +199,11 @@ index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a - ctx.symlink(workspace_root.get_child("third_party").get_child("libc++"), "libc++") - ctx.symlink(workspace_root.get_child("third_party").get_child("libc++abi"), "libc++abi") - ctx.symlink(workspace_root.get_child("third_party").get_child("llvm-libc"), "llvm-libc") -- ctx.symlink(workspace_root.get_child("buildtools").get_child("third_party").get_child("libc++"), "buildtools_libc++") +- +- # Symlink config files +- buildtools_libcxx = workspace_root.get_child("buildtools").get_child("third_party").get_child("libc++") +- ctx.symlink(buildtools_libcxx.get_child("__config_site"), "buildtools_libc++/__config_site") +- ctx.symlink(buildtools_libcxx.get_child("__assertion_handler"), "buildtools_libc++/__assertion_handler") - - # Get the external repository path for include flags - # In bzlmod, repo names may have prefixes, so we need to determine the actual path @@ -207,6 +214,8 @@ index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a - # that conflict with the toolchain's absolute paths, breaking #include_next. - # The toolchain provides the libc++ include paths via -isystem flags. - build_content = ''' +-load("@rules_cc//cc:defs.bzl", "cc_library") +- + ctx.file("BUILD.bazel", """ package(default_visibility = ["//visibility:public"]) @@ -219,6 +228,7 @@ index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a - "-D_LIBCPP_BUILDING_LIBRARY", - "-D_LIBCPP_HARDENING_MODE_DEFAULT=_LIBCPP_HARDENING_MODE_NONE", - "-DLIBC_NAMESPACE=__llvm_libc_cr", +- "-D_LIBCPP_CONSTINIT=constinit", -] - -cc_library( @@ -237,9 +247,8 @@ index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a - "libc++/src/include/**/*", - "libc++/src/src/include/*.h", - "libc++abi/src/src/demangle/*.def", -- "buildtools_libc++/__config_site", -- "buildtools_libc++/__assertion_handler", - "llvm-libc/src/**/*.h", +- "buildtools_libc++/*", - ]), - copts = LIBCXX_COPTS + [ - "-DLIBCXXABI_SILENT_TERMINATE", @@ -262,9 +271,8 @@ index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a - ]) + glob(["libc++/src/src/support/**/*.ipp"], allow_empty = True), - hdrs = glob([ - "libc++/src/include/**/*", -- "buildtools_libc++/__config_site", -- "buildtools_libc++/__assertion_handler", - "llvm-libc/src/**/*.h", +- "buildtools_libc++/*", - ]), - copts = LIBCXX_COPTS + [ - "-DLIBCXX_BUILDING_LIBCXXABI", @@ -275,7 +283,9 @@ index a7d5f11053dd333c5bec614c27168a0effb7b4aa..ed367ef64871133867e9eccb07c6482a - "-lpthread", - "-lm", - ], -- deps = [":libc++abi"], +- deps = [ +- ":libc++abi", +- ], - linkstatic = True, -) -'''.format(REPO_PATH=repo_path) diff --git a/patches/v8/0036-Fix-non-conforming-braced-init-list-in-value_or.patch b/patches/v8/0036-Fix-non-conforming-braced-init-list-in-value_or.patch deleted file mode 100644 index 902b987f1e2..00000000000 --- a/patches/v8/0036-Fix-non-conforming-braced-init-list-in-value_or.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Joyee Cheung -Date: Mon, 6 Apr 2026 18:15:41 +0200 -Subject: Fix non-conforming braced-init-list in value_or() - -value_or() is a function template that deduces its argument type. -Passing a braced-init-list {} causes deduction to fail in GCC -because {} has no type: - - wasm-shuffle-reducer.cc:576: error: no matching member function - for call to 'value_or' - note: candidate template ignored: couldn't infer template argument - -Clang only accepted it by chance. Replace {} with uint8_t{0} to make -the type explicit. - -Refs: https://eel.is/c++draft/temp.deduct.call -Refs: https://github.com/nodejs/node/pull/62572 -Change-Id: I589617c78fbc1a65a3475957f846ffb87364ff28 -Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7726131 -Reviewed-by: Matthias Liedtke -Commit-Queue: Joyee Cheung -Cr-Commit-Position: refs/heads/main@{#106357} - -diff --git a/src/compiler/turboshaft/wasm-shuffle-reducer.cc b/src/compiler/turboshaft/wasm-shuffle-reducer.cc -index 192af4738276fbc0400eb8d087d6292e8b2df2d1..0f8275fedede955baa6bbe4cdd2da0b1e3245fc4 100644 ---- a/src/compiler/turboshaft/wasm-shuffle-reducer.cc -+++ b/src/compiler/turboshaft/wasm-shuffle-reducer.cc -@@ -573,7 +573,7 @@ void WasmShuffleAnalyzer::TryReduceFromMSB(OpIndex input, - uint8_t index = shuffle.shuffle[i]; - if (index >= lower_limit && index <= upper_limit) { - max = std::max(static_cast(index % kSimd128Size), -- max.value_or({})); -+ max.value_or(uint8_t{0})); - } - } - if (max) { diff --git a/patches/v8/0037-Fix-non-portable-std-atomic_flag-construction-in-run.patch b/patches/v8/0036-Fix-non-portable-std-atomic_flag-construction-in-run.patch similarity index 87% rename from patches/v8/0037-Fix-non-portable-std-atomic_flag-construction-in-run.patch rename to patches/v8/0036-Fix-non-portable-std-atomic_flag-construction-in-run.patch index d736db17c5e..3ce6d726f32 100644 --- a/patches/v8/0037-Fix-non-portable-std-atomic_flag-construction-in-run.patch +++ b/patches/v8/0036-Fix-non-portable-std-atomic_flag-construction-in-run.patch @@ -10,10 +10,10 @@ before C++20, but cleared from C++20 onward, which matches the intent of the original code (which used the equivalent of false). diff --git a/src/runtime/runtime-test.cc b/src/runtime/runtime-test.cc -index 12a2ead2d07900e70100d862f58db68401d96901..00ff940e335df0224081cd93198c293f748b964a 100644 +index 478a697dd7eea78ae8dcc44f4ae70148aaf88a4c..6979ba62427161db69b863e0734c616ecbeb6588 100644 --- a/src/runtime/runtime-test.cc +++ b/src/runtime/runtime-test.cc -@@ -1173,7 +1173,7 @@ RUNTIME_FUNCTION(Runtime_SetAllocationTimeout) { +@@ -1194,7 +1194,7 @@ RUNTIME_FUNCTION(Runtime_SetAllocationTimeout) { CONVERT_INT32_ARG_FUZZ_SAFE(timeout, 1); isolate->heap()->set_allocation_timeout(timeout); #else // !V8_ENABLE_ALLOCATION_TIMEOUT diff --git a/samples/pyodide-fastapi/config.capnp b/samples/pyodide-fastapi/config.capnp deleted file mode 100644 index 114e65afaa2..00000000000 --- a/samples/pyodide-fastapi/config.capnp +++ /dev/null @@ -1,34 +0,0 @@ -using Workerd = import "/workerd/workerd.capnp"; - -const config :Workerd.Config = ( - services = [ - (name = "main", worker = .mainWorker), - ], - - sockets = [ - # Serve HTTP on port 8080. - ( name = "http", - address = "*:8080", - http = (), - service = "main" - ), - ], -); - -const mainWorker :Workerd.Worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "./worker.py"), - (name = "fastapi", pythonRequirement = ""), - (name = "anyio", pythonRequirement = ""), - ], - bindings = [ - ( - name = "secret", - text = "thisisasecret" - ), - ], - compatibilityDate = "2023-12-18", - compatibilityFlags = ["python_workers"], - # Learn more about compatibility dates at: - # https://developers.cloudflare.com/workers/platform/compatibility-dates/ -); diff --git a/samples/pyodide-fastapi/worker.py b/samples/pyodide-fastapi/worker.py deleted file mode 100644 index 7802d29235e..00000000000 --- a/samples/pyodide-fastapi/worker.py +++ /dev/null @@ -1,57 +0,0 @@ -from asgi import env -from workers import WorkerEntrypoint - - -class Default(WorkerEntrypoint): - async def fetch(self, request, env): - import asgi - - return await asgi.fetch(app, request, env) - - -# Set up fastapi app - -from fastapi import FastAPI -from pydantic import BaseModel - -app = FastAPI() - - -@app.get("/hello") -async def hello(env=env): - return {"message": "Hello World", "secret": env.secret} - - -@app.get("/route") -async def route(): - return {"message": "this is my custom route"} - - -@app.get("/favicon.ico") -async def favicon(): - return {"message": "here's a favicon I guess?"} - - -@app.get("/items/{item_id}") -async def read_item(item_id: int): - return {"item_id": item_id} - - -class Item(BaseModel): - name: str - description: str | None = None - price: float - tax: float | None = None - - -@app.post("/items/") -async def create_item(item: Item): - return item - - -@app.put("/items/{item_id}") -async def create_item2(item_id: int, item: Item, q: str | None = None): - result = {"item_id": item_id, **item.model_dump()} - if q: - result.update({"q": q}) - return result diff --git a/samples/pyodide-langchain/config.capnp b/samples/pyodide-langchain/config.capnp deleted file mode 100644 index 7d1f43b8b4f..00000000000 --- a/samples/pyodide-langchain/config.capnp +++ /dev/null @@ -1,28 +0,0 @@ -using Workerd = import "/workerd/workerd.capnp"; - -const config :Workerd.Config = ( - services = [ - (name = "main", worker = .mainWorker), - ], - - sockets = [ - # Serve HTTP on port 8080. - ( name = "http", - address = "*:8080", - http = (), - service = "main" - ), - ], -); - -const mainWorker :Workerd.Worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "./worker.py"), - (name = "langchain_core", pythonRequirement = ""), - (name = "langchain_openai", pythonRequirement = ""), - ], - compatibilityDate = "2023-12-18", - compatibilityFlags = ["python_workers"], - # Learn more about compatibility dates at: - # https://developers.cloudflare.com/workers/platform/compatibility-dates/ -); diff --git a/samples/pyodide-langchain/worker.py b/samples/pyodide-langchain/worker.py deleted file mode 100644 index ae61132abd5..00000000000 --- a/samples/pyodide-langchain/worker.py +++ /dev/null @@ -1,15 +0,0 @@ -from langchain_core.prompts import PromptTemplate -from langchain_openai import OpenAI - -API_KEY = "sk-abcdefg" - - -async def test(request): - prompt = PromptTemplate.from_template( - "Complete the following sentence: I am a {profession} and " - ) - llm = OpenAI(api_key=API_KEY) - chain = prompt | llm - - res = await chain.ainvoke({"profession": "electrician"}) - print(res) diff --git a/src/cloudflare/internal/test-tracing-wrapper.ts b/src/cloudflare/internal/test-tracing-wrapper.ts index 4ef71393a60..1bd02674719 100644 --- a/src/cloudflare/internal/test-tracing-wrapper.ts +++ b/src/cloudflare/internal/test-tracing-wrapper.ts @@ -7,15 +7,20 @@ // It must be in the internal/ directory to be compiled as part of the cloudflare bundle, // but it should never be used outside of test configurations. -import { withSpan } from 'cloudflare-internal:tracing-helpers'; +import { + startActiveSpan, + withSpan, +} from 'cloudflare-internal:tracing-helpers'; interface TestWrapper { + startActiveSpan: typeof startActiveSpan; withSpan: typeof withSpan; } // Wrapper function that provides test utilities for tracing export default function (_env: unknown): TestWrapper { return { + startActiveSpan, // Export withSpan for testing withSpan, }; diff --git a/src/cloudflare/internal/test/tracing/BUILD.bazel b/src/cloudflare/internal/test/tracing/BUILD.bazel index 159201c3bef..7023fd47b53 100644 --- a/src/cloudflare/internal/test/tracing/BUILD.bazel +++ b/src/cloudflare/internal/test/tracing/BUILD.bazel @@ -2,18 +2,15 @@ load("//:build/wd_test.bzl", "wd_test") wd_test( src = "tracing-helpers-test.wd-test", - args = ["--experimental"], data = glob(["*.js"]) + ["//src/cloudflare/internal/test:instrumentation-test-helper.js"], ) wd_test( src = "tracing-hierarchy-test.wd-test", - args = ["--experimental"], data = glob(["*.js"]) + ["//src/cloudflare/internal/test:instrumentation-test-helper.js"], ) wd_test( src = "tracing-log-attribution-test.wd-test", - args = ["--experimental"], data = glob(["*.js"]), ) diff --git a/src/cloudflare/internal/test/tracing/tracing-helpers-instrumentation-test.js b/src/cloudflare/internal/test/tracing/tracing-helpers-instrumentation-test.js index 45bac858d27..bbce68e8cde 100644 --- a/src/cloudflare/internal/test/tracing/tracing-helpers-instrumentation-test.js +++ b/src/cloudflare/internal/test/tracing/tracing-helpers-instrumentation-test.js @@ -35,7 +35,17 @@ export const validateSpans = { expectedSpan: 'undefined-attr-op', }, { test: 'publicImportTracing', expectedSpan: 'public-import-op' }, + { + test: 'publicImportStartActiveSpan', + expectedSpan: 'public-start-active-op', + }, { test: 'ctxTracing', expectedSpan: 'ctx-tracing-op' }, + { + test: 'detachedSpanEndsAfterStreamDrain', + expectedSpan: 'detached-stream-op', + }, + { test: 'helperStartActiveSpan', expectedSpan: 'helper-detached-op' }, + { test: 'startActiveSpanSyncThrow', expectedSpan: 'manual-throw-op' }, ]; for (const { test, expectedSpan } of testValidations) { @@ -58,6 +68,44 @@ export const validateSpans = { ); } + { + const span = ( + spansByTest.get('detachedSpanEndsAfterStreamDrain') || [] + ).find((s) => s.name === 'detached-stream-op'); + assert(span, 'detachedSpanEndsAfterStreamDrain: span present'); + assert.strictEqual(span['phase.created'], true); + assert.strictEqual(span['phase.drained'], true); + assert(span.closed, 'Detached stream span should be explicitly closed'); + } + + { + const span = (spansByTest.get('publicImportStartActiveSpan') || []).find( + (s) => s.name === 'public-start-active-op' + ); + assert(span, 'publicImportStartActiveSpan: span present'); + assert.strictEqual(span.path, 'import-from-cloudflare-workers'); + assert.strictEqual(span['ended.explicitly'], true); + assert(span.closed, 'Public startActiveSpan span should be closed'); + } + + { + const span = (spansByTest.get('helperStartActiveSpan') || []).find( + (s) => s.name === 'helper-detached-op' + ); + assert(span, 'helperStartActiveSpan: span present'); + assert.strictEqual(span['ended.explicitly'], true); + assert(span.closed, 'Helper-created span should be explicitly closed'); + } + + { + const span = (spansByTest.get('startActiveSpanSyncThrow') || []).find( + (s) => s.name === 'manual-throw-op' + ); + assert(span, 'startActiveSpanSyncThrow: span present'); + assert.strictEqual(span['after.throw'], true); + assert(span.closed, 'Manual throw span should be explicitly closed'); + } + // Nested spans: verify both outer and inner spans exist and both are closed. // This exercises the AsyncContextFrame push path used by enterSpan for nesting. for (const testName of ['nestedSyncSpans', 'nestedAsyncSpans']) { diff --git a/src/cloudflare/internal/test/tracing/tracing-helpers-test.js b/src/cloudflare/internal/test/tracing/tracing-helpers-test.js index e8c8166c117..7a194f7b75b 100644 --- a/src/cloudflare/internal/test/tracing/tracing-helpers-test.js +++ b/src/cloudflare/internal/test/tracing/tracing-helpers-test.js @@ -197,6 +197,28 @@ export const publicImportTracing = { }, }; +export const publicImportStartActiveSpan = { + async test(ctrl, env, ctx) { + let capturedSpan = null; + const result = publicTracing.startActiveSpan( + 'public-start-active-op', + (span) => { + capturedSpan = span; + span.setAttribute('test', 'publicImportStartActiveSpan'); + span.setAttribute('path', 'import-from-cloudflare-workers'); + assert.strictEqual(span.isTraced, true); + return 'public-start-active-value'; + } + ); + + assert.strictEqual(result, 'public-start-active-value'); + assert.strictEqual(capturedSpan.isTraced, true); + capturedSpan.setAttribute('ended.explicitly', true); + capturedSpan.end(); + assert.strictEqual(capturedSpan.isTraced, false); + }, +}; + // Verify ctx.tracing: same Tracing instance should be reachable off the execution context. export const ctxTracing = { async test(ctrl, env, ctx) { @@ -216,3 +238,118 @@ export const ctxTracing = { assert.strictEqual(result, 'ctx-tracing-value'); }, }; + +export const detachedSpanEndsAfterStreamDrain = { + async test(ctrl, env, ctx) { + assert.ok(ctx.tracing, 'ctx.tracing should be defined'); + assert.strictEqual( + typeof ctx.tracing.startActiveSpan, + 'function', + 'ctx.tracing.startActiveSpan should be a function' + ); + + let capturedSpan = null; + const stream = ctx.tracing.startActiveSpan( + 'detached-stream-op', + (span) => { + capturedSpan = span; + span.setAttribute('test', 'detachedSpanEndsAfterStreamDrain'); + span.setAttribute('phase.created', true); + + return new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('hello')); + controller.enqueue(new TextEncoder().encode(' world')); + controller.close(); + }, + }).pipeThrough( + new TransformStream({ + transform(chunk, controller) { + controller.enqueue(chunk); + }, + flush() { + span.setAttribute('phase.drained', true); + span.end(); + }, + }) + ); + } + ); + + assert.strictEqual( + capturedSpan.isTraced, + true, + 'Detached span should stay open after callback returns' + ); + assert.strictEqual(await new Response(stream).text(), 'hello world'); + assert.strictEqual( + capturedSpan.isTraced, + false, + 'Detached span should stop tracing after explicit end()' + ); + }, +}; + +export const helperStartActiveSpan = { + async test(ctrl, env, ctx) { + const { startActiveSpan } = env.tracingTest; + assert.strictEqual( + typeof startActiveSpan, + 'function', + 'tracing helpers should export startActiveSpan' + ); + + let capturedSpan = null; + const result = startActiveSpan('helper-detached-op', (span) => { + capturedSpan = span; + span.setAttribute('test', 'helperStartActiveSpan'); + return 'helper-detached-value'; + }); + + assert.strictEqual(result, 'helper-detached-value'); + assert.strictEqual( + capturedSpan.isTraced, + true, + 'Helper-created span should stay open after callback returns' + ); + capturedSpan.setAttribute('ended.explicitly', true); + capturedSpan.end(); + assert.strictEqual( + capturedSpan.isTraced, + false, + 'Helper-created span should stop tracing after explicit end()' + ); + }, +}; + +export const startActiveSpanSyncThrow = { + async test(ctrl, env, ctx) { + let capturedSpan = null; + let caught = false; + + try { + ctx.tracing.startActiveSpan('manual-throw-op', (span) => { + capturedSpan = span; + span.setAttribute('test', 'startActiveSpanSyncThrow'); + throw new Error('manual lifecycle throw'); + }); + } catch (e) { + caught = true; + assert.strictEqual(e.message, 'manual lifecycle throw'); + } + + assert(caught, 'startActiveSpan callback error should be rethrown'); + assert.strictEqual( + capturedSpan.isTraced, + true, + 'Manual span should stay open after callback throws' + ); + capturedSpan.setAttribute('after.throw', true); + capturedSpan.end(); + assert.strictEqual( + capturedSpan.isTraced, + false, + 'Manual span should stop tracing after explicit end()' + ); + }, +}; diff --git a/src/cloudflare/internal/test/tracing/tracing-helpers-test.wd-test b/src/cloudflare/internal/test/tracing/tracing-helpers-test.wd-test index 1a58cfe9d5a..3a3a2e26fc3 100644 --- a/src/cloudflare/internal/test/tracing/tracing-helpers-test.wd-test +++ b/src/cloudflare/internal/test/tracing/tracing-helpers-test.wd-test @@ -7,7 +7,11 @@ const unitTests :Workerd.Config = ( modules = [ (name = "worker", esModule = embed "tracing-helpers-test.js"), ], - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = [ + "nodejs_compat", + "streams_enable_constructors", + "transformstream_enable_standard_constructor" + ], streamingTails = ["tail"], bindings = [ ( @@ -28,5 +32,5 @@ const tailWorker :Workerd.Worker = ( (name = "worker", esModule = embed "tracing-helpers-instrumentation-test.js"), (name = "instrumentation-test-helper", esModule = embed "../instrumentation-test-helper.js") ], - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = ["nodejs_compat"], ); diff --git a/src/cloudflare/internal/test/tracing/tracing-hierarchy-test.wd-test b/src/cloudflare/internal/test/tracing/tracing-hierarchy-test.wd-test index a5782bcca9a..065cccd6c82 100644 --- a/src/cloudflare/internal/test/tracing/tracing-hierarchy-test.wd-test +++ b/src/cloudflare/internal/test/tracing/tracing-hierarchy-test.wd-test @@ -11,7 +11,7 @@ const unitTests :Workerd.Config = ( modules = [ (name = "worker", esModule = embed "tracing-hierarchy-test.js"), ], - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = ["nodejs_compat"], streamingTails = ["tail"], bindings = [ ( @@ -29,7 +29,7 @@ const unitTests :Workerd.Config = ( ), ( name = "tracing-hierarchy-mock", worker = ( - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = ["nodejs_compat"], modules = [ (name = "worker", esModule = embed "tracing-hierarchy-mock.js"), ], @@ -44,5 +44,5 @@ const tailWorker :Workerd.Worker = ( (name = "worker", esModule = embed "tracing-hierarchy-instrumentation-test.js"), (name = "instrumentation-test-helper", esModule = embed "../instrumentation-test-helper.js") ], - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = ["nodejs_compat"], ); diff --git a/src/cloudflare/internal/test/tracing/tracing-log-attribution-test.wd-test b/src/cloudflare/internal/test/tracing/tracing-log-attribution-test.wd-test index 2151da339a8..b7c8ee3421c 100644 --- a/src/cloudflare/internal/test/tracing/tracing-log-attribution-test.wd-test +++ b/src/cloudflare/internal/test/tracing/tracing-log-attribution-test.wd-test @@ -12,7 +12,7 @@ const unitTests :Workerd.Config = ( modules = [ (name = "worker", esModule = embed "tracing-log-attribution-test.js"), ], - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = ["nodejs_compat"], streamingTails = ["tail"], bindings = [ ( @@ -32,5 +32,5 @@ const tailWorker :Workerd.Worker = ( modules = [ (name = "worker", esModule = embed "tracing-log-attribution-instrumentation-test.js"), ], - compatibilityFlags = ["experimental", "nodejs_compat"], + compatibilityFlags = ["nodejs_compat"], ); diff --git a/src/cloudflare/internal/tracing-helpers.ts b/src/cloudflare/internal/tracing-helpers.ts index bf5c2a64053..8486689a4ed 100644 --- a/src/cloudflare/internal/tracing-helpers.ts +++ b/src/cloudflare/internal/tracing-helpers.ts @@ -40,3 +40,26 @@ export function withSpan(name: string, fn: (span: Span) => T): T { // promise-reject) auto-ending, so this is a pure passthrough. return tracing.enterSpan(name, fn); } + +/** + * Helper function to start a span that is active while `fn` runs, but whose + * lifecycle is controlled explicitly by the caller via `span.end()`. + * + * @param name - The operation name for the span + * @param fn - The function to execute while the span is active + * @returns The result of the function + * + * @example + * // Explicit lifecycle usage for stream-drain instrumentation + * const stream = startActiveSpan('stream', (span) => { + * return body.pipeThrough(new TransformStream({ + * flush() { + * span.setAttribute('phase.drained', true); + * span.end(); + * }, + * })); + * }); + */ +export function startActiveSpan(name: string, fn: (span: Span) => T): T { + return tracing.startActiveSpan(name, fn); +} diff --git a/src/cloudflare/internal/tracing.d.ts b/src/cloudflare/internal/tracing.d.ts index b6ef9ddbc6c..4d8e3b17ddd 100644 --- a/src/cloudflare/internal/tracing.d.ts +++ b/src/cloudflare/internal/tracing.d.ts @@ -7,14 +7,16 @@ type SpanValue = string | number | boolean; declare class Span { // Returns true if this span will be recorded to the tracing system. False when the - // current async context is not being traced, or when the span has already been submitted - // (which happens automatically when the enterSpan callback returns). Callers can gate - // expensive attribute-computation code on this. + // current async context is not being traced, or when the span has already been submitted. + // Callers can gate expensive attribute-computation code on this. readonly isTraced: boolean; // Sets a single attribute on the span. If `value` is undefined, the attribute is not set, // which is convenient for optional fields. setAttribute(key: string, value: SpanValue | undefined): void; + + // Ends the span and submits its attributes to the tracing system. Idempotent. + end(): void; } // The default export is a singleton instance of the C++ `Tracing` class (see @@ -34,6 +36,15 @@ declare const tracing: { ...args: A ): T; + // Creates a span, makes it active while invoking `callback(span, ...args)`, and + // returns the callback result without automatically ending the span. Callers must + // invoke `span.end()` explicitly. + startActiveSpan( + name: string, + callback: (span: Span, ...args: A) => T, + ...args: A + ): T; + // The `Span` class is exposed as a nested type so callers can reference the type via // `InstanceType` (see `tracing-helpers.ts`). readonly Span: typeof Span; diff --git a/src/cloudflare/internal/workers.d.ts b/src/cloudflare/internal/workers.d.ts index af948c1d0bc..aeabe8dc786 100644 --- a/src/cloudflare/internal/workers.d.ts +++ b/src/cloudflare/internal/workers.d.ts @@ -63,8 +63,3 @@ export interface CacheContext { export function getCtxCache(): CacheContext | undefined; export function abortIsolate(reason?: string): never; - -// True when the workerd_experimental compat flag is enabled. Use this for gating experimental -// re-exports in user-facing wrappers; Cloudflare.compatibilityFlags filters out experimental -// flags themselves so it cannot be used to detect this. -export const isExperimental: boolean; diff --git a/src/cloudflare/workers.ts b/src/cloudflare/workers.ts index 8aebf545be2..e3fe0cbd93c 100644 --- a/src/cloudflare/workers.ts +++ b/src/cloudflare/workers.ts @@ -207,11 +207,5 @@ export const cache = new Proxy( export const tracing = innerTracing; export function abortIsolate(reason?: string): never { - if (entrypoints.isExperimental) { - entrypoints.abortIsolate(reason); - } else { - throw new Error( - 'abortIsolate() requires the "experimental" compatibility flag.' - ); - } + entrypoints.abortIsolate(reason); } diff --git a/src/node/internal/crypto_random.ts b/src/node/internal/crypto_random.ts index d1e456ae7a3..2b19a81654d 100644 --- a/src/node/internal/crypto_random.ts +++ b/src/node/internal/crypto_random.ts @@ -36,6 +36,7 @@ import { import { isAnyArrayBuffer, isArrayBufferView, + isDataView, } from 'node-internal:internal_types'; import { @@ -81,7 +82,9 @@ export function randomFillSync( buffer ); } - const maxLength = (buffer as Uint8Array).length; + // Use byteLength, not length — DataView has no .length property and + // TypedArray .length is element count, not bytes. + const maxLength = (buffer as Uint8Array).byteLength; if (offset !== undefined) { validateInteger(offset, 'offset', 0, kMaxLength); } else offset = 0; @@ -90,6 +93,12 @@ export function randomFillSync( } else size = maxLength - offset; if (isAnyArrayBuffer(buffer)) { buffer = Buffer.from(buffer); + } else if (isDataView(buffer)) { + buffer = new Uint8Array( + buffer.buffer, + buffer.byteOffset, + buffer.byteLength + ); } buffer = (buffer as Buffer).subarray(offset, offset + size); return crypto.getRandomValues(buffer as Uint8Array); @@ -130,7 +139,9 @@ export function randomFill( let offset = 0; let size = 0; - const maxLength = (buffer as Uint8Array).length; + // Use byteLength, not length — DataView has no .length property and + // TypedArray .length is element count, not bytes. + const maxLength = (buffer as Uint8Array).byteLength; if (typeof callback === 'function') { validateInteger(offsetOrCallback, 'offset', 0, maxLength); offset = offsetOrCallback; diff --git a/src/node/internal/internal_fs_utils.ts b/src/node/internal/internal_fs_utils.ts index a93c5cfc4a8..936a6853009 100644 --- a/src/node/internal/internal_fs_utils.ts +++ b/src/node/internal/internal_fs_utils.ts @@ -569,14 +569,14 @@ export function validateReadArgs( length = buffer.byteLength - offset, position = null, } = offsetOrOptions; - actualOffset = offset; + actualOffset += offset; actualLength = length; actualPosition = position; } // Handle the case where the third argument is a number (offset) else if (typeof offsetOrOptions === 'number') { - actualOffset = offsetOrOptions; - actualLength = length ?? buffer.byteLength - actualOffset; + actualOffset += offsetOrOptions; + actualLength = length ?? buffer.byteLength - offsetOrOptions; actualPosition = position; } else { throw new ERR_INVALID_ARG_TYPE( @@ -590,8 +590,9 @@ export function validateReadArgs( validateUint32(actualLength, 'length'); validatePosition(actualPosition, 'position'); - // The actualOffset plus actualLength must not exceed the buffer's byte length. - if (actualOffset + actualLength > buffer.byteLength) { + // The actualOffset plus actualLength must not exceed the backing buffer's byte length. + const backingBufferLength = buffer.buffer.byteLength; + if (actualOffset + actualLength > backingBufferLength) { throw new ERR_INVALID_ARG_VALUE('offset', actualOffset, 'out of bounds'); } diff --git a/src/node/internal/internal_tls_wrap.ts b/src/node/internal/internal_tls_wrap.ts index 7bd91618fd4..90931e7aba0 100644 --- a/src/node/internal/internal_tls_wrap.ts +++ b/src/node/internal/internal_tls_wrap.ts @@ -32,7 +32,6 @@ import { tryReadStart, } from 'node-internal:internal_net'; import { JSStreamSocket } from 'node-internal:internal_tls_jsstream'; -import { checkServerIdentity } from 'node-internal:internal_tls'; import type { ConnectionOptions, TlsOptions, @@ -56,6 +55,7 @@ import { ERR_TLS_INVALID_CONTEXT, } from 'node-internal:internal_errors'; import { SecureContext } from 'node-internal:internal_tls_common'; +import { default as processImpl } from 'node-internal:process'; import { ok } from 'node-internal:internal_assert'; const kConnectOptions = Symbol('connect-options'); @@ -220,13 +220,18 @@ export function TLSSocket( throw new ERR_OPTION_NOT_IMPLEMENTED('options.pskCallback'); } - // TODO(soon): Call this on secureConnect once connect() api supports - // getting peer certificate. + // checkServerIdentity requires access to the peer certificate via + // getPeerCertificate(), which is not yet implemented. When the + // throw_on_not_implemented_tls_options compat flag is enabled we throw; + // otherwise we silently continue so that existing workers are not broken. if (tlsOptions.checkServerIdentity !== undefined) { validateFunction( tlsOptions.checkServerIdentity, 'options.checkServerIdentity' ); + if (processImpl.shouldThrowOnNotImplementedTlsOption()) { + throw new ERR_OPTION_NOT_IMPLEMENTED('options.checkServerIdentity'); + } } this._tlsOptions = tlsOptions; @@ -235,7 +240,7 @@ export function TLSSocket( this._newSessionPending = false; this._controlReleased = false; this.secureConnecting = true; - this.servername = null; + this.servername = tlsOptions.servername ?? null; this.authorized = false; this[kRes] = null; this[kIsVerified] = false; @@ -495,7 +500,11 @@ TLSSocket.prototype._start = function _start(this: TLSSocket): void { try { const { host, port, addressType } = this._handle.options; - const socket = this._handle.socket.startTls(); + const tlsOpts = + this.servername != null + ? { expectedServerHostname: this.servername } + : undefined; + const socket = this._handle.socket.startTls(tlsOpts); this._handle = { socket: socket, @@ -537,9 +546,7 @@ TLSSocket.prototype.setServername = function setServername( name: string ): void { validateString(name, 'name'); - // Pipefitter currently does not provide us a way on the internal - // system and possibly KJ's TLS implementation doesn't provides a way, - // but it is something we will need sooner than later. + this.servername = name; }; TLSSocket.prototype.setSession = function (_session: string | Buffer): void { @@ -700,6 +707,9 @@ export function connect(...args: unknown[]): TLSSocket { options.checkServerIdentity, 'options.checkServerIdentity' ); + if (processImpl.shouldThrowOnNotImplementedTlsOption()) { + throw new ERR_OPTION_NOT_IMPLEMENTED('options.checkServerIdentity'); + } } // @ts-expect-error TS2345 Type incompatibility between Node.js Duplex and internal Duplex @@ -710,7 +720,6 @@ export function connect(...args: unknown[]): TLSSocket { enableTrace: options.enableTrace, highWaterMark: options.highWaterMark, secureContext: options.secureContext, - checkServerIdentity: options.checkServerIdentity ?? checkServerIdentity, onread: options.onread, signal: options.signal, lookup: options.lookup, @@ -722,6 +731,14 @@ export function connect(...args: unknown[]): TLSSocket { tlssock[kConnectOptions] = options; + // In Node.js, servername defaults to options.host when not explicitly set. + // Store it on the TLSSocket so _start() can pass it as expectedServerHostname + // to the native Socket.startTls() call for correct TLS certificate identity + // validation. + if (options.servername !== undefined) { + tlssock.servername = options.servername; + } + if (cb) { tlssock.once('secureConnect', cb); } diff --git a/src/node/internal/process.d.ts b/src/node/internal/process.d.ts index 55e978ebedf..5d5531f902b 100644 --- a/src/node/internal/process.d.ts +++ b/src/node/internal/process.d.ts @@ -9,6 +9,7 @@ export function getCwd(): string; export function setCwd(path: string): void; export const versions: Record; export const platform: string; +export function shouldThrowOnNotImplementedTlsOption(): boolean; declare global { const Cloudflare: { diff --git a/src/node/internal/sockets.d.ts b/src/node/internal/sockets.d.ts index 47c3649a64d..51c86852a70 100644 --- a/src/node/internal/sockets.d.ts +++ b/src/node/internal/sockets.d.ts @@ -16,13 +16,17 @@ export interface Writer extends WritableStream { releaseLock(): void; } +export interface TlsOptions { + expectedServerHostname?: string; +} + export interface Socket { opened: Promise; closed: Promise; close(): Promise; readable: Reader; writable: Writer; - startTls(): Socket; + startTls(options?: TlsOptions): Socket; readonly upgraded: boolean; readonly secureTransport: 'on' | 'off' | 'starttls'; diff --git a/src/pyodide/AGENTS.md b/src/pyodide/AGENTS.md index e6effc5ff02..43b8c37545b 100644 --- a/src/pyodide/AGENTS.md +++ b/src/pyodide/AGENTS.md @@ -12,7 +12,8 @@ Python Workers runtime layer. Replaces Pyodide's loader with a minimal substitut | `internal/python.ts` | Core bridge: Emscripten init, Pyodide bootstrap, snapshot orchestration | | `internal/snapshot.ts` | Memory snapshot collect/restore; baseline vs dedicated snapshot types | | `internal/setupPackages.ts`, `loadPackage.ts` | Package mounting, sys.path, vendor dir setup | -| `internal/tar.ts`, `tarfs.ts` | Tar archive parsing + read-only filesystem for bundles | +| `internal/tarfs.ts` | Read-only Emscripten filesystem backing site-packages / dynlib mounts | +| `python_packages.capnp` + `pack_python_packages.py` | Build-time: stdlib wheels are extracted into a `PythonPackages` message embedded in the bundle | | `internal/topLevelEntropy/` | TS+Python: patches `getRandomValues` with deterministic entropy during import, reseeds before request | | `internal/pool/` | Emscripten setup in plain V8 isolate; `emscriptenSetup.ts` has NO access to C++ extensions | | `internal/workers-api/` | Python SDK package (frozen) | diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel index 8d0390ad7ea..01c5f3d2f85 100644 --- a/src/pyodide/BUILD.bazel +++ b/src/pyodide/BUILD.bazel @@ -1,15 +1,34 @@ load("@bazel_skylib//rules:write_file.bzl", "write_file") -load("@rules_python//python:defs.bzl", "py_test") +load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library") +load("@rules_python//python:defs.bzl", "py_binary", "py_test") load("//:build/python_metadata.bzl", "BUNDLE_VERSION_INFO") load("//:build/wd_cc_embed.bzl", "wd_cc_embed") load(":helpers.bzl", "pyodide_extra", "pyodide_static", "python_bundles") +cc_capnp_library( + name = "python_packages_capnp", + srcs = ["python_packages.capnp"], + visibility = ["//visibility:public"], +) + +# Build-time tool that extracts the stdlib wheels into a PythonPackages capnp message embedded in +# the Pyodide bundle. Used by python_bundles() in helpers.bzl. +py_binary( + name = "pack_python_packages", + srcs = ["pack_python_packages.py"], + visibility = ["//visibility:public"], +) + pyodide_extra() python_bundles() pyodide_static() +# The checked-in package lock files. Exported so the `pyodide` module extension +# (build/deps/dep_pyodide.bzl) can read them to download the stdlib wheels. +exports_files(glob(["python-lock/*.json"])) + DEV_VERSION = BUNDLE_VERSION_INFO["development"]["real_pyodide_version"] alias( diff --git a/src/pyodide/helpers.bzl b/src/pyodide/helpers.bzl index 201d478f7e9..f71c9a9c0e8 100644 --- a/src/pyodide/helpers.bzl +++ b/src/pyodide/helpers.bzl @@ -47,6 +47,7 @@ def _fmt_python_snapshot_release( baseline_snapshot_hash, flag, real_pyodide_version, + integrity, **_kwds): content = ", ".join( [ @@ -57,6 +58,7 @@ def _fmt_python_snapshot_release( "backport = %s" % backport, 'baselineSnapshotHash = "%s"' % baseline_snapshot_hash, 'flagName = "%s"' % flag, + 'integrity = "%s"' % integrity, ], ) return "(%s)" % content @@ -88,7 +90,7 @@ def pyodide_extra(): ) for tag in package_tags: - _copy_and_capnp_embed("@pyodide-lock_" + tag + ".json//file") + _copy_and_capnp_embed("python-lock/pyodide-lock_" + tag + ".json") cc_capnp_library( name = "pyodide_extra_capnp", @@ -105,6 +107,17 @@ def pyodide_extra(): }), ) +def _packages_tag_for_version(version): + # Maps a Pyodide version to the package lock tag whose stdlib wheels should be embedded into + # that version's bundle. Newer Pyodide versions bundle the stdlib directly and have no lock + # file / wheels to embed, in which case this returns None. + for name, info in BUNDLE_VERSION_INFO.items(): + if name == "development": + continue + if info["pyodide_version"] == version and "packages" in info: + return info["packages"] + return None + def python_bundles(overrides = {}): srcs = [_python_bundle_helper(info, overrides) for info in PYODIDE_VERSIONS] native.filegroup( @@ -122,7 +135,6 @@ def pyodide_static(): "internal/*.py", "internal/workers-api/src/*.py", "internal/workers-api/src/workers/*.py", - "internal/patches/*.py", "internal/topLevelEntropy/*.py", ]) internal_modules = native.glob( @@ -328,6 +340,41 @@ def _python_bundle(version, *, pyodide_asm_wasm = None, pyodide_asm_js = None, p deps = ["pyodide.asm.js@rule_js@" + version], ) + # The CPython stdlib modules and the shared libraries they depend on are embedded directly into + # the bundle so that the runtime no longer has to download or unpack them at request time. The + # wheels listed in the (pre-filtered) lock file are extracted at build time into a single + # PythonPackages capnp message (one entry per file, keyed by install_dir + path; see + # python_packages.capnp / pack_python_packages.py) which is embedded as the `python_packages` + # data module. Newer Pyodide versions bundle the stdlib directly and have no wheels to embed. + packages_tag = _packages_tag_for_version(version) + internal_data_modules = [ + _out_path("python_stdlib.zip", version), + ] + extra_deps = [] + has_packages = bool(packages_tag) + if has_packages: + lockfile = "python-lock/pyodide-lock_%s.json" % packages_tag + wheels = "@all_pyodide_wheels_%s//:whls" % packages_tag + native.genrule( + name = "python_packages.bin@rule@" + version, + srcs = [wheels, lockfile, "python_packages.capnp"], + outs = [_out_path("python_packages.bin", version)], + cmd = " ".join([ + "$(execpath :pack_python_packages)", + "--capnp $(execpath @capnp-cpp//src/capnp:capnp_tool)", + "--schema $(location python_packages.capnp)", + "--lock $(location %s)" % lockfile, + "--out $@", + "$(locations %s)" % wheels, + ]), + tools = [ + ":pack_python_packages", + "@capnp-cpp//src/capnp:capnp_tool", + ], + ) + internal_data_modules.append(_out_path("python_packages.bin", version)) + extra_deps.append("python_packages.bin@rule@" + version) + import_name = "pyodideRuntime" wd_js_bundle( name = "pyodide@" + version, @@ -340,27 +387,28 @@ def _python_bundle(version, *, pyodide_asm_wasm = None, pyodide_asm_js = None, p internal_wasm_modules = [ _out_path("pyodide.asm.wasm", version), ], - internal_data_modules = [ - _out_path("python_stdlib.zip", version), - ], + internal_data_modules = internal_data_modules, deps = [ "emscriptenSetup@" + version, "pyodide.asm.wasm@copy@" + version, "python_stdlib.zip@copy@" + version, - ], + ] + extra_deps, out_dir = _out_path("", version), ) pyodide_cappn_bin_rule = "pyodide.capnp.bin@rule@" + version + bin_srcs = [ + ":pyodide@%s.capnp" % version, + "//src/workerd/jsg:modules.capnp", + _ts_bundle_out(import_name + "-internal_", "emscriptenSetup", version), + _ts_bundle_out(import_name + "-internal_", "pyodide.asm.wasm", version), + _ts_bundle_out(import_name + "-internal_", "python_stdlib.zip", version), + ] + if has_packages: + bin_srcs.append(_ts_bundle_out(import_name + "-internal_", "python_packages.bin", version)) native.genrule( name = pyodide_cappn_bin_rule, - srcs = [ - ":pyodide@%s.capnp" % version, - "//src/workerd/jsg:modules.capnp", - _ts_bundle_out(import_name + "-internal_", "emscriptenSetup", version), - _ts_bundle_out(import_name + "-internal_", "pyodide.asm.wasm", version), - _ts_bundle_out(import_name + "-internal_", "python_stdlib.zip", version), - ], + srcs = bin_srcs, outs = [_out_path("pyodide.capnp.bin", version)], cmd = " ".join([ # Annoying logic to deal with different paths in workerd vs downstream. diff --git a/src/pyodide/internal/loadPackage.ts b/src/pyodide/internal/loadPackage.ts index 1ad2ece9215..b5d1818dd87 100644 --- a/src/pyodide/internal/loadPackage.ts +++ b/src/pyodide/internal/loadPackage.ts @@ -3,82 +3,28 @@ // https://opensource.org/licenses/Apache-2.0 /** - * This file contains code that roughly replaces pyodide.loadPackage, with workerd-specific - * optimizations: - * - Wheels are decompressed with a DecompressionStream instead of in Python - * - Wheels are overlaid onto the site-packages dir instead of actually being copied - * - Wheels are fetched from a disk cache if available. + * This file mounts the CPython stdlib packages (and the shared libraries they depend on) into the + * Pyodide filesystem. * - * Note that loadPackages is only used in local dev for now, internally we use the full big bundle - * that contains all the packages ready to go. + * The packages are extracted at build time and embedded directly in the Pyodide bundle as + * individual files (see src/pyodide/pack_python_packages.py and python_packages.capnp), so there is + * no gzip/tar work and no download at runtime: we simply overlay each embedded file onto + * site-packages or /usr/lib according to its install_dir. */ -import { - LOCKFILE, - PACKAGES_VERSION, - USING_OLDEST_PACKAGES_VERSION, -} from 'pyodide-internal:metadata'; -import { - VIRTUALIZED_DIR, - STDLIB_PACKAGES, -} from 'pyodide-internal:setupPackages'; -import { parseTarInfo } from 'pyodide-internal:tar'; +import { default as EmbeddedPackages } from 'pyodide-internal:packages'; +import { VIRTUALIZED_DIR } from 'pyodide-internal:setupPackages'; import { createTarFS } from 'pyodide-internal:tarfs'; -import { default as ArtifactBundler } from 'pyodide-internal:artifacts'; -import { - PythonUserError, - PythonWorkersInternalError, -} from 'pyodide-internal:util'; - -function getPackageMetadata(requirement: string): PackageDeclaration { - const obj = LOCKFILE['packages'][requirement]; - if (!obj) { - throw new PythonUserError( - 'Requirement ' + requirement + ' not found in lockfile' - ); - } - - return obj; -} - -function loadBundleFromArtifactBundler(requirement: string): Reader { - const filename = getPackageMetadata(requirement).file_name; - const fullPath = `python-package-bucket/${PACKAGES_VERSION}/${filename}`; - const reader = ArtifactBundler.getPackage(fullPath); - if (!reader) { - throw new PythonWorkersInternalError( - 'Failed to get package ' + fullPath + ' from ArtifactBundler' - ); - } - return reader; -} /** - * Downloads the requirements specified and loads them into Pyodide. Note that this does not - * do any dependency resolution, it just installs the requirements that are specified. See - * `getTransitiveRequirements` for the code that deals with this. + * Mounts every embedded stdlib package file into the virtualized filesystem. */ -export function loadPackages(Module: Module, requirements: Set): void { - let pkgsToLoad = requirements; - // TODO: Package snapshot created with '20240829.4' needs the stdlib packages to be added here. - // We should remove this check once the next Python and packages versions are rolled - // out. - if (USING_OLDEST_PACKAGES_VERSION) { - pkgsToLoad = pkgsToLoad.union(new Set(STDLIB_PACKAGES)); - } - - for (const req of pkgsToLoad) { - if (req === 'test') { - continue; // Skip the test package, it is only useful for internal Python regression testing. - } - if (VIRTUALIZED_DIR.hasRequirementLoaded(req)) { - continue; - } - - const reader = loadBundleFromArtifactBundler(req); - const [tarInfo, soFiles] = parseTarInfo(reader); - const pkg = getPackageMetadata(req); - VIRTUALIZED_DIR.addSmallBundle(tarInfo, soFiles, req, pkg.install_dir); +export function loadPackages(Module: Module): void { + // A single bulk call (each entry carries its reader) rather than a reader accessor per file, to + // avoid a JS<->C++ round-trip for every stdlib file. + const files = EmbeddedPackages.getFiles(); + for (const file of files) { + VIRTUALIZED_DIR.addFile(file.installDir, file.path, file.reader, file.size); } const tarFS = createTarFS(Module); diff --git a/src/pyodide/internal/metadata.ts b/src/pyodide/internal/metadata.ts index 8a69a909b66..7d73df751f0 100644 --- a/src/pyodide/internal/metadata.ts +++ b/src/pyodide/internal/metadata.ts @@ -7,8 +7,6 @@ import { default as ArtifactBundler } from 'pyodide-internal:artifacts'; export const IS_WORKERD = MetadataReader.isWorkerd(); export const IS_TRACING = MetadataReader.isTracing(); -export const SHOULD_ABORT_ISOLATE_ON_FATAL_ERROR = - MetadataReader.shouldAbortIsolateOnFatalError(); // Snapshots export const SHOULD_SNAPSHOT_TO_DISK = MetadataReader.shouldSnapshotToDisk(); @@ -32,16 +30,11 @@ export const MEMORY_SNAPSHOT_READER = MetadataReader.hasMemorySnapshot() // Packages export const PACKAGES_VERSION = MetadataReader.getPackagesVersion(); -export const USING_OLDEST_PACKAGES_VERSION = PACKAGES_VERSION === '20240829.4'; // The package lock is embedded in the binary. See `getPyodideLock` and `packageLocks`. export const LOCKFILE = JSON.parse( MetadataReader.getPackagesLock() ) as PackageLock; -export const REQUIREMENTS = MetadataReader.getRequirements(); -export const TRANSITIVE_REQUIREMENTS = - MetadataReader.getTransitiveRequirements(); - // Entrypoints export const MAIN_MODULE_NAME = MetadataReader.getMainModule(); diff --git a/src/pyodide/internal/patches/aiohttp.py b/src/pyodide/internal/patches/aiohttp.py deleted file mode 100644 index 0fabf993630..00000000000 --- a/src/pyodide/internal/patches/aiohttp.py +++ /dev/null @@ -1,259 +0,0 @@ -""" -Monkeypatch aiohttp to introduce Fetch API support. - -Based on https://github.com/pyodide/pyodide/issues/3711#issuecomment-1773523301 -with some modifications. -""" - -# ruff: noqa: PLR0913, TRY301, TRY300 - -from collections.abc import Iterable -from contextlib import suppress -from typing import Any - -from aiohttp import ClientSession, ClientTimeout, CookieJar, InvalidURL, hdrs, payload -from aiohttp.client_reqrep import _merge_ssl_params -from aiohttp.helpers import TimeoutHandle, get_env_proxy_for_url, strip_auth_from_url -from multidict import CIMultiDict, istr -from yarl import URL - - -class Content: - __slots__ = ("_exception", "_jsresp") - - def __init__(self, _jsresp): - self._jsresp = _jsresp - self._exception = None - - async def read(self): - if self._exception: - raise self._exception - buf = await self._jsresp.arrayBuffer() - self._jsresp = None - return buf.to_bytes() - - def exception(self): - return self._exception - - def set_exception(self, exc: BaseException) -> None: - self._exception = exc - - -async def _request( - self, - method: str, - str_or_url, - *, - params=None, - data: Any = None, - json: Any = None, - cookies=None, - headers=None, - skip_auto_headers: Iterable[str] | None = None, - auth=None, - allow_redirects: bool = True, - max_redirects: int = 10, - compress: str | None = None, - chunked: bool | None = None, - expect100: bool = False, - raise_for_status=None, - read_until_eof: bool = True, - proxy=None, - proxy_auth=None, - timeout=None, - verify_ssl: bool | None = None, - fingerprint: bytes | None = None, - ssl_context=None, - ssl=None, - proxy_headers=None, - trace_request_ctx=None, - read_bufsize: int | None = None, -): - # NOTE: timeout clamps existing connect and read timeouts. We cannot - # set the default to None because we need to detect if the user wants - # to use the existing timeouts by setting timeout to None. - - if self.closed: - raise RuntimeError("Session is closed") - - ssl = _merge_ssl_params(ssl, verify_ssl, ssl_context, fingerprint) - - if data is not None and json is not None: - raise ValueError("data and json parameters can not be used at the same time") - elif json is not None: - data = payload.JsonPayload(json, dumps=self._json_serialize) - - history = [] - version = self._version - params = params or {} - - # Merge with default headers and transform to CIMultiDict - headers = self._prepare_headers(headers) - proxy_headers = self._prepare_headers(proxy_headers) - - try: - url = self._build_url(str_or_url) - except ValueError as e: - raise InvalidURL(str_or_url) from e - - skip_headers = set(self._skip_auto_headers) - if skip_auto_headers is not None: - for i in skip_auto_headers: - skip_headers.add(istr(i)) - - if proxy is not None: - try: - proxy = URL(proxy) - except ValueError as e: - raise InvalidURL(proxy) from e - - if timeout is None: - real_timeout = self._timeout - elif not isinstance(timeout, ClientTimeout): - real_timeout = ClientTimeout(total=timeout) # type: ignore[arg-type] - else: - real_timeout = timeout - # timeout is cumulative for all request operations - # (request, redirects, responses, data consuming) - tm = TimeoutHandle(self._loop, real_timeout.total) - handle = tm.start() - - if read_bufsize is None: - read_bufsize = self._read_bufsize - - traces = [] - - timer = tm.timer() - try: - with timer: - url, auth_from_url = strip_auth_from_url(url) - if auth and auth_from_url: - raise ValueError( - "Cannot combine AUTH argument with credentials encoded in URL" - ) - - if auth is None: - auth = auth_from_url - if auth is None: - auth = self._default_auth - # It would be confusing if we support explicit - # Authorization header with auth argument - if auth is not None and hdrs.AUTHORIZATION in headers: - raise ValueError( - "Cannot combine AUTHORIZATION header " - "with AUTH argument or credentials " - "encoded in URL" - ) - - all_cookies = self._cookie_jar.filter_cookies(url) - - if cookies is not None: - tmp_cookie_jar = CookieJar() - tmp_cookie_jar.update_cookies(cookies) - req_cookies = tmp_cookie_jar.filter_cookies(url) - if req_cookies: - all_cookies.load(req_cookies) - - if proxy is not None: - proxy = URL(proxy) - elif self._trust_env: - with suppress(LookupError): - proxy, proxy_auth = get_env_proxy_for_url(url) - - req = self._request_class( - method, - url, - params=params, - headers=headers, - skip_auto_headers=skip_headers, - data=data, - cookies=all_cookies, - auth=auth, - version=version, - compress=compress, - chunked=chunked, - expect100=expect100, - loop=self._loop, - response_class=self._response_class, - proxy=proxy, - proxy_auth=proxy_auth, - timer=timer, - session=self, - ssl=ssl, - proxy_headers=proxy_headers, - traces=traces, - ) - - req.response = resp = req.response_class( - req.method, - req.original_url, - writer=None, - continue100=req._continue, - timer=req._timer, - request_info=req.request_info, - traces=req._traces, - loop=req.loop, - session=req._session, - ) - from js import Headers, fetch - - from pyodide.ffi import to_js - - body = None - if req.body: - body = to_js(req.body._value) - jsheaders = Headers.new() - for k, v in headers.items(): - jsheaders.append(k, v) - jsresp = await fetch( - str(req.url), method=req.method, headers=jsheaders, body=body - ) - resp.version = version - resp.status = jsresp.status - resp.reason = jsresp.statusText - # This is not quite correct in handling of repeated headers - resp._headers = CIMultiDict(jsresp.headers) - resp._raw_headers = tuple(tuple(e) for e in jsresp.headers) - resp.content = Content(jsresp) - - # check response status - if raise_for_status is None: - raise_for_status = self._raise_for_status - - if raise_for_status is None: - pass - elif callable(raise_for_status): - await raise_for_status(resp) - elif raise_for_status: - resp.raise_for_status() - - # register connection - if handle is not None: - if resp.connection is not None: - resp.connection.add_callback(handle.cancel) - else: - handle.cancel() - - resp._history = tuple(history) - - for trace in traces: - await trace.send_request_end( - method, url.update_query(params), headers, resp - ) - return resp - - except BaseException as e: - # cleanup timer - tm.close() - if handle: - handle.cancel() - handle = None - - for trace in traces: - await trace.send_request_exception( - method, url.update_query(params), headers, e - ) - raise - - -ClientSession._request = _request diff --git a/src/pyodide/internal/patches/httpx.py b/src/pyodide/internal/patches/httpx.py deleted file mode 100644 index 9e4ff509a1c..00000000000 --- a/src/pyodide/internal/patches/httpx.py +++ /dev/null @@ -1,96 +0,0 @@ -"""A patch to make async httpx work using JavaScript fetch.""" - -from contextlib import contextmanager - -from httpx._client import AsyncClient, BoundAsyncStream, logger -from httpx._models import Headers, Request, Response -from httpx._transports.default import AsyncResponseStream -from httpx._types import AsyncByteStream -from httpx._utils import Timer -from js import Headers as js_Headers -from js import fetch - -from pyodide.ffi import create_proxy - - -@contextmanager -def acquire_buffer(content): - """Acquire a Uint8Array view of a bytes object""" - if not content: - yield None - return - body_px = create_proxy(content) - body_buf = body_px.getBuffer("u8") - try: - yield body_buf.data - finally: - body_px.destroy() - body_buf.release() - - -async def js_readable_stream_iter(js_readable_stream): - """Readable streams are supposed to be async iterators some day but they - aren't yet. In the meantime, this is an adaptor that produces an async - iterator from a readable stream. - """ - reader = js_readable_stream.getReader() - while True: - res = await reader.read() - if res.done: - return - b = res.value.to_bytes() - print("js_readable_stream_iter", b) - yield b - - -async def _send_single_request(self, request: Request) -> Response: - """ - Sends a single request, without handling any redirections. - - This is the function we're patching here... - """ - timer = Timer() - await timer.async_start() - - if not isinstance(request.stream, AsyncByteStream): - raise TypeError( - "Attempted to send an sync request with an AsyncClient instance." - ) - - # BEGIN MODIFIED PART - js_headers = js_Headers.new(request.headers.multi_items()) - with acquire_buffer(request.content) as body: - js_resp = await fetch( - str(request.url), method=request.method, headers=js_headers, body=body - ) - - py_headers = Headers(js_resp.headers) - # Unset content-encoding b/c Javascript fetch already handled unpacking. If - # we leave it we will get errors when httpx tries to unpack a second time. - py_headers.pop("content-encoding", None) - response = Response( - status_code=js_resp.status, - headers=py_headers, - stream=AsyncResponseStream(js_readable_stream_iter(js_resp.body)), - ) - # END MODIFIED PART - - assert isinstance(response.stream, AsyncByteStream) - response.request = request - response.stream = BoundAsyncStream(response.stream, response=response, timer=timer) - self.cookies.extract_cookies(response) - response.default_encoding = self._default_encoding - - logger.info( - 'HTTP Request: %s %s "%s %d %s"', - request.method, - request.url, - response.http_version, - response.status_code, - response.reason_phrase, - ) - - return response - - -AsyncClient._send_single_request = _send_single_request diff --git a/src/pyodide/internal/pool/builtin_wrappers.ts b/src/pyodide/internal/pool/builtin_wrappers.ts index af2ce325436..26bb7fb57a6 100644 --- a/src/pyodide/internal/pool/builtin_wrappers.ts +++ b/src/pyodide/internal/pool/builtin_wrappers.ts @@ -255,7 +255,7 @@ function prepareStackTrace( } /** - * This is a fix for a problem with package snapshots in 0.26.0a2. 0.26.0a2 tests if + * This is a fix for a problem with snapshots in 0.26.0a2. 0.26.0a2 tests if * wasm-type-reflection is supported by the runtime and if so uses it to avoid function pointer * casting instead of a JS trampoline. We cannot stack switch through the JS trampoline so we need * to make sure that when stack switching is available, we don't use JS trampolines. When 0.26.0a2 diff --git a/src/pyodide/internal/pool/emscriptenSetup.ts b/src/pyodide/internal/pool/emscriptenSetup.ts index 769d67d1dbf..57a452aa222 100644 --- a/src/pyodide/internal/pool/emscriptenSetup.ts +++ b/src/pyodide/internal/pool/emscriptenSetup.ts @@ -144,7 +144,6 @@ function getInstantiateWasm( * This isn't public API of Pyodide so it's a bit fiddly. */ function getEmscriptenSettings( - isWorkerd: boolean, pythonStdlib: ArrayBuffer, pyodideWasmModule: WebAssembly.Module ): EmscriptenSettings { @@ -162,13 +161,10 @@ function getEmscriptenSettings( lockFileURL: '', enableRunUntilComplete: true, }; - let lockFilePromise; - if (isWorkerd) { - lockFilePromise = new Promise( - (res) => (config.resolveLockFilePromise = res) - ); - } - const API = { config, lockFilePromise }; + // We mount the stdlib packages directly (see loadPackage.ts) rather than going through Pyodide's + // package manager, so we deliberately leave `API.lockFilePromise` unset. Pyodide's bootstrap + // guards on it (`API.lockFilePromise && ...`), so the package index is simply not initialised. + const API = { config }; let resolveReadyPromise: (mod: Module) => void; let rejectReadyPromise: (e: any) => void = () => {}; const readyPromise: Promise = new Promise((res, rej) => { @@ -227,15 +223,10 @@ function* featureDetectionMonkeyPatchesContextManager(): Generator { * Returns the instantiated emscriptenModule object. */ export async function instantiateEmscriptenModule( - isWorkerd: boolean, pythonStdlib: ArrayBuffer, wasmModule: WebAssembly.Module ): Promise { - const emscriptenSettings = getEmscriptenSettings( - isWorkerd, - pythonStdlib, - wasmModule - ); + const emscriptenSettings = getEmscriptenSettings(pythonStdlib, wasmModule); for (const _ of featureDetectionMonkeyPatchesContextManager()) { // Ignore the returned promise, it won't resolve until we're done preloading dynamic // libraries. diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index b213ea10fc0..3a8af9d98f1 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -8,7 +8,7 @@ import { mountWorkerFiles, } from 'pyodide-internal:setupPackages'; import { - maybeCollectSnapshot, + maybeCollectBaselineSnapshot, maybeRestoreSnapshot, finalizeBootstrap, isRestoringSnapshot, @@ -24,7 +24,6 @@ import { import { LEGACY_VENDOR_PATH, PROCESS_PTH_FILES, - SHOULD_ABORT_ISOLATE_ON_FATAL_ERROR, setCpuLimitNearlyExceededCallback, } from 'pyodide-internal:metadata'; import { default as FatalReporter } from 'pyodide-internal:fatal-reporter'; @@ -42,7 +41,6 @@ import { import { loadPackages } from 'pyodide-internal:loadPackage'; import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata'; import { default as setupPythonSearchPathSource } from 'pyodide-internal:setup_python_search_path.py'; -import { TRANSITIVE_REQUIREMENTS, IS_WORKERD } from 'pyodide-internal:metadata'; import { getTrustedReadFunc } from 'pyodide-internal:readOnlyFS'; import { PyodideVersion } from 'pyodide-internal:const'; import { default as pythonStdlibZip } from 'pyodideRuntime-internal:python_stdlib.zip'; @@ -225,13 +223,11 @@ function compileModuleFromReadOnlyFS( } export async function loadPyodide( - isWorkerd: boolean, - lockfile: PackageLock, customSerializedObjects: CustomSerializedObjects ): Promise { try { const Module = await enterJaegerSpan('instantiate_emscripten', () => - instantiateEmscriptenModule(IS_WORKERD, pythonStdlibZip, pyodideAsmWasm) + instantiateEmscriptenModule(pythonStdlibZip, pyodideAsmWasm) ); Module.compileModuleFromReadOnlyFS = compileModuleFromReadOnlyFS; if (Module.API.version === PyodideVersion.V0_28_2) { @@ -242,9 +238,6 @@ export async function loadPyodide( } else { Module.API.config.jsglobals = globalThis; } - if (isWorkerd) { - Module.API.config.resolveLockFilePromise!(lockfile); - } Module.setGetRandomValues(getRandomValues); Module.setSetTimeout( makeSetTimeout(Module), @@ -257,14 +250,14 @@ export async function loadPyodide( enterJaegerSpan('load_packages', () => { // NB. loadPackages adds the packages to the `VIRTUALIZED_DIR` global which then gets used in // preloadDynamicLibs. - loadPackages(Module, TRANSITIVE_REQUIREMENTS); + loadPackages(Module); }); enterJaegerSpan('prepare_wasm_linear_memory', () => { prepareWasmLinearMemory(Module, customSerializedObjects); }); - maybeCollectSnapshot(Module, customSerializedObjects); + maybeCollectBaselineSnapshot(Module, customSerializedObjects); // Mount worker files after doing snapshot upload so we ensure that data from the files is never // present in snapshot memory. mountWorkerFiles(Module); @@ -285,11 +278,9 @@ export async function loadPyodide( } catch (_e) { FatalReporter.reportFatal('Internal error reporting fatal error'); } - if (SHOULD_ABORT_ISOLATE_ON_FATAL_ERROR) { - cloudflareWorkers.abortIsolate( - `Python worker fatal error: ${String(error)}` - ); - } + cloudflareWorkers.abortIsolate( + `Python worker fatal error: ${String(error)}` + ); }; return pyodide; } catch (e) { diff --git a/src/pyodide/internal/setupPackages.ts b/src/pyodide/internal/setupPackages.ts index 6f7575e789b..a33135f2ee6 100644 --- a/src/pyodide/internal/setupPackages.ts +++ b/src/pyodide/internal/setupPackages.ts @@ -2,35 +2,15 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -import { parseTarInfo } from 'pyodide-internal:tar'; import { createMetadataFS } from 'pyodide-internal:metadatafs'; -import { LOCKFILE } from 'pyodide-internal:metadata'; import { invalidateCaches, PythonWorkersInternalError, - PythonUserError, simpleRunPython, } from 'pyodide-internal:util'; -import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader'; -const canonicalizeNameRegex = /[-_.]+/g; const DYNLIB_PATH = '/usr/lib'; -/** - * Canonicalize a package name. Port of Python's packaging.utils.canonicalize_name. - * @param name The package name to canonicalize. - * @returns The canonicalize package name. - * @private - */ -function canonicalizePackageName(name: string): string { - return name.replace(canonicalizeNameRegex, '-').toLowerCase(); -} - -// The "name" field in the lockfile is not canonicalized -export const STDLIB_PACKAGES: string[] = Object.values(LOCKFILE.packages) - .filter(({ package_type }) => package_type === 'cpython_module') - .map(({ name }) => canonicalizePackageName(name)); - // Each item in the list is an element of the file path, for example // `folder/file.txt` -> `["folder", "file.txt"] export type FilePath = string[]; @@ -64,87 +44,78 @@ class VirtualizedDir { // TODO(soon): Can we use the # syntax here? // eslint-disable-next-line no-restricted-syntax private soFiles: FilePath[]; - // TODO(soon): Can we use the # syntax here? - // eslint-disable-next-line no-restricted-syntax - private loadedRequirements: Set; constructor() { this.rootInfo = createTarFsInfo(); this.dynlibTarFs = createTarFsInfo(); this.soFiles = []; - this.loadedRequirements = new Set(); } /** - * mountOverlay "overlays" a directory onto the site-packages root directory. - * All files and subdirectories in the overlay will be accessible at site-packages by the worker. - * If a file or directory already exists, an error is thrown. - * @param {TarInfo} overlayInfo The directory that is to be "copied" into site-packages + * Adds a single extracted package file to the virtualized filesystem, creating intermediate + * directories as needed. Files are routed to /usr/lib (dynlib) or site-packages (everything else) + * based on their `installDir`. The package stdlib is embedded in the bundle as individual files + * (see loadPackage.ts), so this is called once per file. + * + * @param installDir The package's `install_dir` (from the lock file). + * @param path The file's path within `installDir`, e.g. "ssl/__init__.py". + * @param reader Reads the file's contents. + * @param size The file's size in bytes. */ - mountOverlay(overlayInfo: TarFSInfo, dir: InstallDir): void { - const dest = dir == 'dynlib' ? this.dynlibTarFs : this.rootInfo; - overlayInfo.children!.forEach((val, key) => { - if (dest.children!.has(key)) { + addFile( + installDir: InstallDir, + path: string, + reader: Reader, + size: number + ): void { + const dest = installDir == 'dynlib' ? this.dynlibTarFs : this.rootInfo; + const parts = path.split('/'); + let dir = dest; + for (let i = 0; i < parts.length - 1; i++) { + const part = parts[i]!; + let child = dir.children!.get(part); + if (!child) { + child = { + children: new Map(), + mode: 0o777, + type: '5', + modtime: 0, + size: 0, + path: parts.slice(0, i + 1).join('/'), + name: part, + parts: [], + reader: null, + }; + dir.children!.set(part, child); + } + if (!child.children) { throw new PythonWorkersInternalError( - `File/folder ${key} being written by multiple packages` + `File/folder ${path} conflicts with a file written by another package` ); } - dest.children!.set(key, val); - }); - } - - /** - * A small bundle contains just a single package, it can be thought of as a wheel. - * - * The entire bundle will be overlaid onto site-packages or /usr/lib depending on its install_dir. - * - * @param {TarInfo} tarInfo The root tarInfo for the small bundle (See tar.js) - * @param {List} soFiles A list of .so files contained in the small bundle - * @param {String} requirement The canonicalized package name this small bundle corresponds to - * @param {InstallDir} installDir The `install_dir` field from the metadata about the package taken from the lockfile - */ - addSmallBundle( - tarInfo: TarFSInfo, - soFiles: string[], - requirement: string, - installDir: InstallDir - ): void { - for (const soFile of soFiles) { - this.soFiles.push(soFile.split('/')); + dir = child; } - this.mountOverlay(tarInfo, installDir); - this.loadedRequirements.add(requirement); - } - /** - * A big bundle contains multiple packages, each package contained in a folder whose name is the canonicalized package name. - * This function overlays the requested packages onto the site-packages directory. - * @param {TarInfo} tarInfo The root tarInfo for the big bundle (See tar.js) - * @param {List} soFiles A list of .so files contained in the big bundle - * @param {List} requirements canonicalized list of packages to pick from the big bundle - */ - addBigBundle( - tarInfo: TarFSInfo, - soFiles: string[], - requirements: Set - ): void { - // add all the .so files we will need to preload from the big bundle - for (const soFile of soFiles) { - // If folder is in list of requirements include .so file in list to preload. - const [pkg, ...rest] = soFile.split('/'); - if (requirements.has(pkg!)) { - this.soFiles.push(rest); - } + const name = parts.at(-1)!; + if (dir.children!.has(name)) { + throw new PythonWorkersInternalError( + `File ${path} being written by multiple packages` + ); } + dir.children!.set(name, { + children: undefined, + mode: 0o755, + type: '0', + modtime: 0, + size, + path, + name, + parts: [], + contentsOffset: 0, + reader, + }); - for (const req of requirements) { - const child = tarInfo.children!.get(req); - if (!child) { - throw new PythonUserError( - `Requirement ${req} not found in pyodide packages tar` - ); - } - this.mountOverlay(child, 'site'); - this.loadedRequirements.add(req); + if (path.endsWith('.so')) { + this.soFiles.push(parts); } } @@ -161,10 +132,6 @@ class VirtualizedDir { return this.soFiles; } - hasRequirementLoaded(req: string): boolean { - return this.loadedRequirements.has(req); - } - mount(Module: Module, tarFS: EmscriptenFS): void { Module.FS.mkdirTree(Module.FS.sessionSitePackages); Module.FS.mount( @@ -177,51 +144,6 @@ class VirtualizedDir { } } -/** - * This stitches together the view of the site packages directory. Each - * requirement corresponds to a folder in the original tar file. For each - * requirement in the list we grab the corresponding folder and stitch them - * together into a combined folder. - * - * This also returns the list of soFiles in the resulting site-packages - * directory so we can preload them. - * - * TODO(later): This needs to be removed when external package loading is enabled. - */ -export function buildVirtualizedDir(): VirtualizedDir { - if (EmbeddedPackagesTarReader.read === undefined) { - // Package retrieval is enabled, so the embedded tar reader isn't initialized. - // All packages, including STDLIB_PACKAGES, are loaded in `loadPackages`. - return new VirtualizedDir(); - } - - const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader); - - const requirementsInBigBundle = new Set(STDLIB_PACKAGES); - const res = new VirtualizedDir(); - res.addBigBundle(bigTarInfo, bigTarSoFiles, requirementsInBigBundle); - - return res; -} - -/** - * Patch loadPackage: - * - in workerd, disable integrity checks - * - otherwise, disable it entirely - * - * TODO: stop using loadPackage in workerd. - */ -export function patchLoadPackage(pyodide: Pyodide): void { - pyodide.loadPackage = disabledLoadPackage; - return; -} - -function disabledLoadPackage(): never { - throw new PythonWorkersInternalError( - 'pyodide.loadPackage is disabled because packages are encoded in the binary' - ); -} - /** * This mounts the metadataFS (which contains user code). */ @@ -244,4 +166,4 @@ export function adjustSysPath(Module: Module): void { ); } -export const VIRTUALIZED_DIR = buildVirtualizedDir(); +export const VIRTUALIZED_DIR = new VirtualizedDir(); diff --git a/src/pyodide/internal/snapshot.ts b/src/pyodide/internal/snapshot.ts index 8c450a15db5..ada81eb15f8 100644 --- a/src/pyodide/internal/snapshot.ts +++ b/src/pyodide/internal/snapshot.ts @@ -7,12 +7,10 @@ import { default as ArtifactBundler } from 'pyodide-internal:artifacts'; import { default as UnsafeEval } from 'internal:unsafe-eval'; import { default as DiskCache } from 'pyodide-internal:disk_cache'; import { type FilePath, VIRTUALIZED_DIR } from 'pyodide-internal:setupPackages'; -import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader'; import { SHOULD_SNAPSHOT_TO_DISK, IS_CREATING_BASELINE_SNAPSHOT, MEMORY_SNAPSHOT_READER, - REQUIREMENTS, IS_CREATING_SNAPSHOT, IS_EW_VALIDATING, IS_DYNAMIC_WORKER, @@ -76,8 +74,6 @@ type SnapshotSettings = { // The new wire format, with additional information about the hiwire state, the order that dsos were // loaded in, and their memory bases. We also moved settings out of the dsoHandles. type SnapshotMeta = { - // We just store importedModulesList to help with testing and introspection - readonly importedModulesList: ReadonlyArray | undefined; readonly hiwire: SnapshotConfig | undefined; readonly dsoHandles: DsoHandles; readonly settings: SnapshotSettings; @@ -196,9 +192,9 @@ function loadDynlib( * This function is used to ensure the order in which we load SO_FILES stays the same. It is only * used for 0.26.0a2, later we look at SNAPSHOT_META.loadOrder to decide what order to load libs. * - * The sort always puts _lzma.so and _ssl.so first, because these SO_FILES are loaded in the - * baseline snapshot, and if we want to generate a package snapshot while a baseline snapshot is - * loaded we need them to be first. The rest of the files are sorted alphabetically. + * The sort always puts _lzma.so and _ssl.so first, because these SO_FILES are loaded first when + * creating the baseline snapshot, so we keep them first here to preserve the load order. The rest + * of the files are sorted alphabetically. * * The `filePaths` list is of the form [["folder", "file.so"], ["file.so"]], so each element in it * is effectively a file path. @@ -296,24 +292,23 @@ function loadDynlibFromTarFs( for (const part of soFile) { node = node?.children?.get(part); } - if (!node?.contentsOffset) { + // Note: contentsOffset can legitimately be 0 (embedded package files are read from their own + // per-file reader starting at offset 0), so compare against undefined rather than truthiness. + if (node?.contentsOffset === undefined) { node = VIRTUALIZED_DIR.getDynlibRoot(); for (const part of soFile) { node = node?.children?.get(part); } } - if (!node?.contentsOffset) { + if (node?.contentsOffset === undefined) { throw Error(`fs node could not be found for ${soFile.join('/')}`); } - const { contentsOffset, size } = node; - if (contentsOffset === undefined) { - throw Error(`contentsOffset not defined for ${soFile.join('/')}`); + const { contentsOffset, size, reader } = node; + if (!reader) { + throw Error(`reader not defined for ${soFile.join('/')}`); } const wasmModuleData = new Uint8Array(size); - (node.reader ?? EmbeddedPackagesTarReader).read( - contentsOffset, - wasmModuleData - ); + reader.read(contentsOffset, wasmModuleData); const path = base + soFile.join('/'); loadDynlib(Module, path, wasmModuleData); } @@ -448,19 +443,13 @@ function recordDsoHandles(Module: Module): DsoHandles { * can't snapshot the JS runtime state so we have no ffi. Thus some imports from * user code will fail. * - * If we are doing a baseline snapshot, just import everything from - * baselineSnapshotImports. These will all succeed. - * - * If doing a more dedicated "package" snap shot, also try to import each - * user import that is importing non-vendored modules. + * We import everything from baselineSnapshotImports. These will all succeed. * * All of this is being done in the __main__ global scope, so be careful not to * pollute it with extra included-by-default names (user code is executed in its * own separate module scope though so it's not _that_ important). - * - * This function returns a list of modules that have been imported. */ -function memorySnapshotDoImports(Module: Module): string[] { +function memorySnapshotDoImports(Module: Module): void { const baselineSnapshotImports = MetadataReader.constructor.getBaselineSnapshotImports(); const toImport = baselineSnapshotImports.join(','); @@ -472,31 +461,6 @@ function memorySnapshotDoImports(Module: Module): string[] { simpleRunPython(Module, 'sysconfig.get_config_vars()'); // Delete to avoid polluting globals simpleRunPython(Module, `del ${toDelete}`); - if (IS_CREATING_BASELINE_SNAPSHOT) { - // We've done all the imports for the baseline snapshot. - return []; - } - if (REQUIREMENTS.length == 0) { - // Don't attempt to scan for package imports if the Worker has specified no package - // requirements, as this means their code isn't going to be importing any modules that we need - // to include in a snapshot. - return []; - } - - // The `importedModules` list will contain all modules that have been imported, including local - // modules, the usual `js` and other stdlib modules. We want to filter out local imports, so we - // grab them and put them into a set for fast filtering. - const importedModules: string[] = MetadataReader.getPackageSnapshotImports( - Module.API.version - ); - const deduplicatedModules = [...new Set(importedModules)]; - - // Import the modules list so they are included in the snapshot. - if (deduplicatedModules.length > 0) { - simpleRunPython(Module, 'import ' + deduplicatedModules.join(',')); - } - - return deduplicatedModules; } function describeValue(val: any): string { @@ -653,7 +617,6 @@ function getHiwireDeserializer( */ function makeLinearMemorySnapshot( Module: Module, - importedModulesList: string[], customSerializedObjects: CustomSerializedObjects, snapshotType: ArtifactBundler.SnapshotType ): Uint8Array { @@ -666,7 +629,7 @@ function makeLinearMemorySnapshot( ); } const settings: SnapshotSettings = { - baselineSnapshot: IS_CREATING_BASELINE_SNAPSHOT, + baselineSnapshot: snapshotType === 'baseline', snapshotType, compatFlags: COMPATIBILITY_FLAGS, }; @@ -674,7 +637,6 @@ function makeLinearMemorySnapshot( version: 1, dsoHandles, hiwire, - importedModulesList, jsModuleNames: Array.from(jsModuleNames), settings, ...CREATED_SNAPSHOT_META, @@ -743,13 +705,12 @@ function decodeSnapshot( if (!meta?.version) { return { version: 1, - importedModulesList: undefined, dsoHandles: meta, hiwire: undefined, loadOrder: [], soMemoryBases: {}, settings: { - snapshotType: meta.settings?.baselineSnapshot ? 'baseline' : 'package', + snapshotType: 'baseline', compatFlags: {}, ...meta.settings, }, @@ -762,9 +723,7 @@ function decodeSnapshot( ...extras, settings: { ...meta.settings, - snapshotType: - meta.settings.snapshotType ?? - (meta.settings.baselineSnapshot ? 'baseline' : 'package'), + snapshotType: meta.settings.snapshotType ?? 'baseline', compatFlags: meta.settings.compatFlags ?? {}, }, }; @@ -844,7 +803,6 @@ export function maybeRestoreSnapshot(Module: Module): void { function collectSnapshot( Module: Module, - importedModulesList: string[], customSerializedObjects: CustomSerializedObjects, snapshotType: ArtifactBundler.SnapshotType ): void { @@ -855,7 +813,6 @@ function collectSnapshot( } const snapshot = makeLinearMemorySnapshot( Module, - importedModulesList, customSerializedObjects, snapshotType ); @@ -863,7 +820,9 @@ function collectSnapshot( if (IS_EW_VALIDATING) { ArtifactBundler.storeMemorySnapshot({ snapshot, - importedModulesList, + // This field is no longer used but is still required by the C++ + // MemorySnapshotResult struct consumed by the validator. + importedModulesList: [], snapshotType, }); } else if (SHOULD_SNAPSHOT_TO_DISK) { @@ -881,11 +840,7 @@ export function maybeCollectDedicatedSnapshot( Module: Module, customSerializedObjects: CustomSerializedObjects | null ): void { - if (!IS_CREATING_SNAPSHOT) { - return; - } - - if (!IS_DEDICATED_SNAPSHOT_ENABLED) { + if (!IS_CREATING_SNAPSHOT || !IS_DEDICATED_SNAPSHOT_ENABLED) { return; } @@ -902,38 +857,26 @@ export function maybeCollectDedicatedSnapshot( 'customSerializedObjects is required for dedicated snapshot' ); } - collectSnapshot(Module, [], customSerializedObjects, 'dedicated'); + collectSnapshot(Module, customSerializedObjects, 'dedicated'); } /** - * Collects either a baseline or package snapshot. This is called prior to running the top-level - * of the worker and crucially before the worker files are mounted. + * Collects a baseline snapshot if appropriate. This is called prior to running + * the top-level of the worker and crucially before the worker files are + * mounted. * * Dedicated snapshots are collected in `maybeCollectDedicatedSnapshot`. */ -export function maybeCollectSnapshot( +export function maybeCollectBaselineSnapshot( Module: Module, customSerializedObjects: CustomSerializedObjects ): void { // In order to surface any problems that occur in `memorySnapshotDoImports` to // users in local development, always call it even if we aren't actually - const importedModulesList = memorySnapshotDoImports(Module); - if (!IS_CREATING_SNAPSHOT) { - return; - } - - if (IS_DEDICATED_SNAPSHOT_ENABLED) { - // We are not interested in collecting a baseline/package snapshot here if this feature flag - // is enabled. - return; + memorySnapshotDoImports(Module); + if (IS_CREATING_SNAPSHOT && !IS_DEDICATED_SNAPSHOT_ENABLED) { + collectSnapshot(Module, customSerializedObjects, 'baseline'); } - - collectSnapshot( - Module, - importedModulesList, - customSerializedObjects, - IS_CREATING_BASELINE_SNAPSHOT ? 'baseline' : 'package' - ); } export function finalizeBootstrap( @@ -961,6 +904,5 @@ export function finalizeBootstrap( Module.API.public_api.registerJsModule('_cf_internal_snapshot_info', { loadedSnapshot: !!LOADED_SNAPSHOT_META, loadedBaselineSnapshot: LOADED_SNAPSHOT_META?.settings.baselineSnapshot, - importedModulesList: LOADED_SNAPSHOT_META?.importedModulesList, }); } diff --git a/src/pyodide/internal/tar.ts b/src/pyodide/internal/tar.ts deleted file mode 100644 index 931b5d078e4..00000000000 --- a/src/pyodide/internal/tar.ts +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2026 Cloudflare, Inc. -// Licensed under the Apache 2.0 license found in the LICENSE file or at: -// https://opensource.org/licenses/Apache-2.0 - -// This is based on the info about the tar file format on wikipedia -// And some trial and error with real tar files. -// https://en.wikipedia.org/wiki/Tar_(computing)#File_format - -import { PythonWorkersInternalError } from 'pyodide-internal:util'; - -const decoder = new TextDecoder(); -function decodeString(buf: Uint8Array): string { - const nullIdx = buf.indexOf(0); - if (nullIdx >= 0) { - buf = buf.subarray(0, nullIdx); - } - return decoder.decode(buf); -} -function decodeField(buf: Uint8Array, offset: number, size: number): string { - return decodeString(buf.subarray(offset, offset + size)); -} -function decodeNumber(buf: Uint8Array, offset: number, size: number): number { - return parseInt(decodeField(buf, offset, size), 8); -} - -function decodeHeader(buf: Uint8Array, reader: Reader): TarFSInfo { - const nameBase = decodeField(buf, 0, 100); - const namePrefix = decodeField(buf, 345, 155); - let path = namePrefix + nameBase; - // Trim possible leading ./ - if (path.startsWith('./')) { - path = path.slice(2); - } - const mode = decodeNumber(buf, 100, 8); - const size = decodeNumber(buf, 124, 12); - const modtime = decodeNumber(buf, 136, 12); - const type = String.fromCharCode(buf[156]!); - return { - path, - name: path, - mode, - size, - modtime, - type, - parts: [], - children: undefined, - reader, - }; -} - -export function parseTarInfo(reader: Reader): [TarFSInfo, string[]] { - const directories: TarFSInfo[] = []; - const soFiles = []; - const root: TarFSInfo = { - children: new Map(), - mode: 0o777, - type: '5', - modtime: 0, - size: 0, - path: '', - name: '', - parts: [], - reader, - }; - let directory = root; - const buf = new Uint8Array(512); - let offset = 0; - let longName = null; // if truthy, overwrites the filename of the next header - while (true) { - reader.read(offset, buf); - const info = decodeHeader(buf, reader); - if (isNaN(info.mode)) { - // Invalid mode means we're done - return [root, soFiles]; - } - if (longName) { - info.path = longName; - info.name = longName; - longName = null; - } - const contentsOffset = offset + 512; - offset += 512 * Math.ceil(info.size / 512 + 1); - if (info.path === '') { - // skip possible leading ./ directory - continue; - } - if (info.path.includes('PaxHeader')) { - // Ignore PaxHeader extension - // These metadata directories don't actually have a directory entry which - // is going to cause us to crash below. - // Our tar files shouldn't have these anyways... - continue; - } - if (info.type === 'L') { - const buf = new Uint8Array(info.size); - reader.read(contentsOffset, buf); - longName = decodeString(buf); - continue; - } - - // Navigate to the correct directory by going up until we're at the common - // ancestor of the current position and the target then back down. - // - // Most tar files I run into are lexicographically sorted, so the "go back - // down" step is not necessary. But some tar files are a bit out of order. - // - // We do rely on the fact that the entry for a given directory appears - // before any files in the directory. I don't see anywhere in the spec where - // it says this is required but I think it would be weird and annoying for a - // tar file to violate this property. - - // go up to common ancestor - while (directories.length && !info.name.startsWith(directory.path)) { - directory = directories.pop()!; - } - // go down to target (in many tar files this second loop body is evaluated 0 - // times) - const parts = info.path.slice(0, -1).split('/'); - for (let i = directories.length; i < parts.length - 1; i++) { - directories.push(directory); - directory = directory.children!.get(parts[i]!)!; - } - if (info.type === '5') { - // a directory - directories.push(directory); - info.parts = parts; - info.name = info.parts.at(-1)!; - info.children = new Map(); - directory.children!.set(info.name, info); - directory = info; - } else if (info.type === '0') { - // a normal file - info.contentsOffset = contentsOffset; - info.name = info.path.slice(directory.path.length); - if (info.name.endsWith('.so')) { - soFiles.push(info.path); - } - directory.children!.set(info.name, info); - } else { - // fail if we encounter other values of type (e.g., symlink, LongName, etc) - throw new PythonWorkersInternalError( - `Python TarFS error: Unexpected type ${info.type}` - ); - } - } -} diff --git a/src/pyodide/make_snapshots.py b/src/pyodide/make_snapshots.py index 5df871d9f00..a8ed0f36270 100644 --- a/src/pyodide/make_snapshots.py +++ b/src/pyodide/make_snapshots.py @@ -55,7 +55,6 @@ def bundle_version_info(): const mainWorker :Workerd.Worker = ( modules = [ (name = "worker.py", pythonModule = embed "./worker.py"), - {requirements} ], compatibilityDate = "2025-08-05", compatibilityFlags = ["python_no_global_handlers", {compat_flags}], @@ -67,23 +66,15 @@ def bundle_version_info(): def make_config( flags: list[str], - reqs: list[str], ) -> str: - requirements = "" - for name in reqs: - requirements += f'(name="{name}", pythonRequirement=""),' - compat_flags = "" for flag in flags: compat_flags += f'"{flag}", ' - return TEMPLATE.format(requirements=requirements, compat_flags=compat_flags) + return TEMPLATE.format(compat_flags=compat_flags) -def make_worker(imports: list[str]) -> str: - contents = "" - for i in imports: - contents += f"import {i}\n" - contents += dedent("""\ +def make_worker() -> str: + contents = dedent("""\ from workers import WorkerEntrypoint class Default(WorkerEntrypoint): def test(self): @@ -92,22 +83,17 @@ def test(self): return contents -def make_snapshot( # noqa: PLR0913 +def make_snapshot( d: Path, outdir: Path, outprefix: str, compat_flags: list[str], - requirements: list[str], - imports: list[str], ) -> str: config_path = d / "config.capnp" - config_path.write_text(make_config(compat_flags, requirements)) + config_path.write_text(make_config(compat_flags)) worker_path = d / "worker.py" - worker_path.write_text(make_worker(imports)) - if imports: - snapshot_flag = "--python-save-snapshot" - else: - snapshot_flag = "--python-save-baseline-snapshot" + worker_path.write_text(make_worker()) + snapshot_flag = "--python-save-baseline-snapshot" if "WORKERD_BINARY" in environ: workerd = [environ["WORKERD_BINARY"]] @@ -144,42 +130,13 @@ def make_snapshot( # noqa: PLR0913 def make_baseline_snapshot( cache: Path, outdir: Path, compat_flags: list[str] ) -> list[tuple[str, str]]: - name, digest = make_snapshot(cache, outdir, "baseline", compat_flags, [], []) + name, digest = make_snapshot(cache, outdir, "baseline", compat_flags) return [ ("baseline_snapshot", name), ("baseline_snapshot_hash", digest), ] -def make_numpy_snapshot( - cache: Path, outdir: Path, compat_flags: list[str] -) -> list[tuple[str, str]]: - name, digest = make_snapshot( - cache, outdir, "package_snapshot_numpy", compat_flags, ["numpy"], ["numpy"] - ) - return [ - ("numpy_snapshot", name), - ("numpy_snapshot_hash", digest), - ] - - -def make_fastapi_snapshot( - cache: Path, outdir: Path, compat_flags: list[str] -) -> list[tuple[str, str]]: - name, digest = make_snapshot( - cache, - outdir, - "package_snapshot_fastapi", - compat_flags, - ["fastapi"], - ["fastapi", "pydantic"], - ) - return [ - ("fastapi_snapshot", name), - ("fastapi_snapshot_hash", digest), - ] - - def make_snapshots( cache: Path, outdir: Path, update_released: bool ) -> tuple[str, tuple[str, str]]: @@ -195,10 +152,6 @@ def make_snapshots( with timing(f"version {ver} snapshots"): with timing("baseline snapshot"): ver_info += make_baseline_snapshot(cache, outdir, compat_flags) - with timing("numpy snapshot"): - ver_info += make_numpy_snapshot(cache, outdir, compat_flags) - with timing("fastapi snapshot"): - ver_info += make_fastapi_snapshot(cache, outdir, compat_flags) res.append((ver, ver_info)) return res @@ -241,10 +194,7 @@ def upload_snapshots(outdir: Path): ) for file in outdir.glob("*.bin"): - if file.name.startswith("baseline-"): - key = "baseline-snapshot/" + hexdigest(file) - else: - key = "test-snapshot/" + file.name + key = "baseline-snapshot/" + hexdigest(file) s3.upload_file(str(file), "pyodide-capnp-bin", key) diff --git a/src/pyodide/pack_python_packages.py b/src/pyodide/pack_python_packages.py new file mode 100644 index 00000000000..ee1e56dd6ce --- /dev/null +++ b/src/pyodide/pack_python_packages.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +"""Build-time tool: extract Python stdlib package wheels into a PythonPackages capnp message. + +The CPython stdlib modules (and the shared libraries they depend on) used to be downloaded and +unpacked at request time. Instead we extract every file from each wheel here, at build time, and +embed them directly into the Pyodide bundle as a single PythonPackages message (schema in +src/pyodide/python_packages.capnp). The runtime mounts these files directly, with no gzip/tar work. + +Each wheel's `install_dir` (from the pre-filtered lock file) determines where its files mount in the +worker filesystem ("site"/"stdlib" -> site-packages, "dynlib" -> /usr/lib). + +Usage: + pack_python_packages.py --capnp --lock --out ... +""" + +import argparse +import json +import subprocess +import sys +import tarfile +import tempfile +from pathlib import Path + +# capnp text "string" / embed-filename escaping (paths are POSIX, but be safe). +def capnp_escape(s: str) -> str: + return s.replace("\\", "\\\\").replace('"', '\\"') + + +def extract(wheels: list[Path], lock: dict, work_dir: Path) -> list[tuple[str, str, Path]]: + """Extract every regular file from each wheel listed in the lock file. + + Returns (install_dir, path, on_disk) tuples. Inputs not referenced by the lock file (e.g. the + wheel repo's BUILD.bazel / REPO.bazel) are ignored. + """ + by_name = {wheel.name: wheel for wheel in wheels} + entries: list[tuple[str, str, Path]] = [] + files_dir = work_dir / "files" + for index, pkg in enumerate(lock["packages"].values()): + file_name = pkg["file_name"] + install_dir = pkg["install_dir"] + wheel = by_name.get(file_name) + if wheel is None: + raise SystemExit(f"Wheel {file_name} from lock file was not provided") + with tarfile.open(wheel, "r:gz") as tar: + for member in tar.getmembers(): + if member.isdir(): + continue + if not member.isfile(): + raise SystemExit( + f"Unsupported tar entry type in {wheel.name}: {member.name}" + ) + path = member.name + if path.startswith("./"): + path = path[2:] + if not path: + continue + on_disk = files_dir / str(index) / path + on_disk.parent.mkdir(parents=True, exist_ok=True) + src = tar.extractfile(member) + assert src is not None + on_disk.write_bytes(src.read()) + entries.append((install_dir, path, on_disk)) + return entries + + +def write_capnp( + entries: list[tuple[str, str, Path]], work_dir: Path, schema_src: Path +) -> Path: + # Copy the canonical schema into the work dir so the generated const file can `import` it. + # Using the real schema (rather than re-declaring the structs here) keeps a single source of + # truth: changes to python_packages.capnp can't silently diverge from what we serialize. + (work_dir / "python_packages.capnp").write_text(schema_src.read_text()) + + capnp_path = work_dir / "packages.capnp" + lines = [ + "@0xf1b2c3d4e5a60798;", + 'using Schema = import "python_packages.capnp";', + "", + "const packages :Schema.PythonPackages = (files = [", + ] + for install_dir, path, on_disk in entries: + embed = capnp_escape(str(on_disk.relative_to(work_dir))) + lines.append( + ' (installDir = "%s", path = "%s", contents = embed "%s"),' + % (capnp_escape(install_dir), capnp_escape(path), embed) + ) + lines.append("]);") + capnp_path.write_text("\n".join(lines) + "\n") + return capnp_path + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--capnp", required=True, help="Path to the capnp tool") + parser.add_argument("--schema", required=True, help="Path to python_packages.capnp") + parser.add_argument("--lock", required=True, help="Path to the pre-filtered lock file") + parser.add_argument("--out", required=True, help="Output path for the binary message") + parser.add_argument("wheels", nargs="+", help="Wheel (.tar.gz) files to embed") + args = parser.parse_args() + + # Resolve to absolute paths up front since capnp eval runs with cwd set to the work dir. + capnp = str(Path(args.capnp).resolve()) + out_path = str(Path(args.out).resolve()) + schema_src = Path(args.schema).resolve() + lock = json.loads(Path(args.lock).read_text()) + wheels = [Path(w) for w in args.wheels] + + with tempfile.TemporaryDirectory() as tmp: + work_dir = Path(tmp) + entries = extract(wheels, lock, work_dir) + capnp_path = write_capnp(entries, work_dir, schema_src) + with open(out_path, "wb") as out: + subprocess.run( + [capnp, "eval", capnp_path.name, "packages", "-o", "binary"], + cwd=work_dir, + stdout=out, + check=True, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/pyodide/pyodide_extra.capnp b/src/pyodide/pyodide_extra.capnp index afe1517d663..df717e3e656 100644 --- a/src/pyodide/pyodide_extra.capnp +++ b/src/pyodide/pyodide_extra.capnp @@ -39,6 +39,9 @@ struct PythonSnapshotRelease @0x89c66fb883cb6975 { # Name of the corresponding feature flag flagName @6 :Text; realPyodideVersion @7 :Text; + integrity @8 :Text; + # Subresource-integrity-style checksum ("sha256-") of the Pyodide capnp bundle that gets + # downloaded for this release. Used to verify the integrity of the bundle at runtime. } const releases :List(PythonSnapshotRelease) = [ diff --git a/src/pyodide/python-entrypoint-helper.ts b/src/pyodide/python-entrypoint-helper.ts index eb591daa928..d79d431d6b6 100644 --- a/src/pyodide/python-entrypoint-helper.ts +++ b/src/pyodide/python-entrypoint-helper.ts @@ -14,10 +14,8 @@ import { IS_WORKERD, LEGACY_GLOBAL_HANDLERS, EXTERNAL_SDK, - LOCKFILE, MAIN_MODULE_NAME, SHOULD_SNAPSHOT_TO_DISK, - TRANSITIVE_REQUIREMENTS, WORKFLOWS_ENABLED, } from 'pyodide-internal:metadata'; import { @@ -25,7 +23,6 @@ import { clearSignals, loadPyodide, } from 'pyodide-internal:python'; -import { patchLoadPackage } from 'pyodide-internal:setupPackages'; import { fillSnapshotJsModules, LOADED_SNAPSHOT_TYPE, @@ -165,7 +162,7 @@ async function getPyodide(): Promise { return pyodidePromise; } pyodidePromise = (async function (): Promise { - const pyodide = await loadPyodide(IS_WORKERD, LOCKFILE, { + const pyodide = await loadPyodide({ pyodide_entrypoint_helper: get_pyodide_entrypoint_helper(), cloudflare_compat_flags: COMPATIBILITY_FLAGS, }); @@ -193,21 +190,6 @@ async function injectSitePackagesModule( ); } -/** - * Put the patch into site_packages and import it. - * - * TODO: Ideally we should only import the patch lazily when the package that it patches is - * imported. Or just apply the patch directly or upstream a fix. - */ -async function applyPatch(pyodide: Pyodide, patchName: string): Promise { - await injectSitePackagesModule( - pyodide, - `patches/${patchName}`, - patchName + '_patch' - ); - pyodide.pyimport(patchName + '_patch'); -} - async function injectWorkersApi(pyodide: Pyodide): Promise { if (EXTERNAL_SDK) { pyodide.FS.mkdir(`${pyodide.FS.sitePackages}/workers`); @@ -263,9 +245,15 @@ async function injectWorkersApi(pyodide: Pyodide): Promise { await injectSitePackagesModule(pyodide, 'workers-api/src/asgi', 'asgi'); } +function disabledLoadPackage(): never { + throw new PythonWorkersInternalError( + 'pyodide.loadPackage is disabled' + ); +} + async function setupPatches(pyodide: Pyodide): Promise { await enterJaegerSpan('setup_patches', async () => { - patchLoadPackage(pyodide); + pyodide.loadPackage = disabledLoadPackage; // install any extra packages into the site-packages directory // Expose the doAnImport function and global modules to Python globals @@ -278,18 +266,6 @@ async function setupPatches(pyodide: Pyodide): Promise { // Inject modules that enable JS features to be used idiomatically from Python. await injectWorkersApi(pyodide); - - // Install patches as needed - if (TRANSITIVE_REQUIREMENTS.has('aiohttp')) { - await applyPatch(pyodide, 'aiohttp'); - } - // Other than the oldest version of httpx, we apply the patch at the build step. - if ( - pyodide._module.API.version === PyodideVersion.V0_26_0a2 && - TRANSITIVE_REQUIREMENTS.has('httpx') - ) { - await applyPatch(pyodide, 'httpx'); - } }); } diff --git a/src/pyodide/python-lock/pyodide-lock_20240829.4.json b/src/pyodide/python-lock/pyodide-lock_20240829.4.json new file mode 100644 index 00000000000..86b047106f0 --- /dev/null +++ b/src/pyodide/python-lock/pyodide-lock_20240829.4.json @@ -0,0 +1,45 @@ +{ + "info": { + "arch": "wasm32", + "platform": "emscripten_3_1_52", + "python": "3.12.3", + "version": "0.26.0a3" + }, + "packages": { + "hashlib": { + "file_name": "hashlib-1.0.0.tar.gz", + "install_dir": "stdlib", + "sha256": "f70fb8ecf9a89401dfdb8af1327db61b5a668ac5ac43cc04cf69aeced1b4627b" + }, + "lzma": { + "file_name": "lzma-1.0.0.tar.gz", + "install_dir": "stdlib", + "sha256": "8b6b4ce208fa4f3b47dd65b17633736effdfc7f61f658cb64fe21c9171f5626c" + }, + "openssl": { + "file_name": "openssl-1.1.1n.tar.gz", + "install_dir": "dynlib", + "sha256": "00073d8e6d070ce4ff21edaf0e4458f2329f7146c2cbd3bebb8158b8c5088d02" + }, + "pydecimal": { + "file_name": "pydecimal-1.0.0.tar.gz", + "install_dir": "stdlib", + "sha256": "8b767f6092c429008e4a6910ab61d88bdb4a58db0f79aeaf32d98732cd806ea3" + }, + "pydoc-data": { + "file_name": "pydoc_data-1.0.0.tar.gz", + "install_dir": "stdlib", + "sha256": "ed0d79d3f2df7caf6c5a6d64ec526ad96ff1dd50e302e1296b80d31ddfdf08ee" + }, + "sqlite3": { + "file_name": "sqlite3-1.0.0.tar.gz", + "install_dir": "stdlib", + "sha256": "784b6041b0021190b02cc21967419e48b0e554ded3a19d7048d40a8b7b308591" + }, + "ssl": { + "file_name": "ssl-1.0.0.tar.gz", + "install_dir": "stdlib", + "sha256": "5a291867843055a2a86723c2595eed9db3ccf51e39cf7c43c5e5286dc2487f12" + } + } +} diff --git a/src/pyodide/python-lock/pyodide-lock_20250808.json b/src/pyodide/python-lock/pyodide-lock_20250808.json new file mode 100644 index 00000000000..73fe327e27e --- /dev/null +++ b/src/pyodide/python-lock/pyodide-lock_20250808.json @@ -0,0 +1,46 @@ +{ + "info": { + "abi_version": "2025_0", + "arch": "wasm32", + "platform": "emscripten_4_0_9", + "python": "3.13.2", + "version": "0.28.1" + }, + "packages": { + "hashlib": { + "file_name": "hashlib-1.0.0-cp313-cp313-pyodide_2025_0_wasm32.tar.gz", + "install_dir": "site", + "sha256": "3d48f7b026f94f2df4b35e60dcd53862c1451e5570bab60446bb5ca8c1a476de" + }, + "libopenssl": { + "file_name": "libopenssl-1.1.1w.tar.gz", + "install_dir": "dynlib", + "sha256": "45617501d5e4a22e4a99d97da37f8547649c34a6f80ab63dc799058a83f8aee8" + }, + "lzma": { + "file_name": "lzma-1.0.0-cp313-cp313-pyodide_2025_0_wasm32.tar.gz", + "install_dir": "site", + "sha256": "356f6f412ce9643137c2c4a3ad7d4e33bfaa90fb4db2dfe7f9ebf22b1c937c67" + }, + "pydecimal": { + "file_name": "pydecimal-1.0.0-cp313-cp313-pyodide_2025_0_wasm32.tar.gz", + "install_dir": "site", + "sha256": "60609b9765a140b7ab55b92859d5737ee3e9b6e9e8060b68cf20edf11e1b9ca0" + }, + "pydoc-data": { + "file_name": "pydoc_data-1.0.0-cp313-cp313-pyodide_2025_0_wasm32.tar.gz", + "install_dir": "site", + "sha256": "ee3ae17d6923b8f0c9979fad3fc5fa92d3277191b402d9a1b025ca2d5954ee05" + }, + "sqlite3": { + "file_name": "sqlite3-1.0.0-cp313-cp313-pyodide_2025_0_wasm32.tar.gz", + "install_dir": "site", + "sha256": "12b24e6c9e0bbe66f4f35703641f60801dba3c94a81c24ac2046f9d418960cd5" + }, + "ssl": { + "file_name": "ssl-1.0.0-cp313-cp313-pyodide_2025_0_wasm32.tar.gz", + "install_dir": "site", + "sha256": "efb2f31bd3db13118d3ed1bf4e6579f86dc95f55f4df69f5d8b4c2469efaaa8b" + } + } +} diff --git a/src/pyodide/python_packages.capnp b/src/pyodide/python_packages.capnp new file mode 100644 index 00000000000..95924690a98 --- /dev/null +++ b/src/pyodide/python_packages.capnp @@ -0,0 +1,26 @@ +# Copyright (c) 2026 Cloudflare, Inc. +# Licensed under the Apache 2.0 license found in the LICENSE file or at: +# https://opensource.org/licenses/Apache-2.0 + +@0xc3f6a2b1e4d50789; + +using Cxx = import "/capnp/c++.capnp"; +$Cxx.namespace("workerd::api::pyodide"); + +# A single file extracted from a Python stdlib package wheel at build time. The CPython stdlib +# modules and the shared libraries they depend on are extracted and embedded directly in the +# Pyodide bundle (see src/pyodide/pack_python_packages.py and helpers.bzl) so the runtime no longer +# downloads or unpacks wheels at request time. +struct PythonPackageFile { + # The mount root this file belongs to, taken from the package's `install_dir` in the lock file + # ("site" / "stdlib" -> site-packages, "dynlib" -> /usr/lib). + installDir @0 :Text; + # The file's path within `installDir`, e.g. "ssl/__init__.py". + path @1 :Text; + # The (already-decompressed) file contents. + contents @2 :Data; +} + +struct PythonPackages { + files @0 :List(PythonPackageFile); +} diff --git a/src/pyodide/types/artifacts.d.ts b/src/pyodide/types/artifacts.d.ts index 8e25cf921e2..7adc70f5b3c 100644 --- a/src/pyodide/types/artifacts.d.ts +++ b/src/pyodide/types/artifacts.d.ts @@ -3,7 +3,7 @@ // https://opensource.org/licenses/Apache-2.0 declare namespace ArtifactBundler { - type SnapshotType = 'baseline' | 'dedicated' | 'package'; + type SnapshotType = 'baseline' | 'dedicated'; type MemorySnapshotResult = { snapshot: Uint8Array; importedModulesList: string[]; @@ -20,7 +20,6 @@ declare namespace ArtifactBundler { const getMemorySnapshotSize: () => number; const disposeMemorySnapshot: () => void; const storeMemorySnapshot: (snap: MemorySnapshotResult) => void; - const getPackage: (path: string) => Reader | null; } export default ArtifactBundler; diff --git a/src/pyodide/types/modules.d.ts b/src/pyodide/types/modules.d.ts index 2e44d65d51b..4b1457d5462 100644 --- a/src/pyodide/types/modules.d.ts +++ b/src/pyodide/types/modules.d.ts @@ -14,7 +14,6 @@ declare module 'pyodide-internal:setup_python_search_path.py' { declare module 'pyodideRuntime-internal:emscriptenSetup' { function instantiateEmscriptenModule( - isWorkerd: boolean, pythonStdlib: ArrayBuffer, pyodideWasmModule: WebAssembly.Module ): Promise; diff --git a/src/pyodide/types/packages.d.ts b/src/pyodide/types/packages.d.ts new file mode 100644 index 00000000000..c812ce55522 --- /dev/null +++ b/src/pyodide/types/packages.d.ts @@ -0,0 +1,16 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +declare namespace EmbeddedPackages { + interface PackageFile { + installDir: InstallDir; + path: string; + size: number; + reader: Reader; + } + + const getFiles: () => PackageFile[]; +} + +export default EmbeddedPackages; diff --git a/src/pyodide/types/pyodide-lock.d.ts b/src/pyodide/types/pyodide-lock.d.ts index 9403ec87abe..1f707a2d7c7 100644 --- a/src/pyodide/types/pyodide-lock.d.ts +++ b/src/pyodide/types/pyodide-lock.d.ts @@ -3,17 +3,13 @@ // https://opensource.org/licenses/Apache-2.0 type InstallDir = 'site' | 'stdlib' | 'dynlib'; +// The checked-in lock files are filtered down to just the fields that are still +// consumed: file_name + install_dir for the runtime loader, and sha256 for the +// build-time wheel download. interface PackageDeclaration { - depends: string[]; file_name: string; - imports: string[]; install_dir: InstallDir; - name: string; - package_type: string; sha256: string; - shared_library: boolean; - unvendored_tests: boolean; - version: string; } interface PackageLock { diff --git a/src/pyodide/types/runtime-generated/metadata.d.ts b/src/pyodide/types/runtime-generated/metadata.d.ts index 4432c36c993..245691a50c8 100644 --- a/src/pyodide/types/runtime-generated/metadata.d.ts +++ b/src/pyodide/types/runtime-generated/metadata.d.ts @@ -19,12 +19,9 @@ declare namespace MetadataReader { const isTracing: () => boolean; const shouldSnapshotToDisk: () => boolean; const isCreatingBaselineSnapshot: () => boolean; - const shouldAbortIsolateOnFatalError: () => boolean; - const getRequirements: () => string[]; const getMainModule: () => string; const hasMemorySnapshot: () => boolean; const getNames: () => string[]; - const getPackageSnapshotImports: (version: string) => string[]; const getSizes: () => number[]; const readMemorySnapshot: ( offset: number, @@ -36,7 +33,6 @@ declare namespace MetadataReader { const getPackagesVersion: () => string; const getPackagesLock: () => string; const read: (index: number, position: number, buffer: Uint8Array) => number; - const getTransitiveRequirements: () => Set; const getCompatibilityFlags: () => CompatibilityFlags; const setCpuLimitNearlyExceededCallback: ( buf: Uint8Array, diff --git a/src/rust/cxx-integration-test/cxx-rust-integration-test.c++ b/src/rust/cxx-integration-test/cxx-rust-integration-test.c++ index e638040c0d8..bd6c83e92c7 100644 --- a/src/rust/cxx-integration-test/cxx-rust-integration-test.c++ +++ b/src/rust/cxx-integration-test/cxx-rust-integration-test.c++ @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/rust/jsg-test/tests/unwrap.rs b/src/rust/jsg-test/tests/unwrap.rs index 86077c9cb85..b5757391e56 100644 --- a/src/rust/jsg-test/tests/unwrap.rs +++ b/src/rust/jsg-test/tests/unwrap.rs @@ -136,20 +136,21 @@ fn v8_unwrap_string_returns_correct_values() { /// /// Rust wrappables use `WORKERD_RUST_WRAPPABLE_TAG` (0xeb05), while C++ JSG objects use /// `WORKERD_WRAPPABLE_TAG` (0xeb04). Attempting to unwrap a C++ object through the Rust path -/// must return nullptr to prevent reading garbage from non-existent `data[2]` fields. +/// must return None to prevent reading garbage from non-existent `data[2]` fields. #[test] fn unwrap_resource_rejects_cpp_tagged_object() { let harness = crate::Harness::new(); harness.run_in_context(|lock, _ctx| { let cpp_obj = crate::Harness::create_cpp_tagged_object(lock); - // unwrap_resource returns nullptr because the object has the C++ tag, not the Rust tag. - let result = + // unwrap_resource returns None because the object has the C++ tag, not the Rust tag. + let result: Option> = // SAFETY: isolate is valid and locked, value is a valid Local. - unsafe { jsg::v8::ffi::unwrap_resource(lock.isolate().as_ffi(), cpp_obj.into_ffi()) }; + unsafe { jsg::v8::ffi::unwrap_resource(lock.isolate().as_ffi(), cpp_obj.into_ffi()) } + .into(); assert!( - result.get().is_null(), - "unwrap_resource should return null for a C++ tagged object" + result.is_none(), + "unwrap_resource should return None for a C++ tagged object" ); Ok(()) @@ -166,12 +167,13 @@ fn unwrap_resource_rejects_plain_js_object() { harness.run_in_context(|lock, ctx| { let plain_obj = ctx.eval_raw("({})").unwrap(); - let result = + let result: Option> = // SAFETY: isolate is valid and locked, value is a valid Local. - unsafe { jsg::v8::ffi::unwrap_resource(lock.isolate().as_ffi(), plain_obj.into_ffi()) }; + unsafe { jsg::v8::ffi::unwrap_resource(lock.isolate().as_ffi(), plain_obj.into_ffi()) } + .into(); assert!( - result.get().is_null(), - "unwrap_resource should return null for a plain JS object" + result.is_none(), + "unwrap_resource should return None for a plain JS object" ); Ok(()) diff --git a/src/rust/jsg/ffi.c++ b/src/rust/jsg/ffi.c++ index a518e510202..0d5963fc6d9 100644 --- a/src/rust/jsg/ffi.c++ +++ b/src/rust/jsg/ffi.c++ @@ -656,10 +656,10 @@ double unwrap_number(Isolate* isolate, Local value) { ->Value(); } -kj::Rc unwrap_resource(Isolate* isolate, Local value) { +kj::Maybe> unwrap_resource(Isolate* isolate, Local value) { auto v8_val = local_from_ffi(kj::mv(value)); // Non-object values (numbers, strings, booleans, etc.) are never wrapped resources. - if (!v8_val->IsObject()) return nullptr; + if (!v8_val->IsObject()) return kj::none; auto v8_obj = v8_val.As(); // Plain JS objects have no internal fields; check before reading to avoid V8 fatal error. if (v8_obj->InternalFieldCount() < ::workerd::jsg::Wrappable::INTERNAL_FIELD_COUNT || @@ -668,7 +668,7 @@ kj::Rc unwrap_resource(Isolate* isolate, Local value) { static_cast( ::workerd::jsg::Wrappable::WRAPPABLE_TAG_FIELD_INDEX)) != const_cast(&::workerd::jsg::Wrappable::WORKERD_RUST_WRAPPABLE_TAG)) { - return nullptr; + return kj::none; } auto* ptr = static_cast( reinterpret_cast<::workerd::jsg::Wrappable*>(v8_obj->GetAlignedPointerFromInternalField( diff --git a/src/rust/jsg/ffi.h b/src/rust/jsg/ffi.h index d9df79c4907..2fda21608e4 100644 --- a/src/rust/jsg/ffi.h +++ b/src/rust/jsg/ffi.h @@ -271,7 +271,7 @@ void wrappable_attach_wrapper(kj::Rc wrappable, FunctionCallbackInfo& ::rust::String unwrap_string(Isolate* isolate, Local value); bool unwrap_boolean(Isolate* isolate, Local value); double unwrap_number(Isolate* isolate, Local value); -kj::Rc unwrap_resource(Isolate* isolate, Local value); +kj::Maybe> unwrap_resource(Isolate* isolate, Local value); ::rust::Vec unwrap_uint8_array(Isolate* isolate, Local value); ::rust::Vec unwrap_uint16_array(Isolate* isolate, Local value); ::rust::Vec unwrap_uint32_array(Isolate* isolate, Local value); diff --git a/src/rust/jsg/v8.rs b/src/rust/jsg/v8.rs index 56cf8d52110..26260175134 100644 --- a/src/rust/jsg/v8.rs +++ b/src/rust/jsg/v8.rs @@ -638,7 +638,7 @@ pub mod ffi { pub unsafe fn unwrap_resource( isolate: *mut Isolate, value: Local, /* v8::LocalValue */ - ) -> KjRc; + ) -> KjMaybe>; pub unsafe fn function_template_get_function( isolate: *mut Isolate, @@ -3478,17 +3478,14 @@ impl WrappableRc { /// Returns `None` if the value is not a Rust-tagged Wrappable /// (e.g. a C++ JSG object, a plain JS object, or a primitive). /// - /// The C++ `unwrap_resource` returns a `KjRc` whose inner - /// pointer is null when the value doesn't contain a Rust Wrappable. - /// We check `get().is_null()` to distinguish that case. + /// The C++ `unwrap_resource` returns `None` when the value doesn't contain + /// a Rust Wrappable. #[doc(hidden)] pub fn from_js(isolate: IsolatePtr, value: Local) -> Option { // SAFETY: isolate is valid and locked; value handle is valid. - let handle = unsafe { ffi::unwrap_resource(isolate.as_ffi(), value.into_ffi()) }; - if handle.get().is_null() { - return None; - } - Some(Self { handle }) + let handle: Option> = + unsafe { ffi::unwrap_resource(isolate.as_ffi(), value.into_ffi()) }.into(); + handle.map(|handle| Self { handle }) } /// Wraps this Wrappable as a JavaScript object using the given constructor template. diff --git a/src/rust/worker/bridge.h b/src/rust/worker/bridge.h index 71f9386bc3a..e9bdf57b024 100644 --- a/src/rust/worker/bridge.h +++ b/src/rust/worker/bridge.h @@ -36,6 +36,8 @@ inline workerd::EventOutcome fromImpl(kj_rs::Rust*, workerd::rust::worker::Event return workerd::EventOutcome::RESPONSE_STREAM_DISCONNECTED; case workerd::rust::worker::EventOutcome::InternalError: return workerd::EventOutcome::INTERNAL_ERROR; + case workerd::rust::worker::EventOutcome::ExceededWallTime: + return workerd::EventOutcome::EXCEEDED_WALL_TIME; } } diff --git a/src/rust/worker/exception.rs b/src/rust/worker/exception.rs index 25b79801c82..68eeaaf932a 100644 --- a/src/rust/worker/exception.rs +++ b/src/rust/worker/exception.rs @@ -10,5 +10,8 @@ pub const CPU_LIMIT_DETAIL_ID: u64 = 0xfdcb_787b_a424_0576; /// If an exception is thrown for exceeding memory limits, it will contain this detail. pub const MEMORY_LIMIT_DETAIL_ID: u64 = 0xbaf7_6dd7_ce5b_d8cf; +/// If an exception is thrown for exceeding wall time limits, it will contain this detail. +pub const WALL_TIME_LIMIT_DETAIL_ID: u64 = 0x6e8f_2b4a_1c9d_3e5b; + /// If an exception is thrown for worker killed before start, it will contain this detail. pub const SCRIPT_KILLED_DETAIL_ID: u64 = 0xf893_5d57_9c20_da70; diff --git a/src/rust/worker/ffi.rs b/src/rust/worker/ffi.rs index 7c7a7ebe8d6..9a5670c130c 100644 --- a/src/rust/worker/ffi.rs +++ b/src/rust/worker/ffi.rs @@ -39,6 +39,7 @@ pub mod bridge { LoadShed = 9, ResponseStreamDisconnected = 10, InternalError = 11, + ExceededWallTime = 12, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -241,6 +242,7 @@ impl From for bridge::EventOutcome { Self::ResponseStreamDisconnected } outcome_capnp::EventOutcome::InternalError => Self::InternalError, + outcome_capnp::EventOutcome::ExceededWallTime => Self::ExceededWallTime, } } } diff --git a/src/rust/worker/kill_switch.rs b/src/rust/worker/kill_switch.rs index df7231a963d..307aab33031 100644 --- a/src/rust/worker/kill_switch.rs +++ b/src/rust/worker/kill_switch.rs @@ -43,7 +43,7 @@ pub struct Worker {} impl Worker { fn error(file: &str, line: u32) -> Result<()> { Err(KjError::new( - cxx::KjExceptionType::Overloaded, + cxx::KjExceptionType::Failed, "jsg.Error: This script has been killed.".to_owned(), ) .with_details(vec![(SCRIPT_KILLED_DETAIL_ID, vec![])]) diff --git a/src/rust/worker/test.c++ b/src/rust/worker/test.c++ index c3c788246d3..e4aecf97d19 100644 --- a/src/rust/worker/test.c++ +++ b/src/rust/worker/test.c++ @@ -40,7 +40,7 @@ KJ_TEST("kill_switch worker") { }); auto& e = KJ_ASSERT_NONNULL(exception); - KJ_ASSERT(e.getType() == kj::Exception::Type::OVERLOADED); + KJ_ASSERT(e.getType() == kj::Exception::Type::FAILED); KJ_ASSERT(e.getDescription() == "jsg.Error: This script has been killed."); KJ_ASSERT(e.getDetail(SCRIPT_KILLED_DETAIL_ID) != kj::none); } @@ -73,7 +73,7 @@ KJ_TEST("kill_switch worker connect") { }); auto& e = KJ_ASSERT_NONNULL(exception); - KJ_ASSERT(e.getType() == kj::Exception::Type::OVERLOADED); + KJ_ASSERT(e.getType() == kj::Exception::Type::FAILED); KJ_ASSERT(e.getDescription() == "jsg.Error: This script has been killed."); KJ_ASSERT(e.getDetail(SCRIPT_KILLED_DETAIL_ID) != kj::none); } diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index 39964a3d067..f630ec43be3 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -377,6 +377,7 @@ wd_cc_library( "pyodide/requirements.h", "//src/pyodide:generated/pyodide_extra.capnp.h", "//src/pyodide:pyodide_static.capnp.h", + "//src/pyodide:python_packages.capnp.h", ], implementation_deps = [ "//src/workerd/io", @@ -386,12 +387,14 @@ wd_cc_library( deps = [ "//src/pyodide:pyodide_extra_capnp", "//src/pyodide:pyodide_static", + "//src/pyodide:python_packages_capnp", "//src/workerd/io:compatibility-date_capnp", "//src/workerd/jsg", "@capnp-cpp//src/kj:kj-async", "@capnp-cpp//src/kj/compat:kj-gzip", "@capnp-cpp//src/kj/compat:kj-http", "@capnp-cpp//src/kj/compat:kj-tls", + "@ssl", ], ) @@ -524,10 +527,12 @@ wd_cc_library( ], ) for f in [ + "streams/pendingabort-gc-uaf-test.c++", "actor-state-test.c++", "basics-test.c++", "crypto/aes-test.c++", "crypto/impl-test.c++", + "crypto/prime-test.c++", ] ] @@ -541,6 +546,7 @@ wd_cc_library( ], ) for f in [ + "streams/draining-read-uaf-test.c++", "streams/queue-test.c++", "streams/standard-test.c++", ] @@ -615,7 +621,6 @@ kj_test( "//src/workerd/io", "//src/workerd/io:worker-interface", "//src/workerd/tests:test-fixture", - "//src/workerd/util:autogate", ], ) @@ -658,6 +663,15 @@ kj_test( deps = ["//src/workerd/tests:test-fixture"], ) +kj_test( + src = "fetch-body-rewindable-test.c++", + deps = [ + "//src/workerd/io", + "//src/workerd/io:worker-interface", + "//src/workerd/tests:test-fixture", + ], +) + kj_test( src = "streams/writable-sink-test.c++", deps = [ diff --git a/src/workerd/api/actor-state-iocontext-test.c++ b/src/workerd/api/actor-state-iocontext-test.c++ index ef500184506..97d7b653529 100644 --- a/src/workerd/api/actor-state-iocontext-test.c++ +++ b/src/workerd/api/actor-state-iocontext-test.c++ @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/src/workerd/api/actor-state-test.c++ b/src/workerd/api/actor-state-test.c++ index 1157cfad512..26556afab98 100644 --- a/src/workerd/api/actor-state-test.c++ +++ b/src/workerd/api/actor-state-test.c++ @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -33,7 +34,7 @@ KJ_TEST("v8 serialization version tag hasn't changed") { e.getIsolate().runInLockScope([&](ActorStateIsolate::Lock& isolateLock) { JSG_WITHIN_CONTEXT_SCOPE(isolateLock, isolateLock.newContext().getHandle(isolateLock), [&](jsg::Lock& js) { - auto buf = serializeV8Value(isolateLock, isolateLock.boolean(true)); + auto buf = serializeV8Value(isolateLock, "some-key"_kj, isolateLock.boolean(true)); // Confirm that a version header is appropriately written and that it contains the expected // current version. When the version increases, we need to write a v8 patch that allows it @@ -110,10 +111,10 @@ KJ_TEST("wire format version does not change deserialization behavior on real da KJ_EXPECT(!dataIn.hadErrors, kj::str(hexStr.c_str())); auto oldVal = oldDeserializeV8Value(isolateLock, dataIn); - auto oldOutput = serializeV8Value(isolateLock, oldVal); + auto oldOutput = serializeV8Value(isolateLock, key, oldVal); auto newVal = deserializeV8Value(isolateLock, key, dataIn); - auto newOutput = serializeV8Value(isolateLock, newVal); + auto newOutput = serializeV8Value(isolateLock, key, newVal); KJ_EXPECT(oldOutput == newOutput, kj::str(hexStr.c_str())); } }); diff --git a/src/workerd/api/actor-state.c++ b/src/workerd/api/actor-state.c++ index 3f4b406c363..73379d3b57f 100644 --- a/src/workerd/api/actor-state.c++ +++ b/src/workerd/api/actor-state.c++ @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ uint32_t billingUnits(size_t bytes, BillAtLeastOne billAtLeastOne = BillAtLeastO } jsg::JsValue deserializeMaybeV8Value( - jsg::Lock& js, kj::ArrayPtr key, kj::Maybe> buf) { + jsg::Lock& js, kj::StringPtr key, kj::Maybe> buf) { KJ_IF_SOME(b, buf) { return deserializeV8Value(js, key, b); } else { @@ -213,24 +214,6 @@ kj::Promise updateStorageDeletes( metrics.addStorageDeletes(deleted); }; -// Return the id of the current actor (or the empty string if there is no current actor). -kj::Maybe getCurrentActorId() { - KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { - KJ_IF_SOME(actor, ioContext.getActor()) { - KJ_SWITCH_ONEOF(actor.getId()) { - KJ_CASE_ONEOF(s, kj::String) { - return kj::heapString(s); - } - KJ_CASE_ONEOF(actorId, kj::Own) { - return actorId->toString(); - } - } - KJ_UNREACHABLE; - } - } - return kj::none; -} - } // namespace DurableObjectStorage::DurableObjectStorage(jsg::Lock& js, @@ -512,7 +495,7 @@ jsg::Promise DurableObjectStorageOperations::setAlarm( jsg::Promise DurableObjectStorageOperations::putOne( jsg::Lock& js, kj::String key, jsg::JsValue value, const PutOptions& options) { - kj::Array buffer = serializeV8Value(js, value); + kj::Array buffer = serializeV8Value(js, key, value); auto units = billingUnits(key.size() + buffer.size()); @@ -588,6 +571,10 @@ jsg::Promise DurableObjectStorageOperations::deleteOne( jsg::Lock& js, kj::String key, const PutOptions& options) { auto& context = IoContext::current(); + KJ_IF_SOME(handler, KJ_ASSERT_NONNULL(context.getActor()).getStoredExternalHandler()) { + handler.cancelPutExternals(key); + } + return transformCacheResult(js, getCache(OP_DELETE).delete_(kj::mv(key), options, context.getCurrentTraceSpan()), options, [](jsg::Lock&, bool value) { @@ -615,7 +602,7 @@ jsg::Promise DurableObjectStorageOperations::putMultiple( // deleting an undefined field is confusing, throwing could break otherwise working code, and // a stray undefined here or there is probably closer to what the user desires. - kj::Array buffer = serializeV8Value(js, field.value); + kj::Array buffer = serializeV8Value(js, field.name, field.value); units += billingUnits(field.name.size() + buffer.size()); @@ -638,6 +625,12 @@ jsg::Promise DurableObjectStorageOperations::deleteMultiple( auto& context = IoContext::current(); + KJ_IF_SOME(handler, KJ_ASSERT_NONNULL(context.getActor()).getStoredExternalHandler()) { + for (auto& key: keys) { + handler.cancelPutExternals(key); + } + } + return transformCacheResult(js, getCache(OP_DELETE).delete_(kj::mv(keys), options, context.getCurrentTraceSpan()), options, [numKeys](jsg::Lock&, uint count) -> int { @@ -657,50 +650,15 @@ jsg::Promise> DurableObjectStorage::transaction(jsg::Lo auto& context = IoContext::current(); auto traceContext = context.makeUserTraceSpan("durable_object_storage_transaction"_kjc); - struct TxnResult { - jsg::JsRef value; - bool isError; - }; - return context.attachSpans(js, context .blockConcurrencyWhile(js, - [callback = kj::mv(callback), &context, &cache = *cache]( - jsg::Lock& js) mutable -> jsg::Promise { - // Note that the call to `startTransaction()` is when the SQLite-backed implementation will - // actually invoke `BEGIN TRANSACTION`, so it's important that we're inside the - // blockConcurrencyWhile block before that point so we don't accidentally catch some other - // asynchronous event in our transaction. - // - // For the ActorCache-based implementation, it doesn't matter when we call `startTransaction()` - // as the method merely allocates an object and returns it with no side effects. - auto txn = js.alloc(context.addObject(cache.startTransaction())); - - return js.resolvedPromise(txn.addRef()) - .then(js, kj::mv(callback)) - .then(js, [txn = txn.addRef()](jsg::Lock& js, jsg::JsRef value) mutable { - // In correct usage, `context` should not have changed here, particularly because we're in - // a critical section so it should have been impossible for any other context to receive - // control. However, depending on all that is a bit precarious. jsg::Promise::then() itself - // does NOT guarantee it runs in the same context (the application could have returned a - // custom Promise and then resolved in from some other context). So let's be safe and grab - // IoContext::current() again here, rather than capture it in the lambda. - auto& context = IoContext::current(); - return context.awaitIoWithInputLock(js, txn->maybeCommit(), - [value = kj::mv(value)](jsg::Lock&) mutable { return TxnResult{kj::mv(value), false}; }); - }, [txn = txn.addRef()](jsg::Lock& js, jsg::Value exception) mutable { - // The transaction callback threw an exception. We don't actually want to reset the object, - // we only want to roll back the transaction and propagate the exception. So, we carefully - // pack the exception away into a value. - txn->maybeRollback(); - return js.resolvedPromise(TxnResult{ - // TODO(cleanup): Simplify this once exception is passed using jsg::JsRef instead - // of jsg::V8Ref - jsg::JsValue(exception.getHandle(js)).addRef(js), true}); - }); + [callback = kj::mv(callback), &cache = *cache]( + jsg::Lock& js, IoContext& context) mutable -> jsg::Promise { + return asyncTransactionImpl(js, context, cache, kj::mv(callback)); }) .then(js, - [](jsg::Lock& js, TxnResult result) -> jsg::JsRef { + [](jsg::Lock& js, AsyncTxnResult result) -> jsg::JsRef { if (result.isError) { js.throwException(result.value.getHandle(js)); } else { @@ -710,9 +668,62 @@ jsg::Promise> DurableObjectStorage::transaction(jsg::Lo kj::mv(traceContext)); } +jsg::Promise DurableObjectStorage::asyncTransactionImpl( + jsg::Lock& js, IoContext& context, ActorCacheInterface& cache, AsyncTxnCallback callback) { + // Note that the call to `startTransaction()` is when the SQLite-backed implementation will + // actually invoke `BEGIN TRANSACTION`, so it's important that we're inside the + // blockConcurrencyWhile block before that point so we don't accidentally catch some other + // asynchronous event in our transaction. + // + // For the ActorCache-based implementation, it doesn't matter when we call `startTransaction()` + // as the method merely allocates an object and returns it with no side effects. + kj::Own rawTxn; + KJ_SWITCH_ONEOF(cache.startTransaction()) { + KJ_CASE_ONEOF(t, kj::Own) { + rawTxn = kj::mv(t); + } + KJ_CASE_ONEOF(promise, kj::Promise) { + // Whoops, we can't start the transaction yet. Wait and try again. + return context.awaitIoWithInputLock(js, kj::mv(promise), + [&context, &cache, callback = kj::mv(callback)](jsg::Lock& js) mutable { + return asyncTransactionImpl(js, context, cache, kj::mv(callback)); + }); + } + } + + auto txn = js.alloc(context.addObject(kj::mv(rawTxn))); + + return js.resolvedPromise(txn.addRef()) + .then(js, kj::mv(callback)) + .then(js, [txn = txn.addRef()](jsg::Lock& js, jsg::JsRef value) mutable { + // In correct usage, `context` should not have changed here, particularly because we're in + // a critical section so it should have been impossible for any other context to receive + // control. However, depending on all that is a bit precarious. jsg::Promise::then() itself + // does NOT guarantee it runs in the same context (the application could have returned a + // custom Promise and then resolved in from some other context). So let's be safe and grab + // IoContext::current() again here, rather than capture it in the lambda. + auto& context = IoContext::current(); + return context.awaitIoWithInputLock( + js, txn->maybeCommit(), [value = kj::mv(value)](jsg::Lock&) mutable { + return AsyncTxnResult{kj::mv(value), false}; + }); + }, [txn = txn.addRef()](jsg::Lock& js, jsg::Value exception) mutable { + // The transaction callback threw an exception. We don't actually want to reset the object, + // we only want to roll back the transaction and propagate the exception. So, we carefully + // pack the exception away into a value. + txn->maybeRollback(); + return js.resolvedPromise(AsyncTxnResult{ + // TODO(cleanup): Simplify this once exception is passed using jsg::JsRef instead + // of jsg::V8Ref + jsg::JsValue(exception.getHandle(js)).addRef(js), true}); + }); +} + jsg::JsRef DurableObjectStorage::transactionSync( jsg::Lock& js, jsg::Function()> callback) { KJ_IF_SOME(sqlite, cache->getSqliteDatabase()) { + auto& context = IoContext::current(); + // SAVEPOINT is a readonly statement, but we need to trigger an outer TRANSACTION sqlite.notifyWrite(); @@ -726,6 +737,10 @@ jsg::JsRef DurableObjectStorage::transactionSync( sqlite.run( {.regulator = SqliteDatabase::TRUSTED}, kj::str("SAVEPOINT _cf_sync_savepoint_", depth)); + + StoredExternalHandler::SyncNestedTransaction syncExternalTxn( + context.getActorOrThrow().getOrCreateStoredExternalHandler()); + return js.tryCatch([&]() { auto result = callback(js); @@ -736,6 +751,7 @@ jsg::JsRef DurableObjectStorage::transactionSync( sqlite.run( {.regulator = SqliteDatabase::TRUSTED}, kj::str("RELEASE _cf_sync_savepoint_", depth)); + syncExternalTxn.commit(); return kj::mv(result); }, [&](jsg::Value exception) -> jsg::JsRef { // If a critical error forced an automatic rollback, we skip the rollback and release @@ -1008,7 +1024,7 @@ jsg::Ref DurableObjectFacets::get(jsg::Lock& js, auto& ioCtx = IoContext::current(); kj::Function()> getStartInfo = - ioCtx.makeReentryCallback( + ioCtx.makeReentryCallbackWeak( [&ioCtx, getStartupOptions = kj::mv(getStartupOptions)](jsg::Lock& js) mutable { return getStartupOptions(js).then(js, [&ioCtx](jsg::Lock& js, StartupOptions options) { Worker::Actor::Id id; @@ -1070,6 +1086,12 @@ void DurableObjectFacets::delete_(jsg::Lock& js, kj::String name) { getFacetManager().deleteFacet(name); } +void DurableObjectFacets::clone(jsg::Lock& js, kj::String src, kj::String dst) { + requireValidFacetName(src); + requireValidFacetName(dst); + getFacetManager().cloneFacet(src, dst); +} + ActorState::ActorState(Worker::Actor::Id actorId, kj::Maybe> transient, kj::Maybe> persistent) @@ -1164,10 +1186,10 @@ Worker::Actor::HibernationManager& DurableObjectState::maybeInitHibernationManag } void DurableObjectState::acceptWebSocket( - jsg::Ref ws, jsg::Optional> tags) { + jsg::Lock& js, jsg::Ref ws, jsg::Optional> tags) { JSG_ASSERT(!ws->isAccepted(), Error, "Cannot call `acceptWebSocket()` if the WebSocket was already accepted via `accept()`"); - JSG_ASSERT(ws->peerIsAwaitingCoupling(), Error, + JSG_ASSERT(ws->peerIsAwaitingCoupling(js), Error, "Cannot call `acceptWebSocket()` on this WebSocket because its pair has already been " "accepted or used in a Response."); @@ -1319,60 +1341,4 @@ jsg::Promise DurableObjectState::configureReadReplication( return context.attachSpans(js, context.awaitIo(js, kj::mv(promise)), kj::mv(traceContext)); } -kj::Array serializeV8Value(jsg::Lock& js, const jsg::JsValue& value) { - jsg::Serializer serializer(js, - jsg::Serializer::Options{ - .version = 15, - .omitHeader = false, - }); - serializer.write(js, value); - auto released = serializer.release(); - return kj::mv(released.data); -} - -jsg::JsValue deserializeV8Value( - jsg::Lock& js, kj::ArrayPtr key, kj::ArrayPtr buf) { - - KJ_ASSERT(buf.size() > 0, "unexpectedly empty value buffer", key); - try { - // The js.tryCatch will handle the normal exception path. We wrap this in an - // additional try/catch in case the js.tryCatch hits an exception that is - // terminal for the isolate, causing exception to be rethrown, in which case - // we throw a kj::Exception wrapping a jsg.Error. - return js.tryCatch([&]() -> jsg::JsValue { - jsg::Deserializer::Options options{}; - if (buf[0] != 0xFF) { - // When Durable Objects was first released, it did not properly write headers when serializing - // to storage. If we find that the header is missing (as indicated by the first byte not being - // 0xFF), it's safe to assume that the data was written at the only serialization version we - // used during that early time period, so we explicitly set that version here. - options.version = 13; - options.readHeader = false; - } - - jsg::Deserializer deserializer(js, buf, kj::none, kj::none, options); - - return deserializer.readValue(js); - }, [&](jsg::Value&& exception) mutable -> jsg::JsValue { - // If we do hit a deserialization error, we log information that will be helpful in - // understanding the problem but that won't leak too much about the customer's data. We - // include the key (to help find the data in the database if it hasn't been deleted), the - // length of the value, and the first three bytes of the value (which is just the v8-internal - // version header and the tag that indicates the type of the value, but not its contents). - kj::String actorId = getCurrentActorId().orDefault([]() { return kj::String(); }); - KJ_FAIL_ASSERT("actor storage deserialization failed", "failed to deserialize stored value", - actorId, exception.getHandle(js), key, buf.size(), - buf.first(std::min(static_cast(3), buf.size()))); - }); - } catch (jsg::JsExceptionThrown&) { - // We can occasionally hit an isolate termination here -- we prefix the error with jsg to avoid - // counting it against our internal storage error metrics but also throw a KJ exception rather - // than a jsExceptionThrown error to avoid confusing the normal termination handling code. - // We don't expect users to ever actually see this error. - JSG_FAIL_REQUIRE(Error, - "isolate terminated while deserializing value from Durable Object " - "storage; contact us if you're wondering why you're seeing this"); - } -} - } // namespace workerd::api diff --git a/src/workerd/api/actor-state.h b/src/workerd/api/actor-state.h index 67be1b4e968..39ca9b78c46 100644 --- a/src/workerd/api/actor-state.h +++ b/src/workerd/api/actor-state.h @@ -30,11 +30,6 @@ class DurableObjectClass; class LoopbackDurableObjectNamespace; class LoopbackColoLocalActorNamespace; -kj::Array serializeV8Value(jsg::Lock& js, const jsg::JsValue& value); - -jsg::JsValue deserializeV8Value( - jsg::Lock& js, kj::ArrayPtr key, kj::ArrayPtr buf); - // Common implementation of DurableObjectStorage and DurableObjectTransaction. This class is // designed to be used as a mixin. class DurableObjectStorageOperations { @@ -228,10 +223,11 @@ class DurableObjectStorage: public jsg::Object, public DurableObjectStorageOpera // Omit from definitions }; - jsg::Promise> transaction(jsg::Lock& js, - jsg::Function>(jsg::Ref)> - closure, - jsg::Optional options); + using AsyncTxnCallback = + jsg::Function>(jsg::Ref)>; + + jsg::Promise> transaction( + jsg::Lock& js, AsyncTxnCallback closure, jsg::Optional options); jsg::JsRef transactionSync( jsg::Lock& js, jsg::Function()> callback); @@ -361,6 +357,14 @@ class DurableObjectStorage: public jsg::Object, public DurableObjectStorageOpera void visitForGc(jsg::GcVisitor& visitor) { visitor.visit(maybePrimary); } + + struct AsyncTxnResult { + jsg::JsRef value; + bool isError; + }; + + static jsg::Promise asyncTransactionImpl( + jsg::Lock& js, IoContext& context, ActorCacheInterface& cache, AsyncTxnCallback callback); }; class DurableObjectTransaction final: public jsg::Object, public DurableObjectStorageOperations { @@ -467,11 +471,13 @@ class DurableObjectFacets: public jsg::Object { void abort(jsg::Lock& js, kj::String name, jsg::JsValue reason); void delete_(jsg::Lock& js, kj::String name); + void clone(jsg::Lock& js, kj::String src, kj::String dst); JSG_RESOURCE_TYPE(DurableObjectFacets) { JSG_METHOD(get); JSG_METHOD(abort); JSG_METHOD_NAMED(delete, delete_); + JSG_METHOD(clone); JSG_TS_OVERRIDE({ get( @@ -650,7 +656,8 @@ class DurableObjectState: public jsg::Object { // // `tags` are string tags which can be used to look up // the WebSocket with getWebSockets(). - void acceptWebSocket(jsg::Ref ws, jsg::Optional> tags); + void acceptWebSocket( + jsg::Lock& js, jsg::Ref ws, jsg::Optional> tags); // Gets an array of accepted WebSockets matching the given tag. // If no tag is provided, an array of all accepted WebSockets is returned. diff --git a/src/workerd/api/actor.c++ b/src/workerd/api/actor.c++ index 5d5784a8353..aa50b6ca0db 100644 --- a/src/workerd/api/actor.c++ +++ b/src/workerd/api/actor.c++ @@ -5,6 +5,7 @@ #include "actor.h" #include +#include #include #include @@ -22,6 +23,23 @@ namespace { // accumulate. constexpr size_t ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL = 32768; +} // namespace + +IoChannelFactory::ActorChannel& LocalActorOutgoingFactory::getOrCreateActorChannel( + IoContext& context, SpanParent parentSpan) { + if (actorChannel == kj::none) { + actorChannel = context.getColoLocalActorChannel(channelId, actorId, kj::mv(parentSpan)); + + // The ActorChannelImpl we just created holds a Cap'n Proto Pipeline::Client representing an + // open connection to the target DO's routing supervisor. Register external memory to pressure + // V8 into collecting this factory's owning stub promptly when it becomes unreachable, + // preventing connection/FD accumulation from stubs that are created and discarded in a loop. + jsg::Lock& js = context.getCurrentLock(); + channelMemoryAdjustment = + js.getExternalMemoryAdjustment(ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL); + } + + return *KJ_REQUIRE_NONNULL(actorChannel); } kj::Own LocalActorOutgoingFactory::newSingleUseClient( @@ -32,19 +50,8 @@ kj::Own LocalActorOutgoingFactory::newSingleUseClient( [&](TraceContext& tracing, IoChannelFactory& ioChannelFactory) { tracing.setTag("objectId"_kjc, actorId.asPtr()); - // Lazily initialize actorChannel - if (actorChannel == kj::none) { - actorChannel = - context.getColoLocalActorChannel(channelId, actorId, tracing.getInternalSpanParent()); - - // As in GlobalActorOutgoingFactory, account for external memory used by the open connection. - jsg::Lock& js = context.getCurrentLock(); - channelMemoryAdjustment = - js.getExternalMemoryAdjustment(ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL); - } - - return KJ_REQUIRE_NONNULL(actorChannel) - ->startRequest({.cfBlobJson = kj::mv(cfStr), + return getOrCreateActorChannel(context, tracing.getInternalSpanParent()) + .startRequest({.cfBlobJson = kj::mv(cfStr), .parentSpan = tracing.getInternalSpanParent(), .userSpanParent = tracing.getUserSpanParent()}); }, @@ -53,6 +60,34 @@ kj::Own LocalActorOutgoingFactory::newSingleUseClient( .operationName = kj::ConstString("durable_object_subrequest"_kjc)})); } +kj::Own LocalActorOutgoingFactory::getSubrequestChannel() { + auto& context = IoContext::current(); + return kj::addRef(getOrCreateActorChannel(context, context.getCurrentTraceSpan())); +} + +IoChannelFactory::ActorChannel& GlobalActorOutgoingFactory::getOrCreateActorChannel( + IoContext& context, SpanParent parentSpan) { + if (actorChannel == kj::none) { + KJ_SWITCH_ONEOF(channelIdOrFactory) { + KJ_CASE_ONEOF(channelId, uint) { + actorChannel = + context.getGlobalActorChannel(channelId, id->getInner(), kj::mv(locationHint), mode, + enableReplicaRouting, routingMode, kj::mv(parentSpan), kj::mv(version)); + } + KJ_CASE_ONEOF(factory, kj::Own) { + actorChannel = factory->getGlobalActor(id->getInner(), kj::mv(locationHint), mode, + enableReplicaRouting, routingMode, kj::mv(parentSpan), kj::mv(version)); + } + } + + jsg::Lock& js = context.getCurrentLock(); + channelMemoryAdjustment = + js.getExternalMemoryAdjustment(ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL); + } + + return *KJ_REQUIRE_NONNULL(actorChannel); +} + kj::Own GlobalActorOutgoingFactory::newSingleUseClient( kj::Maybe cfStr) { auto& context = IoContext::current(); @@ -61,31 +96,8 @@ kj::Own GlobalActorOutgoingFactory::newSingleUseClient( [&](TraceContext& tracing, IoChannelFactory& ioChannelFactory) { tracing.setTag("objectId"_kjc, id->toString()); - // Lazily initialize actorChannel - if (actorChannel == kj::none) { - KJ_SWITCH_ONEOF(channelIdOrFactory) { - KJ_CASE_ONEOF(channelId, uint) { - actorChannel = context.getGlobalActorChannel(channelId, id->getInner(), - kj::mv(locationHint), mode, enableReplicaRouting, routingMode, - tracing.getInternalSpanParent(), kj::mv(version)); - } - KJ_CASE_ONEOF(factory, kj::Own) { - actorChannel = factory->getGlobalActor(id->getInner(), kj::mv(locationHint), mode, - enableReplicaRouting, routingMode, tracing.getInternalSpanParent(), kj::mv(version)); - } - } - - // The ActorChannelImpl we just created holds a Cap'n Proto Pipeline::Client representing an - // open connection to the target DO's routing supervisor. Register external memory to pressure - // V8 into collecting this factory's owning stub promptly when it becomes unreachable, - // preventing connection/FD accumulation from stubs that are created and discarded in a loop. - jsg::Lock& js = context.getCurrentLock(); - channelMemoryAdjustment = - js.getExternalMemoryAdjustment(ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL); - } - - return KJ_REQUIRE_NONNULL(actorChannel) - ->startRequest({.cfBlobJson = kj::mv(cfStr), + return getOrCreateActorChannel(context, tracing.getInternalSpanParent()) + .startRequest({.cfBlobJson = kj::mv(cfStr), .parentSpan = tracing.getInternalSpanParent(), .userSpanParent = tracing.getUserSpanParent()}); }, @@ -94,6 +106,11 @@ kj::Own GlobalActorOutgoingFactory::newSingleUseClient( .operationName = kj::ConstString("durable_object_subrequest"_kjc)})); } +kj::Own GlobalActorOutgoingFactory::getSubrequestChannel() { + auto& context = IoContext::current(); + return kj::addRef(getOrCreateActorChannel(context, context.getCurrentTraceSpan())); +} + kj::Own ReplicaActorOutgoingFactory::newSingleUseClient( kj::Maybe cfStr) { auto& context = IoContext::current(); @@ -113,6 +130,10 @@ kj::Own ReplicaActorOutgoingFactory::newSingleUseClient( .operationName = kj::ConstString("durable_object_subrequest"_kjc)})); } +kj::Own ReplicaActorOutgoingFactory::getSubrequestChannel() { + return kj::addRef(*actorChannel); +} + jsg::Ref ColoLocalActorNamespace::get(jsg::Lock& js, kj::String actorId) { JSG_REQUIRE(actorId.size() > 0 && actorId.size() <= 2048, TypeError, "Actor ID length must be in the range [1, 2048]."); @@ -284,25 +305,35 @@ void DurableObjectClass::serialize(jsg::Lock& js, jsg::Serializer& serializer) { } } return; + } else KJ_IF_SOME(storedHandler, kj::tryDowncast(handler)) { + // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This + // format is temporary, anyone using this will lose their data later. + JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, + "DurableObjectClass cannot be serialized in this context."); + KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::STORAGE)) { + KJ_CASE_ONEOF(token, kj::Array) { + // Token is available synchronously. For backwards compatibility, write it directly into + // the serialized value. + // TODO(cleanup): As soon as all of production is updated to understand externals, stop + // writing inline tokens. + serializer.writeLengthDelimited(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + storedHandler.writeChannel(kj::mv(channel), kj::mv(promise)); + + // Write an empty array to signal that we're using an external rather than an inline + // token. + serializer.writeLengthDelimited(kj::ArrayPtr()); + } + } + return; } + // TODO(someday): structuredClone() should have special handling that just reproduces the same // local object. At present we have no way to recognize structuredClone() here though. } - // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This format - // is temporary, anyone using this will lose their data later. - JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, - "DurableObjectClass cannot be serialized in this context."); - KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::STORAGE)) { - KJ_CASE_ONEOF(token, kj::Array) { - serializer.writeLengthDelimited(token); - } - KJ_CASE_ONEOF(promise, kj::Promise>) { - // TODO(stub-storage): Eventually we'll serialize by pointing to an external table. - KJ_UNIMPLEMENTED( - "tried to store ActorClassChannel whose token is not synchronously available"); - } - } + JSG_FAIL_REQUIRE(DOMDataCloneError, "DurableObjectClass cannot be serialized in this context."); } jsg::Ref DurableObjectClass::deserialize( @@ -344,18 +375,29 @@ jsg::Ref DurableObjectClass::deserialize( KJ_FAIL_REQUIRE("wrong external type for DurableObjectClass", external.which()); } + return js.alloc(ioctx.addObject(kj::mv(channel))); + } else KJ_IF_SOME(storedHandler, + kj::tryDowncast(handler)) { + // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This + // format is temporary, anyone using this will lose their data later. + JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, + "DurableObjectClass cannot be deserialized in this context."); + auto& ioctx = IoContext::current(); + auto token = deserializer.readLengthDelimitedBytes(); + kj::Own channel; + if (token.size() > 0) { + // Token embedded inline, just use it. + channel = ioctx.getIoChannelFactory().actorClassFromToken( + IoChannelFactory::ChannelTokenUsage::STORAGE, token); + } else { + // Token stored out-of-line as an external. + channel = storedHandler.readActorClassChannel(ioctx.getIoChannelFactory()); + } return js.alloc(ioctx.addObject(kj::mv(channel))); } } - // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This format - // is temporary, anyone using this will lose their data later. - JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, - "DOMDataCloneError cannot be deserialized in this context."); - auto& ioctx = IoContext::current(); - auto channel = ioctx.getIoChannelFactory().actorClassFromToken( - IoChannelFactory::ChannelTokenUsage::STORAGE, deserializer.readLengthDelimitedBytes()); - return js.alloc(ioctx.addObject(kj::mv(channel))); + JSG_FAIL_REQUIRE(DOMDataCloneError, "DurableObjectClass cannot be deserialized in this context."); } } // namespace workerd::api diff --git a/src/workerd/api/actor.h b/src/workerd/api/actor.h index a4194513b2e..87161bc7504 100644 --- a/src/workerd/api/actor.h +++ b/src/workerd/api/actor.h @@ -126,6 +126,29 @@ class DurableObject final: public Fetcher { // the interface implemented by users' Durable Object classes. } + // Even though it ought to be inherited, we have to declare this explicitly or the serialization + // JSG template magic gets mad. + void serialize(jsg::Lock& js, jsg::Serializer& serializer) { + return Fetcher::serialize(js, serializer); + } + + // DurableObject stubs serialize as ServiceStubs, aka Fetchers. We just forward to the + // implementation of serialize() from Fetcher, which is our parent class anyway. On the other + // end, it is deserialized as a Fetcher. + // + // Because DO stubs serialize as `Fetcher`, the `id` and `name` properties get dropped when + // serialized. Some arguments why this is the right thing: + // - Honestly, these properties shouldn't be there. They are blocking the ability for a DO to + // implement RPC methods named `id` or `name`. The app can just as easily store the ID + // alongside the stub. Arguably we should remove these properties (with a compat flag). + // - You may not WANT to send them over RPC. You may not want the recipient of the stub to know + // the ID or name of the thing it is talking to. If you do want it to know, you should tell it + // so explicitly. + // - `DurableObjectId` is not serializable, and it would actually be tricky to make it + // serializable due to the fact that you need an `ActorIdFactory` to construct a valid ID for + // a given namespace. This would take some work to solve. + JSG_ONEWAY_SERIALIZABLE(rpc::SerializationTag::SERVICE_STUB); + void visitForMemoryInfo(jsg::MemoryTracker& tracker) const { tracker.trackField("id", id); } @@ -211,9 +234,9 @@ class DurableObjectNamespace: public jsg::Object { JSG_STRUCT(locationHint, routingMode, version); - // DurableObjectLocationHint values from https://developers.cloudflare.com/workers/runtime-apis/durable-objects/#providing-a-location-hint + // DurableObjectLocationHint values from https://developers.cloudflare.com/durable-objects/reference/data-location/#provide-a-location-hint JSG_STRUCT_TS_DEFINE( - type DurableObjectLocationHint = "wnam" | "enam" | "sam" | "weur" | "eeur" | "apac" | "oc" | "afr" | "me"; + type DurableObjectLocationHint = "wnam" | "enam" | "sam" | "weur" | "eeur" | "apac" | "apac-ne" | "apac-se" | "oc" | "afr" | "me"; type DurableObjectRoutingMode = "primary-only"); JSG_STRUCT_TS_OVERRIDE_DYNAMIC(CompatibilityFlags::Reader flags) { @@ -310,8 +333,12 @@ class GlobalActorOutgoingFactory final: public Fetcher::OutgoingFactory { version(kj::mv(version)) {} kj::Own newSingleUseClient(kj::Maybe cfStr) override; + kj::Own getSubrequestChannel() override; private: + IoChannelFactory::ActorChannel& getOrCreateActorChannel( + IoContext& context, SpanParent parentSpan); + ChannelIdOrFactory channelIdOrFactory; jsg::Ref id; kj::Maybe locationHint; @@ -335,8 +362,12 @@ class LocalActorOutgoingFactory final: public Fetcher::OutgoingFactory { actorId(kj::mv(actorId)) {} kj::Own newSingleUseClient(kj::Maybe cfStr) override; + kj::Own getSubrequestChannel() override; private: + IoChannelFactory::ActorChannel& getOrCreateActorChannel( + IoContext& context, SpanParent parentSpan); + uint channelId; kj::String actorId; kj::Maybe> actorChannel; @@ -356,6 +387,7 @@ class ReplicaActorOutgoingFactory final: public Fetcher::OutgoingFactory { actorId(kj::mv(actorId)) {} kj::Own newSingleUseClient(kj::Maybe cfStr) override; + kj::Own getSubrequestChannel() override; private: kj::Own actorChannel; diff --git a/src/workerd/api/blob.c++ b/src/workerd/api/blob.c++ index a1388f556af..a2b614dca1d 100644 --- a/src/workerd/api/blob.c++ +++ b/src/workerd/api/blob.c++ @@ -88,25 +88,22 @@ kj::Maybe concat(jsg::Lock& js, jsg::Optional m size_t toCopy = kj::min(bytes.size(), cachedSize); if (toCopy > 0) { KJ_ASSERT(view.size() >= toCopy); - view.first(toCopy).copyFrom(bytes.asArrayPtr().first(toCopy)); + view.write(bytes.asArrayPtr().first(toCopy)); } - view = view.slice(toCopy); } KJ_CASE_ONEOF(text, kj::String) { auto byteLength = text.asBytes().size(); KJ_ASSERT(byteLength == cachedPartSizes[index++]); if (byteLength == 0) continue; KJ_ASSERT(view.size() >= byteLength); - view.first(byteLength).copyFrom(text.asBytes()); - view = view.slice(byteLength); + view.write(text.asBytes()); } KJ_CASE_ONEOF(blob, jsg::Ref) { auto data = blob->getData(); KJ_ASSERT(data.size() == cachedPartSizes[index++]); if (data.size() == 0) continue; KJ_ASSERT(view.size() >= data.size()); - view.first(data.size()).copyFrom(data); - view = view.slice(data.size()); + view.write(data); } } } diff --git a/src/workerd/api/container.c++ b/src/workerd/api/container.c++ index 6c3cbfe3782..5117f5b6c7d 100644 --- a/src/workerd/api/container.c++ +++ b/src/workerd/api/container.c++ @@ -154,8 +154,8 @@ jsg::Promise> ExecProcess::output(jsg::Lock& js) { stdoutPromise = stream->getController() .readAllBytes(js, IoContext::current().getLimitEnforcer().getBufferingLimit()) - .then(js, [](jsg::Lock&, jsg::BufferSource bytes) { - return kj::heapArray(bytes.asArrayPtr()); + .then(js, [](jsg::Lock& js, jsg::JsRef bytes) { + return bytes.getHandle(js).copy(); }); } @@ -165,8 +165,8 @@ jsg::Promise> ExecProcess::output(jsg::Lock& js) { "Cannot call output() after stderr has started being consumed."); stderrPromise = stream->getController() .readAllBytes(js, kj::maxValue) - .then(js, [](jsg::Lock&, jsg::BufferSource bytes) { - return kj::heapArray(bytes.asArrayPtr()); + .then(js, [](jsg::Lock& js, jsg::JsRef bytes) { + return bytes.getHandle(js).copy(); }); } @@ -211,6 +211,9 @@ void Container::start(jsg::Lock& js, jsg::Optional maybeOptions) StartupOptions options = kj::mv(maybeOptions).orDefault({}); auto req = rpcClient->startRequest(); + KJ_IF_SOME(spanContext, IoContext::current().getCurrentTraceSpan().toSpanContext()) { + spanContext.toCapnp(req.initSpanContext()); + } KJ_IF_SOME(entrypoint, options.entrypoint) { auto list = req.initEntrypoint(entrypoint.size()); for (auto i: kj::indices(entrypoint)) { @@ -481,6 +484,9 @@ jsg::Promise> Container::exec( auto params = req.initParams(); params.setCombinedOutput(combinedOutput); + KJ_IF_SOME(spanContext, ioContext.getCurrentTraceSpan().toSpanContext()) { + spanContext.toCapnp(params.initSpanContext()); + } // Some basic validation... KJ_IF_SOME(cwd, options.cwd) { @@ -599,7 +605,11 @@ jsg::Promise Container::monitor(jsg::Lock& js) { return IoContext::current() .awaitIo(js, rpcClient->monitorRequest(capnp::MessageSize{4, 0}).send()) - .then(js, [this](jsg::Lock& js, capnp::Response results) { + // Note: `self` (jsg::Ref) is captured to prevent GC from collecting this object while + // the promise continuation is pending. Without it, the bare `this` pointer dangles. + .then(js, + [this, self = JSG_THIS]( + jsg::Lock& js, capnp::Response results) { running = false; auto exitCode = results.getExitCode(); KJ_IF_SOME(d, destroyReason) { @@ -613,7 +623,8 @@ jsg::Promise Container::monitor(jsg::Lock& js) { KJ_ASSERT_NONNULL(err.tryCast()).set(js, "exitCode", js.num(exitCode)); js.throwException(err); } - }, [this](jsg::Lock& js, jsg::Value&& error) { + }, + [this, self = JSG_THIS](jsg::Lock& js, jsg::Value&& error) { running = false; destroyReason = kj::none; js.throwException(kj::mv(error)); @@ -824,10 +835,14 @@ class Container::TcpPortOutgoingFactory final: public Fetcher::OutgoingFactory { jsg::Ref Container::getTcpPort(jsg::Lock& js, int port) { JSG_REQUIRE(port > 0 && port < 65536, TypeError, "Invalid port number: ", port); - auto req = rpcClient->getTcpPortRequest(capnp::MessageSize{4, 0}); + auto req = rpcClient->getTcpPortRequest( + capnp::MessageSize{4 + capnp::sizeInWords(), 0}); req.setPort(port); auto& ioctx = IoContext::current(); + KJ_IF_SOME(spanContext, ioctx.getCurrentTraceSpan().toSpanContext()) { + spanContext.toCapnp(req.initSpanContext()); + } kj::Own factory = kj::heap(ioctx.getByteStreamFactory(), ioctx.getEntropySource(), diff --git a/src/workerd/api/container.h b/src/workerd/api/container.h index c3d4a26695e..98346826d39 100644 --- a/src/workerd/api/container.h +++ b/src/workerd/api/container.h @@ -274,8 +274,8 @@ class Container: public jsg::Object { JSG_METHOD(snapshotDirectory); JSG_METHOD(snapshotContainer); JSG_METHOD(interceptOutboundHttps); + JSG_METHOD(exec); if (flags.getWorkerdExperimental()) { - JSG_METHOD(exec); JSG_METHOD(interceptOutboundTcp); JSG_METHOD(inspect); } diff --git a/src/workerd/api/crypto/aes-test.c++ b/src/workerd/api/crypto/aes-test.c++ index cd7ee28e5d8..7ef39ed548b 100644 --- a/src/workerd/api/crypto/aes-test.c++ +++ b/src/workerd/api/crypto/aes-test.c++ @@ -42,21 +42,27 @@ KJ_TEST("AES-KW key wrap") { // AES-KW 256 }); - auto aesKeys = KJ_MAP(rawKey, kj::mv(rawWrappingKeys)) { - SubtleCrypto::ImportKeyAlgorithm algorithm = { - .name = kj::str("AES-KW"), - }; - bool extractable = false; - - return CryptoKey::Impl::importAes(isolateLock, "AES-KW", "raw", kj::mv(rawKey), - kj::mv(algorithm), extractable, {kj::str("wrapKey"), kj::str("unwrapKey")}); - }; - auto keyMaterial = kj::heapArray( {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + auto getKeys = [&](jsg::Lock& js) { + return KJ_MAP(rawKey, kj::mv(rawWrappingKeys)) { + SubtleCrypto::ImportKeyAlgorithm algorithm = { + .name = kj::str("AES-KW"), + }; + bool extractable = false; + + auto u8 = jsg::JsBufferSource(jsg::JsUint8Array::create(isolateLock, rawKey)); + + return CryptoKey::Impl::importAes(isolateLock, "AES-KW", "raw", u8.addRef(isolateLock), + kj::mv(algorithm), extractable, {kj::str("wrapKey"), kj::str("unwrapKey")}); + }; + }; + JSG_WITHIN_CONTEXT_SCOPE(isolateLock, isolateLock.newContext().getHandle(isolateLock), [&](jsg::Lock& js) { + auto aesKeys = getKeys(js); + for (const auto& aesKey: aesKeys) { SubtleCrypto::EncryptAlgorithm params; params.name = kj::str("AES-KW"); @@ -102,7 +108,8 @@ KJ_TEST("AES-CTR key wrap") { SubtleCrypto subtle; static constexpr auto getWrappingKey = [](jsg::Lock& js, SubtleCrypto& subtle) { - return subtle.importKeySync(js, "raw", kj::heapArray(KEY_DATA), + auto keyData = jsg::JsBufferSource(jsg::JsUint8Array::create(js, KEY_DATA)); + return subtle.importKeySync(js, "raw", keyData.addRef(js), SubtleCrypto::ImportKeyAlgorithm{.name = kj::str("AES-CTR")}, false /* extractable */, {kj::str("wrapKey"), kj::str("unwrapKey")}); }; @@ -133,8 +140,9 @@ KJ_TEST("AES-CTR key wrap") { JSG_WITHIN_CONTEXT_SCOPE(isolateLock, isolateLock.newContext().getHandle(isolateLock), [&](jsg::Lock& js) { auto wrappingKey = getWrappingKey(js, subtle); + auto keyData = jsg::JsBufferSource(jsg::JsUint8Array::create(js, KEY_DATA)); subtle - .importKey(js, kj::str("raw"), kj::heapArray(KEY_DATA), getImportKeyAlg(), true, + .importKey(js, kj::str("raw"), keyData.addRef(js), getImportKeyAlg(), true, kj::arr(kj::str("decrypt"))) .then(js, [&](jsg::Lock&, jsg::Ref toWrap) { @@ -142,8 +150,8 @@ KJ_TEST("AES-CTR key wrap") { }) .then(js, [&](jsg::Lock& js, jsg::JsRef wrapped) { - auto data = wrapped.getHandle(js).copy(); - return subtle.unwrapKey(js, kj::str("raw"), kj::mv(data), *wrappingKey, getEnc(js), + auto data = jsg::JsBufferSource(wrapped.getHandle(js)); + return subtle.unwrapKey(js, kj::str("raw"), data, *wrappingKey, getEnc(js), getImportKeyAlg(), true, kj::arr(kj::str("encrypt")), *jwkHandler); }) .then(js, [&](jsg::Lock& js, jsg::Ref unwrapped) { diff --git a/src/workerd/api/crypto/aes.c++ b/src/workerd/api/crypto/aes.c++ index 200ba57f6d6..8a059d9e1a7 100644 --- a/src/workerd/api/crypto/aes.c++ +++ b/src/workerd/api/crypto/aes.c++ @@ -788,8 +788,9 @@ kj::Own CryptoKey::Impl::importAes(jsg::Lock& js, if (format == "raw") { // NOTE: Checked in SubtleCrypto::importKey(). - keyDataArray = kj::mv(keyData.get>()); - switch (keyDataArray.size() * 8) { + auto& source = keyData.get>(); + auto handle = source.getHandle(js); + switch (handle.size() * 8) { case 128: case 192: case 256: @@ -797,8 +798,9 @@ kj::Own CryptoKey::Impl::importAes(jsg::Lock& js, default: JSG_FAIL_REQUIRE(DOMDataError, "Imported AES key length must be 128, 192, or 256 bits but provided ", - keyDataArray.size() * 8, "."); + handle.size() * 8, "."); } + keyDataArray = handle.copy(); } else if (format == "jwk") { auto aesMode = normalizedName.slice(4); diff --git a/src/workerd/api/crypto/crypto.c++ b/src/workerd/api/crypto/crypto.c++ index 6cf3456554f..7495a5c65dd 100644 --- a/src/workerd/api/crypto/crypto.c++ +++ b/src/workerd/api/crypto/crypto.c++ @@ -25,6 +25,15 @@ #include namespace workerd::api { +namespace { +// BoringSSL does not tolerate null pointers even when the length is zero. +// JsBufferSource::asArrayPtr() can return {nullptr, 0} for empty buffers, +// so we ensure a non-null pointer before passing to OpenSSL. +kj::ArrayPtr nonNullBytes(kj::ArrayPtr ptr) { + static const kj::byte DUMMY = 0; + return ptr == nullptr ? kj::arrayPtr(&DUMMY, 0) : ptr; +} +} // namespace kj::StringPtr CryptoKeyUsageSet::name() const { if (*this == encrypt()) return "encrypt"; @@ -328,63 +337,65 @@ void CryptoKey::visitForGc(jsg::GcVisitor& visitor) { jsg::Promise> SubtleCrypto::encrypt(jsg::Lock& js, kj::OneOf algorithmParam, const CryptoKey& key, - kj::Array plainText) { + jsg::JsBufferSource plainText) { auto algorithm = interpretAlgorithmParam(kj::mv(algorithmParam)); auto checkErrorsOnFinish = webCryptoOperationBegin(__func__, algorithm); return js.evalNow([&] { validateOperation(key, algorithm.name, CryptoKeyUsageSet::encrypt()); - return key.impl->encrypt(js, kj::mv(algorithm), plainText).addRef(js); + return key.impl->encrypt(js, kj::mv(algorithm), nonNullBytes(plainText.asArrayPtr())) + .addRef(js); }); } jsg::Promise> SubtleCrypto::decrypt(jsg::Lock& js, kj::OneOf algorithmParam, const CryptoKey& key, - kj::Array cipherText) { + jsg::JsBufferSource cipherText) { auto algorithm = interpretAlgorithmParam(kj::mv(algorithmParam)); auto checkErrorsOnFinish = webCryptoOperationBegin(__func__, algorithm); return js.evalNow([&] { validateOperation(key, algorithm.name, CryptoKeyUsageSet::decrypt()); - return key.impl->decrypt(js, kj::mv(algorithm), cipherText).addRef(js); + return key.impl->decrypt(js, kj::mv(algorithm), nonNullBytes(cipherText.asArrayPtr())) + .addRef(js); }); } jsg::Promise> SubtleCrypto::sign(jsg::Lock& js, kj::OneOf algorithmParam, const CryptoKey& key, - kj::Array data) { + jsg::JsBufferSource data) { auto algorithm = interpretAlgorithmParam(kj::mv(algorithmParam)); auto checkErrorsOnFinish = webCryptoOperationBegin(__func__, algorithm); return js.evalNow([&] { validateOperation(key, algorithm.name, CryptoKeyUsageSet::sign()); - return key.impl->sign(js, kj::mv(algorithm), data).addRef(js); + return key.impl->sign(js, kj::mv(algorithm), nonNullBytes(data.asArrayPtr())).addRef(js); }); } jsg::Promise SubtleCrypto::verify(jsg::Lock& js, kj::OneOf algorithmParam, const CryptoKey& key, - kj::Array signature, - kj::Array data) { + jsg::JsBufferSource signature, + jsg::JsBufferSource data) { auto algorithm = interpretAlgorithmParam(kj::mv(algorithmParam)); auto checkErrorsOnFinish = webCryptoOperationBegin(__func__, algorithm); return js.evalNow([&] { validateOperation(key, algorithm.name, CryptoKeyUsageSet::verify()); - return key.impl->verify(js, kj::mv(algorithm), signature, data); + return key.impl->verify(js, kj::mv(algorithm), nonNullBytes(signature.asArrayPtr()), + nonNullBytes(data.asArrayPtr())); }); } -jsg::Promise> SubtleCrypto::digest(jsg::Lock& js, - kj::OneOf algorithmParam, - kj::Array data) { +jsg::Promise> SubtleCrypto::digest( + jsg::Lock& js, kj::OneOf algorithmParam, jsg::JsBufferSource data) { auto algorithm = interpretAlgorithmParam(kj::mv(algorithmParam)); auto checkErrorsOnFinish = webCryptoOperationBegin(__func__, algorithm); @@ -395,8 +406,9 @@ jsg::Promise> SubtleCrypto::digest(jsg::Lock& js, auto digestCtx = kj::disposeWith(EVP_MD_CTX_new()); KJ_ASSERT(digestCtx.get() != nullptr); + auto ptr = nonNullBytes(data.asArrayPtr()); OSSLCALL(EVP_DigestInit_ex(digestCtx.get(), type, nullptr)); - OSSLCALL(EVP_DigestUpdate(digestCtx.get(), data.begin(), data.size())); + OSSLCALL(EVP_DigestUpdate(digestCtx.get(), ptr.begin(), ptr.size())); auto buf = jsg::JsArrayBuffer::create(js, EVP_MD_CTX_size(digestCtx.get())); uint messageDigestSize = 0; @@ -456,13 +468,13 @@ jsg::Promise> SubtleCrypto::deriveKey(jsg::Lock& js, auto length = getKeyLength(derivedKeyAlgorithm); - auto secret = baseKey.impl->deriveBits(js, kj::mv(algorithm), length); + auto secret = jsg::JsBufferSource(baseKey.impl->deriveBits(js, kj::mv(algorithm), length)); // TODO(perf): For conformance, importKey() makes a copy of `secret`. In this case we really // don't need to, but rather we ought to call the appropriate CryptoKey::Impl::import*() // function directly. return importKeySync( - js, "raw", secret.copy(), kj::mv(derivedKeyAlgorithm), extractable, kj::mv(keyUsages)); + js, "raw", secret.addRef(js), kj::mv(derivedKeyAlgorithm), extractable, kj::mv(keyUsages)); }); } @@ -531,7 +543,7 @@ jsg::Promise> SubtleCrypto::wrapKey(jsg::Lock& js jsg::Promise> SubtleCrypto::unwrapKey(jsg::Lock& js, kj::String format, - kj::Array wrappedKey, + jsg::JsBufferSource wrappedKey, const CryptoKey& unwrappingKey, kj::OneOf unwrapAlgorithm, kj::OneOf unwrappedKeyAlgorithm, @@ -550,7 +562,8 @@ jsg::Promise> SubtleCrypto::unwrapKey(jsg::Lock& js, validateOperation(unwrappingKey, normalizedAlgorithm.name, CryptoKeyUsageSet::unwrapKey()); - auto bytes = unwrappingKey.impl->unwrapKey(js, kj::mv(normalizedAlgorithm), wrappedKey); + auto bytes = unwrappingKey.impl->unwrapKey( + js, kj::mv(normalizedAlgorithm), nonNullBytes(wrappedKey.asArrayPtr())); ImportKeyData importData; @@ -560,7 +573,7 @@ jsg::Promise> SubtleCrypto::unwrapKey(jsg::Lock& js, importData = JSG_REQUIRE_NONNULL(jwkHandler.tryUnwrap(js, jwkDict.getHandle(js)), DOMDataError, "Missing \"kty\" field or corrupt JSON unwrapping key?"); } else { - importData = bytes.copy(); + importData = jsg::JsBufferSource(bytes).addRef(js); } auto imported = importKeySync(js, format, kj::mv(importData), kj::mv(normalizedUnwrapAlgorithm), @@ -597,12 +610,14 @@ jsg::Ref SubtleCrypto::importKeySync(jsg::Lock& js, bool extractable, kj::ArrayPtr keyUsages) { if (format == "raw" || format == "pkcs8" || format == "spki") { - auto& key = JSG_REQUIRE_NONNULL(keyData.tryGet>(), TypeError, + auto& key = JSG_REQUIRE_NONNULL(keyData.tryGet>(), TypeError, "Import data provided for \"raw\", \"pkcs8\", or \"spki\" import formats must be a buffer " "source."); + auto keyHandle = key.getHandle(js); // Make a copy of the key import data. - keyData = kj::heapArray(key.asPtr()); + auto copy = jsg::JsUint8Array::create(js, keyHandle.asArrayPtr()); + keyData = jsg::JsBufferSource(copy).addRef(js); } else if (format == "jwk") { JSG_REQUIRE(keyData.is(), TypeError, "Import data provided for \"jwk\" import format must be a JsonWebKey."); @@ -800,7 +815,7 @@ void DigestStream::dispose(jsg::Lock& js) { KJ_IF_SOME(ready, state.tryGet()) { auto reason = js.typeError("The DigestStream was disposed."); ready.resolver.reject(js, reason); - state.init(js.v8Ref(reason)); + state.init(reason.addRef(js)); } } JSG_CATCH(exception) { @@ -859,7 +874,7 @@ void DigestStream::abort(jsg::Lock& js, jsg::JsValue reason) { // If the state is already closed or errored, then this is a non-op KJ_IF_SOME(ready, state.tryGet()) { ready.resolver.reject(js, reason); - state.init(js.v8Ref(reason)); + state.init(reason.addRef(js)); } } diff --git a/src/workerd/api/crypto/crypto.h b/src/workerd/api/crypto/crypto.h index ad2fff86e83..da0073b7ee8 100644 --- a/src/workerd/api/crypto/crypto.h +++ b/src/workerd/api/crypto/crypto.h @@ -519,31 +519,30 @@ class SubtleCrypto: public jsg::Object { JSG_STRUCT_TS_OVERRIDE(JsonWebKey); // Rename from SubtleCryptoJsonWebKey }; - using ImportKeyData = kj::OneOf, JsonWebKey>; + using ImportKeyData = kj::OneOf, JsonWebKey>; using ExportKeyData = kj::OneOf, JsonWebKey>; jsg::Promise> encrypt(jsg::Lock& js, kj::OneOf algorithm, const CryptoKey& key, - kj::Array plainText); + jsg::JsBufferSource plainText); jsg::Promise> decrypt(jsg::Lock& js, kj::OneOf algorithm, const CryptoKey& key, - kj::Array cipherText); + jsg::JsBufferSource cipherText); jsg::Promise> sign(jsg::Lock& js, kj::OneOf algorithm, const CryptoKey& key, - kj::Array data); + jsg::JsBufferSource data); jsg::Promise verify(jsg::Lock& js, kj::OneOf algorithm, const CryptoKey& key, - kj::Array signature, - kj::Array data); + jsg::JsBufferSource signature, + jsg::JsBufferSource data); - jsg::Promise> digest(jsg::Lock& js, - kj::OneOf algorithm, - kj::Array data); + jsg::Promise> digest( + jsg::Lock& js, kj::OneOf algorithm, jsg::JsBufferSource data); jsg::Promise, CryptoKeyPair>> generateKey(jsg::Lock& js, kj::OneOf algorithm, @@ -591,7 +590,7 @@ class SubtleCrypto: public jsg::Object { const jsg::TypeHandler& jwkHandler); jsg::Promise> unwrapKey(jsg::Lock& js, kj::String format, - kj::Array wrappedKey, + jsg::JsBufferSource wrappedKey, const CryptoKey& unwrappingKey, kj::OneOf unwrapAlgorithm, kj::OneOf unwrappedKeyAlgorithm, diff --git a/src/workerd/api/crypto/dh.c++ b/src/workerd/api/crypto/dh.c++ index 2aca01b6a3c..02530718dc3 100644 --- a/src/workerd/api/crypto/dh.c++ +++ b/src/workerd/api/crypto/dh.c++ @@ -68,8 +68,8 @@ kj::Own initDhGroup(kj::StringPtr name) { return kj::mv(dh); } -kj::Own initDh(kj::OneOf, int>& sizeOrKey, - kj::OneOf, int>& generator) { +kj::Own initDh(kj::OneOf, int>& sizeOrKey, + kj::OneOf, int>& generator) { KJ_SWITCH_ONEOF(sizeOrKey) { KJ_CASE_ONEOF(size, int) { KJ_SWITCH_ONEOF(generator) { @@ -121,14 +121,14 @@ kj::Own initDh(kj::OneOf, int>& sizeOrKey, "DiffieHellman init failed: Invalid DH prime generated"); return kj::mv(dh); } - KJ_CASE_ONEOF(gen, kj::Array) { + KJ_CASE_ONEOF(gen, kj::ArrayPtr) { // Node.js does not support generating Diffie-Hellman keys from an int prime // and byte-array generator. This could change in the future. JSG_FAIL_REQUIRE(Error, "DiffieHellman init failed: invalid parameters"); } } } - KJ_CASE_ONEOF(key, kj::Array) { + KJ_CASE_ONEOF(key, kj::ArrayPtr) { // Operations on an "egregiously large" prime will throw with BoringSSL. JSG_REQUIRE(key.size() <= OPENSSL_DH_MAX_MODULUS_BITS / CHAR_BIT, RangeError, "DiffieHellman init failed: key is too large"); @@ -149,7 +149,7 @@ kj::Own initDh(kj::OneOf, int>& sizeOrKey, JSG_FAIL_REQUIRE(Error, "DiffieHellman init failed: could not set keys"); } } - KJ_CASE_ONEOF(gen, kj::Array) { + KJ_CASE_ONEOF(gen, kj::ArrayPtr) { JSG_REQUIRE(gen.size() <= OPENSSL_DH_MAX_MODULUS_BITS / CHAR_BIT, RangeError, "DiffieHellman init failed: generator is too large"); JSG_REQUIRE(gen.size() > 0, Error, "DiffieHellman init failed: invalid generator"); @@ -193,8 +193,8 @@ void zeroPadDiffieHellmanSecret(size_t remainder_size, unsigned char* data, size DiffieHellman::DiffieHellman(kj::StringPtr group): dh(initDhGroup(group)) {} -DiffieHellman::DiffieHellman( - kj::OneOf, int>& sizeOrKey, kj::OneOf, int>& generator) +DiffieHellman::DiffieHellman(kj::OneOf, int>& sizeOrKey, + kj::OneOf, int>& generator) : dh(initDh(sizeOrKey, generator)) {} kj::Maybe DiffieHellman::check() { diff --git a/src/workerd/api/crypto/dh.h b/src/workerd/api/crypto/dh.h index 07a91d5985f..2104f610baf 100644 --- a/src/workerd/api/crypto/dh.h +++ b/src/workerd/api/crypto/dh.h @@ -11,8 +11,8 @@ namespace workerd::api { class DiffieHellman final { public: DiffieHellman(kj::StringPtr group); - DiffieHellman(kj::OneOf, int>& sizeOrKey, - kj::OneOf, int>& generator); + DiffieHellman(kj::OneOf, int>& sizeOrKey, + kj::OneOf, int>& generator); DiffieHellman(DiffieHellman&&) = default; DiffieHellman& operator=(DiffieHellman&&) = default; KJ_DISALLOW_COPY(DiffieHellman); diff --git a/src/workerd/api/crypto/digest.c++ b/src/workerd/api/crypto/digest.c++ index 79e85e54907..6c2c888270a 100644 --- a/src/workerd/api/crypto/digest.c++ +++ b/src/workerd/api/crypto/digest.c++ @@ -121,7 +121,7 @@ class HmacKey final: public CryptoKey::Impl { CryptoKey::HmacKeyAlgorithm keyAlgorithm; }; -void zeroOutTrailingKeyBits(kj::Array& keyDataArray, int keyBitLength) { +void zeroOutTrailingKeyBits(kj::ArrayPtr keyDataArray, int keyBitLength) { // We zero out the least-significant bits of the last byte, matching Chrome's // big-endian behavior when generating keys. int arrayBitLength = keyDataArray.size() * 8; @@ -268,7 +268,9 @@ kj::Own CryptoKey::Impl::importHmac(jsg::Lock& js, if (format == "raw") { // NOTE: Checked in SubtleCrypto::importKey(). - keyDataArray = kj::mv(keyData.get>()); + auto& source = keyData.get>(); + auto handle = source.getHandle(js); + keyDataArray = handle.copy(); } else if (format == "jwk") { auto& keyDataJwk = keyData.get(); JSG_REQUIRE(keyDataJwk.kty == "oct", DOMDataError, diff --git a/src/workerd/api/crypto/ec.c++ b/src/workerd/api/crypto/ec.c++ index 9b5117c4c8e..3c39d8a530a 100644 --- a/src/workerd/api/crypto/ec.c++ +++ b/src/workerd/api/crypto/ec.c++ @@ -501,16 +501,18 @@ kj::OneOf, CryptoKeyPair> EllipticKey::generateElliptic(jsg: return CryptoKeyPair{.publicKey = kj::mv(publicKey), .privateKey = kj::mv(privateKey)}; } -AsymmetricKeyData importEllipticRaw(SubtleCrypto::ImportKeyData keyData, +AsymmetricKeyData importEllipticRaw(jsg::Lock& js, + SubtleCrypto::ImportKeyData keyData, int curveId, kj::StringPtr normalizedName, kj::ArrayPtr keyUsages, CryptoKeyUsageSet allowedUsages) { // Import an elliptic key represented by raw data, only public keys are supported. - JSG_REQUIRE(keyData.is>(), DOMDataError, - "Expected raw EC key but instead got a Json Web Key."); - const auto& raw = keyData.get>(); + auto& source = JSG_REQUIRE_NONNULL(keyData.tryGet>(), + DOMDataError, "Expected raw EC key but instead got a JSON Web Key."); + auto handle = source.getHandle(js); + auto raw = handle.asArrayPtr(); auto usages = CryptoKeyUsageSet::validate( normalizedName, CryptoKeyUsageSet::Context::importPublic, keyUsages, allowedUsages); @@ -714,7 +716,7 @@ kj::Own CryptoKey::Impl::importEcdsa(jsg::Lock& js, CryptoKeyUsageSet::sign() | CryptoKeyUsageSet::verify()); } else { return importEllipticRaw( - kj::mv(keyData), curveId, normalizedName, keyUsages, CryptoKeyUsageSet::verify()); + js, kj::mv(keyData), curveId, normalizedName, keyUsages, CryptoKeyUsageSet::verify()); } }(); @@ -775,7 +777,7 @@ kj::Own CryptoKey::Impl::importEcdh(jsg::Lock& js, CryptoKeyUsageSet::derivationKeyMask()); } else { // The usage set is required to be empty for public ECDH keys, including raw keys. - return importEllipticRaw(kj::mv(keyData), curveId, normalizedName, keyUsages, usageSet); + return importEllipticRaw(js, kj::mv(keyData), curveId, normalizedName, keyUsages, usageSet); } }(); @@ -1179,7 +1181,7 @@ kj::Own CryptoKey::Impl::importEddsa(jsg::Lock& js, normalizedName == "X25519" ? CryptoKeyUsageSet::derivationKeyMask() : CryptoKeyUsageSet::sign() | CryptoKeyUsageSet::verify()); } else { - return importEllipticRaw(kj::mv(keyData), nid, normalizedName, keyUsages, + return importEllipticRaw(js, kj::mv(keyData), nid, normalizedName, keyUsages, normalizedName == "X25519" ? CryptoKeyUsageSet() : CryptoKeyUsageSet::verify()); } }(); diff --git a/src/workerd/api/crypto/hkdf.c++ b/src/workerd/api/crypto/hkdf.c++ index 1e169c070a5..948f3abff58 100644 --- a/src/workerd/api/crypto/hkdf.c++ +++ b/src/workerd/api/crypto/hkdf.c++ @@ -121,10 +121,11 @@ kj::Own CryptoKey::Impl::importHkdf(jsg::Lock& js, format, "\")"); // NOTE: Checked in SubtleCrypto::importKey(). - auto keyDataArray = kj::mv(keyData.get>()); + auto& source = keyData.get>(); + auto handle = source.getHandle(js); auto keyAlgorithm = CryptoKey::KeyAlgorithm{normalizedName}; - return kj::heap(kj::mv(keyDataArray), kj::mv(keyAlgorithm), extractable, usages); + return kj::heap(handle.copy(), kj::mv(keyAlgorithm), extractable, usages); } } // namespace workerd::api diff --git a/src/workerd/api/crypto/impl.c++ b/src/workerd/api/crypto/impl.c++ index a33015c523c..730e8805540 100644 --- a/src/workerd/api/crypto/impl.c++ +++ b/src/workerd/api/crypto/impl.c++ @@ -254,6 +254,13 @@ void checkPbkdfLimits(jsg::Lock& js, size_t iterations) { } } +void checkScryptLimits(jsg::Lock& js, uint32_t N, uint32_t r, uint32_t p) { + auto& limits = Worker::Isolate::from(js).getLimitEnforcer(); + KJ_IF_SOME(max, limits.checkScryptCost(js, N, r, p)) { + JSG_FAIL_REQUIRE(RangeError, kj::str("Scrypt failed: cost exceeds maximum (", max, ").")); + } +} + kj::Maybe> toBignum(kj::ArrayPtr data) { BIGNUM* result = BN_bin2bn(data.begin(), data.size(), nullptr); if (result == nullptr) return kj::none; diff --git a/src/workerd/api/crypto/impl.h b/src/workerd/api/crypto/impl.h index b9477be2b20..1a25fa8f6b9 100644 --- a/src/workerd/api/crypto/impl.h +++ b/src/workerd/api/crypto/impl.h @@ -424,7 +424,11 @@ class ZeroOnFree { // Check that the requested number of iterations for a key-derivation function // is acceptable. If the requested iterations is not acceptable, a JS error will // be thrown. Otherwise the method will return normally. +// Largest standard DH group (modp18) is 8192 bits. +constexpr uint32_t kMaxPrimeBits = 8192; + void checkPbkdfLimits(jsg::Lock& js, size_t iterations); +void checkScryptLimits(jsg::Lock& js, uint32_t N, uint32_t r, uint32_t p); // Either succeeds with exactly |length| bytes of cryptographically // strong pseudo-random data, or fails. This function may block. diff --git a/src/workerd/api/crypto/keys.c++ b/src/workerd/api/crypto/keys.c++ index 03a20891e6d..2c6d7d40357 100644 --- a/src/workerd/api/crypto/keys.c++ +++ b/src/workerd/api/crypto/keys.c++ @@ -453,9 +453,10 @@ AsymmetricKeyData importAsymmetricForWebCrypto(jsg::Lock& js, return {readJwk(kj::mv(keyDataJwk)), keyType, usages}; } else if (format == "spki") { - kj::ArrayPtr keyBytes = - JSG_REQUIRE_NONNULL(keyData.tryGet>(), DOMDataError, - "SPKI import requires an ArrayBuffer."); + auto& source = JSG_REQUIRE_NONNULL(keyData.tryGet>(), + DOMDataError, "SPKI import requires an ArrayBuffer."); + auto handle = source.getHandle(js); + kj::ArrayPtr keyBytes = handle.asArrayPtr(); const kj::byte* ptr = keyBytes.begin(); auto evpPkey = OSSLCALL_OWN( EVP_PKEY, d2i_PUBKEY(nullptr, &ptr, keyBytes.size()), DOMDataError, "Invalid SPKI input."); @@ -472,9 +473,10 @@ AsymmetricKeyData importAsymmetricForWebCrypto(jsg::Lock& js, (normalizedName == "ECDH" ? CryptoKeyUsageSet() : CryptoKeyUsageSet::publicKeyMask())); return {kj::mv(evpPkey), KeyType::PUBLIC, usages}; } else if (format == "pkcs8") { - kj::ArrayPtr keyBytes = - JSG_REQUIRE_NONNULL(keyData.tryGet>(), DOMDataError, - "PKCS8 import requires an ArrayBuffer."); + auto& source = JSG_REQUIRE_NONNULL(keyData.tryGet>(), + DOMDataError, "PKCS8 import requires an ArrayBuffer."); + auto handle = source.getHandle(js); + kj::ArrayPtr keyBytes = handle.asArrayPtr(); const kj::byte* ptr = keyBytes.begin(); auto evpPkey = OSSLCALL_OWN(EVP_PKEY, d2i_AutoPrivateKey(nullptr, &ptr, keyBytes.size()), DOMDataError, "Invalid PKCS8 input."); diff --git a/src/workerd/api/crypto/pbkdf2.c++ b/src/workerd/api/crypto/pbkdf2.c++ index 845696e552f..d7f82da7ced 100644 --- a/src/workerd/api/crypto/pbkdf2.c++ +++ b/src/workerd/api/crypto/pbkdf2.c++ @@ -64,9 +64,12 @@ class Pbkdf2Key final: public CryptoKey::Impl { // check for v8::Isolate::IsExecutionTerminating() in the loop, but for now a hard cap seems // wisest. checkPbkdfLimits(js, iterations); + auto derivedLengthBytes = length / 8; + JSG_REQUIRE(ncrypto::checkHkdfLength(hashType, derivedLengthBytes), DOMOperationError, + "Pbkdf2 failed: derived key length exceeds maximum for this hash"); - return JSG_REQUIRE_NONNULL(pbkdf2(js, length / 8, iterations, hashType, keyData, salt), Error, - "PBKDF2 deriveBits failed."); + return JSG_REQUIRE_NONNULL(pbkdf2(js, derivedLengthBytes, iterations, hashType, keyData, salt), + Error, "PBKDF2 deriveBits failed."); } // TODO(bug): Possibly by mistake, PBKDF2 was historically not on the allow list of @@ -133,10 +136,11 @@ kj::Own CryptoKey::Impl::importPbkdf2(jsg::Lock& js, "PBKDF2 key must be imported in \"raw\" format (requested \"", format, "\")."); // NOTE: Checked in SubtleCrypto::importKey(). - auto keyDataArray = kj::mv(keyData.get>()); + auto& source = keyData.get>(); + auto handle = source.getHandle(js); auto keyAlgorithm = CryptoKey::KeyAlgorithm{normalizedName}; - return kj::heap(kj::mv(keyDataArray), kj::mv(keyAlgorithm), extractable, usages); + return kj::heap(handle.copy(), kj::mv(keyAlgorithm), extractable, usages); } } // namespace workerd::api diff --git a/src/workerd/api/crypto/prime-test.c++ b/src/workerd/api/crypto/prime-test.c++ new file mode 100644 index 00000000000..6a73567e871 --- /dev/null +++ b/src/workerd/api/crypto/prime-test.c++ @@ -0,0 +1,31 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "prime.h" + +#include + +namespace workerd::api { +namespace { + +KJ_TEST("checkPrime rejects excessive num_checks") { + uint8_t buf[] = {0x07}; + KJ_EXPECT_THROW_MESSAGE("Invalid number of checks", checkPrime(kj::arrayPtr(buf, 1u), 65)); +} + +KJ_TEST("checkPrime rejects oversized candidate") { + auto bigBuf = kj::heapArray(2000); + memset(bigBuf.begin(), 0xFF, bigBuf.size()); + KJ_EXPECT_THROW_MESSAGE("exceeds maximum size", checkPrime(bigBuf.asPtr(), 1)); +} + +KJ_TEST("checkPrime accepts valid inputs") { + uint8_t buf7[] = {0x07}; + uint8_t buf9[] = {0x09}; + KJ_EXPECT(checkPrime(kj::arrayPtr(buf7, 1u), 10) == true); + KJ_EXPECT(checkPrime(kj::arrayPtr(buf9, 1u), 10) == false); +} + +} // namespace +} // namespace workerd::api diff --git a/src/workerd/api/crypto/prime.c++ b/src/workerd/api/crypto/prime.c++ index 37c31a0057f..212af65df87 100644 --- a/src/workerd/api/crypto/prime.c++ +++ b/src/workerd/api/crypto/prime.c++ @@ -2,11 +2,24 @@ #include "impl.h" +#include #include #include +#include + namespace workerd::api { +namespace { + +bool checkLimitEnforcer(int, int) { + KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { + return ioContext.getLimitEnforcer().getLimitsExceeded() == kj::none; + } + return true; +} + +} // namespace jsg::JsArrayBuffer randomPrime(jsg::Lock& js, uint32_t size, @@ -15,6 +28,9 @@ jsg::JsArrayBuffer randomPrime(jsg::Lock& js, kj::Maybe> rem_buf) { ncrypto::ClearErrorOnReturn clearErrorOnReturn; + JSG_REQUIRE(size <= kMaxPrimeBits, RangeError, "generatePrime size exceeds maximum (", + kMaxPrimeBits, " bits)"); + // Use mapping to have kj::Own work with optional buffer static const auto toBignum = [](kj::Maybe>& maybeBignum) -> ncrypto::BignumPointer { @@ -30,6 +46,7 @@ jsg::JsArrayBuffer randomPrime(jsg::Lock& js, auto add = toBignum(add_buf); auto rem = toBignum(rem_buf); + // The JS interface already ensures that the (positive) size fits into an int. int bits = static_cast(size); @@ -72,12 +89,14 @@ jsg::JsArrayBuffer randomPrime(jsg::Lock& js, JSG_REQUIRE( workerd::api::CSPRNG(nullptr), Error, "Error while generating prime (bad random state)"); - if (auto prime = ncrypto::BignumPointer::NewPrime({ - .bits = bits, - .safe = safe, - .add = kj::mv(add), - .rem = kj::mv(rem), - })) { + if (auto prime = ncrypto::BignumPointer::NewPrime( + { + .bits = bits, + .safe = safe, + .add = kj::mv(add), + .rem = kj::mv(rem), + }, + checkLimitEnforcer)) { auto buf = JSG_REQUIRE_NONNULL( bignumToArrayPadded(js, *prime.get()), Error, "Error while generating prime"); return jsg::JsArrayBuffer::create(js, buf.asArrayPtr()); @@ -88,14 +107,15 @@ jsg::JsArrayBuffer randomPrime(jsg::Lock& js, bool checkPrime(kj::ArrayPtr bufferView, uint32_t num_checks) { ncrypto::ClearErrorOnReturn clearErrorOnReturn; - static constexpr int32_t kMaxChecks = kj::maxValue; - // Strictly upper bound the number of checks. If this proves to be too expensive - // then we may need to consider lowering this limit further. + // Maximum BoringSSL recommends for any use case. + static constexpr uint32_t kMaxChecks = 64; JSG_REQUIRE(num_checks <= kMaxChecks, RangeError, "Invalid number of checks"); + JSG_REQUIRE(bufferView.size() <= kMaxPrimeBits / CHAR_BIT, RangeError, + "checkPrime candidate exceeds maximum size"); auto candidate = ncrypto::BignumPointer(bufferView.begin(), bufferView.size()); JSG_REQUIRE(candidate, Error, "Error while checking prime"); - return candidate.isPrime(num_checks); + return candidate.isPrime(num_checks, checkLimitEnforcer); } } // namespace workerd::api diff --git a/src/workerd/api/crypto/x509.c++ b/src/workerd/api/crypto/x509.c++ index 050bdc788bf..5db2dc9b0d0 100644 --- a/src/workerd/api/crypto/x509.c++ +++ b/src/workerd/api/crypto/x509.c++ @@ -44,8 +44,9 @@ kj::String toString(BIO* bio) { BIO_get_mem_ptr(bio, &mem); auto result = kj::heapArray(mem->length + 1); kj::ArrayPtr data(mem->data, mem->length); - result.first(data.size()).copyFrom(data); - result[result.size() - 1] = '\0'; // NUL-terminate. + auto remaining = result.asPtr(); + remaining.write(data); + remaining[0] = '\0'; // NUL-terminate. return kj::String(kj::mv(result)); } diff --git a/src/workerd/api/fetch-body-rewindable-test.c++ b/src/workerd/api/fetch-body-rewindable-test.c++ new file mode 100644 index 00000000000..287ed46b7fd --- /dev/null +++ b/src/workerd/api/fetch-body-rewindable-test.c++ @@ -0,0 +1,125 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "global-scope.h" + +#include +#include +#include +#include + +#include + +namespace workerd::api { +namespace { + +// Records, in call order, every value passed to setNextSubrequestBodyRewindable(). +class RecordingRequestObserver final: public RequestObserver { + public: + RecordingRequestObserver(kj::Vector& calls): calls(calls) {} + + void setNextSubrequestBodyRewindable(SubrequestBodyRewindable bodyRewindable) override { + calls.add(bodyRewindable.toBool()); + } + + private: + kj::Vector& calls; +}; + +// Minimal WorkerInterface that answers every outgoing request() with an empty 200, draining the +// request body first so a streaming sender doesn't block on backpressure. +class MockFetchTarget final: public WorkerInterface { + public: + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + co_await requestBody.readAllBytes(); + // Build the response headers on the same HttpHeaderTable as the request headers; the runtime + // reads the response with its own registered header IDs, so a fresh table would mismatch. + auto responseHeaders = headers.cloneShallow(); + responseHeaders.clear(); + response.send(200, "OK"_kj, responseHeaders, static_cast(0)); + } + + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + ConnectResponse& response, + kj::HttpConnectSettings settings) override { + KJ_UNIMPLEMENTED("not used in this test"); + } + kj::Promise prewarm(kj::StringPtr url) override { + KJ_UNIMPLEMENTED("not used in this test"); + } + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + KJ_UNIMPLEMENTED("not used in this test"); + } + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + KJ_UNIMPLEMENTED("not used in this test"); + } + kj::Promise customEvent(kj::Own event) override { + return event->notSupported(); + } +}; + +struct FetchTargetIoChannelFactory final: public TestFixture::DummyIoChannelFactory { + FetchTargetIoChannelFactory(TimerChannel& timer): DummyIoChannelFactory(timer) {} + + kj::Own startSubrequest(uint channel, SubrequestMetadata metadata) override { + return kj::heap(); + } +}; + +// fetchImplNoOutputLock forwards Request::canRewindBody() to RequestObserver so that, downstream, +// edgeworker can classify retry eligibility for disconnected outgoing actor calls. The subtle +// property here is that the stashed signal is per-call, not sticky: a single RequestObserver is +// shared across every outgoing subrequest in an IoContext, so the value set for one call must not +// carry over into the next. We issue two fetches in one invocation -- a rewindable (buffered) body +// then a non-rewindable (stream) body -- to exercise that shared observer across consecutive calls +// and verify the per-body mapping, the per-call sequencing, and the absence of stale attribution all +// at once (the no-staleness behaviour can only be observed across more than one fetch). +KJ_TEST("fetch reports each outgoing body's rewindability per-call without staleness") { + kj::Vector bodyRewindableCalls; + + TestFixture fixture(TestFixture::SetupParams{ + .mainModuleSource = R"SCRIPT( + export default { + async fetch(request) { + // Buffered (string) body: rewindable. + await fetch("http://example.com/buffered", { method: "POST", body: "hello" }); + + // The incoming request body is a (non-buffer-backed) stream, so forwarding it yields a + // non-rewindable body. + await fetch("http://example.com/stream", + { method: "POST", body: request.body, duplex: "half" }); + + return new Response("OK"); + }, + }; + )SCRIPT"_kj, + .ioChannelFactory = kj::Function(TimerChannel&)>( + [&](TimerChannel& timer) -> kj::Own { + return kj::heap(timer); + }), + .requestObserverFactory = kj::Function()>( + [&]() -> kj::Own { + return kj::refcounted(bodyRewindableCalls); + }), + }); + + auto result = + fixture.runRequest(kj::HttpMethod::POST, "http://www.example.com"_kj, "incoming-body"_kj); + KJ_EXPECT(result.statusCode == 200); + + KJ_ASSERT(bodyRewindableCalls.size() == 2, + "expected exactly one rewindability signal per outgoing fetch"); + KJ_EXPECT(bodyRewindableCalls[0] == true, "buffered request body should be rewindable"); + KJ_EXPECT( + bodyRewindableCalls[1] == false, "streamed request body should not be rewindable (no carryover)"); +} + +} // namespace +} // namespace workerd::api diff --git a/src/workerd/api/filesystem.c++ b/src/workerd/api/filesystem.c++ index 42006199e2f..b35358fc6f0 100644 --- a/src/workerd/api/filesystem.c++ +++ b/src/workerd/api/filesystem.c++ @@ -490,7 +490,7 @@ void FileSystemModule::close(jsg::Lock& js, int fd) { } uint32_t FileSystemModule::write( - jsg::Lock& js, int fd, kj::Array data, WriteOptions options) { + jsg::Lock& js, int fd, kj::Array> data, WriteOptions options) { auto& vfs = workerd::VirtualFileSystem::current(js); KJ_IF_SOME(opened, vfs.tryGetFd(js, fd)) { @@ -513,7 +513,8 @@ uint32_t FileSystemModule::write( auto pos = getPosition(js, opened.addRef(), file.addRef(), options); uint32_t total = 0; for (auto& buffer: data) { - KJ_SWITCH_ONEOF(file->write(js, pos, buffer)) { + auto handle = buffer.getHandle(js); + KJ_SWITCH_ONEOF(file->write(js, pos, handle.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { pos += written; total += written; @@ -546,7 +547,7 @@ uint32_t FileSystemModule::write( } uint32_t FileSystemModule::read( - jsg::Lock& js, int fd, kj::Array data, WriteOptions options) { + jsg::Lock& js, int fd, kj::Array> data, WriteOptions options) { auto& vfs = workerd::VirtualFileSystem::current(js); KJ_IF_SOME(opened, vfs.tryGetFd(js, fd)) { if (!opened->read) { @@ -561,11 +562,12 @@ uint32_t FileSystemModule::read( } uint32_t total = 0; for (auto& buffer: data) { - auto read = file->read(js, pos, buffer); + auto handle = buffer.getHandle(js); + auto read = file->read(js, pos, handle.asArrayPtr()); // if read is less than the size of the buffer, we are at EOF. pos += read; total += read; - if (read < buffer.size()) break; + if (read < handle.size()) break; } // We only update the position if the options.position is not set. if (options.position == kj::none) { @@ -588,7 +590,8 @@ uint32_t FileSystemModule::read( } } -jsg::BufferSource FileSystemModule::readAll(jsg::Lock& js, kj::OneOf pathOrFd) { +jsg::JsRef FileSystemModule::readAll( + jsg::Lock& js, kj::OneOf pathOrFd) { auto& vfs = workerd::VirtualFileSystem::current(js); KJ_SWITCH_ONEOF(pathOrFd) { KJ_CASE_ONEOF(path, FilePath) { @@ -597,7 +600,7 @@ jsg::BufferSource FileSystemModule::readAll(jsg::Lock& js, kj::OneOf) { KJ_SWITCH_ONEOF(file->readAllBytes(js)) { - KJ_CASE_ONEOF(data, jsg::BufferSource) { + KJ_CASE_ONEOF(data, jsg::JsRef) { return kj::mv(data); } KJ_CASE_ONEOF(err, workerd::FsError) { @@ -635,7 +638,7 @@ jsg::BufferSource FileSystemModule::readAll(jsg::Lock& js, kj::OneOfreadAllBytes(js)) { - KJ_CASE_ONEOF(data, jsg::BufferSource) { + KJ_CASE_ONEOF(data, jsg::JsRef) { return kj::mv(data); } KJ_CASE_ONEOF(err, workerd::FsError) { @@ -656,7 +659,7 @@ jsg::BufferSource FileSystemModule::readAll(jsg::Lock& js, kj::OneOf pathOrFd, - jsg::BufferSource data, + jsg::JsBufferSource data, WriteAllOptions options) { auto& vfs = workerd::VirtualFileSystem::current(js); @@ -684,7 +687,7 @@ uint32_t FileSystemModule::writeAll(jsg::Lock& js, // If the append option is set, we will write to the end of the file // instead of overwriting it. if (options.append) { - KJ_SWITCH_ONEOF(file->write(js, stat.size, data)) { + KJ_SWITCH_ONEOF(file->write(js, stat.size, data.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { return written; } @@ -696,7 +699,7 @@ uint32_t FileSystemModule::writeAll(jsg::Lock& js, } // Otherwise, we overwrite the entire file. - KJ_SWITCH_ONEOF(file->writeAll(js, data)) { + KJ_SWITCH_ONEOF(file->writeAll(js, data.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { return written; } @@ -737,7 +740,7 @@ uint32_t FileSystemModule::writeAll(jsg::Lock& js, node::THROW_ERR_UV_EPERM(js, "writeAll"_kj); } auto file = workerd::File::newWritable(js, static_cast(data.size())); - KJ_SWITCH_ONEOF(file->writeAll(js, data)) { + KJ_SWITCH_ONEOF(file->writeAll(js, data.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { KJ_IF_SOME(err, dir->add(js, relative.name, kj::mv(file))) { throwFsError(js, err, "writeAll"_kj); @@ -788,14 +791,14 @@ uint32_t FileSystemModule::writeAll(jsg::Lock& js, // If the file descriptor was opened in append mode, or if the append option // is set, then we'll use write instead to append to the end of the file. if (opened->append || options.append) { - return write(js, fd, kj::arr(kj::mv(data)), + return write(js, fd, kj::arr(data.addRef(js)), { .position = stat.size, }); } // Otherwise, we overwrite the entire file. - KJ_SWITCH_ONEOF(file->writeAll(js, data)) { + KJ_SWITCH_ONEOF(file->writeAll(js, data.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { return written; } @@ -1147,7 +1150,7 @@ void readdirImpl(jsg::Lock& js, const kj::Path& path, const FileSystemModule::ReadDirOptions& options, kj::Vector& entries) { - for (auto& entry: *dir.get()) { + for (auto& entry: *dir) { auto name = options.recursive ? path.append(entry.key).toString(false) : kj::str(entry.key); KJ_SWITCH_ONEOF(entry.value) { KJ_CASE_ONEOF(file, kj::Rc) { @@ -1421,7 +1424,7 @@ void handleCpDir(jsg::Lock& js, // Here, we iterate through each of the entries in the source directory, // recursively copying them to the destination directory. - for (auto& entry: *src.get()) { + for (auto& entry: *src) { kj::StringPtr name = entry.key; KJ_SWITCH_ONEOF(entry.value) { KJ_CASE_ONEOF(file, kj::Rc) { @@ -1890,9 +1893,9 @@ jsg::Ref FileSystemModule::openAsBlob( } KJ_CASE_ONEOF(file, kj::Rc) { KJ_SWITCH_ONEOF(file->readAllBytes(js)) { - KJ_CASE_ONEOF(bytes, jsg::BufferSource) { - return js.alloc( - js, bytes.getJsHandle(js), kj::mv(options.type).orDefault(kj::String())); + KJ_CASE_ONEOF(bytes, jsg::JsRef) { + return js.alloc(js, jsg::JsBufferSource(bytes.getHandle(js)), + kj::mv(options.type).orDefault(kj::String())); } KJ_CASE_ONEOF(err, workerd::FsError) { throwFsError(js, err, "open"_kj); @@ -2125,7 +2128,7 @@ bool FileSystemHandle::canBeModifiedCurrently(jsg::Lock& js) const { auto pathname = getLocator().getPathname(); if (pathname.endsWith("/"_kj)) { auto cloned = getLocator().clone(); - cloned.setPathname(pathname.slice(0, pathname.size() - 1)); + cloned.setPathname(pathname.first(pathname.size() - 1)); return !getVfs().isLocked(js, cloned); } return !getVfs().isLocked(js, getLocator()); @@ -2357,7 +2360,7 @@ kj::Array> collectEntries(const workerd::VirtualFileS kj::Rc inner, const jsg::Url& parentLocator) { kj::Vector> entries; - for (auto& entry: *inner.get()) { + for (auto& entry: *inner) { KJ_SWITCH_ONEOF(entry.value) { KJ_CASE_ONEOF(file, kj::Rc) { auto locator = KJ_ASSERT_NONNULL(parentLocator.tryResolve(entry.key)); @@ -2557,10 +2560,10 @@ jsg::Promise> FileSystemFileHandle::getFile( KJ_CASE_ONEOF(file, kj::Rc) { auto stat = file->stat(js); KJ_SWITCH_ONEOF(file->readAllBytes(js)) { - KJ_CASE_ONEOF(bytes, jsg::BufferSource) { - return js.resolvedPromise( - js.alloc(js, bytes.getJsHandle(js), jsg::USVString(kj::str(getName(js))), - kj::String(), (stat.lastModified - kj::UNIX_EPOCH) / kj::MILLISECONDS)); + KJ_CASE_ONEOF(bytes, jsg::JsRef) { + return js.resolvedPromise(js.alloc(js, jsg::JsBufferSource(bytes.getHandle(js)), + jsg::USVString(kj::str(getName(js))), kj::String(), + (stat.lastModified - kj::UNIX_EPOCH) / kj::MILLISECONDS)); } KJ_CASE_ONEOF(err, workerd::FsError) { return js.rejectedPromise>( @@ -2659,8 +2662,7 @@ jsg::Promise> FileSystemFileHandle::creat jsg::Lock& js, v8::Local chunk, auto c) mutable { return js.tryCatch([&] { KJ_IF_SOME(unwrapped, dataHandler.tryUnwrap(js, chunk)) { - return FileSystemWritableFileStream::writeImpl( - js, kj::mv(unwrapped), *state.get(), deHandler); + return FileSystemWritableFileStream::writeImpl(js, kj::mv(unwrapped), *state, deHandler); } return js.rejectedPromise( js.typeError("WritableStream received a value that is not writable")); @@ -2724,13 +2726,13 @@ FileSystemWritableFileStream::FileSystemWritableFileStream( sharedState(kj::mv(sharedState)) {} jsg::Promise FileSystemWritableFileStream::write(jsg::Lock& js, - kj::OneOf, jsg::BufferSource, kj::String, WriteParams> data, + FileSystemWritableData data, const jsg::TypeHandler>& deHandler) { JSG_REQUIRE(!getController().isLockedToWriter(), TypeError, "Cannot write to a stream that is locked to a reader"); auto writer = getWriter(js); KJ_DEFER(writer->releaseLock(js)); - return writeImpl(js, kj::mv(data), *sharedState.get(), deHandler); + return writeImpl(js, kj::mv(data), *sharedState, deHandler); } jsg::Promise FileSystemWritableFileStream::writeImpl(jsg::Lock& js, @@ -2750,8 +2752,9 @@ jsg::Promise FileSystemWritableFileStream::writeImpl(jsg::Lock& js, } } } - KJ_CASE_ONEOF(buffer, jsg::BufferSource) { - KJ_SWITCH_ONEOF(inner->write(js, state.position, buffer)) { + KJ_CASE_ONEOF(buffer, jsg::JsRef) { + auto handle = buffer.getHandle(js); + KJ_SWITCH_ONEOF(inner->write(js, state.position, handle.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { state.position += written; } @@ -2799,8 +2802,9 @@ jsg::Promise FileSystemWritableFileStream::writeImpl(jsg::Lock& js, } KJ_UNREACHABLE; } - KJ_CASE_ONEOF(buffer, jsg::BufferSource) { - KJ_SWITCH_ONEOF(inner->write(js, offset, buffer)) { + KJ_CASE_ONEOF(buffer, jsg::JsRef) { + auto handle = buffer.getHandle(js); + KJ_SWITCH_ONEOF(inner->write(js, offset, handle.asArrayPtr())) { KJ_CASE_ONEOF(written, uint32_t) { state.position = offset + written; return js.resolvedPromise(); diff --git a/src/workerd/api/filesystem.h b/src/workerd/api/filesystem.h index b774fb45b79..2810c1e1c58 100644 --- a/src/workerd/api/filesystem.h +++ b/src/workerd/api/filesystem.h @@ -103,10 +103,10 @@ class FileSystemModule final: public jsg::Object { JSG_STRUCT(position); }; - uint32_t write(jsg::Lock& js, int fd, kj::Array data, WriteOptions options); - uint32_t read(jsg::Lock& js, int fd, kj::Array data, WriteOptions options); + uint32_t write(jsg::Lock& js, int fd, kj::Array> data, WriteOptions options); + uint32_t read(jsg::Lock& js, int fd, kj::Array> data, WriteOptions options); - jsg::BufferSource readAll(jsg::Lock& js, kj::OneOf pathOrFd); + jsg::JsRef readAll(jsg::Lock& js, kj::OneOf pathOrFd); struct WriteAllOptions { bool exclusive; @@ -116,7 +116,7 @@ class FileSystemModule final: public jsg::Object { uint32_t writeAll(jsg::Lock& js, kj::OneOf pathOrFd, - jsg::BufferSource data, + jsg::JsBufferSource data, WriteAllOptions options); struct RenameOrCopyOptions { @@ -298,12 +298,12 @@ struct FileSystemFileWriteParams { jsg::Optional position; // Yes, wrapping the kj::Maybe with a jsg::Optional is intentional here. We need to // be able to accept null or undefined values and handle them per the spec. - jsg::Optional, jsg::BufferSource, kj::String>>> data; + jsg::Optional, jsg::JsRef, kj::String>>> data; JSG_STRUCT(type, size, position, data); }; using FileSystemWritableData = - kj::OneOf, jsg::BufferSource, kj::String, FileSystemFileWriteParams>; + kj::OneOf, jsg::JsRef, kj::String, FileSystemFileWriteParams>; class FileSystemFileHandle final: public FileSystemHandle { public: diff --git a/src/workerd/api/form-data.c++ b/src/workerd/api/form-data.c++ index f85b98b6f1e..592e649fd6b 100644 --- a/src/workerd/api/form-data.c++ +++ b/src/workerd/api/form-data.c++ @@ -290,13 +290,14 @@ void FormData::parse(jsg::Lock& js, // // TODO(conform): Transcode to UTF-8, like the spec tells us to. assertUtf8(params); + auto& adjustment = externalMemoryAdjustment.emplace(js.getExternalMemoryAdjustment()); kj::Vector query; - parseQueryString(query, kj::mv(rawText)); + parseQueryString(query, kj::mv(rawText), adjustment); data.reserve(query.size()); for (auto& param: query) { data.add(Entry{ - .name = kj::str(param.name), - .value = kj::str(param.value), + .name = kj::mv(param.name), + .value = kj::mv(param.value), }); } return; diff --git a/src/workerd/api/form-data.h b/src/workerd/api/form-data.h index 060855c44ba..d254a36980d 100644 --- a/src/workerd/api/form-data.h +++ b/src/workerd/api/form-data.h @@ -188,6 +188,11 @@ class FormData: public jsg::Object { private: kj::Vector data; + // Tracks heap used by parsed URL-encoded entries so V8's per-isolate memory limit can + // account for it. Accumulated during parseQueryString() and retained for the lifetime + // of this FormData object. + kj::Maybe externalMemoryAdjustment; + static EntryType clone(jsg::Lock& js, EntryType& value); template diff --git a/src/workerd/api/global-scope.c++ b/src/workerd/api/global-scope.c++ index 21ff2274275..57fa969ba00 100644 --- a/src/workerd/api/global-scope.c++ +++ b/src/workerd/api/global-scope.c++ @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -84,10 +85,7 @@ jsg::Promise CacheContext::purge(jsg::Lock& js, JSG_FAIL_REQUIRE(Error, "Cache purge is not available in this context."); } -jsg::Optional> ExecutionContext::getTracing(jsg::Lock& js) { - if (!FeatureFlags::get(js).getWorkerdExperimental()) { - return kj::none; - } +jsg::Ref ExecutionContext::getTracing(jsg::Lock& js) { // A new Tracing handle is allocated on first access only - `JSG_LAZY_INSTANCE_PROPERTY` // uses V8's SetLazyDataProperty, which caches the getter result on the instance after the // first call. So `ctx.tracing === ctx.tracing` and only one allocation per @@ -96,6 +94,31 @@ jsg::Optional> ExecutionContext::getTracing(jsg::Lock& js) { return js.alloc(); } +kj::StringPtr AccessContext::getAud() { + return info->getAudience(); +} + +jsg::Promise> AccessContext::getIdentity(jsg::Lock& js) { + auto& ioctx = IoContext::current(); + return ioctx.awaitIo(js, info->getIdentity(), + [](jsg::Lock& js, kj::Maybe json) -> jsg::Optional { + KJ_IF_SOME(j, json) { + return jsg::JsValue(js.parseJson(j).getHandle(js)); + } + return kj::none; + }); +} + +jsg::Optional> ExecutionContext::getAccess(jsg::Lock& js) { + // Pull the per-request AccessInfo (if any) off the current IncomingRequest. Standalone workerd + // never supplies one; production embedders construct one before calling newWorkerEntrypoint(). + if (!IoContext::hasCurrent()) return kj::none; + auto& ioctx = IoContext::current(); + KJ_IF_SOME(info, ioctx.getAccessInfo()) { + return js.alloc(ioctx.addObject(kj::addRef(info))); + } + return kj::none; +} void ExecutionContext::abort(jsg::Lock& js, jsg::Optional reason) { KJ_IF_SOME(r, reason) { IoContext::current().abort(js.exceptionToKj(kj::mv(r))); @@ -502,7 +525,7 @@ namespace { // Returns true if an alarm failure should count against the user's retry limit. // A failure is user-generated if any of: // - The exception was explicitly tagged with EXCEPTION_IS_USER_ERROR at construction time -// (e.g. state.abort(), exceededCpu, exceededMemory, overload queue). +// (e.g. state.abort(), exceededCpu, exceededMemory, exceededWallTime, overload queue). // - The exception originated from user code throwing inside blockConcurrencyWhile, which // breaks the input gate as a secondary side-effect. // - The exception is a plain jsg.* error without broken.* or jsg-internal.* prefixes, @@ -572,7 +595,7 @@ kj::Promise ServiceWorkerGlobalScope::runAlarm(kj: auto e = KJ_EXCEPTION(OVERLOADED, "broken.dropped; worker_do_not_log; jsg.Error: Alarm exceeded its allowed execution time"); e.setDetail(jsg::EXCEPTION_IS_USER_ERROR, kj::heapArray(0)); - e.setDetail(CPU_LIMIT_DETAIL_ID, kj::heapArray(0)); + e.setDetail(WALL_TIME_LIMIT_DETAIL_ID, kj::heapArray(0)); context.getMetrics().reportFailure(e); // We don't want the handler to keep running after timeout. @@ -581,8 +604,9 @@ kj::Promise ServiceWorkerGlobalScope::runAlarm(kj: // retriable, and we'll count the retries against the alarm retries limit. This will ensure // that the handler will attempt to run for a number of times before giving up and deleting // the alarm. - return WorkerInterface::AlarmResult{ - .retry = true, .retryCountsAgainstLimit = true, .outcome = EventOutcome::EXCEEDED_CPU}; + return WorkerInterface::AlarmResult{.retry = true, + .retryCountsAgainstLimit = true, + .outcome = EventOutcome::EXCEEDED_WALL_TIME}; }); return alarm(lock, js.alloc(scheduledTime, retryCount)) diff --git a/src/workerd/api/global-scope.h b/src/workerd/api/global-scope.h index 0abc00c27c1..89a26e89bb6 100644 --- a/src/workerd/api/global-scope.h +++ b/src/workerd/api/global-scope.h @@ -25,6 +25,10 @@ namespace workerd::jsg { class DOMException; } // namespace workerd::jsg +namespace workerd { +class AccessInfo; +} // namespace workerd + namespace workerd::api { class Tracing; @@ -240,6 +244,48 @@ class CacheContext: public jsg::Object { } }; +// Concrete wrapper exposing per-request Cloudflare Access authentication info to JavaScript +// as `ctx.access`. The actual auth data is supplied by the embedding application via +// `workerd::AccessInfo`, which is plumbed through `newWorkerEntrypoint()` onto +// `IoContext::IncomingRequest`. +// +// Standalone workerd never constructs one of these (no `AccessInfo` is supplied), so +// `ctx.access` is `undefined`. Embedders construct a concrete `AccessInfo` subclass and pass it +// through the entrypoint; `ExecutionContext::getAccess()` lazily wraps it in this class. +class AccessContext: public jsg::Object { + public: + explicit AccessContext(IoOwn info): info(kj::mv(info)) {} + + // Returns the audience claim from the Access JWT. + kj::StringPtr getAud(); + + // Fetches the full identity information for the authenticated user. Resolves to `undefined` + // if no identity is associated with the request (e.g. service-token authentication). + jsg::Promise> getIdentity(jsg::Lock& js); + + JSG_RESOURCE_TYPE(AccessContext) { + JSG_READONLY_INSTANCE_PROPERTY(aud, getAud); + JSG_METHOD(getIdentity); + JSG_TS_OVERRIDE(CloudflareAccessContext { + /** + * The audience tag (AUD) of the Access application protecting this Worker, + * taken from the validated Access JWT. + */ + readonly aud: string; + /** + * Fetches the authenticated user's identity information from Cloudflare + * Access, equivalent to calling `/cdn-cgi/access/get-identity`. + * Resolves to `undefined` when no identity is associated with the request + * (e.g. service-token authentication). + */ + getIdentity(): Promise; + }); + } + + private: + IoOwn info; +}; + class ExecutionContext: public jsg::Object { public: ExecutionContext(jsg::Lock& js, jsg::JsValue exports) @@ -293,7 +339,11 @@ class ExecutionContext: public jsg::Object { return js.undefined(); } - jsg::Optional> getTracing(jsg::Lock& js); + jsg::Ref getTracing(jsg::Lock& js); + + // Returns an AccessContext for the current request, or empty jsg::Optional otherwise. + // Called by the runtime to provide Cloudflare Access authentication context. + jsg::Optional> getAccess(jsg::Lock& js); JSG_RESOURCE_TYPE(ExecutionContext, CompatibilityFlags::Reader flags) { JSG_METHOD(waitUntil); @@ -306,12 +356,10 @@ class ExecutionContext: public jsg::Object { if (flags.getEnableVersionApi()) { JSG_LAZY_INSTANCE_PROPERTY(version, getVersion); } + JSG_LAZY_INSTANCE_PROPERTY(access, getAccess); - // ctx.tracing - user tracing API. The *type* is always visible (so the generated - // `Tracing` / `Span` types exist in every compat-date snapshot, not only the - // experimental one). The *value* is `undefined` outside the `workerdExperimental` - // compat flag - the gate lives in `getTracing()` in global-scope.c++. - // TODO: Remove this comment once the feature is stable. + // ctx.tracing - user tracing API. Always available; the Tracing object is stateless + // and enterSpan() is a no-op when called outside a traced request. JSG_LAZY_INSTANCE_PROPERTY(tracing, getTracing); if (flags.getWorkerdExperimental()) { @@ -340,11 +388,13 @@ class ExecutionContext: public jsg::Object { readonly key?: string; readonly override?: string; }; + readonly access?: CloudflareAccessContext; }); } else { JSG_TS_OVERRIDE( { readonly props: Props; readonly exports: Cloudflare.Exports; + readonly access?: CloudflareAccessContext; }); } } else { @@ -357,10 +407,12 @@ class ExecutionContext: public jsg::Object { readonly key?: string; readonly override?: string; }; + readonly access?: CloudflareAccessContext; }); } else { JSG_TS_OVERRIDE( { readonly props: Props; + readonly access?: CloudflareAccessContext; }); } } @@ -1094,6 +1146,6 @@ class ServiceWorkerGlobalScope: public WorkerGlobalScope { api::ExecutionContext, api::ExportedHandler, \ api::ServiceWorkerGlobalScope::StructuredCloneOptions, api::Navigator, \ api::AlarmInvocationInfo, api::Immediate, api::Cloudflare, api::CachePurgeError, \ - api::CachePurgeResult, api::CachePurgeOptions, api::CacheContext + api::CachePurgeResult, api::CachePurgeOptions, api::CacheContext, api::AccessContext // The list of global-scope.h types that are added to worker.c++'s JSG_DECLARE_ISOLATE_TYPE } // namespace workerd::api diff --git a/src/workerd/api/hibernatable-web-socket.c++ b/src/workerd/api/hibernatable-web-socket.c++ index b7606c61c43..960b4276608 100644 --- a/src/workerd/api/hibernatable-web-socket.c++ +++ b/src/workerd/api/hibernatable-web-socket.c++ @@ -68,7 +68,7 @@ kj::Promise HibernatableWebSocketCustomEve auto& context = incomingRequest->getContext(); incomingRequest->delivered(); - KJ_DEFER({ waitUntilTasks.add(incomingRequest->drain().attach(kj::mv(incomingRequest))); }); + KJ_DEFER({ incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); }); EventOutcome outcome = EventOutcome::OK; diff --git a/src/workerd/api/html-rewriter.c++ b/src/workerd/api/html-rewriter.c++ index 3a8c2f8fabf..0d2556b391a 100644 --- a/src/workerd/api/html-rewriter.c++ +++ b/src/workerd/api/html-rewriter.c++ @@ -443,7 +443,7 @@ const kj::FiberPool& getFiberPool() { kj::Promise Rewriter::write(kj::ArrayPtr buffer) { KJ_ASSERT(maybeWaitScope == kj::none); // Defer fiber creation until the event loop runs. If this promise is dropped synchronously - // (e.g. by Canceler::cancel() during PumpToReader destruction), no fiber is created, avoiding + // (e.g. by stream pump cancellation), no fiber is created, avoiding // a KJ assertion failure when destroying an unfired fiber. Once the event loop processes this, // the fiber is created and immediately fires (armDepthFirst), so cancellation works normally. return kj::evalLater([this, buffer]() { diff --git a/src/workerd/api/http.c++ b/src/workerd/api/http.c++ index 1f5c3a16476..4ff3708a29a 100644 --- a/src/workerd/api/http.c++ +++ b/src/workerd/api/http.c++ @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -242,7 +243,7 @@ bool Body::getBodyUsed() { } return false; } -jsg::Promise Body::arrayBuffer(jsg::Lock& js) { +jsg::Promise> Body::arrayBuffer(jsg::Lock& js) { KJ_IF_SOME(i, impl) { return js.evalNow([&] { JSG_REQUIRE(!i.stream->isDisturbed(), TypeError, @@ -255,13 +256,15 @@ jsg::Promise Body::arrayBuffer(jsg::Lock& js) { // If there's no body, we just return an empty array. // See https://fetch.spec.whatwg.org/#concept-body-consume-body - auto backing = jsg::BackingStore::alloc(js, 0); - return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); + auto ab = jsg::JsArrayBuffer::create(js, 0); + return js.resolvedPromise(ab.addRef(js)); } -jsg::Promise Body::bytes(jsg::Lock& js) { - return arrayBuffer(js).then(js, - [](jsg::Lock& js, jsg::BufferSource data) { return data.getTypedView(js); }); +jsg::Promise> Body::bytes(jsg::Lock& js) { + return arrayBuffer(js).then(js, [](jsg::Lock& js, jsg::JsRef data) { + auto handle = data.getHandle(js); + return jsg::JsUint8Array::create(js, handle).addRef(js); + }); } jsg::Promise Body::text(jsg::Lock& js) { @@ -331,7 +334,10 @@ jsg::Promise Body::json(jsg::Lock& js) { } jsg::Promise> Body::blob(jsg::Lock& js) { - return arrayBuffer(js).then(js, [this](jsg::Lock& js, jsg::BufferSource buffer) { + // Note: `self` (jsg::Ref) is captured to prevent GC from collecting this object while + // the promise continuation is pending. Without it, the bare `this` pointer dangles. + return arrayBuffer(js).then( + js, [this, self = JSG_THIS](jsg::Lock& js, jsg::JsRef buffer) { kj::String contentType = headersRef.getCommon(js, capnp::CommonHeaderName::CONTENT_TYPE) .map([](auto&& b) -> kj::String { return kj::mv(b); @@ -344,7 +350,7 @@ jsg::Promise> Body::blob(jsg::Lock& js) { }).orDefault(nullptr); } - return js.alloc(js, buffer.getJsHandle(js), kj::mv(contentType)); + return js.alloc(js, jsg::JsBufferSource(buffer.getHandle(js)), kj::mv(contentType)); }); } @@ -1333,7 +1339,7 @@ kj::Promise> Response::send(jsg::Lock& js, } auto clientSocket = outer.acceptWebSocket(outHeaders); - auto wsPromise = ws->couple(kj::mv(clientSocket), context.getMetrics()); + auto wsPromise = ws->couple(js, kj::mv(clientSocket), context.getMetrics()); KJ_IF_SOME(a, context.getActor()) { KJ_IF_SOME(hib, a.getHibernationManager()) { @@ -1547,6 +1553,14 @@ jsg::Promise> fetchImplNoOutputLock(jsg::Lock& js, } } + // Stash whether this request's body can be rewound (and so the request re-sent), before we lose + // access to the JS-level request. This is currently consumed only when the target is an actor + // (Durable Object), to classify retry eligibility for disconnected calls; for other fetches the + // value is simply overwritten by the next call and never read. The set->getClientWithTracing-> + // wrap*SubrequestClient sequence is synchronous, so there is no stale-attribution risk. + ioContext.getMetrics().setNextSubrequestBodyRewindable( + SubrequestBodyRewindable(jsRequest->canRewindBody())); + // Get client and trace context (if needed) in one clean call auto clientWithTracing = fetcher->getClientWithTracing(ioContext, jsRequest->serializeCfBlobJson(js), "fetch"_kjc); auto traceContext = kj::mv(clientWithTracing.traceContext); @@ -2183,25 +2197,35 @@ void Fetcher::serialize(jsg::Lock& js, jsg::Serializer& serializer) { } } return; + } else KJ_IF_SOME(storedHandler, kj::tryDowncast(handler)) { + // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This + // format is temporary, anyone using this will lose their data later. + JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, + "ServiceStub cannot be serialized in this context."); + KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::STORAGE)) { + KJ_CASE_ONEOF(token, kj::Array) { + // Token is available synchronously. For backwards compatibility, write it directly into + // the serialized value. + // TODO(cleanup): As soon as all of production is updated to understand externals, stop + // writing inline tokens. + serializer.writeLengthDelimited(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + storedHandler.writeChannel(kj::mv(channel), kj::mv(promise)); + + // Write an empty array to signal that we're using an external rather than an inline + // token. + serializer.writeLengthDelimited(kj::ArrayPtr()); + } + } + return; } + // TODO(someday): structuredClone() should have special handling that just reproduces the same // local object. At present we have no way to recognize structuredClone() here though. } - // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This format - // is temporary, anyone using this will lose their data later. - JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, - "ServiceStub cannot be serialized in this context."); - KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::STORAGE)) { - KJ_CASE_ONEOF(token, kj::Array) { - serializer.writeLengthDelimited(token); - } - KJ_CASE_ONEOF(promise, kj::Promise>) { - // TODO(stub-storage): Eventually we'll serialize by pointing to an external table. - KJ_UNIMPLEMENTED( - "tried to store SubrequestChannel whose token is not synchronously available"); - } - } + JSG_FAIL_REQUIRE(DOMDataCloneError, "ServiceStub cannot be serialized in this context."); } jsg::Ref Fetcher::deserialize(jsg::Lock& js, @@ -2244,18 +2268,29 @@ jsg::Ref Fetcher::deserialize(jsg::Lock& js, KJ_FAIL_REQUIRE("wrong external type for Fetcher", external.which()); } + return js.alloc(ioctx.addObject(kj::mv(channel))); + } else KJ_IF_SOME(storedHandler, + kj::tryDowncast(handler)) { + // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This + // format is temporary, anyone using this will lose their data later. + JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, + "ServiceStub cannot be deserialized in this context."); + auto& ioctx = IoContext::current(); + auto token = deserializer.readLengthDelimitedBytes(); + kj::Own channel; + if (token.size() > 0) { + // Token embedded inline, just use it. + channel = ioctx.getIoChannelFactory().subrequestChannelFromToken( + IoChannelFactory::ChannelTokenUsage::STORAGE, token); + } else { + // Token stored out-of-line as an external. + channel = storedHandler.readSubrequestChannel(ioctx.getIoChannelFactory()); + } return js.alloc(ioctx.addObject(kj::mv(channel))); } } - // The allow_irrevocable_stub_storage flag allows us to just embed the token inline. This format - // is temporary, anyone using this will lose their data later. - JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, - "ServiceStub cannot be deserialized in this context."); - auto& ioctx = IoContext::current(); - auto channel = ioctx.getIoChannelFactory().subrequestChannelFromToken( - IoChannelFactory::ChannelTokenUsage::STORAGE, deserializer.readLengthDelimitedBytes()); - return js.alloc(ioctx.addObject(kj::mv(channel))); + JSG_FAIL_REQUIRE(DOMDataCloneError, "ServiceStub cannot be deserialized in this context."); } static jsg::Promise throwOnError( diff --git a/src/workerd/api/http.h b/src/workerd/api/http.h index df9f1a4c4f9..8d0cd54b960 100644 --- a/src/workerd/api/http.h +++ b/src/workerd/api/http.h @@ -164,8 +164,8 @@ class Body: public jsg::Object { kj::Maybe> getBody(); bool getBodyUsed(); - jsg::Promise arrayBuffer(jsg::Lock& js); - jsg::Promise bytes(jsg::Lock& js); + jsg::Promise> arrayBuffer(jsg::Lock& js); + jsg::Promise> bytes(jsg::Lock& js); jsg::Promise text(jsg::Lock& js); jsg::Promise> formData(jsg::Lock& js); jsg::Promise json(jsg::Lock& js); @@ -362,7 +362,8 @@ class Fetcher: public JsRpcClientProvider { kj::OneOf, kj::String> requestOrUrl, jsg::Optional>> requestInit); - using GetResult = kj::OneOf, jsg::BufferSource, kj::String, jsg::Value>; + using GetResult = + kj::OneOf, jsg::JsRef, kj::String, jsg::Value>; jsg::Promise get(jsg::Lock& js, kj::String url, jsg::Optional type); diff --git a/src/workerd/api/messagechannel.c++ b/src/workerd/api/messagechannel.c++ index 515c8510773..4e199668bea 100644 --- a/src/workerd/api/messagechannel.c++ +++ b/src/workerd/api/messagechannel.c++ @@ -2,14 +2,10 @@ #include "events.h" -#include #include -#include namespace workerd::api { -MessagePort::MessagePort() - : weakThis(kj::refcounted>(kj::Badge{}, *this)), - state(Pending()) { +MessagePort::MessagePort(): state(Pending()) { // We set a callback on the underlying EventTarget to be notified when // a listener for the message event is added or removed. When there // are no listeners, we move back to the Pending state, otherwise we @@ -82,9 +78,10 @@ void MessagePort::deliver(jsg::Lock& js, const jsg::JsValue& value) { // Binds two ports to each other such that messages posted to one // are delivered on the other. -void MessagePort::entangle(MessagePort& port1, MessagePort& port2) { - port1.other = port2.addWeakRef(); - port2.other = port1.addWeakRef(); +void MessagePort::entangle( + jsg::Lock& js, jsg::Ref& port1, jsg::Ref& port2) { + port1->other = port2.getWeakRef(js); + port2->other = port1.getWeakRef(js); } // Post a message to the entangled port. @@ -108,32 +105,27 @@ void MessagePort::postMessage(jsg::Lock& js, } JSG_REQUIRE(!hasTransfer, Error, "Transfer list is not supported"); - // If the port is closed, other will be kj::none and we will just drop the message. - other->runIfAlive([&](MessagePort& o) { - // Take a strong reference to prevent GC from freeing the target port during - // serialization. Serialization can run arbitrary user code via custom getters - // on the message object. That code could close this port (which also closes - // the entangled port), and then force GC to free the target port — leaving - // the `o` reference dangling for the deliver() call below. - auto ref = o.addRef(); - - jsg::Serializer ser(js); - - KJ_IF_SOME(d, data) { - ser.write(js, d.getHandle(js)); - } else { - ser.write(js, js.undefined()); - } + // If the port is closed or the peer has been collected, just drop the message. + KJ_IF_SOME(o, other) { + KJ_IF_SOME(ref, o.tryAddRef(js)) { + jsg::Serializer ser(js); - auto released = ser.release(); - JSG_REQUIRE(released.sharedArrayBuffers.size() == 0, TypeError, - "SharedArrayBuffer is unsupported with MessagePort"); + KJ_IF_SOME(d, data) { + ser.write(js, d.getHandle(js)); + } else { + ser.write(js, js.undefined()); + } - // Now, deserialize the message into a JsValue - jsg::Deserializer deserializer(js, released); - auto clonedData = deserializer.readValue(js); - o.deliver(js, clonedData); - }); + auto released = ser.release(); + JSG_REQUIRE(released.sharedArrayBuffers.size() == 0, TypeError, + "SharedArrayBuffer is unsupported with MessagePort"); + + // Now, deserialize the message into a JsValue + jsg::Deserializer deserializer(js, released); + auto clonedData = deserializer.readValue(js); + ref->deliver(js, clonedData); + } + } } void MessagePort::closeImpl() { @@ -141,15 +133,29 @@ void MessagePort::closeImpl() { // already scheduled for delivery in the `start()` or `deliver()` methods. if (state.is()) return; state = Closed{}; - weakThis->invalidate(); - other->runIfAlive([&](MessagePort& o) { o.closeImpl(); }); + KJ_IF_SOME(o, other) { + // Use of tryGet here rather than tryAddRef is intentional. closeImpl + // is called from the destructor, where we may or may not have the + // isolate lock. Materializing a strong reference to the other port + // requires the isolate lock. The other = kj::none line below will + // ensure that the jsg::WeakRef is cleaned up under lock either + // immediately or eventually. + KJ_IF_SOME(ref, o.tryGet()) { + ref.closeImpl(); + } + other = kj::none; + } } void MessagePort::close(jsg::Lock& js) { if (state.is()) return; state = Closed{}; - weakThis->invalidate(); - other->runIfAlive([&](MessagePort& o) { o.close(js); }); + KJ_IF_SOME(o, other) { + KJ_IF_SOME(ref, o.tryAddRef(js)) { + ref->close(js); + } + other = kj::none; + } auto closeEvent = js.alloc(kj::str("close"), Event::Init{}, true); dispatchEventImpl(js, kj::mv(closeEvent)); } @@ -201,7 +207,7 @@ void MessagePort::setOnMessage(jsg::Lock& js, jsg::JsValue value) { jsg::Ref MessageChannel::constructor(jsg::Lock& js) { auto port1 = js.alloc(); auto port2 = js.alloc(); - MessagePort::entangle(*port1, *port2); + MessagePort::entangle(js, port1, port2); return js.alloc(kj::mv(port1), kj::mv(port2)); } diff --git a/src/workerd/api/messagechannel.h b/src/workerd/api/messagechannel.h index 62bdf119be4..88c0be2b635 100644 --- a/src/workerd/api/messagechannel.h +++ b/src/workerd/api/messagechannel.h @@ -6,7 +6,6 @@ #include #include #include -#include namespace workerd::api { @@ -106,11 +105,7 @@ class MessagePort final: public EventTarget { // Bind two message ports together such that messages posted to // one are delivered to the other. - static void entangle(MessagePort& port1, MessagePort& port2); - - kj::Maybe getOther() { - return other->tryGet().map([](MessagePort& o) -> MessagePort& { return o; }); - } + static void entangle(jsg::Lock& js, jsg::Ref& port1, jsg::Ref& port2); // TODO(soon): Support serialization/deserialization to use MessagePort // with JSRPC. We'll need to implement a rpc mechanism for passing the @@ -126,12 +121,6 @@ class MessagePort final: public EventTarget { void dispatchMessage(jsg::Lock& js, const jsg::JsValue& value); - kj::Own> addWeakRef() { - KJ_ASSERT(weakThis->isValid()); - return kj::addRef(*weakThis); - } - - kj::Own> weakThis; kj::OneOf state; // Two ports are entangled when they weakly reference each other. @@ -139,7 +128,7 @@ class MessagePort final: public EventTarget { // ports gets GC'd the other will will also end up being closed. // To keep them both alive, maintain strong references to both // ports! - kj::Own> other; + kj::Maybe> other; kj::Maybe> onmessageValue; void visitForGc(jsg::GcVisitor& visitor) { diff --git a/src/workerd/api/node/buffer.c++ b/src/workerd/api/node/buffer.c++ index 40d88fe7fcb..2ef588a4494 100644 --- a/src/workerd/api/node/buffer.c++ +++ b/src/workerd/api/node/buffer.c++ @@ -123,7 +123,7 @@ uint32_t writeInto(jsg::Lock& js, auto backing = decodeHexTruncated(js, buf, false); auto bytes = backing.asArrayPtr(); auto amountToCopy = kj::min(bytes.size(), dest.size()); - dest.first(amountToCopy).copyFrom(bytes.first(amountToCopy)); + dest.write(bytes.first(amountToCopy)); return amountToCopy; } default: @@ -249,8 +249,7 @@ jsg::JsUint8Array BufferUtil::concat( // The amount to copy is the lesser of the remaining space in the destination or // the size of the chunk we're copying. auto amountToCopy = kj::min(ptr.size(), view.size()); - view.first(amountToCopy).copyFrom(ptr.first(amountToCopy)); - view = view.slice(amountToCopy); + view.write(ptr.first(amountToCopy)); // If there's no more space in the destination, we're done. if (view == nullptr) { break; diff --git a/src/workerd/api/node/crypto-keys.c++ b/src/workerd/api/node/crypto-keys.c++ index b97779b9fc7..1690049cf4f 100644 --- a/src/workerd/api/node/crypto-keys.c++ +++ b/src/workerd/api/node/crypto-keys.c++ @@ -11,6 +11,7 @@ #include #include +#include #include // TODO(soon): This implements most of node:crypto key import, export, and @@ -405,8 +406,9 @@ kj::OneOf CryptoImpl:: } kj::StringPtr type = JSG_REQUIRE_NONNULL(opts.type, TypeError, "Missing type option"); - auto data = - key->impl->exportKeyExt(js, format, type, kj::mv(opts.cipher), kj::mv(opts.passphrase)); + auto maybePass = opts.passphrase.map( + [&](auto& pass) mutable -> kj::Array { return pass.getHandle(js).copy(); }); + auto data = key->impl->exportKeyExt(js, format, type, kj::mv(opts.cipher), kj::mv(maybePass)); if (format == "pem"_kj) { // TODO(perf): As a later performance optimization, change this so that it doesn't copy. return kj::str(data.asArrayPtr().asChars()); @@ -595,6 +597,11 @@ jsg::Ref CryptoImpl::createPublicKey(jsg::Lock& js, CreateAsymmetricK CryptoKeyPair CryptoImpl::generateRsaKeyPair(jsg::Lock& js, RsaKeyPairOptions options) { ncrypto::ClearErrorOnReturn clearErrorOnReturn; + // Matches the WebCrypto validateRsaParams bound. Consider lowering both to 8192. + static constexpr uint32_t kMaxRsaModulusLength = 16384; + JSG_REQUIRE(options.modulusLength <= kMaxRsaModulusLength, RangeError, + "RSA modulusLength exceeds maximum (", kMaxRsaModulusLength, ")"); + auto ctx = ncrypto::EVPKeyCtxPointer::NewFromID( options.type == "rsa-pss" ? EVP_PKEY_RSA_PSS : EVP_PKEY_RSA); @@ -803,6 +810,8 @@ CryptoKeyPair CryptoImpl::generateDhKeyPair(jsg::Lock& js, DhKeyPairOptions opti } KJ_CASE_ONEOF(prime, jsg::JsRef) { auto primePtr = prime.getHandle(js).asArrayPtr(); + JSG_REQUIRE( + primePtr.size() <= kMaxPrimeBits / CHAR_BIT, RangeError, "DH prime exceeds maximum size"); ncrypto::BignumPointer bn(primePtr.begin(), primePtr.size()); auto bn_g = ncrypto::BignumPointer::New(); diff --git a/src/workerd/api/node/crypto.c++ b/src/workerd/api/node/crypto.c++ index 467585b2d10..e867185f84f 100644 --- a/src/workerd/api/node/crypto.c++ +++ b/src/workerd/api/node/crypto.c++ @@ -17,15 +17,24 @@ using namespace std::string_view_literals; namespace workerd::api::node { +namespace { +// BoringSSL does not tolerate null pointers even when the length is zero. +// JsBufferSource::asArrayPtr() can return {nullptr, 0} for empty buffers, +// so we ensure a non-null pointer before passing to OpenSSL. +kj::ArrayPtr nonNullBytes(kj::ArrayPtr ptr) { + static kj::byte DUMMY = 0; + return ptr == nullptr ? kj::arrayPtr(&DUMMY, 0) : ptr; +} +} // namespace // ====================================================================================== #pragma region KDF jsg::JsArrayBuffer CryptoImpl::getHkdf(jsg::Lock& js, kj::String hash, - kj::Array key, - kj::Array salt, - kj::Array info, + jsg::JsBufferSource key, + jsg::JsBufferSource salt, + jsg::JsBufferSource info, uint32_t length) { // The Node.js version of the HKDF is a bit different from the Web Crypto API // version. For one, the length here specifies the number of bytes, whereas @@ -44,12 +53,14 @@ jsg::JsArrayBuffer CryptoImpl::getHkdf(jsg::Lock& js, JSG_REQUIRE(key.size() <= INT32_MAX, RangeError, "Hkdf failed: key is too large"); JSG_REQUIRE(ncrypto::checkHkdfLength(digest, length), RangeError, "Invalid Hkdf key length"); - return JSG_REQUIRE_NONNULL(api::hkdf(js, length, digest, key, salt, info), Error, "Hkdf failed"); + return JSG_REQUIRE_NONNULL(api::hkdf(js, length, digest, nonNullBytes(key.asArrayPtr()), + nonNullBytes(salt.asArrayPtr()), nonNullBytes(info.asArrayPtr())), + Error, "Hkdf failed"); } jsg::JsArrayBuffer CryptoImpl::getPbkdf(jsg::Lock& js, - kj::Array password, - kj::Array salt, + jsg::JsBufferSource password, + jsg::JsBufferSource salt, uint32_t num_iterations, uint32_t keylen, kj::String name) { @@ -67,14 +78,18 @@ jsg::JsArrayBuffer CryptoImpl::getPbkdf(jsg::Lock& js, // Note: The user could DoS us by selecting a very high iteration count. As with the Web Crypto // API, intentionally limit the maximum iteration count. checkPbkdfLimits(js, num_iterations); + JSG_REQUIRE(ncrypto::checkHkdfLength(digest, keylen), RangeError, + "Pbkdf2 failed: derived key length exceeds maximum for this hash"); return JSG_REQUIRE_NONNULL( - api::pbkdf2(js, keylen, num_iterations, digest, password, salt), Error, "Pbkdf2 failed"); + api::pbkdf2(js, keylen, num_iterations, digest, nonNullBytes(password.asArrayPtr()), + nonNullBytes(salt.asArrayPtr())), + Error, "Pbkdf2 failed"); } jsg::JsArrayBuffer CryptoImpl::getScrypt(jsg::Lock& js, - kj::Array password, - kj::Array salt, + jsg::JsBufferSource password, + jsg::JsBufferSource salt, uint32_t N, uint32_t r, uint32_t p, @@ -82,27 +97,28 @@ jsg::JsArrayBuffer CryptoImpl::getScrypt(jsg::Lock& js, uint32_t keylen) { JSG_REQUIRE(password.size() <= INT32_MAX, RangeError, "Scrypt failed: password is too large"); JSG_REQUIRE(salt.size() <= INT32_MAX, RangeError, "Scrypt failed: salt is too large"); + checkScryptLimits(js, N, r, p); return JSG_REQUIRE_NONNULL( - api::scrypt(js, keylen, N, r, p, maxmem, password, salt), Error, "Scrypt failed"); + api::scrypt(js, keylen, N, r, p, maxmem, nonNullBytes(password.asArrayPtr()), + nonNullBytes(salt.asArrayPtr())), + Error, "Scrypt failed"); } #pragma endregion // KDF // ====================================================================================== #pragma region SPKAC -bool CryptoImpl::verifySpkac(kj::Array input) { - return workerd::api::verifySpkac(input); +bool CryptoImpl::verifySpkac(jsg::Lock& js, jsg::JsBufferSource input) { + return workerd::api::verifySpkac(nonNullBytes(input.asArrayPtr())); } -kj::Maybe CryptoImpl::exportPublicKey( - jsg::Lock& js, kj::Array input) { - return workerd::api::exportPublicKey(js, input); +kj::Maybe CryptoImpl::exportPublicKey(jsg::Lock& js, jsg::JsBufferSource input) { + return workerd::api::exportPublicKey(js, nonNullBytes(input.asArrayPtr())); } -kj::Maybe CryptoImpl::exportChallenge( - jsg::Lock& js, kj::Array input) { - return workerd::api::exportChallenge(js, input); +kj::Maybe CryptoImpl::exportChallenge(jsg::Lock& js, jsg::JsBufferSource input) { + return workerd::api::exportChallenge(js, nonNullBytes(input.asArrayPtr())); } #pragma endregion // SPKAC @@ -112,25 +128,27 @@ kj::Maybe CryptoImpl::exportChallenge( jsg::JsArrayBuffer CryptoImpl::randomPrime(jsg::Lock& js, uint32_t size, bool safe, - jsg::Optional> add_buf, - jsg::Optional> rem_buf) { + jsg::Optional add_buf, + jsg::Optional rem_buf) { return workerd::api::randomPrime(js, size, safe, - add_buf.map([](kj::Array& buf) { return buf.asPtr(); }), - rem_buf.map([](kj::Array& buf) { return buf.asPtr(); })); + add_buf.map([&](jsg::JsBufferSource& buf) mutable { return nonNullBytes(buf.asArrayPtr()); }), + rem_buf.map( + [&](jsg::JsBufferSource& buf) mutable { return nonNullBytes(buf.asArrayPtr()); })); } -bool CryptoImpl::checkPrimeSync(kj::Array bufferView, uint32_t num_checks) { - return workerd::api::checkPrime(bufferView.asPtr(), num_checks); +bool CryptoImpl::checkPrimeSync( + jsg::Lock& js, jsg::JsBufferSource bufferView, uint32_t num_checks) { + return workerd::api::checkPrime(nonNullBytes(bufferView.asArrayPtr()), num_checks); } #pragma endregion // Primes // ====================================================================================== #pragma region Hmac jsg::Ref CryptoImpl::HmacHandle::constructor( - jsg::Lock& js, kj::String algorithm, kj::OneOf, jsg::Ref> key) { + jsg::Lock& js, kj::String algorithm, KeyParam key) { KJ_SWITCH_ONEOF(key) { - KJ_CASE_ONEOF(key_data, kj::Array) { - return js.alloc(HmacContext(js, algorithm, key_data.asPtr())); + KJ_CASE_ONEOF(key_data, jsg::JsBufferSource) { + return js.alloc(HmacContext(js, algorithm, nonNullBytes(key_data.asArrayPtr()))); } KJ_CASE_ONEOF(key, jsg::Ref) { return js.alloc(HmacContext(js, algorithm, key->impl.get())); @@ -139,8 +157,8 @@ jsg::Ref CryptoImpl::HmacHandle::constructor( KJ_UNREACHABLE; } -int CryptoImpl::HmacHandle::update(kj::Array data) { - ctx.update(data); +int CryptoImpl::HmacHandle::update(jsg::Lock& js, jsg::JsBufferSource data) { + ctx.update(nonNullBytes(data.asArrayPtr())); return 1; // This just always returns 1 no matter what. } @@ -151,16 +169,16 @@ jsg::JsUint8Array CryptoImpl::HmacHandle::digest(jsg::Lock& js) { jsg::JsUint8Array CryptoImpl::HmacHandle::oneshot(jsg::Lock& js, kj::String algorithm, CryptoImpl::HmacHandle::KeyParam key, - kj::Array data) { + jsg::JsBufferSource data) { KJ_SWITCH_ONEOF(key) { - KJ_CASE_ONEOF(key_data, kj::Array) { - HmacContext ctx(js, algorithm, key_data.asPtr()); - ctx.update(data); + KJ_CASE_ONEOF(key_data, jsg::JsBufferSource) { + HmacContext ctx(js, algorithm, nonNullBytes(key_data.asArrayPtr())); + ctx.update(nonNullBytes(data.asArrayPtr())); return ctx.digest(js); } KJ_CASE_ONEOF(key, jsg::Ref) { HmacContext ctx(js, algorithm, key->impl.get()); - ctx.update(data); + ctx.update(nonNullBytes(data.asArrayPtr())); return ctx.digest(js); } } @@ -179,8 +197,8 @@ jsg::Ref CryptoImpl::HashHandle::constructor( return js.alloc(HashContext(algorithm, xofLen)); } -int CryptoImpl::HashHandle::update(kj::Array data) { - ctx.update(data); +int CryptoImpl::HashHandle::update(jsg::Lock& js, jsg::JsBufferSource data) { + ctx.update(nonNullBytes(data.asArrayPtr())); return 1; } @@ -198,9 +216,9 @@ void CryptoImpl::HashHandle::visitForMemoryInfo(jsg::MemoryTracker& tracker) con } jsg::JsUint8Array CryptoImpl::HashHandle::oneshot( - jsg::Lock& js, kj::String algorithm, kj::Array data, kj::Maybe xofLen) { + jsg::Lock& js, kj::String algorithm, jsg::JsBufferSource data, kj::Maybe xofLen) { HashContext ctx(algorithm, xofLen); - ctx.update(data); + ctx.update(nonNullBytes(data.asArrayPtr())); return ctx.digest(js); } #pragma endregion Hash @@ -215,21 +233,43 @@ jsg::Ref CryptoImpl::DiffieHellmanGroupHandle( jsg::Ref CryptoImpl::DiffieHellmanHandle::constructor( jsg::Lock& js, - kj::OneOf, int> sizeOrKey, - kj::OneOf, int> generator) { - return js.alloc(DiffieHellman(sizeOrKey, generator)); + kj::OneOf sizeOrKey, + kj::OneOf generator) { + auto sizeOrKeyParam = [&]() -> kj::OneOf, int> { + KJ_SWITCH_ONEOF(sizeOrKey) { + KJ_CASE_ONEOF(size, int) { + return size; + } + KJ_CASE_ONEOF(key, jsg::JsBufferSource) { + return nonNullBytes(key.asArrayPtr()); + } + } + KJ_UNREACHABLE; + }(); + auto generatorParam = [&]() -> kj::OneOf, int> { + KJ_SWITCH_ONEOF(generator) { + KJ_CASE_ONEOF(gen, int) { + return gen; + } + KJ_CASE_ONEOF(gen, jsg::JsBufferSource) { + return nonNullBytes(gen.asArrayPtr()); + } + } + KJ_UNREACHABLE; + }(); + return js.alloc(DiffieHellman(sizeOrKeyParam, generatorParam)); } CryptoImpl::DiffieHellmanHandle::DiffieHellmanHandle(DiffieHellman dh): dh(kj::mv(dh)) { verifyError = JSG_REQUIRE_NONNULL(this->dh.check(), Error, "DiffieHellman init failed"); }; -void CryptoImpl::DiffieHellmanHandle::setPrivateKey(kj::Array key) { - dh.setPrivateKey(key); +void CryptoImpl::DiffieHellmanHandle::setPrivateKey(jsg::Lock& js, jsg::JsBufferSource key) { + dh.setPrivateKey(nonNullBytes(key.asArrayPtr())); } -void CryptoImpl::DiffieHellmanHandle::setPublicKey(kj::Array key) { - dh.setPublicKey(key); +void CryptoImpl::DiffieHellmanHandle::setPublicKey(jsg::Lock& js, jsg::JsBufferSource key) { + dh.setPublicKey(nonNullBytes(key.asArrayPtr())); } jsg::JsUint8Array CryptoImpl::DiffieHellmanHandle::getPublicKey(jsg::Lock& js) { @@ -249,8 +289,8 @@ jsg::JsUint8Array CryptoImpl::DiffieHellmanHandle::getPrime(jsg::Lock& js) { } jsg::JsUint8Array CryptoImpl::DiffieHellmanHandle::computeSecret( - jsg::Lock& js, kj::Array key) { - return dh.computeSecret(js, key); + jsg::Lock& js, jsg::JsBufferSource key) { + return dh.computeSecret(js, nonNullBytes(key.asArrayPtr())); } jsg::JsUint8Array CryptoImpl::DiffieHellmanHandle::generateKeys(jsg::Lock& js) { @@ -753,9 +793,7 @@ jsg::Ref CryptoImpl::CipherHandle::construct(jsg::Lock // Copy the IV into C++-owned memory so that later modifications to the JS buffer // cannot affect the cipher. This matches Node.js, which copies the IV into OpenSSL // at init time. - auto ivCopy = kj::heapArray(iv.asArrayPtr()); - return js.alloc( - mode, kj::mv(ctx), kj::mv(key), kj::mv(ivCopy), kj::mv(maybeAuthInfo)); + return js.alloc(mode, kj::mv(ctx), kj::mv(key), iv.copy(), kj::mv(maybeAuthInfo)); } jsg::JsUint8Array CryptoImpl::CipherHandle::update(jsg::Lock& js, jsg::JsBufferSource data) { @@ -1278,14 +1316,17 @@ jsg::JsUint8Array Cipher(jsg::Lock& js, KJ_IF_SOME(labelRef, options.oaepLabel) { auto label = labelRef.getHandle(js); - // The ctx takes ownership of the data buffer so we have to copy. + // EVP_PKEY_CTX_set0_rsa_oaep_label takes ownership of the buffer only on + // success (return 1). Keep RAII ownership until the call succeeds so the + // DataPointer destructor frees the buffer on the error path (e.g. when + // padding is not RSA_PKCS1_OAEP_PADDING). auto data = ncrypto::DataPointer::Alloc(label.size()); kj::ArrayPtr dataPtr(data.get(), data.size()); dataPtr.copyFrom(label.asArrayPtr()); - auto released = data.release(); - JSG_REQUIRE(EVP_PKEY_CTX_set0_rsa_oaep_label( - ctx.get(), static_cast(released.data), released.len) == 1, + JSG_REQUIRE(EVP_PKEY_CTX_set0_rsa_oaep_label(ctx.get(), data.get(), data.size()) == 1, Error, "Failed to set the OAEP label"); + // Ownership has been transferred to ctx; prevent double-free. + data.release(); } size_t len; diff --git a/src/workerd/api/node/crypto.h b/src/workerd/api/node/crypto.h index fd753c626dc..945e386b6f3 100644 --- a/src/workerd/api/node/crypto.h +++ b/src/workerd/api/node/crypto.h @@ -22,16 +22,16 @@ class CryptoImpl final: public jsg::Object { DiffieHellmanHandle(DiffieHellman dh); static jsg::Ref constructor(jsg::Lock& js, - kj::OneOf, int> sizeOrKey, - kj::OneOf, int> generator); + kj::OneOf sizeOrKey, + kj::OneOf generator); - void setPrivateKey(kj::Array key); - void setPublicKey(kj::Array key); + void setPrivateKey(jsg::Lock& js, jsg::JsBufferSource key); + void setPublicKey(jsg::Lock& js, jsg::JsBufferSource key); jsg::JsUint8Array getPublicKey(jsg::Lock& js); jsg::JsUint8Array getPrivateKey(jsg::Lock& js); jsg::JsUint8Array getGenerator(jsg::Lock& js); jsg::JsUint8Array getPrime(jsg::Lock& js); - jsg::JsUint8Array computeSecret(jsg::Lock& js, kj::Array key); + jsg::JsUint8Array computeSecret(jsg::Lock& js, jsg::JsBufferSource key); jsg::JsUint8Array generateKeys(jsg::Lock& js); int getVerifyError(); @@ -61,9 +61,9 @@ class CryptoImpl final: public jsg::Object { jsg::JsArrayBuffer randomPrime(jsg::Lock& js, uint32_t size, bool safe, - jsg::Optional> add, - jsg::Optional> rem); - bool checkPrimeSync(kj::Array bufferView, uint32_t num_checks); + jsg::Optional add, + jsg::Optional rem); + bool checkPrimeSync(jsg::Lock& js, jsg::JsBufferSource bufferView, uint32_t num_checks); // Hash class HashHandle final: public jsg::Object { @@ -73,10 +73,10 @@ class CryptoImpl final: public jsg::Object { static jsg::Ref constructor( jsg::Lock& js, kj::String algorithm, kj::Maybe xofLen); static jsg::JsUint8Array oneshot( - jsg::Lock&, kj::String algorithm, kj::Array data, kj::Maybe xofLen); + jsg::Lock&, kj::String algorithm, jsg::JsBufferSource data, kj::Maybe xofLen); jsg::Ref copy(jsg::Lock& js, kj::Maybe xofLen); - int update(kj::Array data); + int update(jsg::Lock& js, jsg::JsBufferSource data); jsg::JsUint8Array digest(jsg::Lock& js); JSG_RESOURCE_TYPE(HashHandle) { @@ -95,7 +95,7 @@ class CryptoImpl final: public jsg::Object { // Hmac class HmacHandle final: public jsg::Object { public: - using KeyParam = kj::OneOf, jsg::Ref>; + using KeyParam = kj::OneOf>; HmacHandle(HmacContext ctx): ctx(kj::mv(ctx)) {}; @@ -104,9 +104,9 @@ class CryptoImpl final: public jsg::Object { // Efficiently implement one-shot HMAC that avoids multiple calls // across the C++/JS boundary. static jsg::JsUint8Array oneshot( - jsg::Lock& js, kj::String algorithm, KeyParam key, kj::Array data); + jsg::Lock& js, kj::String algorithm, KeyParam key, jsg::JsBufferSource data); - int update(kj::Array data); + int update(jsg::Lock& js, jsg::JsBufferSource data); jsg::JsUint8Array digest(jsg::Lock& js); JSG_RESOURCE_TYPE(HmacHandle) { @@ -124,23 +124,23 @@ class CryptoImpl final: public jsg::Object { // Hkdf jsg::JsArrayBuffer getHkdf(jsg::Lock& js, kj::String hash, - kj::Array key, - kj::Array salt, - kj::Array info, + jsg::JsBufferSource key, + jsg::JsBufferSource salt, + jsg::JsBufferSource info, uint32_t length); // Pbkdf2 jsg::JsArrayBuffer getPbkdf(jsg::Lock& js, - kj::Array password, - kj::Array salt, + jsg::JsBufferSource password, + jsg::JsBufferSource salt, uint32_t num_iterations, uint32_t keylen, kj::String name); // Scrypt jsg::JsArrayBuffer getScrypt(jsg::Lock& js, - kj::Array password, - kj::Array salt, + jsg::JsBufferSource password, + jsg::JsBufferSource salt, uint32_t N, uint32_t r, uint32_t p, @@ -152,7 +152,7 @@ class CryptoImpl final: public jsg::Object { jsg::Optional type; jsg::Optional format; jsg::Optional cipher; - jsg::Optional> passphrase; + jsg::Optional> passphrase; JSG_STRUCT(type, format, cipher, passphrase); }; @@ -164,7 +164,7 @@ class CryptoImpl final: public jsg::Object { jsg::Optional saltLength; jsg::Optional divisorLength; jsg::Optional namedCurve; - jsg::Optional> prime; + jsg::Optional> prime; jsg::Optional primeLength; jsg::Optional generator; jsg::Optional groupName; @@ -195,7 +195,7 @@ class CryptoImpl final: public jsg::Object { jsg::Optional type; jsg::Optional> passphrase; // The passphrase is only used for private keys. The format, type, and passphrase - // options are only used if the key is a kj::Array. + // options are only used if the key is set. JSG_STRUCT(key, format, type, passphrase); }; @@ -506,9 +506,9 @@ class CryptoImpl final: public jsg::Object { kj::ArrayPtr getCiphers(); // SPKAC - bool verifySpkac(kj::Array input); - kj::Maybe exportPublicKey(jsg::Lock& js, kj::Array input); - kj::Maybe exportChallenge(jsg::Lock& js, kj::Array input); + bool verifySpkac(jsg::Lock& js, jsg::JsBufferSource input); + kj::Maybe exportPublicKey(jsg::Lock& js, jsg::JsBufferSource input); + kj::Maybe exportChallenge(jsg::Lock& js, jsg::JsBufferSource input); // ECDH class ECDHHandle final: public jsg::Object { diff --git a/src/workerd/api/node/process.c++ b/src/workerd/api/node/process.c++ index 8c11d71e79c..222b2928a0c 100644 --- a/src/workerd/api/node/process.c++ +++ b/src/workerd/api/node/process.c++ @@ -257,4 +257,8 @@ void ProcessModule::setCwd(jsg::Lock& js, kj::String path) { } } +bool ProcessModule::shouldThrowOnNotImplementedTlsOption(jsg::Lock& js) { + return FeatureFlags::get(js).getThrowOnNotImplementedTlsOptions(); +} + } // namespace workerd::api::node diff --git a/src/workerd/api/node/process.h b/src/workerd/api/node/process.h index bb1dcf9b30a..085c4d153b9 100644 --- a/src/workerd/api/node/process.h +++ b/src/workerd/api/node/process.h @@ -50,6 +50,10 @@ class ProcessModule final: public jsg::Object { void setCwd(jsg::Lock& js, kj::String path); + // Checks the throw_on_not_implemented_tls_options compat flag. If enabled, returns true + // (caller should throw). Otherwise returns false (caller should silently continue). + bool shouldThrowOnNotImplementedTlsOption(jsg::Lock& js); + JSG_RESOURCE_TYPE(ProcessModule) { JSG_METHOD(getEnvObject); JSG_METHOD(getBuiltinModule); @@ -58,6 +62,7 @@ class ProcessModule final: public jsg::Object { JSG_METHOD(setCwd); JSG_LAZY_READONLY_INSTANCE_PROPERTY(versions, getVersions); JSG_LAZY_READONLY_INSTANCE_PROPERTY(platform, getPlatform); + JSG_METHOD(shouldThrowOnNotImplementedTlsOption); } }; diff --git a/src/workerd/api/node/tests/BUILD.bazel b/src/workerd/api/node/tests/BUILD.bazel index 8963d5c8c30..4235a5b99a3 100644 --- a/src/workerd/api/node/tests/BUILD.bazel +++ b/src/workerd/api/node/tests/BUILD.bazel @@ -137,6 +137,12 @@ wd_test( data = ["crypto_scrypt-test.js"], ) +wd_test( + src = "crypto_scrypt-limits-test.wd-test", + args = ["--experimental"], + data = ["crypto_scrypt-limits-test.js"], +) + wd_test( src = "crypto_spkac-test.wd-test", args = ["--experimental"], @@ -242,6 +248,36 @@ wd_test( data = ["zlib-zstd-nodejs-test.js"], ) +wd_test( + src = "zlib-resizable-ab-test.wd-test", + args = ["--experimental"], + data = ["zlib-resizable-ab-test.js"], +) + +wd_test( + src = "zlib-dictionary-resizable-test.wd-test", + args = ["--experimental"], + data = ["zlib-dictionary-resizable-test.js"], +) + +wd_test( + src = "zlib-invalid-mode-test.wd-test", + args = ["--experimental"], + data = ["zlib-invalid-mode-test.js"], +) + +wd_test( + src = "zlib-resizable-buffer-test.wd-test", + args = ["--experimental"], + data = ["zlib-resizable-buffer-test.js"], +) + +wd_test( + src = "zlib-allocator-mismatch-test.wd-test", + args = ["--experimental"], + data = ["zlib-allocator-mismatch-test.js"], +) + wd_test( src = "module-nodejs-test.wd-test", args = ["--experimental"], @@ -464,6 +500,12 @@ wd_test( sidecar_randomize_ip = False, ) +wd_test( + src = "tls-check-server-identity-test.wd-test", + args = ["--experimental"], + data = ["tls-check-server-identity-test.js"], +) + wd_test( src = "streams-nodejs-test.wd-test", args = ["--experimental"], @@ -618,6 +660,12 @@ wd_test( data = ["fs-misc-test.js"], ) +wd_test( + src = "fs-readsync-byteoffset-test.wd-test", + args = ["--experimental"], + data = ["fs-readsync-byteoffset-test.js"], +) + wd_test( src = "fs-link-test.wd-test", args = ["--experimental"], diff --git a/src/workerd/api/node/tests/crypto_cipher-test.js b/src/workerd/api/node/tests/crypto_cipher-test.js index e0272f852fd..ca724cc3edd 100644 --- a/src/workerd/api/node/tests/crypto_cipher-test.js +++ b/src/workerd/api/node/tests/crypto_cipher-test.js @@ -611,6 +611,35 @@ export const transferredAuthTagDecrypt = { }, }; +// Regression: AUTOVULN-CLOUDFLARE-WORKERD-76 +// publicEncrypt/privateDecrypt with non-OAEP padding + oaepLabel must +// throw without leaking the label buffer. +export const oaepLabelWithNonOaepPaddingThrows = { + test(_, env) { + const pub = createPublicKey(env['rsa_public.pem']); + pub.padding = 1; // RSA_PKCS1_PADDING (not OAEP=4) + pub.oaepLabel = Buffer.alloc(1024); + pub.encoding = 'utf8'; + + for (let i = 0; i < 5; i++) { + throws(() => publicEncrypt(pub, Buffer.from('test')), { + message: /Failed to set the OAEP label/, + }); + } + + const pvt = createPrivateKey(env['rsa_private.pem']); + pvt.padding = 1; // RSA_PKCS1_PADDING (not OAEP=4) + pvt.oaepLabel = Buffer.alloc(1024); + pvt.encoding = 'utf8'; + + for (let i = 0; i < 5; i++) { + throws(() => privateDecrypt(pvt, Buffer.from('test')), { + message: /Failed to set the OAEP label/, + }); + } + }, +}; + export const testUnimplemented = { async test() { strictEqual(typeof Cipher, 'function'); diff --git a/src/workerd/api/node/tests/crypto_keys-test.js b/src/workerd/api/node/tests/crypto_keys-test.js index cc682989238..c98a637cc8d 100644 --- a/src/workerd/api/node/tests/crypto_keys-test.js +++ b/src/workerd/api/node/tests/crypto_keys-test.js @@ -1936,6 +1936,25 @@ export const generate_key_pair_arg_validation = { }, }; +export const generate_rsa_key_pair_modulus_cap = { + test() { + throws(() => generateKeyPairSync('rsa', { modulusLength: 16385 }), { + name: 'RangeError', + }); + const { publicKey } = generateKeyPairSync('rsa', { modulusLength: 2048 }); + strictEqual(publicKey.asymmetricKeyDetails.modulusLength, 2048); + }, +}; + +export const generate_dh_key_pair_prime_size_cap = { + test() { + // 1025 bytes > kMaxPrimeBits / 8 (1024 bytes) + throws(() => generateKeyPairSync('dh', { prime: Buffer.alloc(1025) }), { + name: 'RangeError', + }); + }, +}; + export const generate_rsa_key_pair = { test() { const { publicKey, privateKey } = generateKeyPairSync('rsa', { diff --git a/src/workerd/api/node/tests/crypto_pbkdf2-test.js b/src/workerd/api/node/tests/crypto_pbkdf2-test.js index f8100a0ab50..6326e505959 100644 --- a/src/workerd/api/node/tests/crypto_pbkdf2-test.js +++ b/src/workerd/api/node/tests/crypto_pbkdf2-test.js @@ -327,3 +327,21 @@ export const invalid_digest_tests = { } }, }; + +export const pbkdf2_keylen_cap = { + test() { + // SHA-1 digest = 20 bytes, max keylen = 255 * 20 = 5100 + assert.throws(() => crypto.pbkdf2Sync('p', 's', 1000, 5101, 'sha1'), { + name: 'RangeError', + }); + const r1 = crypto.pbkdf2Sync('p', 's', 1000, 5100, 'sha1'); + assert.strictEqual(r1.length, 5100); + + // SHA-256 digest = 32 bytes, max keylen = 255 * 32 = 8160 + assert.throws(() => crypto.pbkdf2Sync('p', 's', 1000, 8161, 'sha256'), { + name: 'RangeError', + }); + const r2 = crypto.pbkdf2Sync('p', 's', 1000, 8160, 'sha256'); + assert.strictEqual(r2.length, 8160); + }, +}; diff --git a/src/workerd/api/node/tests/crypto_random-test.js b/src/workerd/api/node/tests/crypto_random-test.js index daf06f8daa8..51ba3916597 100644 --- a/src/workerd/api/node/tests/crypto_random-test.js +++ b/src/workerd/api/node/tests/crypto_random-test.js @@ -438,6 +438,74 @@ export const randomFillSyncTest = { }, }; +// Regression: randomFillSync with DataView was silently filling zero bytes +// because DataView has no .length property (only .byteLength). +export const randomFillSyncDataView = { + async test() { + const { randomFillSync } = await import('node:crypto'); + + // DataView without explicit offset/size — should fill all 8 bytes. + const ab = new ArrayBuffer(16); + const dv = new DataView(ab, 4, 8); + randomFillSync(dv); + + // Verify the DataView region was actually filled (not left as zeros). + // With 8 random bytes, P(all zero) = 2^-64 — negligible false-positive. + const filled = new Uint8Array(ab, 4, 8); + let allZero = true; + for (let i = 0; i < filled.length; i++) { + if (filled[i] !== 0) { + allZero = false; + break; + } + } + if (allZero) + throw new Error('DataView region was not filled with random data'); + + // Verify bytes outside the DataView window were NOT touched. + const before = new Uint8Array(ab, 0, 4); + const after = new Uint8Array(ab, 12, 4); + for (let i = 0; i < 4; i++) { + strictEqual( + before[i], + 0, + `byte before DataView at ${i} should be untouched` + ); + strictEqual( + after[i], + 0, + `byte after DataView at ${i} should be untouched` + ); + } + + // DataView with explicit offset and size args. + const ab2 = new ArrayBuffer(16); + const dv2 = new DataView(ab2); + randomFillSync(dv2, 2, 4); + const slice = new Uint8Array(ab2, 2, 4); + allZero = true; + for (let i = 0; i < slice.length; i++) { + if (slice[i] !== 0) { + allZero = false; + break; + } + } + if (allZero) throw new Error('DataView with offset/size was not filled'); + + // Bytes outside the fill region should be untouched. + strictEqual( + new Uint8Array(ab2, 0, 2).every((b) => b === 0), + true, + 'bytes before fill region should be zero' + ); + strictEqual( + new Uint8Array(ab2, 6, 10).every((b) => b === 0), + true, + 'bytes after fill region should be zero' + ); + }, +}; + // Ref: https://github.com/cloudflare/workerd/issues/2716 export const getRandomValuesIllegalInvocation = { async test() { @@ -461,3 +529,10 @@ export const getRandomValuesIllegalInvocation = { strictEqual(crypto.getRandomValues(new Uint8Array(6)).length, 6); }, }; + +export const generate_prime_size_cap = { + test() { + throws(() => generatePrimeSync(8193), { name: 'RangeError' }); + ok(generatePrimeSync(512).byteLength > 0); + }, +}; diff --git a/src/workerd/api/node/tests/crypto_scrypt-limits-test.js b/src/workerd/api/node/tests/crypto_scrypt-limits-test.js new file mode 100644 index 00000000000..756604c95c6 --- /dev/null +++ b/src/workerd/api/node/tests/crypto_scrypt-limits-test.js @@ -0,0 +1,39 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +import { strictEqual, throws } from 'node:assert'; +import { scrypt, scryptSync } from 'node:crypto'; + +export const scrypt_cost_limit_sync = { + test() { + // N=1024, r=1, p=32768 → cost=33,554,432, exceeds 2^20 + throws(() => scryptSync('p', 's', 64, { N: 1024, r: 1, p: 32768 }), { + name: 'RangeError', + }); + + // N=2, r=1, p=1048577 → cost just over 2^20 + throws(() => scryptSync('p', 's', 64, { N: 2, r: 1, p: 1048577 }), { + name: 'RangeError', + }); + + // N=1024, r=1, p=1 → cost=1024, within limit + const result = scryptSync('p', 's', 64, { N: 1024, r: 1, p: 1 }); + strictEqual(result.length, 64); + }, +}; + +export const scrypt_cost_limit_async = { + async test() { + const { promise, resolve, reject } = Promise.withResolvers(); + scrypt('p', 's', 64, { N: 1024, r: 1, p: 32768 }, (err) => { + if (err) { + resolve(err); + } else { + reject(new Error('Expected error')); + } + }); + const err = await promise; + strictEqual(err.constructor.name, 'RangeError'); + }, +}; diff --git a/src/workerd/api/node/tests/crypto_scrypt-limits-test.wd-test b/src/workerd/api/node/tests/crypto_scrypt-limits-test.wd-test new file mode 100644 index 00000000000..36a90ce9357 --- /dev/null +++ b/src/workerd/api/node/tests/crypto_scrypt-limits-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "crypto_scrypt-limits-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "crypto_scrypt-limits-test.js") + ], + compatibilityFlags = ["nodejs_compat_v2", "experimental"] + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/fs-readsync-byteoffset-test.js b/src/workerd/api/node/tests/fs-readsync-byteoffset-test.js new file mode 100644 index 00000000000..80a7514d392 --- /dev/null +++ b/src/workerd/api/node/tests/fs-readsync-byteoffset-test.js @@ -0,0 +1,97 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +import { strictEqual } from 'node:assert'; +import { openSync, closeSync, readSync, writeFileSync } from 'node:fs'; + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-13 +export const readSyncByteOffsetRegressionTest = { + test() { + writeFileSync('/tmp/byteoffset-test.txt', 'ATTACK'); + const ab = new ArrayBuffer(13); + const full = new Uint8Array(ab); + full.set([83, 69, 67, 82, 69, 84, 124, 80, 85, 66, 76, 73, 67]); + const view = new Uint8Array(ab, 7, 6); + const fd = openSync('/tmp/byteoffset-test.txt', 'r'); + try { + readSync(fd, view, { offset: 0, length: 6, position: 0 }); + } finally { + closeSync(fd); + } + strictEqual(new TextDecoder().decode(view), 'ATTACK'); + strictEqual(full[0], 83); + strictEqual(full[6], 124); + }, +}; + +export const readSyncNumericOffsetTest = { + test() { + writeFileSync('/tmp/numoffset-test.txt', 'HELLO'); + const ab = new ArrayBuffer(16); + new Uint8Array(ab).fill(65); + const view = new Uint8Array(ab, 8, 5); + const fd = openSync('/tmp/numoffset-test.txt', 'r'); + try { + readSync(fd, view, 0, 5, 0); + } finally { + closeSync(fd); + } + strictEqual(new TextDecoder().decode(view), 'HELLO'); + strictEqual(new Uint8Array(ab, 0, 8)[0], 65); + }, +}; + +// Nonzero numeric offset argument + undefined length: length should default to +// (buffer.byteLength - offset) of the *view*, and the read should land within +// the view starting at the given offset, not corrupting bytes outside. +export const readSyncNonzeroOffsetUndefinedLengthTest = { + test() { + writeFileSync('/tmp/nz-offset-undef-len-test.txt', 'XYZ'); + const ab = new ArrayBuffer(16); + new Uint8Array(ab).fill(65); // 'A' everywhere + const view = new Uint8Array(ab, 4, 8); // view spans ab[4..12) + const fd = openSync('/tmp/nz-offset-undef-len-test.txt', 'r'); + try { + // offset=2 within the view, length omitted (undefined), position=0. + readSync(fd, view, 2, undefined, 0); + } finally { + closeSync(fd); + } + // Data should land at view[2..5) == ab[6..9). + const full = new Uint8Array(ab); + // Bytes outside the view must remain 'A'. + for (let i = 0; i < 4; i++) strictEqual(full[i], 65); + for (let i = 12; i < 16; i++) strictEqual(full[i], 65); + // Bytes inside the view before the offset must remain 'A'. + strictEqual(full[4], 65); + strictEqual(full[5], 65); + // Data 'XYZ' should appear at ab[6..9). + strictEqual(full[6], 0x58); // 'X' + strictEqual(full[7], 0x59); // 'Y' + strictEqual(full[8], 0x5a); // 'Z' + }, +}; + +// Same scenario via the options-object form: nonzero offset, length omitted. +export const readSyncNonzeroOffsetUndefinedLengthOptionsTest = { + test() { + writeFileSync('/tmp/nz-offset-undef-len-opts-test.txt', 'XYZ'); + const ab = new ArrayBuffer(16); + new Uint8Array(ab).fill(65); + const view = new Uint8Array(ab, 4, 8); + const fd = openSync('/tmp/nz-offset-undef-len-opts-test.txt', 'r'); + try { + readSync(fd, view, { offset: 2, position: 0 }); + } finally { + closeSync(fd); + } + const full = new Uint8Array(ab); + for (let i = 0; i < 4; i++) strictEqual(full[i], 65); + for (let i = 12; i < 16; i++) strictEqual(full[i], 65); + strictEqual(full[4], 65); + strictEqual(full[5], 65); + strictEqual(full[6], 0x58); + strictEqual(full[7], 0x59); + strictEqual(full[8], 0x5a); + }, +}; diff --git a/src/workerd/api/node/tests/fs-readsync-byteoffset-test.wd-test b/src/workerd/api/node/tests/fs-readsync-byteoffset-test.wd-test new file mode 100644 index 00000000000..eade14eb873 --- /dev/null +++ b/src/workerd/api/node/tests/fs-readsync-byteoffset-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "fs-readsync-byteoffset-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "fs-readsync-byteoffset-test.js") + ], + compatibilityFlags = ["nodejs_compat", "nodejs_compat_v2", "experimental", "enable_nodejs_fs_module"] + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/http-client-nodejs-test.js b/src/workerd/api/node/tests/http-client-nodejs-test.js index f60509de99a..fcbdf16cd5b 100644 --- a/src/workerd/api/node/tests/http-client-nodejs-test.js +++ b/src/workerd/api/node/tests/http-client-nodejs-test.js @@ -572,39 +572,41 @@ export const testHostHeaderDoesNotOverrideTransportDestination = { async test(_ctrl, env) { const { promise, resolve, reject } = Promise.withResolvers(); const attackerHost = '169.254.169.254'; - http.get( - { - hostname: env.SIDECAR_HOSTNAME, - port: env.HOST_ECHO_SERVER_PORT, - path: '/safe-endpoint', - headers: { Host: attackerHost }, - }, - (res) => { - let body = ''; - res.on('data', (chunk) => (body += chunk)); - res.on('end', () => { - try { - // The request must have reached the sidecar (not 169.254.169.254). - // If the Host header were used as the URL authority (the bug), - // the fetch would go to 169.254.169.254 and either fail or - // return a non-200 response from a different server. - strictEqual(res.statusCode, 200); - // The sidecar echoes back the Host header it received. Since - // fetch() derives the Host header from the URL (which now uses - // this.host, the transport destination), the echoed value will - // contain the sidecar's address, NOT the attacker-supplied value. - ok( - !body.includes(attackerHost), - `Host header must not contain the attacker-supplied value ` + - `"${attackerHost}"; got "${body}"` - ); - resolve(); - } catch (err) { - reject(err); - } - }); - } - ).on('error', reject); + http + .get( + { + hostname: env.SIDECAR_HOSTNAME, + port: env.HOST_ECHO_SERVER_PORT, + path: '/safe-endpoint', + headers: { Host: attackerHost }, + }, + (res) => { + let body = ''; + res.on('data', (chunk) => (body += chunk)); + res.on('end', () => { + try { + // The request must have reached the sidecar (not 169.254.169.254). + // If the Host header were used as the URL authority (the bug), + // the fetch would go to 169.254.169.254 and either fail or + // return a non-200 response from a different server. + strictEqual(res.statusCode, 200); + // The sidecar echoes back the Host header it received. Since + // fetch() derives the Host header from the URL (which now uses + // this.host, the transport destination), the echoed value will + // contain the sidecar's address, NOT the attacker-supplied value. + ok( + !body.includes(attackerHost), + `Host header must not contain the attacker-supplied value ` + + `"${attackerHost}"; got "${body}"` + ); + resolve(); + } catch (err) { + reject(err); + } + }); + } + ) + .on('error', reject); await promise; }, }; diff --git a/src/workerd/api/node/tests/http-client-path-ssrf-test.js b/src/workerd/api/node/tests/http-client-path-ssrf-test.js index 3088ddda35e..262e92d8475 100644 --- a/src/workerd/api/node/tests/http-client-path-ssrf-test.js +++ b/src/workerd/api/node/tests/http-client-path-ssrf-test.js @@ -101,7 +101,11 @@ export const testRejectsMetadataNetworkPath = { // configured host without authority override. export const testBackslashPathsCannotOverrideAuthority = { test() { - const backslashPaths = ['\\\\evil.test/x', '\\/evil.test/x', '/\\evil.test/x']; + const backslashPaths = [ + '\\\\evil.test/x', + '\\/evil.test/x', + '/\\evil.test/x', + ]; for (const path of backslashPaths) { // If the parser normalises \ to /, our check rejects it (throws). // If it doesn't normalise, the path is safe. Either way, verify @@ -129,7 +133,7 @@ export const testBackslashPathsCannotOverrideAuthority = { if (resolved.host !== 'api.example.test') { throw new Error( `Backslash path "${path}" was allowed but URL parser resolved ` + - `host to "${resolved.host}" — authority override!` + `host to "${resolved.host}" — authority override!` ); } } diff --git a/src/workerd/api/node/tests/tls-check-server-identity-test.js b/src/workerd/api/node/tests/tls-check-server-identity-test.js new file mode 100644 index 00000000000..3e436b6fb7a --- /dev/null +++ b/src/workerd/api/node/tests/tls-check-server-identity-test.js @@ -0,0 +1,107 @@ +// Copyright (c) 2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-33: +// node:tls silently accepted and ignored the checkServerIdentity option, +// creating a false sense of security for applications relying on certificate +// pinning or custom hostname verification. The fix rejects the option with +// ERR_OPTION_NOT_IMPLEMENTED until getPeerCertificate() is available to +// actually invoke the verifier. + +import tls from 'node:tls'; +import { throws, doesNotThrow } from 'node:assert'; + +// Verify that providing a custom checkServerIdentity to tls.connect() throws +// ERR_OPTION_NOT_IMPLEMENTED, rather than silently ignoring the verifier. +export const regressionCheckServerIdentityConnect = { + test() { + throws( + () => { + tls.connect({ + port: 443, + host: 'example.com', + checkServerIdentity(hostname, cert) { + return new Error('pin mismatch'); + }, + }); + }, + { + code: 'ERR_OPTION_NOT_IMPLEMENTED', + }, + 'tls.connect() with custom checkServerIdentity must throw ERR_OPTION_NOT_IMPLEMENTED' + ); + }, +}; + +// Verify that providing a custom checkServerIdentity to the TLSSocket +// constructor also throws ERR_OPTION_NOT_IMPLEMENTED. +export const regressionCheckServerIdentityTLSSocket = { + test() { + throws( + () => { + new tls.TLSSocket(undefined, { + checkServerIdentity(hostname, cert) { + return new Error('pin mismatch'); + }, + }); + }, + { + code: 'ERR_OPTION_NOT_IMPLEMENTED', + }, + 'new TLSSocket() with custom checkServerIdentity must throw ERR_OPTION_NOT_IMPLEMENTED' + ); + }, +}; + +// Verify that passing a non-function value (e.g. a number) as +// checkServerIdentity throws a TypeError from validateFunction, which is +// a distinct error path from the ERR_OPTION_NOT_IMPLEMENTED thrown for +// actual function values. +export const nonFunctionCheckServerIdentityConnect = { + test() { + throws( + () => { + tls.connect({ + port: 443, + host: 'example.com', + checkServerIdentity: 42, + }); + }, + { + code: 'ERR_INVALID_ARG_TYPE', + name: 'TypeError', + }, + 'tls.connect() with non-function checkServerIdentity must throw TypeError' + ); + }, +}; + +export const nonFunctionCheckServerIdentityTLSSocket = { + test() { + throws( + () => { + new tls.TLSSocket(undefined, { + checkServerIdentity: 42, + }); + }, + { + code: 'ERR_INVALID_ARG_TYPE', + name: 'TypeError', + }, + 'new TLSSocket() with non-function checkServerIdentity must throw TypeError' + ); + }, +}; + +// Verify that omitting checkServerIdentity (the common case) still works — +// the default built-in checkServerIdentity is used internally and must not +// trigger the rejection. +export const regressionCheckServerIdentityDefault = { + test() { + doesNotThrow(() => { + // connect with lookup stub so we don't actually open a connection + tls.connect({ port: 42, lookup() {} }); + }, 'tls.connect() without custom checkServerIdentity must not throw'); + }, +}; diff --git a/src/workerd/api/node/tests/tls-check-server-identity-test.wd-test b/src/workerd/api/node/tests/tls-check-server-identity-test.wd-test new file mode 100644 index 00000000000..6565078d95a --- /dev/null +++ b/src/workerd/api/node/tests/tls-check-server-identity-test.wd-test @@ -0,0 +1,15 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "tls-check-server-identity-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "tls-check-server-identity-test.js") + ], + compatibilityFlags = ["nodejs_compat", "nodejs_compat_v2", "experimental", + "throw_on_not_implemented_tls_options"], + ), + ), + ], +); diff --git a/src/workerd/api/node/tests/zlib-allocator-mismatch-test.js b/src/workerd/api/node/tests/zlib-allocator-mismatch-test.js new file mode 100644 index 00000000000..4874d387ab2 --- /dev/null +++ b/src/workerd/api/node/tests/zlib-allocator-mismatch-test.js @@ -0,0 +1,55 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-356: +// Process abort via allocator mismatch in node:zlib ZlibStream when +// params()/reset()/write() is called before initialize(). +// +// The bug: ZlibStream::initialize() was the only place that configures +// the z_stream to use CompressionAllocator, but params()/reset() could +// call initializeZlib() first, causing zlib to use its default malloc +// allocator. When initialize() later overwrote stream.zfree to FreeForZlib, +// the malloc'd allocations were not known by the allocator, resulting in a +// fatal JSG_REQUIRE failure in FreeForZlib during GC finalization. + +import zlib from 'node:zlib'; + +// Calling params() before initialize() must not crash the process. +export const paramsBeforeInitializeTest = { + test() { + const ZlibStream = zlib.createDeflate()._handle.constructor; + + (() => { + const h = new ZlibStream(2 /* INFLATE */); + // Call params() BEFORE initialize(). Pre-fix, this triggers initializeZlib() -> + // inflateInit2() with stream.zalloc==NULL, causing zlib to use its default malloc allocator. + h.params(6, 0); + // Now call initialize(). Pre-fix, this overwrites stream.zfree to FreeForZlib without the + // malloc'd allocations being tracked. + h.initialize(15, 6, 8, 0, new Uint32Array(2), () => {}); + })(); + + // The stream should be destroyed without aborting the process. The test harness will also check + // this. + gc(); + }, +}; + +// Calling reset() before initialize() must not crash the process. +export const resetBeforeInitializeTest = { + test() { + const ZlibStream = zlib.createDeflate()._handle.constructor; + + (() => { + const h = new ZlibStream(2 /* INFLATE */); + // reset() also calls initializeZlib() internally. + h.reset(); + h.initialize(15, 6, 8, 0, new Uint32Array(2), () => {}); + })(); + + // The stream should be destroyed without aborting the process. The test harness will also check + // this. + gc(); + }, +}; diff --git a/src/workerd/api/node/tests/zlib-allocator-mismatch-test.wd-test b/src/workerd/api/node/tests/zlib-allocator-mismatch-test.wd-test new file mode 100644 index 00000000000..52d5f37af37 --- /dev/null +++ b/src/workerd/api/node/tests/zlib-allocator-mismatch-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "zlib-allocator-mismatch-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "zlib-allocator-mismatch-test.js") + ], + compatibilityFlags = ["experimental", "nodejs_compat", "nodejs_compat_v2", "nodejs_zlib"], + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/zlib-dictionary-resizable-test.js b/src/workerd/api/node/tests/zlib-dictionary-resizable-test.js new file mode 100644 index 00000000000..dc4c124feee --- /dev/null +++ b/src/workerd/api/node/tests/zlib-dictionary-resizable-test.js @@ -0,0 +1,68 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-82: +// ZlibContext::initialize stored a non-owning view into a resizable +// ArrayBuffer BackingStore as the dictionary. If the buffer was resized +// to 0 before the first write (which lazily calls setDictionary), zlib +// would read into PROT_NONE pages and SIGSEGV the process. +import assert from 'node:assert'; +import { Buffer } from 'node:buffer'; +import zlib from 'node:zlib'; + +export const regression_AUTOVULN_CLOUDFLARE_WORKERD_82 = { + async test() { + // Create a resizable ArrayBuffer and fill it with non-zero data so + // zlib adler32 actually reads the dictionary bytes. + const ab = new ArrayBuffer(1024, { maxByteLength: 1024 }); + const view = new Uint8Array(ab); + for (let i = 0; i < view.length; i++) { + view[i] = i & 0xff || 1; + } + + // Pass the resizable-backed view as the dictionary to Deflate. + const d = zlib.createDeflate({ dictionary: view }); + + // Shrink the backing ArrayBuffer to 0 before the first write. + // Pre-patch this would cause setDictionary to read PROT_NONE pages. + ab.resize(0); + + // The first write triggers lazy zlib init and setDictionary. + // Post-patch this must succeed (the dictionary was deep-copied). + // Pre-patch this would SIGSEGV the process. + const compressed = await new Promise((resolve, reject) => { + const chunks = []; + d.on('data', (chunk) => chunks.push(chunk)); + d.on('end', () => resolve(Buffer.concat(chunks))); + d.on('error', (err) => reject(err)); + d.write(Buffer.from('hello world')); + d.end(); + }); + + assert.ok(compressed.length > 0, 'should produce compressed output'); + + // Verify the compressed data can be inflated back with the same + // dictionary (using a fresh, non-resizable copy of the original). + const dictCopy = Buffer.alloc(1024); + for (let i = 0; i < 1024; i++) { + dictCopy[i] = i & 0xff || 1; + } + + const decompressed = await new Promise((resolve, reject) => { + const chunks = []; + const inf = zlib.createInflate({ dictionary: dictCopy }); + inf.on('data', (chunk) => chunks.push(chunk)); + inf.on('end', () => resolve(Buffer.concat(chunks))); + inf.on('error', (err) => reject(err)); + inf.write(compressed); + inf.end(); + }); + + assert.strictEqual( + decompressed.toString(), + 'hello world', + 'round-trip deflate+inflate with dictionary must preserve data' + ); + }, +}; diff --git a/src/workerd/api/node/tests/zlib-dictionary-resizable-test.wd-test b/src/workerd/api/node/tests/zlib-dictionary-resizable-test.wd-test new file mode 100644 index 00000000000..d48b48ee6f0 --- /dev/null +++ b/src/workerd/api/node/tests/zlib-dictionary-resizable-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "zlib-dictionary-resizable-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "zlib-dictionary-resizable-test.js") + ], + compatibilityFlags = ["experimental", "nodejs_compat", "nodejs_compat_v2", "nodejs_zlib"], + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/zlib-invalid-mode-test.js b/src/workerd/api/node/tests/zlib-invalid-mode-test.js new file mode 100644 index 00000000000..16bd3eb1cce --- /dev/null +++ b/src/workerd/api/node/tests/zlib-invalid-mode-test.js @@ -0,0 +1,78 @@ +// Copyright (c) 2024 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-365: +// ZlibStream::constructor did not validate the mode byte. +// An invalid mode caused KJ_UNREACHABLE in initializeZlib() +// after writeStream() set writing=true, leaving the flag stuck. +// The destructor's JSG_ASSERT(!writing) then threw inside the +// noexcept cppgc finalizer, triggering std::terminate(). + +import assert from 'node:assert'; +import { createDeflate } from 'node:zlib'; + +export const zlibInvalidModeIsRejected = { + test() { + const ZlibStream = createDeflate()._handle.constructor; + + // Mode 0 (ZlibMode::NONE) is not a valid zlib mode. + assert.throws(() => new ZlibStream(0), { + name: 'TypeError', + message: /Invalid zlib mode/, + }); + + // Mode 255 is well outside the DEFLATE..UNZIP range. + assert.throws(() => new ZlibStream(255), { + name: 'TypeError', + message: /Invalid zlib mode/, + }); + + // Mode 8 (BROTLI_DECODE) is not valid for ZlibStream. + assert.throws(() => new ZlibStream(8), { + name: 'TypeError', + message: /Invalid zlib mode/, + }); + + // Valid modes (1=DEFLATE through 7=UNZIP) should work. + const validStream = new ZlibStream(1); // DEFLATE + assert.ok(validStream); + }, +}; + +export const zlibInvalidModeCrash = { + test() { + const ZlibStream = createDeflate()._handle.constructor; + + // Construct a ZlibStream with mode 0 (ZlibMode::NONE), which is invalid. + // With the constructor fix, this throws TypeError immediately — that's fine. + let stream; + try { + stream = new ZlibStream(0); + } catch (_e) { + // Constructor correctly rejected the invalid mode. Nothing left to test. + return; + } + + // If we reach here, the constructor did NOT validate the mode (pre-fix code). + // Exercise the write path to demonstrate the crash: + // writeSync sets writing=true, then context()->work() calls initializeZlib() + // which hits KJ_UNREACHABLE for mode NONE. Without KJ_ON_SCOPE_FAILURE, + // writing stays permanently true. When V8 GC later collects this object, + // ~CompressionStream hits JSG_ASSERT(!writing) inside noexcept ~CppgcShim + // -> std::terminate(). + const writeState = new Uint32Array(2); + stream.initialize(15, 6, 8, 0, writeState, () => {}); + + try { + const input = new Uint8Array(1); + const output = new Uint8Array(1024); + stream.writeSync(0, input, 0, 1, output, 0, 1024); + } catch (_e) { + // Expected: KJ_UNREACHABLE throws, but writing is now stuck true. + } + + // Dropping all references and forcing GC should trigger the crash. + // The process should terminate here due to std::terminate(). + }, +}; diff --git a/src/workerd/api/node/tests/zlib-invalid-mode-test.wd-test b/src/workerd/api/node/tests/zlib-invalid-mode-test.wd-test new file mode 100644 index 00000000000..8df353e19ba --- /dev/null +++ b/src/workerd/api/node/tests/zlib-invalid-mode-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "zlib-invalid-mode-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "zlib-invalid-mode-test.js") + ], + compatibilityFlags = ["experimental", "nodejs_compat", "nodejs_compat_v2", "nodejs_zlib"], + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/zlib-resizable-ab-test.js b/src/workerd/api/node/tests/zlib-resizable-ab-test.js new file mode 100644 index 00000000000..4317e4b112b --- /dev/null +++ b/src/workerd/api/node/tests/zlib-resizable-ab-test.js @@ -0,0 +1,60 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-73: +// SIGSEGV via resizable ArrayBuffer shrunk during JSG +// options unwrap (TOCTOU) in brotli/zlib/zstd sync APIs. +import assert from 'node:assert'; +import zlib from 'node:zlib'; +import { promisify } from 'node:util'; + +const brotliCompressAsync = promisify(zlib.brotliCompress); + +function makeResizableInput(fillByte) { + const ab = new ArrayBuffer(1024, { maxByteLength: 1024 }); + const u8 = new Uint8Array(ab); + u8.fill(fillByte); + return { ab, u8 }; +} + +function makeShrinkingOpts(ab) { + const opts = {}; + Object.defineProperty(opts, 'flush', { + get() { + ab.resize(0); + return 0; + }, + }); + return opts; +} + +export const resizableAbBrotliTest = { + async test() { + const { ab, u8 } = makeResizableInput(0x41); + const opts = makeShrinkingOpts(ab); + const compressed = await brotliCompressAsync(u8, opts); + assert(compressed instanceof Buffer); + assert(compressed.length > 0); + }, +}; + +export const resizableAbZlibTest = { + test() { + const { ab, u8 } = makeResizableInput(0x42); + const opts = makeShrinkingOpts(ab); + const compressed = zlib.deflateSync(u8, opts); + assert(compressed instanceof Buffer); + assert(compressed.length > 0); + }, +}; + +export const resizableAbZstdTest = { + test() { + const { ab, u8 } = makeResizableInput(0x43); + const opts = makeShrinkingOpts(ab); + const compressed = zlib.zstdCompressSync(u8, opts); + assert(compressed instanceof Buffer); + assert(compressed.length > 0); + }, +}; diff --git a/src/workerd/api/node/tests/zlib-resizable-ab-test.wd-test b/src/workerd/api/node/tests/zlib-resizable-ab-test.wd-test new file mode 100644 index 00000000000..2fd994dfc32 --- /dev/null +++ b/src/workerd/api/node/tests/zlib-resizable-ab-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "zlib-resizable-ab-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "zlib-resizable-ab-test.js") + ], + compatibilityFlags = ["experimental", "nodejs_compat", "nodejs_compat_v2", "nodejs_zlib"], + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/zlib-resizable-buffer-test.js b/src/workerd/api/node/tests/zlib-resizable-buffer-test.js new file mode 100644 index 00000000000..69f33e94a39 --- /dev/null +++ b/src/workerd/api/node/tests/zlib-resizable-buffer-test.js @@ -0,0 +1,153 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// TOCTOU between buffer snapshot and ToUint32 coercion in CompressionStream::write[Sync] allowed a +// guest to resize or detach the underlying storage between the buffer snapshot and the bounds +// check, causing zlib to write into PROT_NONE pages. +import { rejects, throws } from 'node:assert'; +import zlib from 'node:zlib'; + +const N = 1 << 16; + +function createHandle() { + return zlib.createDeflate()._handle; +} + +export const regression_AUTOVULN_CLOUDFLARE_WORKERD_295_writeSync_output = { + test() { + const handle = createHandle(); + + const input = new Uint8Array(N).fill(0x41); + const rab = new ArrayBuffer(N, { maxByteLength: N }); + const out = new Uint8Array(rab); + + // valueOf() shrinks the resizable ArrayBuffer to 0 bytes + // after JSG has already snapshotted the output buffer size. + const evil = { + valueOf() { + rab.resize(0); + return N; + }, + }; + + throws( + () => { + handle.writeSync(4, input, 0, N, out, 0, evil); + }, + { + name: 'Error', + message: 'Output access is not within bounds', + } + ); + }, +}; + +export const regression_AUTOVULN_CLOUDFLARE_WORKERD_295_write_output = { + async test() { + const handle = createHandle(); + + const input = new Uint8Array(N).fill(0x41); + const rab = new ArrayBuffer(N, { maxByteLength: N }); + const out = new Uint8Array(rab); + + const evil = { + valueOf() { + rab.resize(0); + return N; + }, + }; + + await rejects( + async () => { + handle.write(4, input, 0, N, out, 0, evil); + }, + { + name: 'Error', + message: 'Output access is not within bounds', + } + ); + }, +}; + +export const regression_AUTOVULN_CLOUDFLARE_WORKERD_295_detached_input = { + test() { + const handle = createHandle(); + + const inputBuffer = new ArrayBuffer(N); + const input = new Uint8Array(inputBuffer).fill(0x41); + const out = new Uint8Array(N); + + const evil = { + valueOf() { + structuredClone(null, { transfer: [inputBuffer] }); + return N; + }, + }; + + throws( + () => { + handle.writeSync(4, input, 0, N, out, 0, evil); + }, + { + name: 'Error', + message: 'Input access is not within bounds', + } + ); + }, +}; + +export const regression_AUTOVULN_CLOUDFLARE_WORKERD_295_writeSync_input = { + test() { + const handle = createHandle(); + + const rab = new ArrayBuffer(N, { maxByteLength: N }); + const input = new Uint8Array(rab).fill(0x41); + const out = new Uint8Array(N); + + const evil = { + valueOf() { + rab.resize(0); + return N; + }, + }; + + throws( + () => { + handle.writeSync(4, input, 0, N, out, 0, evil); + }, + { + name: 'Error', + message: 'Input access is not within bounds', + } + ); + }, +}; + +export const regression_AUTOVULN_CLOUDFLARE_WORKERD_295_non_zero_byte_offset = { + test() { + const handle = createHandle(); + + const offset = N; + const input = new Uint8Array(N).fill(0x41); + const rab = new ArrayBuffer(N + offset, { maxByteLength: N + offset }); + const out = new Uint8Array(rab, offset, N); + + const evil = { + valueOf() { + rab.resize(N); + return N; + }, + }; + + throws( + () => { + handle.writeSync(4, input, 0, N, out, 0, evil); + }, + { + name: 'Error', + message: 'Output access is not within bounds', + } + ); + }, +}; diff --git a/src/workerd/api/node/tests/zlib-resizable-buffer-test.wd-test b/src/workerd/api/node/tests/zlib-resizable-buffer-test.wd-test new file mode 100644 index 00000000000..b73ba921cd6 --- /dev/null +++ b/src/workerd/api/node/tests/zlib-resizable-buffer-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "zlib-resizable-buffer-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "zlib-resizable-buffer-test.js") + ], + compatibilityFlags = ["experimental", "nodejs_compat", "nodejs_compat_v2", "nodejs_zlib"], + ) + ), + ], +); diff --git a/src/workerd/api/node/zlib-util.c++ b/src/workerd/api/node/zlib-util.c++ index c757a639a6c..7bf303b7367 100644 --- a/src/workerd/api/node/zlib-util.c++ +++ b/src/workerd/api/node/zlib-util.c++ @@ -164,7 +164,12 @@ void ZlibContext::initialize(int _level, } KJ_IF_SOME(dict, _dictionary) { - dictionary = kj::mv(dict); + // Deep-copy the dictionary bytes into runtime-owned storage. The incoming + // kj::Array from jsg::asBytes() is a non-owning view into the + // V8 BackingStore; if the JS-side ArrayBuffer is resizable, the caller can + // shrink it to 0 before the deferred setDictionary() runs, leaving the + // stored pointer dangling into PROT_NONE pages (SIGSEGV). + dictionary = dict.clone(); } } @@ -224,6 +229,13 @@ bool ZlibContext::initializeZlib() { if (initialized) { return false; } + + // zlib's manual states: "The application must initialize zalloc, zfree and opaque before calling + // the init function." + stream.zalloc = CompressionAllocator::AllocForZlib; + stream.zfree = CompressionAllocator::FreeForZlib; + stream.opaque = &allocator; + switch (mode) { case ZlibMode::DEFLATE: case ZlibMode::GZIP: @@ -453,7 +465,13 @@ jsg::Ref> ZlibUtil::CompressionS template ZlibUtil::CompressionStream::~CompressionStream() { - JSG_ASSERT(!writing, Error, "Writing to compression stream"_kj); + // This destructor runs from cppgc's noexcept finalizer (~CppgcShim); it + // MUST NOT throw. A throwing assertion here crosses the noexcept boundary + // and triggers std::terminate(), killing the entire workerd process. + if (writing) { + KJ_LOG(ERROR, "CompressionStream destroyed while writing=true; state machine bug"); + return; // Skip close() — the stream state is inconsistent. + } close(); } @@ -480,6 +498,12 @@ void ZlibUtil::CompressionStream::writeStream( JSG_REQUIRE(!pending_close, Error, "Pending close"_kj); writing = true; + // Ensure `writing` is reset on any exception path so that the destructor's + // check never fires due to a stuck flag. Without this, a throwing backend + // (e.g. KJ_UNREACHABLE in initializeZlib()) leaves `writing` permanently + // true, and the destructor's assertion crosses the noexcept ~CppgcShim() + // boundary during V8 GC, triggering std::terminate(). + KJ_ON_SCOPE_FAILURE({ writing = false; }); context()->setBuffers(input, output); context()->setFlush(flush); @@ -536,7 +560,14 @@ void ZlibUtil::CompressionStream::close() { return; } closed = true; - JSG_ASSERT(initialized, Error, "Closing before initialized"_kj); + // Guard against closing an uninitialized stream. This can happen when the + // destructor calls close() on a handle that was constructed but never had + // initialize() called (e.g. via _handle.constructor). Using a non-throwing + // early return instead of JSG_ASSERT avoids a fatal throw from the noexcept + // cppgc destructor chain. + if (!initialized) { + return; + } // Drop JS-heap refs eagerly so callers that explicitly close don't have to // wait for the cycle collector. visitForGc handles the unclosed case. writeCallback = kj::none; @@ -576,10 +607,10 @@ template template void ZlibUtil::CompressionStream::write(jsg::Lock& js, int flush, - jsg::Optional> input, + jsg::Optional input, uint32_t inputOffset, uint32_t inputLength, - kj::Array output, + jsg::JsBufferSource output, uint32_t outputOffset, uint32_t outputLength) { if (flush != Z_NO_FLUSH && flush != Z_PARTIAL_FLUSH && flush != Z_SYNC_FLUSH && @@ -593,19 +624,23 @@ void ZlibUtil::CompressionStream::write(jsg::Lock& js, inputOffset = 0; } - auto input_ensured = input.map([](auto& val) { return val.asPtr(); }).orDefault({}); + auto outputBytes = output.asArrayPtr(); + kj::ArrayPtr inputBytes; + KJ_IF_SOME(i, input) { + inputBytes = i.asArrayPtr(); + } // Check for integer overflow... - JSG_REQUIRE(inputOffset + inputLength >= inputOffset, Error, "Input access it not within bounds"); + JSG_REQUIRE(inputOffset + inputLength >= inputOffset, Error, "Input access is not within bounds"); JSG_REQUIRE( - outputOffset + outputLength >= outputOffset, Error, "Input access it not within bounds"); - JSG_REQUIRE(IsWithinBounds(inputOffset, inputLength, input_ensured.size()), Error, + outputOffset + outputLength >= outputOffset, Error, "Output access is not within bounds"); + JSG_REQUIRE(IsWithinBounds(inputOffset, inputLength, inputBytes.size()), Error, "Input access is not within bounds"_kj); - JSG_REQUIRE(IsWithinBounds(outputOffset, outputLength, output.size()), Error, + JSG_REQUIRE(IsWithinBounds(outputOffset, outputLength, outputBytes.size()), Error, "Output access is not within bounds"_kj); - writeStream(js, flush, input_ensured.slice(inputOffset, inputOffset + inputLength), - output.slice(outputOffset, outputOffset + outputLength)); + writeStream(js, flush, inputBytes.slice(inputOffset, inputOffset + inputLength), + outputBytes.slice(outputOffset, outputOffset + outputLength)); } template @@ -617,7 +652,12 @@ void ZlibUtil::CompressionStream::reset(jsg::Lock& js) { jsg::Ref ZlibUtil::ZlibStream::constructor( jsg::Lock& js, ZlibModeValue mode) { - return js.alloc(static_cast(mode), js.getExternalMemoryTarget()); + auto m = static_cast(mode); + JSG_REQUIRE(m == ZlibMode::DEFLATE || m == ZlibMode::INFLATE || m == ZlibMode::GZIP || + m == ZlibMode::GUNZIP || m == ZlibMode::DEFLATERAW || m == ZlibMode::INFLATERAW || + m == ZlibMode::UNZIP, + TypeError, "Invalid zlib mode"_kj); + return js.alloc(m, js.getExternalMemoryTarget()); } void ZlibUtil::ZlibStream::initialize(jsg::Lock& js, @@ -629,7 +669,6 @@ void ZlibUtil::ZlibStream::initialize(jsg::Lock& js, jsg::Function writeCallback, jsg::Optional> dictionary) { initializeStream(js, writeState, kj::mv(writeCallback)); - allocator.configure(context()->getStream()); context()->initialize(level, windowBits, memLevel, strategy, kj::mv(dictionary)); } @@ -670,9 +709,12 @@ void BrotliContext::getAfterWriteResult(uint32_t* _availIn, uint32_t* _availOut) *_availOut = availOut; } -BrotliEncoderContext::BrotliEncoderContext(ZlibMode _mode): BrotliContext(_mode) { - auto instance = BrotliEncoderCreateInstance(alloc_brotli, free_brotli, alloc_opaque_brotli); - state = kj::disposeWith(instance); +BrotliEncoderContext::BrotliEncoderContext(CompressionAllocator& allocator, ZlibMode _mode) + : BrotliContext(allocator, _mode) { + // NOTE: Ignores any returned errors. + // TODO(soon): It's possible that initialization doesn't need to happen until `initialize` is + // called elsewhere. I'm keeping it like this to avoid changing the existing behaviour. + auto _ = initialize(); } void BrotliEncoderContext::work() { @@ -687,13 +729,9 @@ void BrotliEncoderContext::work() { streamEnd = lastResult && BrotliEncoderIsFinished(state.get()); } -kj::Maybe BrotliEncoderContext::initialize( - brotli_alloc_func init_alloc_func, brotli_free_func init_free_func, void* init_opaque_func) { - alloc_brotli = init_alloc_func; - free_brotli = init_free_func; - alloc_opaque_brotli = init_opaque_func; - - auto instance = BrotliEncoderCreateInstance(alloc_brotli, free_brotli, alloc_opaque_brotli); +kj::Maybe BrotliEncoderContext::initialize() { + auto instance = BrotliEncoderCreateInstance( + CompressionAllocator::AllocForBrotli, CompressionAllocator::FreeForZlib, &allocator); state = kj::disposeWith(kj::mv(instance)); if (state.get() == nullptr) { @@ -705,7 +743,7 @@ kj::Maybe BrotliEncoderContext::initialize( } kj::Maybe BrotliEncoderContext::resetStream() { - return initialize(alloc_brotli, free_brotli, alloc_opaque_brotli); + return initialize(); } kj::Maybe BrotliEncoderContext::setParams(int key, uint32_t value) { @@ -728,18 +766,17 @@ bool BrotliEncoderContext::isStreamEnd() const { return streamEnd; } -BrotliDecoderContext::BrotliDecoderContext(ZlibMode _mode): BrotliContext(_mode) { - auto instance = BrotliDecoderCreateInstance(alloc_brotli, free_brotli, alloc_opaque_brotli); - state = kj::disposeWith(instance); +BrotliDecoderContext::BrotliDecoderContext(CompressionAllocator& allocator, ZlibMode _mode) + : BrotliContext(allocator, _mode) { + // NOTE: Ignores any returned errors. + // TODO(soon): It's possible that initialization doesn't need to happen until `initialize` is + // called elsewhere. I'm keeping it like this to avoid changing the existing behaviour. + auto _ = initialize(); } -kj::Maybe BrotliDecoderContext::initialize( - brotli_alloc_func init_alloc_func, brotli_free_func init_free_func, void* init_opaque_func) { - alloc_brotli = init_alloc_func; - free_brotli = init_free_func; - alloc_opaque_brotli = init_opaque_func; - - auto instance = BrotliDecoderCreateInstance(alloc_brotli, free_brotli, alloc_opaque_brotli); +kj::Maybe BrotliDecoderContext::initialize() { + auto instance = BrotliDecoderCreateInstance( + CompressionAllocator::AllocForBrotli, CompressionAllocator::FreeForZlib, &allocator); state = kj::disposeWith(kj::mv(instance)); if (state.get() == nullptr) { @@ -765,7 +802,7 @@ void BrotliDecoderContext::work() { } kj::Maybe BrotliDecoderContext::resetStream() { - return initialize(alloc_brotli, free_brotli, alloc_opaque_brotli); + return initialize(); } kj::Maybe BrotliDecoderContext::setParams(int key, uint32_t value) { @@ -1048,8 +1085,7 @@ bool ZlibUtil::BrotliCompressionStream::initialize(jsg::Lock jsg::JsArrayBufferView writeResult, jsg::Function writeCallback) { this->initializeStream(js, writeResult, kj::mv(writeCallback)); - auto maybeError = this->context()->initialize( - CompressionAllocator::AllocForBrotli, CompressionAllocator::FreeForZlib, &this->allocator); + auto maybeError = this->context()->initialize(); KJ_IF_SOME(err, maybeError) { this->emitError(js, kj::mv(err)); @@ -1104,8 +1140,7 @@ kj::Array ZlibUtil::zlibSync( // Any use of zlib APIs constitutes an implicit dependency on Allocator which must // remain alive until the zlib stream is destroyed CompressionAllocator allocator(js.getExternalMemoryTarget()); - ZlibContext ctx(static_cast(mode)); - allocator.configure(ctx.getStream()); + ZlibContext ctx(allocator, static_cast(mode)); auto chunkSize = opts.chunkSize.orDefault(ZLIB_PERFORMANT_CHUNK_SIZE); auto maxOutputLength = opts.maxOutputLength.orDefault(Z_MAX_CHUNK); @@ -1160,7 +1195,7 @@ kj::Array ZlibUtil::brotliSync( // Any use of brotli APIs constitutes an implicit dependency on Allocator which must // remain alive until the brotli state is destroyed CompressionAllocator allocator(js.getExternalMemoryTarget()); - Context ctx(Context::Mode); + Context ctx(allocator, Context::Mode); auto chunkSize = opts.chunkSize.orDefault(ZLIB_PERFORMANT_CHUNK_SIZE); auto maxOutputLength = opts.maxOutputLength.orDefault(Z_MAX_CHUNK); @@ -1174,9 +1209,7 @@ kj::Array ZlibUtil::brotliSync( Z_MAX_CHUNK, ". Received ", maxOutputLength)); GrowableBuffer result(ZLIB_PERFORMANT_CHUNK_SIZE, maxOutputLength); - KJ_IF_SOME(err, - ctx.initialize( - CompressionAllocator::AllocForBrotli, CompressionAllocator::FreeForZlib, &allocator)) { + KJ_IF_SOME(err, ctx.initialize()) { JSG_FAIL_REQUIRE(Error, err.message); } @@ -1291,11 +1324,11 @@ void ZlibUtil::zstdWithCallback( #define CREATE_TEMPLATE(T) \ template class ZlibUtil::CompressionStream; \ template void ZlibUtil::CompressionStream::write(jsg::Lock & js, int flush, \ - jsg::Optional> input, uint32_t inputOffset, uint32_t inputLength, \ - kj::Array output, uint32_t outputOffset, uint32_t outputLength); \ + jsg::Optional input, uint32_t inputOffset, uint32_t inputLength, \ + jsg::JsBufferSource output, uint32_t outputOffset, uint32_t outputLength); \ template void ZlibUtil::CompressionStream::write(jsg::Lock & js, int flush, \ - jsg::Optional> input, uint32_t inputOffset, uint32_t inputLength, \ - kj::Array output, uint32_t outputOffset, uint32_t outputLength); + jsg::Optional input, uint32_t inputOffset, uint32_t inputLength, \ + jsg::JsBufferSource output, uint32_t outputOffset, uint32_t outputLength); CREATE_TEMPLATE(ZlibContext) CREATE_TEMPLATE(BrotliEncoderContext) diff --git a/src/workerd/api/node/zlib-util.h b/src/workerd/api/node/zlib-util.h index 9248368e7a5..c15cf253364 100644 --- a/src/workerd/api/node/zlib-util.h +++ b/src/workerd/api/node/zlib-util.h @@ -96,8 +96,9 @@ struct CompressionError { class ZlibContext final { public: - explicit ZlibContext(ZlibMode _mode): mode(_mode) {} - ZlibContext() = default; + explicit ZlibContext(CompressionAllocator& allocator, ZlibMode _mode) + : allocator(allocator), + mode(_mode) {} ~ZlibContext() noexcept(false); KJ_DISALLOW_COPY_AND_MOVE(ZlibContext); @@ -139,11 +140,6 @@ class ZlibContext final { }; kj::Maybe resetStream(); kj::Maybe getError() const; - void setAllocationFunctions(alloc_func alloc, free_func free, void* opaque) { - stream.zalloc = alloc; - stream.zfree = free; - stream.opaque = opaque; - } // Equivalent to Node.js' `DoThreadPoolWork` function. // Ref: https://github.com/nodejs/node/blob/9edf4a0856681a7665bd9dcf2ca7cac252784b98/src/node_zlib.cc#L760 @@ -212,6 +208,7 @@ class ZlibContext final { }; bool initialized = false; + CompressionAllocator& allocator; ZlibMode mode = ZlibMode::NONE; int flush = Z_NO_FLUSH; int windowBits = 0; @@ -232,7 +229,9 @@ using CompressionStreamErrorHandler = jsg::Function input, kj::ArrayPtr output); void setInputBuffer(kj::ArrayPtr input); @@ -261,32 +260,25 @@ class BrotliContext { }; protected: + CompressionAllocator& allocator; ZlibMode mode; const uint8_t* nextIn = nullptr; uint8_t* nextOut = nullptr; size_t availIn = 0; size_t availOut = 0; BrotliEncoderOperation flush = BROTLI_OPERATION_PROCESS; - - // TODO(addaleax): These should not need to be stored here. - // This is currently only done this way to make implementing ResetStream() - // easier. - brotli_alloc_func alloc_brotli = nullptr; - brotli_free_func free_brotli = nullptr; - void* alloc_opaque_brotli = nullptr; }; class BrotliEncoderContext final: public BrotliContext { public: static const ZlibMode Mode = ZlibMode::BROTLI_ENCODE; - explicit BrotliEncoderContext(ZlibMode _mode); + explicit BrotliEncoderContext(CompressionAllocator& allocator, ZlibMode _mode); KJ_DISALLOW_COPY_AND_MOVE(BrotliEncoderContext); // Equivalent to Node.js' `DoThreadPoolWork` implementation. void work(); - kj::Maybe initialize( - brotli_alloc_func init_alloc_func, brotli_free_func init_free_func, void* init_opaque_func); + kj::Maybe initialize(); kj::Maybe resetStream(); kj::Maybe setParams(int key, uint32_t value); kj::Maybe getError() const; @@ -301,14 +293,13 @@ class BrotliEncoderContext final: public BrotliContext { class BrotliDecoderContext final: public BrotliContext { public: static const ZlibMode Mode = ZlibMode::BROTLI_DECODE; - explicit BrotliDecoderContext(ZlibMode _mode); + explicit BrotliDecoderContext(CompressionAllocator& allocator, ZlibMode _mode); KJ_DISALLOW_COPY_AND_MOVE(BrotliDecoderContext); // Equivalent to Node.js' `DoThreadPoolWork` implementation. void work(); - kj::Maybe initialize( - brotli_alloc_func init_alloc_func, brotli_free_func init_free_func, void* init_opaque_func); + kj::Maybe initialize(); kj::Maybe resetStream(); kj::Maybe setParams(int key, uint32_t value); kj::Maybe getError() const; @@ -362,6 +353,8 @@ class ZstdEncoderContext final: public ZstdContext { public: static const ZlibMode Mode = ZlibMode::ZSTD_ENCODE; explicit ZstdEncoderContext(ZlibMode _mode); + explicit ZstdEncoderContext(CompressionAllocator& _allocator, ZlibMode _mode) + : ZstdEncoderContext(_mode) {} KJ_DISALLOW_COPY_AND_MOVE(ZstdEncoderContext); void work(); @@ -381,6 +374,8 @@ class ZstdDecoderContext final: public ZstdContext { public: static const ZlibMode Mode = ZlibMode::ZSTD_DECODE; explicit ZstdDecoderContext(ZlibMode _mode); + explicit ZstdDecoderContext(CompressionAllocator& _allocator, ZlibMode _mode) + : ZstdDecoderContext(_mode) {} KJ_DISALLOW_COPY_AND_MOVE(ZstdDecoderContext); void work(); @@ -409,7 +404,7 @@ class ZlibUtil final: public jsg::Object { explicit CompressionStream( ZlibMode _mode, kj::Arc&& externalMemoryTarget) : allocator(kj::mv(externalMemoryTarget)), - context_(_mode) {} + context_(allocator, _mode) {} // TODO(soon): Find a way to add noexcept(false) to this destructor. ~CompressionStream(); KJ_DISALLOW_COPY_AND_MOVE(CompressionStream); @@ -431,10 +426,10 @@ class ZlibUtil final: public jsg::Object { template void write(jsg::Lock& js, int flush, - jsg::Optional> input, + jsg::Optional input, uint32_t inputOffset, uint32_t inputLength, - kj::Array output, + jsg::JsBufferSource output, uint32_t outputOffset, uint32_t outputLength); void reset(jsg::Lock& js); diff --git a/src/workerd/api/pyodide/pyodide-test.c++ b/src/workerd/api/pyodide/pyodide-test.c++ index 45689840f34..7b041e4c2eb 100644 --- a/src/workerd/api/pyodide/pyodide-test.c++ +++ b/src/workerd/api/pyodide/pyodide-test.c++ @@ -24,6 +24,7 @@ KJ_TEST("getPythonSnapshotRelease") { auto res = KJ_ASSERT_NONNULL(getPythonSnapshotRelease(featureFlags)); KJ_ASSERT(res.getPyodide() == "0.26.0a2"); KJ_ASSERT(res.getFlagName() == "pythonWorkers"); + // The bundle integrity checksum is plumbed through from python_metadata.bzl. } featureFlags.setPythonWorkersDevPyodide(true); @@ -55,195 +56,6 @@ KJ_TEST("getPythonSnapshotRelease") { } } -KJ_TEST("basic `import` tests") { - auto files = kj::heapArrayBuilder(2); - files.add(kj::str("import a\nimport z")); - files.add(kj::str("import b")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 3); - KJ_REQUIRE(result[0] == "a"); - KJ_REQUIRE(result[1] == "z"); - KJ_REQUIRE(result[2] == "b"); -} - -KJ_TEST("supports whitespace") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import a\nimport \n\tz")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "a"); - KJ_REQUIRE(result[1] == "z"); -} - -KJ_TEST("supports windows newlines") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import a\r\nimport \r\n\tz")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "a"); - KJ_REQUIRE(result[1] == "z"); -} - -KJ_TEST("basic `from` test") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("from x import a,b\nfrom z import y")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "x"); - KJ_REQUIRE(result[1] == "z"); -} - -KJ_TEST("ignores indented blocks") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import a\nif True:\n import x\nimport y")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "a"); - KJ_REQUIRE(result[1] == "y"); -} - -KJ_TEST("supports nested imports") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import a.b\nimport z.x.y.i")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "a.b"); - KJ_REQUIRE(result[1] == "z.x.y.i"); -} - -KJ_TEST("nested `from` test") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("from x.y.z import a,b\nfrom z import y")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "x.y.z"); - KJ_REQUIRE(result[1] == "z"); -} - -KJ_TEST("ignores trailing period") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import a.b.\nimport z.x.y.i.")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 0); -} - -KJ_TEST("ignores relative import") { - // This is where we diverge from the old AST-based approach. It would have returned `y` in the - // input below. - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import .a.b\nimport ..z.x\nfrom .y import x")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 0); -} - -KJ_TEST("supports commas") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str("import a,b")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "a"); - KJ_REQUIRE(result[1] == "b"); -} - -KJ_TEST("supports backslash") { - auto files = kj::heapArrayBuilder(4); - files.add(kj::str("import a\\\n,b")); - files.add(kj::str("import\\\n q,w")); - files.add(kj::str("from \\\nx import y")); - files.add(kj::str("from \\\n c import y")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 6); - KJ_REQUIRE(result[0] == "a"); - KJ_REQUIRE(result[1] == "b"); - KJ_REQUIRE(result[2] == "q"); - KJ_REQUIRE(result[3] == "w"); - KJ_REQUIRE(result[4] == "x"); - KJ_REQUIRE(result[5] == "c"); -} - -KJ_TEST("multiline-strings ignored") { - auto files = kj::heapArrayBuilder(4); - files.add(kj::str(R"SCRIPT( -FOO=""" -import x -from y import z -""" -)SCRIPT")); - files.add(kj::str(R"SCRIPT( -FOO=''' -import f -from g import z -''' -)SCRIPT")); - files.add(kj::str(R"SCRIPT(FOO = "\ -import b \ -")SCRIPT")); - files.add(kj::str("FOO=\"\"\" \n", R"SCRIPT(import x -from y import z -""")SCRIPT")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 0); -} - -KJ_TEST("multiline-strings with imports in-between") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str( - R"SCRIPT(FOO=""" -import x -from y import z -"""import q -import w -BAR=""" -import e -""" -from t import u)SCRIPT")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "w"); - KJ_REQUIRE(result[1] == "t"); -} - -KJ_TEST("import after string literal") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str(R"SCRIPT(import a -"import b)SCRIPT")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "a"); -} - -KJ_TEST("import after `i`") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str(R"SCRIPT(import a -iimport b)SCRIPT")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "a"); -} - -KJ_TEST("langchain import") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str(R"SCRIPT(from js import Response, console, URL -from langchain.chat_models import ChatOpenAI -import openai)SCRIPT")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 3); - KJ_REQUIRE(result[0] == "js"); - KJ_REQUIRE(result[1] == "langchain.chat_models"); - KJ_REQUIRE(result[2] == "openai"); -} - -KJ_TEST("quote in multiline string") { - auto files = kj::heapArrayBuilder(1); - files.add(kj::str(R"SCRIPT(temp = """ -w["h -""")SCRIPT")); - auto result = pyodide::PythonModuleInfo::parsePythonScriptImports(files.finish()); - KJ_REQUIRE(result.size() == 0); -} - -using pyodide::PythonModuleInfo; - template kj::Array strArray(Params&&... params) { return kj::arr(kj::str(params)...); @@ -264,107 +76,53 @@ kj::HashSet strSet(Params&&... params) { return set; } -KJ_TEST("basic test of getPackageSnapshotImports") { - auto a = pyodide::PythonModuleInfo(strArray("a.py"), - bytesArray("from js import Response\n" - "import asyncio\n" - "import numbers\n" - "def on_fetch(request):\n" - " return Response.new('Hello')\n")); - auto result = a.getPackageSnapshotImports("0.26.0a2"); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "numbers"); -}; - -KJ_TEST("basic test of getPackageSnapshotImports user module") { - auto a = pyodide::PythonModuleInfo(strArray("a.py", "numbers.py"), - bytesArray("from js import Response\n" - "import asyncio\n" - "import numbers\n" - "def on_fetch(request):\n" - " return Response.new('Hello')\n", - "")); - auto result = a.getPackageSnapshotImports("0.26.0a2"); - KJ_REQUIRE(result.size() == 0); -}; - -kj::Array filterPythonScriptImports( - kj::Array names, kj::ArrayPtr imports, kj::StringPtr version) { - auto contentsBuilder = kj::heapArrayBuilder>(names.size()); - for (auto _: kj::zeroTo(names.size())) { - (void)_; - contentsBuilder.add(kj::Array(nullptr)); - } - auto modInfo = pyodide::PythonModuleInfo(kj::mv(names), contentsBuilder.finish()); - auto modSet = modInfo.getWorkerModuleSet(); - return PythonModuleInfo::filterPythonScriptImports(kj::mv(modSet), kj::mv(imports), version); -} +KJ_TEST("computePyodideBundleIntegrity produces sha256 subresource-integrity strings") { + // Known-answer test: SHA-256 of the empty input. + KJ_EXPECT(pyodide::computePyodideBundleIntegrity(kj::ArrayPtr()) == + "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="); -KJ_TEST("Simple pass through") { - auto imports = strArray("b", "c"); - auto result = filterPythonScriptImports({}, kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 2); - KJ_REQUIRE(result[0] == "b"); - KJ_REQUIRE(result[1] == "c"); + // SHA-256 of "abc". + auto abc = "abc"_kj.asBytes(); + KJ_EXPECT(pyodide::computePyodideBundleIntegrity(abc) == + "sha256-ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="); } -KJ_TEST("pyodide and submodules") { - auto imports = strArray("pyodide", "pyodide.ffi"); - auto result = filterPythonScriptImports({}, kj::mv(imports), "0.26.0a2"); - KJ_REQUIRE(result.size() == 0); -} +KJ_TEST("verifyPyodideBundleIntegrity accepts matching checksums") { + auto data = "hello pyodide"_kj.asBytes(); + auto integrity = pyodide::computePyodideBundleIntegrity(data); -KJ_TEST("js and submodules") { - auto imports = strArray("js", "js.crypto"); - auto result = filterPythonScriptImports({}, kj::mv(imports), "0.26.0a2"); - KJ_REQUIRE(result.size() == 0); + // Should not throw when the checksum matches. + pyodide::verifyPyodideBundleIntegrity("0.28.2"_kj, integrity, data); } -KJ_TEST("importlib and submodules") { - // importlib and importlib.metadata are imported into the baseline snapshot, but importlib.resources is not. - auto imports = strArray("importlib", "importlib.metadata", "importlib.resources"); - auto result = filterPythonScriptImports({}, kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "importlib.resources"); -} +KJ_TEST("verifyPyodideBundleIntegrity rejects a missing checksum for released bundles") { + auto data = "hello pyodide"_kj.asBytes(); -KJ_TEST("Filter worker .py files") { - auto workerModules = strArray("b.py", "c.py"); - auto imports = strArray("b", "c", "d"); - auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "d"); + // A non-dev bundle without a published checksum is an error. + KJ_EXPECT_THROW_MESSAGE("missing an integrity checksum", + pyodide::verifyPyodideBundleIntegrity("0.28.2"_kj, nullptr, data)); + KJ_EXPECT_THROW_MESSAGE("missing an integrity checksum", + pyodide::verifyPyodideBundleIntegrity("0.28.2"_kj, ""_kj, data)); } -KJ_TEST("Filter worker module/__init__.py") { - auto workerModules = strArray("a/__init__.py", "b/__init__.py", "c/a.py"); - auto imports = strArray("a", "b", "c"); - auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 0); -} +KJ_TEST("verifyPyodideBundleIntegrity skips the dev bundle") { + auto data = "hello pyodide"_kj.asBytes(); + auto tampered = "hello pyodide!"_kj.asBytes(); + auto integrity = pyodide::computePyodideBundleIntegrity(data); -KJ_TEST("Filters out subdir/submodule") { - auto workerModules = strArray("subdir/submodule.py"); - auto imports = strArray("subdir.submodule"); - auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 0); + // The "dev" bundle is built locally and has no published checksum, so verification is skipped + // even when the supplied integrity does not match, and an empty integrity is allowed. + pyodide::verifyPyodideBundleIntegrity("dev"_kj, integrity, tampered); + pyodide::verifyPyodideBundleIntegrity("dev"_kj, nullptr, tampered); } -KJ_TEST("Filters out so") { - auto workerModules = strArray("a.so", "b.txt"); - auto imports = strArray("a", "b"); - auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "b"); -} +KJ_TEST("verifyPyodideBundleIntegrity rejects mismatching checksums") { + auto data = "hello pyodide"_kj.asBytes(); + auto tampered = "hello pyodide!"_kj.asBytes(); + auto integrity = pyodide::computePyodideBundleIntegrity(data); -KJ_TEST("Filters out vendor stuff") { - auto workerModules = strArray("python_modules/a.py", "python_modules/package/b.py", - "python_modules/c.so", "python_modules/x.txt"); - auto imports = strArray("a", "package", "x"); - auto result = filterPythonScriptImports(kj::mv(workerModules), kj::mv(imports), ""); - KJ_REQUIRE(result.size() == 1); - KJ_REQUIRE(result[0] == "x"); + KJ_EXPECT_THROW_MESSAGE("integrity check failed", + pyodide::verifyPyodideBundleIntegrity("0.28.2"_kj, integrity, tampered)); } } // namespace diff --git a/src/workerd/api/pyodide/pyodide.c++ b/src/workerd/api/pyodide/pyodide.c++ index ecd9fd18074..a0c894b5f4a 100644 --- a/src/workerd/api/pyodide/pyodide.c++ +++ b/src/workerd/api/pyodide/pyodide.c++ @@ -8,9 +8,9 @@ #include #include #include -#include #include +#include #include #include @@ -20,10 +20,9 @@ #include #include #include +#include #include -#include // for std::sort - namespace workerd::api::pyodide { // singleton that owns bundle @@ -48,16 +47,6 @@ void PyodideBundleManager::setPyodideBundleData( kj::mv(version), {.messageReader = kj::mv(messageReader), .bundle = bundle}); } -const kj::Maybe&> PyodidePackageManager::getPyodidePackage( - kj::StringPtr id) const { - return packages.lockShared()->find(id); -} - -void PyodidePackageManager::setPyodidePackageData( - kj::String id, kj::Array data) const { - packages.lockExclusive()->insert(kj::mv(id), kj::mv(data)); -} - static int readToTarget( kj::ArrayPtr source, int offset, kj::ArrayPtr buf) { int size = source.size(); @@ -140,26 +129,6 @@ kj::HashSet PythonModuleInfo::getWorkerModuleSet() { return result; } -kj::Array PythonModuleInfo::getPackageSnapshotImports(kj::StringPtr version) { - auto workerFiles = this->getPythonFileContents(); - auto importedNames = parsePythonScriptImports(kj::mv(workerFiles)); - auto workerModules = getWorkerModuleSet(); - return PythonModuleInfo::filterPythonScriptImports( - kj::mv(workerModules), kj::mv(importedNames), version); -} - -kj::Array PyodideMetadataReader::getPackageSnapshotImports(kj::String version) { - return state->moduleInfo.getPackageSnapshotImports(version); -} - -kj::Array> PyodideMetadataReader::getRequirements(jsg::Lock& js) { - auto builder = kj::heapArrayBuilder>(state->requirements.size()); - for (auto i: kj::zeroTo(builder.capacity())) { - builder.add(js, js.str(state->requirements[i])); - } - return builder.finish(); -} - kj::Array PyodideMetadataReader::getSizes(jsg::Lock& js) { auto builder = kj::heapArrayBuilder(state->moduleInfo.names.size()); for (auto i: kj::zeroTo(builder.capacity())) { @@ -183,13 +152,6 @@ int PyodideMetadataReader::readMemorySnapshot(int offset, kj::Array bu return readToTarget(KJ_REQUIRE_NONNULL(state->memorySnapshot), offset, buf); } -kj::HashSet PyodideMetadataReader::getTransitiveRequirements() { - auto packages = parseLockFile(state->packagesLock); - auto depMap = getDepMapFromPackagesLock(*packages); - - return getPythonPackageNames(*packages, depMap, state->requirements, state->packagesVersion); -} - int ArtifactBundler::readMemorySnapshot(int offset, kj::Array buf) { if (inner->existingSnapshot == kj::none) { return 0; @@ -197,199 +159,6 @@ int ArtifactBundler::readMemorySnapshot(int offset, kj::Array buf) { return readToTarget(KJ_REQUIRE_NONNULL(inner->existingSnapshot), offset, buf); } -kj::Array PythonModuleInfo::parsePythonScriptImports(kj::Array files) { - auto result = kj::Vector(); - - for (auto& file: files) { - // Returns the number of characters skipped. When `oneOf` is not found, skips to the end of - // the string. - auto skipUntil = [](kj::StringPtr str, std::initializer_list oneOf, int start) -> int { - int result = 0; - while (start + result < str.size()) { - char c = str[start + result]; - for (char expected: oneOf) { - if (c == expected) { - return result; - } - } - - result++; - } - - return result; - }; - - // Skips while current character is in `oneOf`. Returns the number of characters skipped. - auto skipWhile = [](kj::StringPtr str, std::initializer_list oneOf, int start) -> int { - int result = 0; - while (start + result < str.size()) { - char c = str[start + result]; - bool found = false; - for (char expected: oneOf) { - if (c == expected) { - result++; - found = true; - break; - } - } - - if (!found) { - break; - } - } - - return result; - }; - - // Skips one of the characters (specified in `oneOf`) at the current position. Otherwise - // throws. Returns the number of characters skipped. - auto skipChar = [](kj::StringPtr str, std::initializer_list oneOf, int start) -> int { - for (char expected: oneOf) { - if (str[start] == expected) { - return 1; - } - } - - KJ_FAIL_REQUIRE("Expected ", oneOf, "but received", str[start]); - }; - - auto parseKeyword = [](kj::StringPtr str, kj::StringPtr ident, int start) -> bool { - int i = 0; - for (; i < ident.size() && start + i < str.size(); i++) { - if (str[start + i] != ident[i]) { - return false; - } - } - - return i == ident.size(); - }; - - // Returns the size of the import identifier or 0 if no identifier exists at `start`. - auto parseIdent = [](kj::StringPtr str, int start) -> int { - // https://docs.python.org/3/reference/lexical_analysis.html#identifiers - // - // We also accept `.` because import idents can contain it. - // TODO: We don't currently support unicode, but if we see packages that utilize it we will - // implement that support. - if (isDigit(str[start])) { - return 0; - } - int i = 0; - for (; start + i < str.size(); i++) { - char c = str[start + i]; - bool validIdentChar = isAlpha(c) || isDigit(c) || c == '_' || c == '.'; - if (!validIdentChar) { - return i; - } - } - - return i; - }; - - int i = 0; - while (i < file.size()) { - switch (file[i]) { - case 'i': - case 'f': { - auto keywordToParse = file[i] == 'i' ? "import"_kj : "from"_kj; - if (!parseKeyword(file, keywordToParse, i)) { - // We cannot simply skip the current char here, doing so would mean that - // `iimport x` would be parsed as a valid import. - i += skipUntil(file, {'\n', '\r', '"', '\''}, i); - continue; - } - i += keywordToParse.size(); // skip "import" or "from" - - while (i < file.size()) { - // Python expects a `\` to be paired with a newline, but we don't have to be as strict - // here because we rely on the fact that the script has gone through validation already. - i += skipWhile( - file, {'\r', '\n', ' ', '\t', '\\'}, i); // skip whitespace and backslash. - - if (file[i] == '.') { - // ignore relative imports - break; - } - - int identLen = parseIdent(file, i); - KJ_REQUIRE(identLen > 0); - - kj::String ident = kj::heapString(file.slice(i, i + identLen)); - if (ident[identLen - 1] != '.') { // trailing period means the import is invalid - result.add(kj::mv(ident)); - } - - i += identLen; - - // If "import" statement then look for comma. - if (keywordToParse == "import") { - i += skipWhile( - file, {'\r', '\n', ' ', '\t', '\\'}, i); // skip whitespace and backslash. - // Check if next char is a comma. - if (file[i] == ',') { - i += 1; // Skip comma. - // Allow while loop to continue - } else { - // No more idents, so break out of loop. - break; - } - } else { - // The "from" statement doesn't support commas. - break; - } - } - break; - } - case '"': - case '\'': { - char quote = file[i]; - // Detect multi-line string literals `"""` and skip until the corresponding ending `"""`. - if (i + 2 < file.size() && file[i + 1] == quote && file[i + 2] == quote) { - i += 3; // skip start quotes. - // skip until terminating quotes. - while (i + 2 < file.size() && file[i + 1] != quote && file[i + 2] != quote) { - if (file[i] == quote) { - i++; - } - i += skipUntil(file, {quote}, i); - } - i += 3; // skip terminating quotes. - } else if (i + 2 < file.size() && file[i + 1] == '\\' && - (file[i + 2] == '\n' || file[i + 2] == '\r')) { - // Detect string literal with backslash. - i += 3; // skip `"\` - // skip until quote, but ignore `\"`. - while (file[i] != quote && file[i - 1] != '\\') { - if (file[i] == quote) { - i++; - } - i += skipUntil(file, {quote}, i); - } - i += 1; // skip quote. - } else { - i += 1; // skip quote. - } - - // skip until EOL so that we don't mistakenly parse and capture `"import x`. - i += skipUntil(file, {'\n', '\r', '"', '\''}, i); - break; - } - default: - // Skip to the next line or " or ' - i += skipUntil(file, {'\n', '\r', '"', '\''}, i); - if (file[i] == '"' || file[i] == '\'') { - continue; // Allow the quotes to be handled above. - } - if (file[i] != '\0') { - i += skipChar(file, {'\n', '\r'}, i); // skip newline. - } - } - } - } - - return result.releaseAsArray(); -} - const kj::Array snapshotImports = kj::arr("_pyodide"_kj, "_pyodide.docstring"_kj, "_pyodide._core_docs"_kj, @@ -416,10 +185,6 @@ kj::Array PyodideMetadataReader::getBaselineSnapshotImports() { return kj::heapArray(snapshotImports.begin(), snapshotImports.size()); } -bool PyodideMetadataReader::shouldAbortIsolateOnFatalError() { - return util::Autogate::isEnabled(util::AutogateKey::PYTHON_ABORT_ISOLATE_ON_FATAL_ERROR); -} - jsg::JsObject PyodideMetadataReader::getCompatibilityFlags(jsg::Lock& js) { auto flags = FeatureFlags::get(js); auto obj = js.objNoProto(); @@ -445,7 +210,6 @@ jsg::JsObject PyodideMetadataReader::getCompatibilityFlags(jsg::Lock& js) { PyodideMetadataReader::State::State(const State& other) : mainModule(kj::str(other.mainModule)), moduleInfo(other.moduleInfo.clone()), - requirements(KJ_MAP(req, other.requirements) { return kj::str(req); }), pyodideVersion(kj::str(other.pyodideVersion)), packagesVersion(kj::str(other.packagesVersion)), packagesLock(kj::str(other.packagesLock)), @@ -465,7 +229,7 @@ void PyodideMetadataReader::State::verifyNoMainModuleInVendor() { // mainModule includes the .py extension, so we need to extract the base name kj::ArrayPtr mainModuleBase = mainModule; if (mainModule.endsWith(".py")) { - mainModuleBase = mainModuleBase.slice(0, mainModuleBase.size() - 3); + mainModuleBase = mainModuleBase.first(mainModuleBase.size() - 3); } for (auto& name: moduleInfo.names) { @@ -485,58 +249,6 @@ void PyodideMetadataReader::State::verifyNoMainModuleInVendor() { } } -kj::Array PythonModuleInfo::filterPythonScriptImports( - kj::HashSet workerModules, - kj::ArrayPtr imports, - kj::StringPtr version) { - auto baselineSnapshotImportsSet = kj::HashSet(); - for (auto& pkgImport: snapshotImports) { - baselineSnapshotImportsSet.upsert(kj::mv(pkgImport), [](auto&&, auto&&) {}); - } - - kj::HashSet filteredImportsSet; - filteredImportsSet.reserve(imports.size()); - for (auto& pkgImport: imports) { - auto firstDot = pkgImport.findFirst('.').orDefault(pkgImport.size()); - auto firstComponent = pkgImport.slice(0, firstDot); - // Skip duplicates - if (filteredImportsSet.contains(pkgImport)) [[unlikely]] { - continue; - } - - // don't include modules that we provide and that are likely to be imported by most - // workers. - if (firstComponent == "js"_kj.asArray() || firstComponent == "asgi"_kj.asArray() || - firstComponent == "workers"_kj.asArray()) { - continue; - } - if (version == "0.26.0a2") { - if (firstComponent == "pyodide"_kj.asArray() || firstComponent == "httpx"_kj.asArray() || - firstComponent == "openai"_kj.asArray() || firstComponent == "starlette"_kj.asArray() || - firstComponent == "urllib3"_kj.asArray()) { - continue; - } - } - - // Don't include anything that went into the baseline snapshot - if (baselineSnapshotImportsSet.contains(pkgImport)) { - continue; - } - - // Don't include imports from worker files - if (workerModules.contains(firstComponent)) { - continue; - } - filteredImportsSet.upsert(kj::mv(pkgImport), [](auto&&, auto&&) {}); - } - - auto filteredImportsBuilder = kj::heapArrayBuilder(filteredImportsSet.size()); - for (auto& pkgImport: filteredImportsSet) { - filteredImportsBuilder.add(kj::mv(pkgImport)); - } - return filteredImportsBuilder.finish(); -} - kj::Maybe getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease) { for (auto pkgLock: *PACKAGE_LOCKS) { if (pkgLock.getPackageDate() == pythonSnapshotRelease.getPackages()) { @@ -544,6 +256,13 @@ kj::Maybe getPyodideLock(PythonSnapshotRelease::Reader pythonSnapsho } } + // From Pyodide 314 on, we don't use packages inside the lockfile. + // All packages used by the worker should come from PyPI and be bundled inside the worker. + // To avoid breaking existing workers, we return an empty lockfile if no packages are found. + if (pythonSnapshotRelease.getPackages().size() == 0) { + return kj::str("{\"packages\":{}}"); + } + return kj::none; } @@ -594,6 +313,27 @@ void DiskCache::putSnapshot(jsg::Lock& js, kj::String key, kj::Array d } } +kj::String computePyodideBundleIntegrity(kj::ArrayPtr bytes) { + kj::byte hash[SHA256_DIGEST_LENGTH]{}; + SHA256(bytes.begin(), bytes.size(), hash); + return kj::str("sha256-", kj::encodeBase64(kj::arrayPtr(hash, SHA256_DIGEST_LENGTH))); +} + +void verifyPyodideBundleIntegrity( + kj::StringPtr version, kj::StringPtr expectedIntegrity, kj::ArrayPtr bytes) { + // The "dev" bundle is built locally from the current tree and has no published checksum. + if (version == "dev") { + return; + } + // Every released bundle must have a published checksum; refuse to use one without it. + KJ_REQUIRE(expectedIntegrity != nullptr && expectedIntegrity.size() > 0, + "Pyodide bundle is missing an integrity checksum; refusing to use it.", version); + auto actualIntegrity = computePyodideBundleIntegrity(bytes); + KJ_REQUIRE(actualIntegrity == expectedIntegrity, + "Pyodide bundle integrity check failed: the bundle does not match the expected checksum.", + version, expectedIntegrity, actualIntegrity); +} + } // namespace workerd::api::pyodide namespace workerd { @@ -675,57 +415,58 @@ kj::String getPythonBundleName(PythonSnapshotRelease::Reader pyodideRelease) { namespace api::pyodide { -// Returns a string containing the contents of the hashset, delimited by ", " -kj::String hashsetToString(const kj::HashSet& set) { - if (set.size() == 0) { - return kj::String(); - } - - kj::Vector elems; - for (const auto& e: set) { - elems.add(e); - } - - // Sort the elements for consistent output - auto array = elems.releaseAsArray(); - std::sort(array.begin(), array.end()); - - return kj::str(kj::delimited(array, ", "_kjc)); -} +// The Python stdlib packages are extracted at build time and embedded in the Pyodide bundle as a +// PythonPackages capnp message, carried in a data module whose name ends with this segment. +static constexpr kj::StringPtr PACKAGES_MODULE_SUFFIX = "python_packages.bin"_kj; -kj::Array getPythonPackageFiles(kj::StringPtr lockFileContents, - kj::ArrayPtr requirements, - kj::StringPtr packagesVersion) { - auto packages = parseLockFile(lockFileContents); - auto depMap = getDepMapFromPackagesLock(*packages); - - auto allRequirements = getPythonPackageNames(*packages, depMap, requirements, packagesVersion); - - // Add the file names of all the requirements to our result array. - kj::Vector res; - for (const auto& ent: *packages) { - auto name = ent.getName(); - auto obj = ent.getValue().getObject(); - auto fileName = kj::str(getField(obj, "file_name").getString()); - - auto maybeRow = allRequirements.find(name); - KJ_IF_SOME(row, maybeRow) { - allRequirements.erase(row); - res.add(kj::mv(fileName)); - } else if (packagesVersion == "20240829.4") { - auto packageType = getField(obj, "package_type").getString(); - if (packageType == "cpython_module") { - res.add(kj::mv(fileName)); - } +jsg::Ref EmbeddedPackagesReader::fromBundle( + jsg::Lock& js, jsg::Bundle::Reader bundle) { + for (auto module: bundle.getModules()) { + if (module.which() != jsg::Module::DATA) { + continue; + } + kj::StringPtr name = module.getName(); + if (!name.endsWith(PACKAGES_MODULE_SUFFIX)) { + continue; } - } - if (allRequirements.size() != 0) { - JSG_FAIL_REQUIRE(Error, - "Requested Python package(s) that are not supported: ", hashsetToString(allRequirements)); + // The data module holds a serialized PythonPackages message. Its bytes live in the + // process-wide bundle message (word-aligned, since capnp allocates Data on word boundaries), + // so we can read it in place without copying. + auto data = module.getData().asBytes(); + auto words = kj::arrayPtr( + reinterpret_cast(data.begin()), data.size() / sizeof(capnp::word)); + // We're going to reuse this for every Python isolate, so set the traversal + // limit to infinity or else eventually a new Python isolate will fail. + auto messageReader = kj::heap( + words, capnp::ReaderOptions{.traversalLimitInWords = kj::maxValue}); + return js.alloc(kj::mv(messageReader)); } - return res.releaseAsArray(); + // No embedded packages (e.g. a newer Pyodide version that bundles the stdlib directly). + return js.alloc(kj::none); +} + +kj::Array EmbeddedPackagesReader::getFiles(jsg::Lock& js) { + KJ_IF_SOME(packages, files()) { + auto files = packages.getFiles(); + auto builder = kj::heapArrayBuilder(files.size()); + for (auto file: files) { + auto size = file.getContents().size(); + KJ_REQUIRE(size <= size_t(int(kj::maxValue)), + "embedded Python package file is too large to address with an int size", file.getPath(), + size); + // installDir/path are kj::StringPtr pointing into the message; only copied when marshaled. + builder.add(PythonPackageFileMetadata{ + .installDir = file.getInstallDir(), + .path = file.getPath(), + .size = static_cast(size), + .reader = js.alloc(file.getContents().asBytes()), + }); + } + return builder.finish(); + } + return kj::Array(); } void WorkerFatalReporter::reportFatal(jsg::Lock& js, kj::String error) { diff --git a/src/workerd/api/pyodide/pyodide.h b/src/workerd/api/pyodide/pyodide.h index f039158674a..cfa988ba1bb 100644 --- a/src/workerd/api/pyodide/pyodide.h +++ b/src/workerd/api/pyodide/pyodide.h @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -29,7 +30,6 @@ WD_STRONG_BOOL(IsValidating); WD_STRONG_BOOL(IsWorkerd); WD_STRONG_BOOL(SnapshotToDisk); -const auto PYTHON_PACKAGES_URL = "https://pyodide-capnp-bin.edgeworker.net/"; class PyodideBundleManager { public: void setPyodideBundleData(kj::String version, kj::Array data) const; @@ -43,27 +43,17 @@ class PyodideBundleManager { const kj::MutexGuarded> bundles; }; -class PyodidePackageManager { - public: - void setPyodidePackageData(kj::String id, kj::Array data) const; - const kj::Maybe&> getPyodidePackage(kj::StringPtr id) const; - - private: - const kj::MutexGuarded>> packages; -}; - struct PythonConfig { kj::Maybe> packageDiskCacheRoot; kj::Maybe> pyodideDiskCacheRoot; kj::Maybe> snapshotDirectory; const PyodideBundleManager pyodideBundleManager; - const PyodidePackageManager pyodidePackageManager; bool createSnapshot; bool createBaselineSnapshot; kj::Maybe loadSnapshotFromDisk; }; -// A function to read a segment of the tar file into a buffer +// A function to read a segment of a buffer (e.g. an embedded package file) into a target buffer. // Set up this way to avoid copying files that aren't accessed. class ReadOnlyBuffer: public jsg::Object { kj::ArrayPtr source; @@ -78,6 +68,53 @@ class ReadOnlyBuffer: public jsg::Object { } }; +// Metadata for a single embedded Python package file, returned to the runtime so it can build the +// site-packages / dynlib filesystem (see src/pyodide/internal/loadPackage.ts). The string fields +// point directly into the (process-lifetime) bundle message to avoid copying; they are only copied +// when JSG marshals them into V8 strings. +struct PythonPackageFileMetadata { + // Mount root ("site"/"stdlib" -> site-packages, "dynlib" -> /usr/lib). + kj::StringPtr installDir; + // Path within `installDir`, e.g. "ssl/__init__.py". + kj::StringPtr path; + // Size of the file contents in bytes. + int size; + // Reader for the (already-decompressed) bytes of this file. + jsg::Ref reader; + JSG_STRUCT(installDir, path, size, reader); +}; + +// Exposes the Python stdlib package files that are extracted and embedded directly in the Pyodide +// bundle as a PythonPackages capnp message (see python_packages.capnp / pack_python_packages.py). +// The runtime reads `getFiles()` to learn the file layout; each returned entry carries a `reader` +// for the (already-decompressed) bytes of that file. This is a single bulk call (rather than a +// per-file accessor) to avoid a JS<->C++ round-trip per file. +class EmbeddedPackagesReader: public jsg::Object { + public: + EmbeddedPackagesReader(kj::Maybe> messageReader) + : messageReader(kj::mv(messageReader)) {} + + // Builds a reader from a Pyodide bundle, locating the embedded `python_packages` data module. If + // the bundle has no embedded packages, the returned reader exposes an empty file list. + static jsg::Ref fromBundle(jsg::Lock& js, jsg::Bundle::Reader bundle); + + kj::Array getFiles(jsg::Lock& js); + + JSG_RESOURCE_TYPE(EmbeddedPackagesReader) { + JSG_METHOD(getFiles); + } + + private: + // Owns the message backing `files`. `kj::none` when the bundle has no embedded packages. + kj::Maybe> messageReader; + + kj::Maybe files() { + return messageReader.map([](kj::Own& reader) { + return reader->getRoot(); + }); + } +}; + class PythonModuleInfo { public: PythonModuleInfo(kj::Array names, kj::Array> contents) @@ -95,26 +132,8 @@ class PythonModuleInfo { return PythonModuleInfo(kj::mv(clonedNames), kj::mv(clonedContents)); } - // Return the list of names to import into a package snapshot. - kj::Array getPackageSnapshotImports(kj::StringPtr version); - // Takes in a list of Python files (their contents). Parses these files to find the import - // statements, then returns a list of modules imported via those statements. - // - // For example: - // import a, b, c - // from z import x - // import t.y.u - // from . import k - // - // -> ["a", "b", "c", "z", "t.y.u"] - // - // Package relative imports are ignored. - static kj::Array parsePythonScriptImports(kj::Array files); kj::HashSet getWorkerModuleSet(); kj::Array getPythonFileContents(); - static kj::Array filterPythonScriptImports(kj::HashSet workerModules, - kj::ArrayPtr imports, - kj::StringPtr version); }; // A class wrapping the information stored in a WorkerBundle, in particular the Python source files @@ -128,7 +147,6 @@ class PyodideMetadataReader: public jsg::Object { struct State { kj::String mainModule; PythonModuleInfo moduleInfo; - kj::Array requirements; kj::String pyodideVersion; kj::String packagesVersion; kj::String packagesLock; @@ -141,7 +159,6 @@ class PyodideMetadataReader: public jsg::Object { State(kj::String mainModule, kj::Array names, kj::Array> contents, - kj::Array requirements, kj::String pyodideVersion, kj::String packagesVersion, kj::String packagesLock, @@ -152,7 +169,6 @@ class PyodideMetadataReader: public jsg::Object { kj::Maybe> memorySnapshot) : mainModule(kj::mv(mainModule)), moduleInfo(kj::mv(names), kj::mv(contents)), - requirements(kj::mv(requirements)), pyodideVersion(kj::mv(pyodideVersion)), packagesVersion(kj::mv(packagesVersion)), packagesLock(kj::mv(packagesLock)), @@ -189,10 +205,6 @@ class PyodideMetadataReader: public jsg::Object { return state->createBaselineSnapshot; } - // Returns whether the python-abort-isolate-on-fatal-error autogate is enabled. When true, the - // Python on_fatal handler should call abortIsolate() to terminate the isolate after reporting. - bool shouldAbortIsolateOnFatalError(); - kj::StringPtr getMainModule() { return state->mainModule; } @@ -203,11 +215,6 @@ class PyodideMetadataReader: public jsg::Object { kj::Array getNames(jsg::Lock& js, jsg::Optional maybeExtFilter); kj::Array getSizes(jsg::Lock& js); - // Return the list of names to import into a package snapshot. - kj::Array getPackageSnapshotImports(kj::String version); - - kj::Array> getRequirements(jsg::Lock& js); - int read(jsg::Lock& js, int index, int offset, kj::Array buf); bool hasMemorySnapshot() { @@ -237,8 +244,6 @@ class PyodideMetadataReader: public jsg::Object { return state->packagesLock; } - kj::HashSet getTransitiveRequirements(); - static kj::Array getBaselineSnapshotImports(); // We call this during Python setup with the wasm memory and the addresses of the signal clock and @@ -258,10 +263,8 @@ class PyodideMetadataReader: public jsg::Object { JSG_METHOD(isWorkerd); JSG_METHOD(isTracing); JSG_METHOD(getMainModule); - JSG_METHOD(getRequirements); JSG_METHOD(getNames); JSG_METHOD(getSizes); - JSG_METHOD(getPackageSnapshotImports); JSG_METHOD(read); JSG_METHOD(hasMemorySnapshot); JSG_METHOD(getMemorySnapshotSize); @@ -272,8 +275,6 @@ class PyodideMetadataReader: public jsg::Object { JSG_METHOD(getPackagesVersion); JSG_METHOD(getPackagesLock); JSG_METHOD(isCreatingBaselineSnapshot); - JSG_METHOD(shouldAbortIsolateOnFatalError); - JSG_METHOD(getTransitiveRequirements); JSG_METHOD(getCompatibilityFlags); JSG_STATIC_METHOD(getBaselineSnapshotImports); JSG_METHOD(setCpuLimitNearlyExceededCallback); @@ -287,9 +288,6 @@ class PyodideMetadataReader: public jsg::Object { for (const auto& content: state->moduleInfo.contents) { tracker.trackField("content", content); } - for (const auto& requirement: state->requirements) { - tracker.trackField("requirement", requirement); - } } private: @@ -306,11 +304,6 @@ struct MemorySnapshotResult { // This used to be declared nested as ArtifactBundler::State, but then there was a need to // forward-declare it, so here we are. struct ArtifactBundler_State { - kj::Maybe packageManager; - // ^ lifetime should be contained by lifetime of ArtifactBundler since there is normally one worker set for the whole process. see worker-set.h - // In other words: - // WorkerSet lifetime = PackageManager lifetime and Worker lifetime = ArtifactBundler lifetime and WorkerSet owns and will outlive Worker, so PackageManager outlives ArtifactBundler - // The storedSnapshot is only used while isValidating is true. kj::Maybe storedSnapshot; @@ -326,18 +319,16 @@ struct ArtifactBundler_State { // snapshots yet, so the Python runtime uses this to skip snapshot type validation. bool isDynamicWorkerFlag; - ArtifactBundler_State(kj::Maybe packageManager, - kj::Maybe> existingSnapshot, + ArtifactBundler_State(kj::Maybe> existingSnapshot, bool isValidating = false, bool isDynamicWorker = false) - : packageManager(packageManager), - storedSnapshot(kj::none), + : storedSnapshot(kj::none), existingSnapshot(kj::mv(existingSnapshot)), isValidating(isValidating), isDynamicWorkerFlag(isDynamicWorker) {}; kj::Own clone() { - return kj::heap(packageManager, + return kj::heap( existingSnapshot.map( [](kj::Array& data) { return kj::heapArray(data); }), isValidating, isDynamicWorkerFlag); @@ -384,12 +375,7 @@ class ArtifactBundler: public jsg::Object { } static kj::Own makeDisabledBundler() { - return kj::heap(kj::none, kj::none); - } - - // Creates an ArtifactBundler that only grants access to packages, and not a memory snapshot. - static kj::Own makePackagesOnlyBundler(kj::Maybe manager) { - return kj::heap(manager, kj::none); + return kj::heap(kj::none); } void visitForMemoryInfo(jsg::MemoryTracker& tracker) const { @@ -402,16 +388,6 @@ class ArtifactBundler: public jsg::Object { return false; // TODO(later): Remove this function once we regenerate the bundle. } - kj::Maybe> getPackage(jsg::Lock& js, kj::String path) { - KJ_IF_SOME(pacman, inner->packageManager) { - KJ_IF_SOME(ptr, pacman.getPyodidePackage(path)) { - return js.alloc(ptr); - } - } - - return kj::none; - } - JSG_RESOURCE_TYPE(ArtifactBundler) { JSG_METHOD(hasMemorySnapshot); JSG_METHOD(getMemorySnapshotSize); @@ -421,7 +397,6 @@ class ArtifactBundler: public jsg::Object { JSG_METHOD(isDynamicWorker); JSG_METHOD(storeMemorySnapshot); JSG_METHOD(isEnabled); - JSG_METHOD(getPackage); } private: @@ -528,18 +503,19 @@ class SimplePythonLimiter: public jsg::Object { kj::Maybe getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease); -// Returns a list of filenames we need to fetch according to the pyodide-lock.json file -// in addition to the requirements argument, we also must include all "stdlib" packages -// as well as any transitive dependencies needed -kj::Array getPythonPackageFiles(kj::StringPtr lockFileContents, - kj::ArrayPtr requirements, - kj::StringPtr packagesVersion); +// Computes the subresource-integrity-style checksum ("sha256-") of the given bytes. +kj::String computePyodideBundleIntegrity(kj::ArrayPtr bytes); -// Constructs the path to a Python package in the package repository -kj::String getPyodidePackagePath(kj::StringPtr packagesVersion, kj::StringPtr filename); +// Verifies that a fetched/downloaded Pyodide bundle matches the expected subresource-integrity +// checksum from the release metadata. Throws on mismatch. Verification is skipped only for the +// "dev" bundle (built locally, no published checksum); for any other bundle a blank +// `expectedIntegrity` is itself an error. +void verifyPyodideBundleIntegrity( + kj::StringPtr version, kj::StringPtr expectedIntegrity, kj::ArrayPtr bytes); #define EW_PYODIDE_ISOLATE_TYPES \ - api::pyodide::ReadOnlyBuffer, api::pyodide::PyodideMetadataReader, \ + api::pyodide::ReadOnlyBuffer, api::pyodide::PythonPackageFileMetadata, \ + api::pyodide::EmbeddedPackagesReader, api::pyodide::PyodideMetadataReader, \ api::pyodide::ArtifactBundler, api::pyodide::DiskCache, \ api::pyodide::DisabledInternalJaeger, api::pyodide::SimplePythonLimiter, \ api::pyodide::WorkerFatalReporter, api::pyodide::MemorySnapshotResult diff --git a/src/workerd/api/pyodide/requirements.c++ b/src/workerd/api/pyodide/requirements.c++ index 10f7224bab0..1242ed9acbd 100644 --- a/src/workerd/api/pyodide/requirements.c++ +++ b/src/workerd/api/pyodide/requirements.c++ @@ -7,8 +7,6 @@ #include #include -#include - namespace workerd::api::pyodide { // getField gets a field of a JSON object by key @@ -24,64 +22,6 @@ capnp::json::Value::Reader getField( KJ_FAIL_ASSERT("Expected key in JSON object", name); } -kj::String canonicalizePythonPackageName(kj::StringPtr name) { - kj::Vector res(name.size()); - - auto isSeparator = [](char c) { return c == '-' || c == '_' || c == '.'; }; - - for (int i = 0; i < name.size(); i++) { - if (isSeparator(name[i])) { - res.add('-'); - // make i point to the last separator in the sequence - while (isSeparator(name[i])) i++; - i--; - continue; - } - - res.add(std::tolower(name[i])); - } - - res.add(0); // NUL terminator - - return kj::String(res.releaseAsArray()); -} - -// getDepMapFromPackagesLock computes a dependency map (a mapping from requirement to list of dependencies) from the Pyodide lock file JSON -DepMap getDepMapFromPackagesLock( - capnp::List::Reader &packages) { - DepMap res; - - for (const auto &ent: packages) { - auto packageObj = ent.getValue().getObject(); - auto depends = getField(packageObj, "depends").getArray(); - - auto &[_, deps] = res.insert(kj::str(ent.getName()), kj::Vector(depends.size())); - - for (const auto &dep: depends) { - deps.add(kj::str(dep.getString())); - } - } - - return res; -} - -// addWithRecursiveDependencies adds a requirement along with all its dependencies (according to the dependency map) to the requirements set -void addWithRecursiveDependencies( - kj::StringPtr requirement, const DepMap &depMap, kj::HashSet &requirementsSet) { - auto normalizedName = canonicalizePythonPackageName(requirement); - if (requirementsSet.contains(normalizedName)) { - return; - } - - requirementsSet.insert(kj::str(normalizedName)); - - KJ_IF_SOME(deps, depMap.find(normalizedName)) { - for (const auto &dep: deps) { - addWithRecursiveDependencies(dep, depMap, requirementsSet); - } - } -} - kj::Own::Reader> parseLockFile( kj::StringPtr lockFileContents) { capnp::JsonCodec json; @@ -95,38 +35,4 @@ kj::Own::Reader> parseLockFile( return capnp::clone(packages); } -kj::HashSet getPythonPackageNames( - capnp::List::Reader packages, - const DepMap &depMap, - kj::ArrayPtr requirements, - kj::StringPtr packagesVersion) { - - kj::HashSet allRequirements; // Requirements including their recursive dependencies. - - // Potentially add the stdlib packages and their recursive dependencies. - // TODO: Loading stdlib and its dependencies breaks package snapshots on "20240829.4". - // Remove this version check once a new package/python release is made. - if (packagesVersion != "20240829.4") { - // We need to scan the packages list for any packages that need to be included because they - // are part of Python's stdlib (hashlib etc). These need to be implicitly treated as part of - // our `requirements`. - for (const auto &ent: packages) { - auto name = ent.getName(); - auto obj = ent.getValue().getObject(); - auto packageType = getField(obj, "package_type").getString(); - - if (packageType == "cpython_module"_kj) { - addWithRecursiveDependencies(name, depMap, allRequirements); - } - } - } - - // Add all recursive dependencies of each requirement. - for (const auto &req: requirements) { - addWithRecursiveDependencies(req, depMap, allRequirements); - } - - return allRequirements; -} - } // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/requirements.h b/src/workerd/api/pyodide/requirements.h index 6830e17e0e0..484db136612 100644 --- a/src/workerd/api/pyodide/requirements.h +++ b/src/workerd/api/pyodide/requirements.h @@ -6,7 +6,6 @@ #include #include #include -#include namespace workerd::api::pyodide { @@ -14,21 +13,7 @@ capnp::json::Value::Reader getField( capnp::List<::capnp::json::Value::Field, capnp::Kind::STRUCT>::Reader &object, kj::StringPtr name); -kj::String canonicalizePythonPackageName(kj::StringPtr name); - -// map from requirement to list of dependencies -using DepMap = kj::HashMap>; - -DepMap getDepMapFromPackagesLock( - capnp::List::Reader &packages); - kj::Own::Reader> parseLockFile( kj::StringPtr lockFileContents); -kj::HashSet getPythonPackageNames( - capnp::List::Reader packages, - const DepMap &depMap, - kj::ArrayPtr requirements, - kj::StringPtr packagesVersion); - } // namespace workerd::api::pyodide diff --git a/src/workerd/api/queue.c++ b/src/workerd/api/queue.c++ index 6f9012e5850..972dcb79b45 100644 --- a/src/workerd/api/queue.c++ +++ b/src/workerd/api/queue.c++ @@ -80,7 +80,7 @@ kj::StringPtr validateContentType(kj::StringPtr contentType) { } struct Serialized { - kj::Maybe, jsg::BufferSource, jsg::BackingStore>> own; + kj::Maybe, jsg::JsRef>> own; // Holds onto the owner of a given array of serialized data. kj::ArrayPtr data; // A pointer into that data that can be directly written into an outgoing queue send, regardless @@ -103,8 +103,17 @@ Serialized serializeV8(jsg::Lock& js, const jsg::JsValue& body) { return kj::mv(result); } -// Control whether the serialize() method makes a deep copy of provided ArrayBuffer types or if it -// just returns a shallow reference that is only valid until the given method returns. +// Control whether serialize() detaches/copies the ArrayBuffer or holds a shallow reference. +// +// send() uses DEEP_COPY, which detaches the buffer when possible (transferring ownership +// without copying). sendBatch() uses SHALLOW_REFERENCE, which avoids detaching so the +// caller can reuse the buffer after the call. Do not change sendBatch() to DEEP_COPY +// without a compat flag — users may depend on the buffer remaining usable. +// +// SHALLOW_REFERENCE holds a raw pointer into the BackingStore. This is safe for +// non-resizable buffers (the BackingStore shared_ptr prevents deallocation), but +// resizable buffers can have pages decommitted by resize(0) while the pointer is held. +// The SHALLOW_REFERENCE path deep-copies resizable buffers to prevent this. enum class SerializeArrayBufferBehavior { DEEP_COPY, SHALLOW_REFERENCE, @@ -125,26 +134,34 @@ Serialized serialize(jsg::Lock& js, result.own = kj::mv(s); return kj::mv(result); } else if (contentType == IncomingQueueMessage::ContentType::BYTES) { - JSG_REQUIRE(body.isArrayBufferView(), TypeError, + auto source = JSG_REQUIRE_NONNULL(body.tryCast(), TypeError, kj::str("Content Type \"", IncomingQueueMessage::ContentType::BYTES, "\" requires a value of type ArrayBufferView, but received: ", body.typeOf(js))); - jsg::BufferSource source(js, body); if (bufferBehavior == SerializeArrayBufferBehavior::SHALLOW_REFERENCE) { - // If we know the data will be consumed synchronously, we can avoid copying it. + if (source.isResizable()) { + // Resizable buffers can have pages decommitted by resize(0) while + // the shallow reference is held. Deep-copy to prevent OOB read. + kj::Array bytes = jsg::JsBufferSource(source).copy(); + Serialized result; + result.data = bytes; + result.own = kj::mv(bytes); + return kj::mv(result); + } + // Non-resizable: safe to hold a shallow reference. Serialized result; result.data = source.asArrayPtr(); - result.own = kj::mv(source); + result.own = source.addRef(js); return kj::mv(result); - } else if (source.canDetach(js)) { + } else if (source.isDetachable()) { // Prefer detaching the input ArrayBuffer whenever possible to avoid needing to copy it. - auto backingSource = source.detach(js); + auto backingSource = source.detachAndTake(js); Serialized result; result.data = backingSource.asArrayPtr(); - result.own = kj::mv(backingSource); + result.own = backingSource.addRef(js); return kj::mv(result); } else { - kj::Array bytes = kj::heapArray(source.asArrayPtr()); + kj::Array bytes = jsg::JsBufferSource(source).copy(); Serialized result; result.data = bytes; result.own = kj::mv(bytes); @@ -176,7 +193,7 @@ jsg::JsValue deserialize( if (type == IncomingQueueMessage::ContentType::TEXT) { return js.str(body); } else if (type == IncomingQueueMessage::ContentType::BYTES) { - return jsg::JsValue(js.bytes(kj::mv(body)).getHandle(js)); + return jsg::JsUint8Array::create(js, body); } else if (type == IncomingQueueMessage::ContentType::JSON) { return jsg::JsValue::fromJson(js, body.asChars()); } else if (type == IncomingQueueMessage::ContentType::V8) { @@ -196,8 +213,7 @@ jsg::JsValue deserialize(jsg::Lock& js, rpc::QueueMessage::Reader message) { if (type == IncomingQueueMessage::ContentType::TEXT) { return js.str(message.getData().asChars()); } else if (type == IncomingQueueMessage::ContentType::BYTES) { - kj::Array bytes = kj::heapArray(message.getData().asBytes()); - return jsg::JsValue(js.bytes(kj::mv(bytes)).getHandle(js)); + return jsg::JsUint8Array::create(js, message.getData().asBytes()); } else if (type == IncomingQueueMessage::ContentType::JSON) { return jsg::JsValue::fromJson(js, message.getData().asChars()); } else if (type == IncomingQueueMessage::ContentType::V8) { @@ -412,22 +428,22 @@ jsg::Promise WorkerQueue::sendBatch(jsg::Lock& j } QueueMessage::QueueMessage( - jsg::Lock& js, rpc::QueueMessage::Reader message, IoPtr result) + jsg::Lock& js, rpc::QueueMessage::Reader message, IoOwn result) : id(kj::str(message.getId())), timestamp(message.getTimestampNs() * kj::NANOSECONDS + kj::UNIX_EPOCH), body(deserialize(js, message).addRef(js)), attempts(message.getAttempts()), - result(result) {} + result(kj::mv(result)) {} // Note that we must make deep copies of all data here since the incoming Reader may be // deallocated while JS's GC wrappers still exist. QueueMessage::QueueMessage( - jsg::Lock& js, IncomingQueueMessage message, IoPtr result) + jsg::Lock& js, IncomingQueueMessage message, IoOwn result) : id(kj::mv(message.id)), timestamp(message.timestamp), body(deserialize(js, kj::mv(message.body), message.contentType).addRef(js)), attempts(message.attempts), - result(result) {} + result(kj::mv(result)) {} jsg::JsValue QueueMessage::getBody(jsg::Lock& js) { return body.getHandle(js); @@ -482,16 +498,20 @@ void QueueMessage::ack() { } QueueEvent::QueueEvent( - jsg::Lock& js, rpc::EventDispatcher::QueueParams::Reader params, IoPtr result) + jsg::Lock& js, rpc::EventDispatcher::QueueParams::Reader params, IoOwn result) : ExtendableEvent("queue"), queueName(kj::heapString(params.getQueueName())), - result(result) { + result(kj::mv(result)) { // Note that we must make deep copies of all data here since the incoming Reader may be // deallocated while JS's GC wrappers still exist. auto incoming = params.getMessages(); + auto& context = IoContext::current(); auto messagesBuilder = kj::heapArrayBuilder>(incoming.size()); for (auto i: kj::indices(incoming)) { - messagesBuilder.add(js.alloc(js, incoming[i], result)); + // Each QueueMessage gets its own owning IoOwn via addRef so that + // QueueEventResult outlives all JS wrappers even if QueueCustomEvent is freed first. + auto msgResult = context.addObject(kj::addRef(*this->result)); + messagesBuilder.add(js.alloc(js, incoming[i], kj::mv(msgResult))); } messages = messagesBuilder.finish(); @@ -512,16 +532,20 @@ QueueEvent::QueueEvent( }; } -QueueEvent::QueueEvent(jsg::Lock& js, Params params, IoPtr result) +QueueEvent::QueueEvent(jsg::Lock& js, Params params, IoOwn result) : ExtendableEvent("queue"), queueName(kj::mv(params.queueName)), metadata(kj::mv(params.metadata)), - result(result) { + result(kj::mv(result)) { clearEpochSentinel(metadata.metrics.oldestMessageTimestamp); + auto& context = IoContext::current(); auto messagesBuilder = kj::heapArrayBuilder>(params.messages.size()); for (auto i: kj::indices(params.messages)) { - messagesBuilder.add(js.alloc(js, kj::mv(params.messages[i]), result)); + // Each QueueMessage gets its own owning IoOwn via addRef. + auto msgResult = context.addObject(kj::addRef(*this->result)); + auto msg = kj::mv(params.messages[i]); + messagesBuilder.add(js.alloc(js, kj::mv(msg), kj::mv(msgResult))); } messages = messagesBuilder.finish(); } @@ -563,7 +587,7 @@ struct StartQueueEventResponse { StartQueueEventResponse startQueueEvent(EventTarget& globalEventTarget, IoContext& context, kj::OneOf params, - IoPtr result, + IoOwn result, Worker::Lock& lock, kj::Maybe exportedHandler, const jsg::TypeHandler& handlerHandler) { @@ -571,10 +595,10 @@ StartQueueEventResponse startQueueEvent(EventTarget& globalEventTarget, jsg::Ref event(nullptr); KJ_SWITCH_ONEOF(params) { KJ_CASE_ONEOF(p, rpc::EventDispatcher::QueueParams::Reader) { - event = js.alloc(js, p, result); + event = js.alloc(js, p, kj::mv(result)); } KJ_CASE_ONEOF(p, QueueEvent::Params) { - event = js.alloc(js, kj::mv(p), result); + event = js.alloc(js, kj::mv(p), kj::mv(result)); } } @@ -652,11 +676,9 @@ kj::Promise QueueCustomEvent::run( incomingRequest->delivered(); auto& context = incomingRequest->getContext(); - // Create a custom refcounted type for holding the queueEvent so that we can pass it to the - // waitUntil'ed callback safely without worrying about whether this coroutine gets canceled. + // This vestigial type used to hold more than just this bool. + // TODO(cleanup): There's probably a better way to pass this bool through. struct QueueEventHolder: public kj::Refcounted { - jsg::Ref event = nullptr; - kj::Maybe> exportedHandlerProm; bool isServiceWorkerHandler = false; }; auto queueEventHolder = kj::refcounted(); @@ -665,19 +687,28 @@ kj::Promise QueueCustomEvent::run( auto runProm = context.run( [this, entrypointName = entrypointName, &context, queueEvent = kj::addRef(*queueEventHolder), &metrics = incomingRequest->getMetrics(), versionInfo = kj::mv(versionInfo), - props = kj::mv(props), isDynamicDispatch](Worker::Lock& lock) mutable { + props = kj::mv(props), + isDynamicDispatch](Worker::Lock& lock) mutable -> kj::Promise { jsg::AsyncContextFrame::StorageScope traceScope = context.makeAsyncTraceScope(lock); jsg::AsyncContextFrame::StorageScope userTraceScope = context.makeUserAsyncTraceScope(lock); auto& typeHandler = lock.getWorker().getIsolate().getApi().getQueueTypeHandler(lock); + // Pass an owning IoOwn (via addRef) so that QueueEventResult stays + // alive as long as the JSG QueueEvent/QueueMessage wrappers exist, even after + // QueueCustomEvent is destroyed. This prevents a use-after-free under Durable Objects + // where the IoContext outlives individual queue dispatches. auto startResp = startQueueEvent(lock.getGlobalScope(), context, kj::mv(params), - context.addObject(result), lock, + context.addObject(kj::addRef(*result)), lock, lock.getExportedHandler(entrypointName, kj::mv(versionInfo), kj::mv(props), context.getActor(), isDynamicDispatch), typeHandler); - queueEvent->event = kj::mv(startResp.event); - queueEvent->exportedHandlerProm = kj::mv(startResp.exportedHandlerProm); queueEvent->isServiceWorkerHandler = startResp.isServiceWorkerHandler; + + KJ_IF_SOME(p, startResp.exportedHandlerProm) { + return kj::mv(p); + } else { + return kj::READY_NOW; + } }); // 3. Now that we've (asynchronously) called into the event handler, wait on all necessary async @@ -696,22 +727,14 @@ kj::Promise QueueCustomEvent::run( // finishScheduled, but only waiting on the promise returned by the event handler rather than on // all waitUntil'ed promises. auto outcome = co_await runProm - .then([queueEvent = kj::addRef( - *queueEventHolder)]() mutable -> kj::Promise { - // If the queue handler returned a promise, wait on the promise. - KJ_IF_SOME(handlerProm, queueEvent->exportedHandlerProm) { - return handlerProm.then([]() { return EventOutcome::OK; }); - } - // If not, we can consider the invocation complete. - return EventOutcome::OK; - }) + .then([]() mutable -> kj::Promise { return EventOutcome::OK; }) .catch_([](kj::Exception&& e) { // If any exceptions were thrown, mark the outcome accordingly. return EventOutcome::EXCEPTION; }) .exclusiveJoin(timeoutPromise.then([] { // Join everything against a timeout to ensure queue handlers can't run forever. - return EventOutcome::EXCEEDED_CPU; + return EventOutcome::EXCEEDED_WALL_TIME; })).exclusiveJoin(context.onAbort().then([] { // Also handle anything that might cause the worker to get aborted. // This is a change from the outcome we returned on abort before the compat flag, but better @@ -726,22 +749,26 @@ kj::Promise QueueCustomEvent::run( // It'd be nicer if we could fall through to the code below for the non-compat-flag logic in // this case, but we don't even know if the worker uses service worker syntax until after // runProm resolves, so we just copy the bare essentials here. - auto scheduledResult = co_await incomingRequest->finishScheduled(); - bool completed = scheduledResult == EventOutcome::OK; - outcome = completed ? context.waitUntilStatus() : scheduledResult; + auto scheduledResult = co_await incomingRequest->finishScheduled(kj::mv(incomingRequest)); + outcome = scheduledResult.outcome; } else { // We're responsible for calling drain() on the incomingRequest to ensure that waitUntil tasks // can continue to run in the backgound for a while even after we return a result to the // caller of this event. But this is only needed in this code path because in all other code // paths we call incomingRequest->finishScheduled(), which already takes care of waiting on // waitUntil tasks. - waitUntilTasks.add(incomingRequest->drain().attach( - kj::mv(incomingRequest), kj::addRef(*queueEventHolder), kj::addRef(*this))); - } + incomingRequest = incomingRequest.attach(kj::addRef(*queueEventHolder), kj::addRef(*this)); - KJ_IF_SOME(status, context.getLimitEnforcer().getLimitsExceeded()) { - outcome = status; + // If we happen to already know that a limit was exceeded, set the outcome here. If it + // happens later during the drain, that's just too late to report. Oh well. (Note that the + // `finishScheduled()` route already handles limit-exceeded outcomes internally.) + KJ_IF_SOME(status, context.getLimitEnforcer().getLimitsExceeded()) { + outcome = status; + } + + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); } + co_return WorkerInterface::CustomEvent::Result{.outcome = outcome}; } else { // The user has not opted in to the new waitUntil behavior, so we need to add the queue() @@ -750,11 +777,9 @@ kj::Promise QueueCustomEvent::run( // We reuse the finishScheduled() method for convenience, since queues use the same wall clock // timeout as scheduled workers. - auto scheduledResult = co_await incomingRequest->finishScheduled(); - bool completed = scheduledResult == EventOutcome::OK; - + auto scheduledResult = co_await incomingRequest->finishScheduled(kj::mv(incomingRequest)); co_return WorkerInterface::CustomEvent::Result{ - .outcome = completed ? context.waitUntilStatus() : scheduledResult, + .outcome = scheduledResult.outcome, }; } } @@ -796,20 +821,20 @@ kj::Promise QueueCustomEvent::sendRpc( return req.send().then([this](auto resp) { auto respResult = resp.getResult(); - this->result.ackAll = respResult.getAckAll(); + this->result->ackAll = respResult.getAckAll(); auto retryBatch = respResult.getRetryBatch(); - this->result.retryBatch.retry = retryBatch.getRetry(); + this->result->retryBatch.retry = retryBatch.getRetry(); if (retryBatch.isDelaySeconds()) { - this->result.retryBatch.delaySeconds = retryBatch.getDelaySeconds(); + this->result->retryBatch.delaySeconds = retryBatch.getDelaySeconds(); } - this->result.explicitAcks.clear(); + this->result->explicitAcks.clear(); for (const auto& msgId: respResult.getExplicitAcks()) { - this->result.explicitAcks.insert(kj::heapString(msgId)); + this->result->explicitAcks.insert(kj::heapString(msgId)); } - this->result.retries.clear(); + this->result->retries.clear(); for (const auto& retry: respResult.getRetryMessages()) { - auto& entry = this->result.retries.upsert(kj::heapString(retry.getMsgId()), {}); + auto& entry = this->result->retries.upsert(kj::heapString(retry.getMsgId()), {}); if (retry.isDelaySeconds()) { entry.value.delaySeconds = retry.getDelaySeconds(); } @@ -822,8 +847,8 @@ kj::Promise QueueCustomEvent::sendRpc( } kj::Array QueueCustomEvent::getRetryMessages() const { - auto retryMsgs = kj::heapArrayBuilder(result.retries.size()); - for (const auto& entry: result.retries) { + auto retryMsgs = kj::heapArrayBuilder(result->retries.size()); + for (const auto& entry: result->retries) { retryMsgs.add(QueueRetryMessage{ .msgId = kj::heapString(entry.key), .delaySeconds = entry.value.delaySeconds}); } @@ -831,8 +856,8 @@ kj::Array QueueCustomEvent::getRetryMessages() const { } kj::Array QueueCustomEvent::getExplicitAcks() const { - auto ackArray = kj::heapArrayBuilder(result.explicitAcks.size()); - for (const auto& msgId: result.explicitAcks) { + auto ackArray = kj::heapArrayBuilder(result->explicitAcks.size()); + for (const auto& msgId: result->explicitAcks) { ackArray.add(kj::heapString(msgId)); } return ackArray.finish(); diff --git a/src/workerd/api/queue.h b/src/workerd/api/queue.h index 3391367089b..5ba2a8e5815 100644 --- a/src/workerd/api/queue.h +++ b/src/workerd/api/queue.h @@ -211,8 +211,11 @@ struct QueueResponse { }; // Internal-only representation used to accumulate the results of a queue event. +// Independently refcounted so that JSG wrappers (QueueEvent, QueueMessage) can keep it +// alive via IoOwn even after the per-request QueueCustomEvent is destroyed — critical for +// Durable Objects where the IoContext outlives individual queue dispatches. -struct QueueEventResult { +struct QueueEventResult: public kj::Refcounted { struct RetryOptions { jsg::Optional delaySeconds; }; @@ -233,8 +236,8 @@ struct QueueRetryOptions { class QueueMessage final: public jsg::Object { public: - QueueMessage(jsg::Lock& js, rpc::QueueMessage::Reader message, IoPtr result); - QueueMessage(jsg::Lock& js, IncomingQueueMessage message, IoPtr result); + QueueMessage(jsg::Lock& js, rpc::QueueMessage::Reader message, IoOwn result); + QueueMessage(jsg::Lock& js, IncomingQueueMessage message, IoOwn result); kj::StringPtr getId() { return id; @@ -268,7 +271,7 @@ class QueueMessage final: public jsg::Object { void visitForMemoryInfo(jsg::MemoryTracker& tracker) const { tracker.trackField("id", id); tracker.trackField("body", body); - tracker.trackFieldWithSize("IoPtr", sizeof(IoPtr)); + tracker.trackFieldWithSize("IoOwn", sizeof(IoOwn)); } private: @@ -276,7 +279,7 @@ class QueueMessage final: public jsg::Object { kj::Date timestamp; jsg::JsRef body; uint16_t attempts; - IoPtr result; + IoOwn result; void visitForGc(jsg::GcVisitor& visitor) { visitor.visit(body); @@ -295,8 +298,8 @@ class QueueEvent final: public ExtendableEvent { explicit QueueEvent(jsg::Lock& js, rpc::EventDispatcher::QueueParams::Reader params, - IoPtr result); - explicit QueueEvent(jsg::Lock& js, Params params, IoPtr result); + IoOwn result); + explicit QueueEvent(jsg::Lock& js, Params params, IoOwn result); static jsg::Ref constructor(kj::String type) = delete; @@ -337,7 +340,7 @@ class QueueEvent final: public ExtendableEvent { } tracker.trackField("queueName", queueName); tracker.trackFieldWithSize("metadata", sizeof(MessageBatchMetadata)); - tracker.trackFieldWithSize("IoPtr", sizeof(IoPtr)); + tracker.trackFieldWithSize("IoOwn", sizeof(IoOwn)); } struct Incomplete {}; @@ -361,7 +364,7 @@ class QueueEvent final: public ExtendableEvent { kj::Array> messages; kj::String queueName; MessageBatchMetadata metadata; - IoPtr result; + IoOwn result; CompletionStatus completionStatus = Incomplete{}; void visitForGc(jsg::GcVisitor& visitor) { @@ -452,10 +455,10 @@ class QueueCustomEvent final: public WorkerInterface::CustomEvent, public kj::Re tracing::EventInfo getEventInfo() const override; QueueRetryBatch getRetryBatch() const { - return {.retry = result.retryBatch.retry, .delaySeconds = result.retryBatch.delaySeconds}; + return {.retry = result->retryBatch.retry, .delaySeconds = result->retryBatch.delaySeconds}; } bool getAckAll() const { - return result.ackAll; + return result->ackAll; } kj::Array getRetryMessages() const; kj::Array getExplicitAcks() const; @@ -466,7 +469,7 @@ class QueueCustomEvent final: public WorkerInterface::CustomEvent, public kj::Re private: kj::OneOf params; - QueueEventResult result; + kj::Own result = kj::refcounted(); }; #define EW_QUEUE_ISOLATE_TYPES \ diff --git a/src/workerd/api/r2-bucket.c++ b/src/workerd/api/r2-bucket.c++ index 11dd775a28a..76993cb40df 100644 --- a/src/workerd/api/r2-bucket.c++ +++ b/src/workerd/api/r2-bucket.c++ @@ -572,7 +572,7 @@ jsg::Promise>> R2Bucket::put(jsg::Lock& KJ_SWITCH_ONEOF(v) { KJ_CASE_ONEOF(v, jsg::Ref) { (*v).cancel(js, - js.v8Error( + js.error( "Stream cancelled because the associated put operation encountered an error.")); } KJ_CASE_ONEOF_DEFAULT {} @@ -1367,7 +1367,7 @@ void R2Bucket::HeadResult::writeHttpMetadata(jsg::Lock& js, Headers& headers) { } } -jsg::Promise R2Bucket::GetResult::arrayBuffer(jsg::Lock& js) { +jsg::Promise> R2Bucket::GetResult::arrayBuffer(jsg::Lock& js) { return js.evalNow([&] { JSG_REQUIRE(!body->isDisturbed(), TypeError, "Body has already been used. " @@ -1378,7 +1378,7 @@ jsg::Promise R2Bucket::GetResult::arrayBuffer(jsg::Lock& js) }); } -jsg::Promise R2Bucket::GetResult::bytes(jsg::Lock& js) { +jsg::Promise> R2Bucket::GetResult::bytes(jsg::Lock& js) { return js.evalNow([&] { JSG_REQUIRE(!body->isDisturbed(), TypeError, "Body has already been used. " @@ -1387,8 +1387,9 @@ jsg::Promise R2Bucket::GetResult::bytes(jsg::Lock& js) { auto& context = IoContext::current(); return body->getController() .readAllBytes(js, context.getLimitEnforcer().getBufferingLimit()) - .then(js, [](jsg::Lock& js, jsg::BufferSource data) { - return data.getTypedView(js); + .then(js, [](jsg::Lock& js, jsg::JsRef data) { + auto handle = data.getHandle(js); + return jsg::JsUint8Array::create(js, handle).addRef(js); }); }); } @@ -1422,11 +1423,14 @@ jsg::Promise R2Bucket::GetResult::json(jsg::Lock& js) { jsg::Promise> R2Bucket::GetResult::blob(jsg::Lock& js) { // Copy-pasted from http.c++ - return arrayBuffer(js).then(js, [this](jsg::Lock& js, jsg::BufferSource buffer) { + return arrayBuffer(js).then( + js, [this, self = JSG_THIS](jsg::Lock& js, jsg::JsRef buffer) { // httpMetadata can't be null because GetResult always populates it. + // Note: `self` (jsg::Ref) is captured to prevent GC from collecting this object while + // the promise continuation is pending. Without it, the bare `this` pointer dangles. kj::String contentType = mapCopyString(KJ_REQUIRE_NONNULL(httpMetadata).contentType).orDefault(nullptr); - return js.alloc(js, buffer.getJsHandle(js), kj::mv(contentType)); + return js.alloc(js, jsg::JsBufferSource(buffer.getHandle(js)), kj::mv(contentType)); }); } diff --git a/src/workerd/api/r2-bucket.h b/src/workerd/api/r2-bucket.h index 6c0fecb80d2..ced46d20f0a 100644 --- a/src/workerd/api/r2-bucket.h +++ b/src/workerd/api/r2-bucket.h @@ -392,8 +392,8 @@ class R2Bucket: public jsg::Object { return body->isDisturbed(); } - jsg::Promise arrayBuffer(jsg::Lock& js); - jsg::Promise bytes(jsg::Lock& js); + jsg::Promise> arrayBuffer(jsg::Lock& js); + jsg::Promise> bytes(jsg::Lock& js); jsg::Promise text(jsg::Lock& js); jsg::Promise json(jsg::Lock& js); jsg::Promise> blob(jsg::Lock& js); diff --git a/src/workerd/api/rtti.c++ b/src/workerd/api/rtti.c++ index 50295ac5d83..da29d73ee1d 100644 --- a/src/workerd/api/rtti.c++ +++ b/src/workerd/api/rtti.c++ @@ -179,7 +179,7 @@ CompatibilityFlags::Reader compileFlags(capnp::MessageBuilder &message, SimpleWorkerErrorReporter errorReporter; compileCompatibilityFlags(compatDate, flagList.asReader(), output, errorReporter, experimental, - CompatibilityDateValidation::FUTURE_FOR_TEST); + CompatibilityDateValidation::FUTURE_FOR_TEST, nullptr); if (!errorReporter.errors.empty()) { // TODO(someday): throw an `AggregateError` containing all errors diff --git a/src/workerd/api/sockets-test.c++ b/src/workerd/api/sockets-test.c++ index 1649f250202..61efdfb2942 100644 --- a/src/workerd/api/sockets-test.c++ +++ b/src/workerd/api/sockets-test.c++ @@ -5,7 +5,6 @@ #include #include #include -#include #include @@ -114,20 +113,19 @@ KJ_TEST("socket writes are blocked by output gate") { [&](const TestFixture::Environment& env) -> kj::Promise { auto& actor = env.context.getActorOrThrow(); - // Step 1: Connect without gate lock so the pipe is established. - auto socket = connectImplNoOutputLock(env.js, kj::none, kj::str("localhost:1234"), kj::none); + // Step 1: Connect before locking the gate so the pipe is established. + auto socket = connectImpl(env.js, kj::none, kj::str("localhost:1234"), kj::none); env.js.runMicrotasks(); // Prepare write data and lock gate BEFORE any co_await (Worker lock still held). auto paf = kj::newPromiseAndFulfiller(); auto blocker = actor.getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); auto writable = socket->getWritable(); - auto data = kj::heapArray({'h', 'i'}); - auto jsBuffer = env.js.bytes(kj::mv(data)).getHandle(env.js); + jsg::JsValue jsBuffer = jsg::JsUint8Array::create(env.js, "hi"_kjb); writable->getController().write(env.js, jsBuffer).markAsHandled(env.js); - // With autogate (@all-autogates), connect is deferred. Wait for it. - // After co_await, Worker lock is released — no V8 calls allowed. + // Connect can be deferred by other pending output locks. Wait for it. + // After co_await, Worker lock is released -- no V8 calls allowed. for (int i = 0; i < 10 && pipeEnd == kj::none; i++) { co_await kj::evalLater([]() {}); } @@ -153,8 +151,8 @@ KJ_TEST("socket writes are blocked by output gate") { errorsToIgnore); } -// Connect deferral test runs last — its drain errors fire during process exit. -KJ_TEST("connectImplNoOutputLock defers connect until output gate clears") { +// Connect deferral test runs last -- its drain errors fire during process exit. +KJ_TEST("connectImpl defers connect until output gate clears") { bool connectCalled = false; kj::HttpHeaderTable headerTable; kj::Maybe pipeEnd; @@ -169,8 +167,6 @@ KJ_TEST("connectImplNoOutputLock defers connect until output gate clears") { }), }); - bool autogateOn = util::Autogate::isEnabled(util::AutogateKey::TCP_SOCKET_CONNECT_OUTPUT_GATE); - static constexpr kj::StringPtr errorsToIgnore[] = { "failed to invoke drain()"_kj, "no subrequests"_kj, @@ -182,19 +178,13 @@ KJ_TEST("connectImplNoOutputLock defers connect until output gate clears") { auto paf = kj::newPromiseAndFulfiller(); auto blocker = actor.getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); - auto socket = connectImplNoOutputLock(env.js, kj::none, kj::str("localhost:1234"), kj::none); + auto socket = connectImpl(env.js, kj::none, kj::str("localhost:1234"), kj::none); - if (autogateOn) { - co_await kj::evalLater([]() {}); - KJ_EXPECT(!connectCalled, "connect must not happen while output gate is locked"); - paf.fulfiller->fulfill(); - co_await kj::evalLater([]() {}); - KJ_EXPECT(connectCalled, "connect must happen after output gate releases"); - } else { - KJ_EXPECT(connectCalled, "without autogate, connect must happen synchronously"); - paf.fulfiller->fulfill(); - co_await kj::evalLater([]() {}); - } + co_await kj::evalLater([]() {}); + KJ_EXPECT(!connectCalled, "connect must not happen while output gate is locked"); + paf.fulfiller->fulfill(); + co_await kj::evalLater([]() {}); + KJ_EXPECT(connectCalled, "connect must happen after output gate releases"); }), errorsToIgnore); } diff --git a/src/workerd/api/sockets.c++ b/src/workerd/api/sockets.c++ index c361b2a6144..3a39c03b242 100644 --- a/src/workerd/api/sockets.c++ +++ b/src/workerd/api/sockets.c++ @@ -184,7 +184,7 @@ jsg::Ref setupSocket(jsg::Lock& js, return result; } -jsg::Ref connectImplNoOutputLock(jsg::Lock& js, +jsg::Ref connectImpl(jsg::Lock& js, kj::Maybe> fetcher, AnySocketAddress address, jsg::Optional options) { @@ -258,10 +258,7 @@ jsg::Ref connectImplNoOutputLock(jsg::Lock& js, kj::Own tlsStarter = kj::heap(); httpConnectSettings.tlsStarter = tlsStarter; - KJ_IF_SOME(promise, - util::Autogate::isEnabled(util::AutogateKey::TCP_SOCKET_CONNECT_OUTPUT_GATE) - ? ioContext.waitForOutputLocksIfNecessary() - : kj::none) { + KJ_IF_SOME(promise, ioContext.waitForOutputLocksIfNecessary()) { // Wrap the real WorkerInterface in a promised interface that defers connect // until the DO output gate clears. client = newPromisedWorkerInterface( @@ -282,16 +279,6 @@ jsg::Ref connectImplNoOutputLock(jsg::Lock& js, return result; } -jsg::Ref connectImpl(jsg::Lock& js, - kj::Maybe> fetcher, - AnySocketAddress address, - jsg::Optional options) { - // When the TCP_SOCKET_CONNECT_OUTPUT_GATE autogate is enabled, the output gate wait is - // handled inside connectImplNoOutputLock via a deferred connect task, so no separate wait - // is needed here. TODO(cleanup): rename connectImplNoOutputLock once the autogate is removed. - return connectImplNoOutputLock(js, kj::mv(fetcher), kj::mv(address), kj::mv(options)); -} - jsg::Promise Socket::close(jsg::Lock& js) { if (isClosing) { return closedPromiseCopy.whenResolved(js); @@ -302,9 +289,11 @@ jsg::Promise Socket::close(jsg::Lock& js) { readable->getController().setPendingClosure(); // Wait until the socket connects (successfully or otherwise) + // Note: `self` (jsg::Ref) is captured in each continuation to prevent GC from collecting + // this object while the promise chain is pending. Without it, the bare `this` pointer dangles. return openedPromiseCopy.whenResolved(js) .then(js, - [this](jsg::Lock& js) { + [this, self = JSG_THIS](jsg::Lock& js) { if (!writable->getController().isClosedOrClosing()) { return writable->getController().flush(js); } else { @@ -312,7 +301,7 @@ jsg::Promise Socket::close(jsg::Lock& js) { } }) .then(js, - [this](jsg::Lock& js) { + [this, self = JSG_THIS](jsg::Lock& js) { // Forcibly abort the readable/writable streams. auto cancelPromise = readable->getController().cancel(js, kj::none); auto abortPromise = writable->getController().abort(js, kj::none); @@ -322,14 +311,16 @@ jsg::Promise Socket::close(jsg::Lock& js) { return kj::mv(abortPromise); }); }) - .then(js, [this](jsg::Lock& js) { + .then(js, [this, self = JSG_THIS](jsg::Lock& js) { // Destroy the connection stream to close the connection. { auto _ = kj::mv(connectionData); } connectionData = kj::none; resolveFulfiller(js, kj::none); return js.resolvedPromise(); - }).catch_(js, [this](jsg::Lock& js, jsg::Value err) { errorHandler(js, kj::mv(err)); }); + }).catch_(js, [this, self = JSG_THIS](jsg::Lock& js, jsg::Value err) { + errorHandler(js, kj::mv(err)); + }); } jsg::Ref Socket::startTls(jsg::Lock& js, jsg::Optional tlsOptions) { @@ -342,6 +333,18 @@ jsg::Ref Socket::startTls(jsg::Lock& js, jsg::Optional tlsOp JSG_REQUIRE(secureTransport == SecureTransportKind::STARTTLS, TypeError, invalidOptKindMsg); JSG_REQUIRE(domain != kj::none, TypeError, "startTls can only be called once."); + KJ_IF_SOME(opts, tlsOptions) { + if (opts.expectedServerHostname != kj::none) { + if (util::Autogate::isEnabled(util::AutogateKey::STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME)) { + JSG_FAIL_REQUIRE( + TypeError, "The expectedServerHostname option is not currently supported in startTls."); + } else { + LOG_ERROR_PERIODICALLY( + "NOSENTRY startTls called with unsupported expectedServerHostname option"); + } + } + } + // The current socket's writable buffers need to be flushed. The socket's WritableStream is backed // by an AsyncIoStream which doesn't implement any buffering, so we don't need to worry about // flushing. But the JS WritableStream holds a queue so some data may still be buffered. This diff --git a/src/workerd/api/sockets.h b/src/workerd/api/sockets.h index ebf79371f79..98d6933f06d 100644 --- a/src/workerd/api/sockets.h +++ b/src/workerd/api/sockets.h @@ -260,11 +260,6 @@ jsg::Ref setupSocket(jsg::Lock& js, bool isDefaultFetchPort, kj::Maybe> maybeOpenedPrPair); -jsg::Ref connectImplNoOutputLock(jsg::Lock& js, - kj::Maybe> fetcher, - AnySocketAddress address, - jsg::Optional options); - jsg::Ref connectImpl(jsg::Lock& js, kj::Maybe> fetcher, AnySocketAddress address, diff --git a/src/workerd/api/sql.c++ b/src/workerd/api/sql.c++ index 602ece8621c..77e59f8e693 100644 --- a/src/workerd/api/sql.c++ +++ b/src/workerd/api/sql.c++ @@ -7,8 +7,6 @@ #include "actor-state.h" #include -#include -#include #if _WIN32 #define strncasecmp _strnicmp @@ -57,9 +55,9 @@ jsg::Ref SqlStorage::exec( // Move cached statement to end of LRU queue. if (slot->lruLink.isLinked()) { - statementCache.lru.remove(*slot.get()); + statementCache.lru.remove(*slot); } - statementCache.lru.add(*slot.get()); + statementCache.lru.add(*slot); // In order to get accurate statistics, we have to keep the spans around until the query is // actually done, which for read queries that iterate over a cursor won't be until later. @@ -141,13 +139,7 @@ double SqlStorage::getDatabaseSize(jsg::Lock& js) { } bool SqlStorageRegulator::isAllowedName(kj::StringPtr name) const { - if (util::Autogate::isEnabled(util::AutogateKey::SQL_RESTRICT_RESERVED_NAMES)) { - return strncasecmp(name.begin(), "_cf_", 4) != 0; - } - if (name.size() >= 4 && strncasecmp(name.begin(), "_cf_", 4) == 0) { - LOG_WARNING_PERIODICALLY("SQL identifier matches reserved _cf_ prefix case-insensitively"); - } - return !name.startsWith("_cf_"); + return name.size() < 4 || strncasecmp(name.begin(), "_cf_", 4) != 0; } bool SqlStorageRegulator::isAllowedTrigger(kj::StringPtr name) const { diff --git a/src/workerd/api/streams-test.c++ b/src/workerd/api/streams-test.c++ index 8f87442dd7f..257f7377613 100644 --- a/src/workerd/api/streams-test.c++ +++ b/src/workerd/api/streams-test.c++ @@ -58,12 +58,12 @@ KJ_TEST("Reading from default reader") { KJ_ASSERT(!readResult.done); auto& value = KJ_REQUIRE_NONNULL(readResult.value); auto handle = value.getHandle(js); - KJ_ASSERT(handle->IsUint8Array()); + auto u8 = KJ_ASSERT_NONNULL(handle.tryCast()); if (util::Autogate::isEnabled(util::AutogateKey::UPDATED_AUTO_ALLOCATE_CHUNK_SIZE)) { // With 16KB buffer, the entire 10KB stream fits in one read. - KJ_ASSERT(streamLength == handle.As()->ByteLength()); + KJ_ASSERT(streamLength == u8.size()); } else { - KJ_ASSERT(4 * 1024 == handle.As()->ByteLength()); + KJ_ASSERT(4 * 1024 == u8.size()); } }))); }); @@ -106,22 +106,22 @@ KJ_TEST("Reading from byob reader") { auto& value = KJ_REQUIRE_NONNULL(readResult.value); auto handle = value.getHandle(js); - KJ_ASSERT(handle->IsUint8Array()); - auto view = handle.As(); - KJ_ASSERT(kj::min(test.streamLength, test.bufferSize) == view->ByteLength()); - KJ_ASSERT(test.bufferSize == view->Buffer()->ByteLength()); + KJ_ASSERT(handle.isUint8Array()); + v8::Local u8 = KJ_ASSERT_NONNULL(handle.tryCast()); + KJ_ASSERT(kj::min(test.streamLength, test.bufferSize) == u8->ByteLength()); + KJ_ASSERT(test.bufferSize == u8->Buffer()->ByteLength()); }))); return kj::READY_NOW; }); } } -KJ_TEST("PumpToReader regression") { - // If the promise holding the PumpToReader is dropped while the inner - // write to the sink is pending, the PumpToReader can free the sink. - // In some cases, this means that the sink can error because shutdownWrite - // is called while there is still a pending write promise. This test verifies - // that PumpToReader cancels any pending write promise when it is destroyed. +KJ_TEST("ReadableStream pumpTo pending write cancellation regression") { + // If the promise holding pumpTo's implementation is dropped while the inner + // write to the sink is pending, the sink can be freed. In some cases, this + // means that the sink can error because shutdownWrite is called while there + // is still a pending write promise. This test verifies that destruction of + // the pump operation cancels any pending write promise. struct TestSink final: public WritableStreamSink { kj::TwoWayPipe pipe; @@ -179,7 +179,7 @@ KJ_TEST("PumpToReader regression") { [](jsg::Lock& js, auto controller) { auto& c = KJ_REQUIRE_NONNULL( controller.template tryGet>()); - c->enqueue(js, v8::ArrayBuffer::New(js.v8Isolate, 10)); + c->enqueue(js, jsg::JsValue(v8::ArrayBuffer::New(js.v8Isolate, 10))); c->close(js); return js.resolvedPromise(); }}, diff --git a/src/workerd/api/streams/README.md b/src/workerd/api/streams/README.md index 2791e649fea..f01007f4393 100644 --- a/src/workerd/api/streams/README.md +++ b/src/workerd/api/streams/README.md @@ -319,14 +319,12 @@ for (auto consumer: consumers) { ### Pattern: WeakRef for User-Held Handles -- **When**: Handles user code may hold longer than underlying object (`ByobRequest`, - `PumpToReader`) +- **When**: Handles user code may hold longer than underlying object (`ByobRequest`) - **How**: Check liveness before use ```cpp -KJ_IF_SOME(reader, pumpToReader->tryGet()) { - reader.pumpLoop(js, ...); // Safe -- still alive -} +impl.controller->runIfAlive( + [](ReadableByteStreamController& controller) { controller.maybeByobRequest = kj::none; }); ``` ### Pattern: `Rc` for Shared Queue Data @@ -350,12 +348,12 @@ class Entry: public kj::Refcounted { re-acquire. ```cpp -auto onSuccess = JSG_VISITABLE_LAMBDA((this, ref = addRef(), ...), ..., (...) { +auto onSuccess = [this, ref = addRef(), ...](...) mutable { auto maybePipeLock = lock.tryGetPipe(); if (maybePipeLock == kj::none) return js.resolvedPromise(); auto& pipeLock = KJ_REQUIRE_NONNULL(maybePipeLock); // Now safe to use pipeLock -}); +}; ``` ### Pattern: StateListener Self-Destruction Guard @@ -373,21 +371,12 @@ void onConsumerClose(jsg::Lock& js) override { } ``` -### Pattern: Refcounted Pipe State +### Pattern: Weak-ref'd Pipe State - **When**: Internal stream pipe operations with async continuations -- **How**: `Pipe::State` is `kj::Refcounted`; lambdas capture `kj::addRef(*state)`; - `~Pipe()` sets `state->aborted = true`; continuations check before proceeding - -```cpp -struct Pipe { - struct State: public kj::Refcounted { - bool aborted = false; - }; - kj::Own state; - ~Pipe() noexcept(false) { state->aborted = true; } -}; -``` +- **How**: `Pipe::State` holds a weak ref to `Pipe`. Rather than holding bare + references to `Pipe` in the queue, ensures continuations remain safe if the + pipe is somehow destroyed while operations are pending. ### Pattern: Generation Counter diff --git a/src/workerd/api/streams/common.c++ b/src/workerd/api/streams/common.c++ index 09339cd4bf5..9ba516d8c35 100644 --- a/src/workerd/api/streams/common.c++ +++ b/src/workerd/api/streams/common.c++ @@ -7,14 +7,14 @@ namespace workerd::api { WritableStreamController::PendingAbort::PendingAbort( - jsg::Lock& js, jsg::PromiseResolverPair prp, v8::Local reason, bool reject) + jsg::Lock& js, jsg::PromiseResolverPair prp, jsg::JsValue reason, bool reject) : resolver(kj::mv(prp.resolver)), promise(kj::mv(prp.promise)), - reason(js.v8Ref(reason)), + reason(reason.addRef(js)), reject(reject) {} WritableStreamController::PendingAbort::PendingAbort( - jsg::Lock& js, v8::Local reason, bool reject) + jsg::Lock& js, jsg::JsValue reason, bool reject) : WritableStreamController::PendingAbort(js, js.newPromiseAndResolver(), reason, reject) { } @@ -26,13 +26,8 @@ void WritableStreamController::PendingAbort::complete(jsg::Lock& js) { } } -void WritableStreamController::PendingAbort::fail(jsg::Lock& js, v8::Local reason) { +void WritableStreamController::PendingAbort::fail(jsg::Lock& js, jsg::JsValue reason) { maybeRejectPromise(js, resolver, reason); } -kj::Maybe> WritableStreamController::PendingAbort:: - dequeue(kj::Maybe>& maybePendingAbort) { - return kj::mv(maybePendingAbort); -} - } // namespace workerd::api diff --git a/src/workerd/api/streams/common.h b/src/workerd/api/streams/common.h index a129b575685..758e10df2e6 100644 --- a/src/workerd/api/streams/common.h +++ b/src/workerd/api/streams/common.h @@ -57,7 +57,7 @@ inline bool hasUtf8Bom(kj::ArrayPtr data) { } struct ReadResult { - jsg::Optional value; + jsg::Optional> value; bool done; JSG_STRUCT(value, done); @@ -80,7 +80,7 @@ struct DrainingReadResult { }; struct StreamQueuingStrategy { - using SizeAlgorithm = uint64_t(v8::Local); + using SizeAlgorithm = uint64_t(jsg::JsValue); jsg::Optional highWaterMark; jsg::Optional> size; @@ -96,7 +96,7 @@ struct UnderlyingSource { kj::OneOf, jsg::Ref>; using StartAlgorithm = jsg::Promise(Controller); using PullAlgorithm = jsg::Promise(Controller); - using CancelAlgorithm = jsg::Promise(v8::Local reason); + using CancelAlgorithm = jsg::Promise(jsg::JsValue reason); // The autoAllocateChunkSize mechanism allows byte streams to operate as if a BYOB // reader is being used even if it is just a default reader. Support is optional @@ -152,8 +152,8 @@ struct UnderlyingSource { struct UnderlyingSink { using Controller = jsg::Ref; using StartAlgorithm = jsg::Promise(Controller); - using WriteAlgorithm = jsg::Promise(v8::Local, Controller); - using AbortAlgorithm = jsg::Promise(v8::Local reason); + using WriteAlgorithm = jsg::Promise(jsg::JsValue, Controller); + using AbortAlgorithm = jsg::Promise(jsg::JsValue reason); using CloseAlgorithm = jsg::Promise(); // Per the spec, the type property for the UnderlyingSink should always be either @@ -179,7 +179,7 @@ struct UnderlyingSink { struct Transformer { using Controller = jsg::Ref; using StartAlgorithm = jsg::Promise(Controller); - using TransformAlgorithm = jsg::Promise(v8::Local, Controller); + using TransformAlgorithm = jsg::Promise(jsg::JsValue, Controller); using FlushAlgorithm = jsg::Promise(Controller); using CancelAlgorithm = jsg::Promise(jsg::JsValue reason); @@ -319,12 +319,12 @@ namespace StreamStates { struct Closed { static constexpr kj::StringPtr NAME KJ_UNUSED = "closed"_kj; }; -using Errored = jsg::Value; +using Errored = jsg::JsRef; struct Erroring { static constexpr kj::StringPtr NAME KJ_UNUSED = "erroring"_kj; - jsg::Value reason; + jsg::JsRef reason; - Erroring(jsg::Value reason): reason(kj::mv(reason)) {} + Erroring(jsg::JsRef reason): reason(kj::mv(reason)) {} void visitForGc(jsg::GcVisitor& visitor) { visitor.visit(reason); @@ -428,7 +428,7 @@ class ReadableStreamController { virtual ~Branch() noexcept(false) {} virtual void doClose(jsg::Lock& js) = 0; - virtual void doError(jsg::Lock& js, v8::Local reason) = 0; + virtual void doError(jsg::Lock& js, jsg::JsValue reason) = 0; virtual void handleData(jsg::Lock& js, ReadResult result) = 0; }; @@ -445,7 +445,7 @@ class ReadableStreamController { inner->doClose(js); } - inline void doError(jsg::Lock& js, v8::Local reason) { + inline void doError(jsg::Lock& js, jsg::JsValue reason) { inner->doError(js, reason); } @@ -470,7 +470,7 @@ class ReadableStreamController { virtual void close(jsg::Lock& js) = 0; - virtual void error(jsg::Lock& js, v8::Local reason) = 0; + virtual void error(jsg::Lock& js, jsg::JsValue reason) = 0; virtual void ensurePulling(jsg::Lock& js) = 0; @@ -486,11 +486,11 @@ class ReadableStreamController { public: virtual ~PipeController() noexcept(false) {} virtual bool isClosed() = 0; - virtual kj::Maybe> tryGetErrored(jsg::Lock& js) = 0; - virtual void cancel(jsg::Lock& js, v8::Local reason) = 0; + virtual kj::Maybe tryGetErrored(jsg::Lock& js) = 0; + virtual void cancel(jsg::Lock& js, jsg::JsValue reason) = 0; virtual void close(jsg::Lock& js) = 0; - virtual void error(jsg::Lock& js, v8::Local reason) = 0; - virtual void release(jsg::Lock& js, kj::Maybe> maybeError = kj::none) = 0; + virtual void error(jsg::Lock& js, jsg::JsValue reason) = 0; + virtual void release(jsg::Lock& js, kj::Maybe maybeError = kj::none) = 0; virtual kj::Maybe> tryPumpTo(WritableStreamSink& sink, bool end) = 0; virtual jsg::Promise read(jsg::Lock& js) = 0; }; @@ -537,7 +537,7 @@ class ReadableStreamController { jsg::Lock& js, WritableStreamController& destination, PipeToOptions options) = 0; // Indicates that the consumer no longer has any interest in the streams data. - virtual jsg::Promise cancel(jsg::Lock& js, jsg::Optional> reason) = 0; + virtual jsg::Promise cancel(jsg::Lock& js, jsg::Optional reason) = 0; // Branches the ReadableStreamController into two ReadableStream instances that will receive // this streams data. The specific details of how the branching occurs is entirely up to the @@ -573,7 +573,8 @@ class ReadableStreamController { // // limit specifies an upper maximum bound on the number of bytes permitted to be read. // The promise will reject if the read will produce more bytes than the limit. - virtual jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit) = 0; + virtual jsg::Promise> readAllBytes( + jsg::Lock& js, uint64_t limit) = 0; // Fully consumes the ReadableStream. If the stream is already locked to a reader or // errored, the returned JS promise will reject. If the stream is already closed, the @@ -670,22 +671,25 @@ class WritableStreamController { virtual void replaceReadyPromise(jsg::Lock& js, jsg::Promise readyPromise) = 0; }; + // PendingAbort is a GC traced struct. Do not hold it with a kj::Own or + // kj::Rc as that will cause GC tracing issues. Once traced, the held + // resolver and reason become weak. Moving the own does not change their + // status and GC can reclaim them even while the PendingAbort is still + // alive. struct PendingAbort { kj::Maybe::Resolver> resolver; jsg::Promise promise; - jsg::Value reason; + jsg::JsRef reason; bool reject = false; - PendingAbort(jsg::Lock& js, - jsg::PromiseResolverPair prp, - v8::Local reason, - bool reject); + PendingAbort( + jsg::Lock& js, jsg::PromiseResolverPair prp, jsg::JsValue reason, bool reject); - PendingAbort(jsg::Lock& js, v8::Local reason, bool reject); + PendingAbort(jsg::Lock& js, jsg::JsValue reason, bool reject); void complete(jsg::Lock& js); - void fail(jsg::Lock& js, v8::Local reason); + void fail(jsg::Lock& js, jsg::JsValue reason); inline jsg::Promise whenResolved(jsg::Lock& js) { return promise.whenResolved(js); @@ -703,9 +707,6 @@ class WritableStreamController { visitor.visit(resolver, promise, reason); } - static kj::Maybe> dequeue( - kj::Maybe>& maybePendingAbort); - JSG_MEMORY_INFO(PendingAbort) { tracker.trackField("resolver", resolver); tracker.trackField("promise", promise); @@ -722,7 +723,7 @@ class WritableStreamController { // The controller implementation will determine what kind of JavaScript data // it is capable of writing, returning a rejected promise if the written // data type is not supported. - virtual jsg::Promise write(jsg::Lock& js, jsg::Optional> value) = 0; + virtual jsg::Promise write(jsg::Lock& js, jsg::Optional value) = 0; // Indicates that no additional data will be written to the controller. All // existing pending writes should be allowed to complete. @@ -733,7 +734,7 @@ class WritableStreamController { virtual jsg::Promise flush(jsg::Lock& js, bool markAsHandled = false) = 0; // Immediately interrupts existing pending writes and errors the stream. - virtual jsg::Promise abort(jsg::Lock& js, jsg::Optional> reason) = 0; + virtual jsg::Promise abort(jsg::Lock& js, jsg::Optional reason) = 0; // The tryPipeFrom attempts to establish a data pipe where source's data // is delivered to this WritableStreamController as efficiently as possible. @@ -765,7 +766,7 @@ class WritableStreamController { // If maybeJs is set, the writer's closed and ready promises will be resolved. virtual void releaseWriter(Writer& writer, kj::Maybe maybeJs) = 0; - virtual kj::Maybe> isErroring(jsg::Lock& js) = 0; + virtual kj::Maybe isErroring(jsg::Lock& js) = 0; virtual void visitForGc(jsg::GcVisitor& visitor) {}; @@ -935,7 +936,7 @@ inline void maybeResolvePromise( template void maybeRejectPromise(jsg::Lock& js, kj::Maybe::Resolver>& maybeResolver, - v8::Local reason) { + jsg::JsValue reason) { KJ_IF_SOME(resolver, maybeResolver) { resolver.reject(js, reason); maybeResolver = kj::none; @@ -943,8 +944,7 @@ void maybeRejectPromise(jsg::Lock& js, } template -jsg::Promise rejectedMaybeHandledPromise( - jsg::Lock& js, v8::Local reason, bool handled) { +jsg::Promise rejectedMaybeHandledPromise(jsg::Lock& js, jsg::JsValue reason, bool handled) { auto prp = js.newPromiseAndResolver(); if (handled) { prp.promise.markAsHandled(js); diff --git a/src/workerd/api/streams/compression.c++ b/src/workerd/api/streams/compression.c++ index 0633f8ee15c..bd59669f886 100644 --- a/src/workerd/api/streams/compression.c++ +++ b/src/workerd/api/streams/compression.c++ @@ -17,12 +17,6 @@ CompressionAllocator::CompressionAllocator( kj::Arc&& externalMemoryTarget) : externalMemoryTarget(kj::mv(externalMemoryTarget)) {} -void CompressionAllocator::configure(z_stream* stream) { - stream->zalloc = AllocForZlib; - stream->zfree = FreeForZlib; - stream->opaque = this; -} - void* CompressionAllocator::AllocForZlib(void* data, uInt items, uInt size) { size_t real_size = nbytes::MultiplyWithOverflowCheck(static_cast(items), static_cast(size)); @@ -78,7 +72,10 @@ class Context { { // Configure allocator before any stream operations. - allocator.configure(&ctx); + ctx.zalloc = CompressionAllocator::AllocForZlib; + ctx.zfree = CompressionAllocator::FreeForZlib; + ctx.opaque = &allocator; + int result = Z_OK; switch (mode) { case Mode::COMPRESS: @@ -331,7 +328,7 @@ class CompressionStreamBase: public kj::Refcounted, kj::Promise tryReadInternal(kj::ArrayPtr dest, size_t minBytes) { const auto copyIntoBuffer = [this](kj::ArrayPtr dest) { auto maxBytesToCopy = kj::min(dest.size(), output.size()); - dest.first(maxBytesToCopy).copyFrom(output.take(maxBytesToCopy)); + dest.write(output.take(maxBytesToCopy)); output.maybeShift(); return maxBytesToCopy; }; diff --git a/src/workerd/api/streams/compression.h b/src/workerd/api/streams/compression.h index 5b93142b706..6309179d385 100644 --- a/src/workerd/api/streams/compression.h +++ b/src/workerd/api/streams/compression.h @@ -18,7 +18,6 @@ namespace workerd::api { class CompressionAllocator final { public: CompressionAllocator(kj::Arc&& externalMemoryTarget); - void configure(z_stream* stream); static void* AllocForZlib(void* data, uInt items, uInt size); static void* AllocForBrotli(void* data, size_t size); diff --git a/src/workerd/api/streams/draining-read-uaf-test.c++ b/src/workerd/api/streams/draining-read-uaf-test.c++ new file mode 100644 index 00000000000..21c3544dd33 --- /dev/null +++ b/src/workerd/api/streams/draining-read-uaf-test.c++ @@ -0,0 +1,173 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for a use-after-free in wrapDrainingRead. +// +// The bug: ReadableStreamJsController::drainingRead() wraps the inner promise from +// Consumer::drainingRead() with .then() callbacks that call endOperation() on the +// controller. These callbacks captured a raw `this` pointer to the controller with +// no strong reference keeping it alive. If the DrainingReader (which holds the only +// jsg::Ref) was destroyed while the promise was pending — e.g., due +// to coroutine cancellation in pumpToImpl — the controller was freed, and the .then() +// callbacks would access dangling memory. +// +// The fix adds `self = addRef()` captures to the wrapDrainingRead callbacks, keeping +// the stream (and controller) alive until the callbacks complete. +// +// This test reproduces the scenario: +// 1. Create a stream with an async pull (no immediate data). +// 2. Start a draining read → pending promise. +// 3. Enqueue data → resolves the inner promise, enqueueing microtasks. +// 4. Drop ALL external refs to the stream (reader + rs). +// 5. Run microtasks — the .then() callbacks fire. +// +// Without the fix, step 5 is a use-after-free on the controller's state member. +// With the fix, the self ref in the callbacks keeps the controller alive. +// ASAN catches the pre-fix version. + +#include "readable.h" +#include "standard.h" + +#include +#include +#include + +namespace workerd::api { +namespace { + +void preamble(auto callback) { + TestFixture fixture; + fixture.runInIoContext([&](const TestFixture::Environment& env) { callback(env.js); }); +} + +jsg::JsValue toBytes(jsg::Lock& js, kj::StringPtr str) { + return jsg::JsUint8Array::create(js, str.asBytes()); +} + +// Regression test: dropping the DrainingReader while a draining read promise is +// pending must not cause a use-after-free when the promise callbacks fire. +KJ_TEST("wrapDrainingRead ref prevents UAF when DrainingReader is dropped (value stream)") { + preamble([](jsg::Lock& js) { + // The pull callback saves a controller ref so we can enqueue data after + // the draining read is pending. It deliberately does NOT enqueue data, + // forcing drainingRead() into its async path. + kj::Maybe> savedCtrl; + + auto rs = js.alloc(newReadableStreamJsController()); + // clang-format off + rs->getController().setup(js, UnderlyingSource{ + .pull = [&](jsg::Lock& js, UnderlyingSource::Controller controller) { + KJ_SWITCH_ONEOF(controller) { + KJ_CASE_ONEOF(c, jsg::Ref) { + if (savedCtrl == kj::none) { + savedCtrl = c.addRef(); + } + // Return resolved but do NOT enqueue data — this makes + // drainingRead fall into the async path. + return js.resolvedPromise(); + } + KJ_CASE_ONEOF(c, jsg::Ref) {} + } + KJ_UNREACHABLE; + } + }, StreamQueuingStrategy{.highWaterMark = 0}); + // clang-format on + + // Create a DrainingReader and start a read. The pull doesn't provide data, + // so drainingRead() queues a ReadRequest and returns a pending promise. + auto reader = KJ_ASSERT_NONNULL(DrainingReader::create(js, *rs)); + + // Drop the stream. Since js.alloc() never created a CppGC shim (the stream + // was only used from C++, never passed to JS), this is the last external + // strong ref. Without the fix, maybeDeferDestruction (which runs immediately + // under the lock) frees the ReadableStream and its ReadableStreamJsController. + // With the fix, the self = addRef() captured in wrapDrainingRead's .then() + // callbacks keeps the refcount > 0. + // The reader still holds a jsg::Ref as long as it is active. + { auto drop = kj::mv(rs); } + + bool readCompleted = false; + auto promise = reader->read(js).then(js, [&](jsg::Lock& js, DrainingReadResult&& result) { + KJ_ASSERT(!result.done); + KJ_ASSERT(result.chunks.size() == 1); + KJ_ASSERT(kj::str(result.chunks[0].asChars()) == "test"); + readCompleted = true; + }); + + // The pull should have been called, giving us a controller ref. + auto& ctrl = KJ_ASSERT_NONNULL(savedCtrl); + + // Enqueue data. This resolves the pending ReadRequest inside the consumer, + // which resolves the inner promise in the drainingRead chain. The .then() + // microtasks are enqueued but NOT yet processed. + ctrl->enqueue(js, toBytes(js, "test")); + + // Drop the saved controller ref — we no longer need it. + savedCtrl = kj::none; + + // Drop the reader. ~DrainingReader releases the reader lock and drops its + // jsg::Ref, which should be the last external ref to the + // stream. + { auto drop = kj::mv(reader); } + + // Process microtasks. The promise chain fires: + // inner .then() (Consumer level) → outer .then() (wrapDrainingRead) → our .then() + // + // Without fix: the outer .then() accesses this->state on the freed controller → UAF. + // With fix: self ref keeps the controller alive through the callbacks. + js.runMicrotasks(); + + KJ_ASSERT(readCompleted, "draining read promise should have resolved with data"); + }); +} + +// Same test but for byte streams. +KJ_TEST("wrapDrainingRead ref prevents UAF when DrainingReader is dropped (byte stream)") { + preamble([](jsg::Lock& js) { + kj::Maybe> savedCtrl; + + auto rs = js.alloc(newReadableStreamJsController()); + // clang-format off + rs->getController().setup(js, UnderlyingSource{ + .type = kj::str("bytes"), + .pull = [&](jsg::Lock& js, UnderlyingSource::Controller controller) { + KJ_SWITCH_ONEOF(controller) { + KJ_CASE_ONEOF(c, jsg::Ref) {} + KJ_CASE_ONEOF(c, jsg::Ref) { + if (savedCtrl == kj::none) { + savedCtrl = c.addRef(); + } + return js.resolvedPromise(); + } + } + KJ_UNREACHABLE; + } + }, StreamQueuingStrategy{.highWaterMark = 0}); + // clang-format on + + auto reader = KJ_ASSERT_NONNULL(DrainingReader::create(js, *rs)); + + bool readCompleted = false; + auto promise = reader->read(js).then(js, [&](jsg::Lock& js, DrainingReadResult&& result) { + KJ_ASSERT(!result.done); + KJ_ASSERT(result.chunks.size() == 1); + KJ_ASSERT(kj::str(result.chunks[0].asChars()) == "test"); + readCompleted = true; + }); + + auto& ctrl = KJ_ASSERT_NONNULL(savedCtrl); + ctrl->enqueue(js, jsg::BufferSource(js, jsg::JsBufferSource(toBytes(js, "test")))); + savedCtrl = kj::none; + + { auto drop = kj::mv(reader); } + { auto drop = kj::mv(rs); } + + js.runMicrotasks(); + + KJ_ASSERT(readCompleted, "draining read promise should have resolved with data"); + }); +} + +} // namespace +} // namespace workerd::api diff --git a/src/workerd/api/streams/encoding.c++ b/src/workerd/api/streams/encoding.c++ index 105ff2593e2..d39aaa62493 100644 --- a/src/workerd/api/streams/encoding.c++ +++ b/src/workerd/api/streams/encoding.c++ @@ -42,9 +42,9 @@ struct Holder: public kj::Refcounted { jsg::Ref TextEncoderStream::constructor(jsg::Lock& js) { auto state = kj::rc(); - auto transform = [holder = state.addRef()](jsg::Lock& js, v8::Local chunk, + auto transform = [holder = state.addRef()](jsg::Lock& js, jsg::JsValue chunk, jsg::Ref controller) mutable { - auto str = jsg::check(chunk->ToString(js.v8Context())); + v8::Local str = chunk.toJsString(js); size_t length = str->Length(); if (length == 0) return js.resolvedPromise(); @@ -147,10 +147,10 @@ jsg::Ref TextDecoderStream::constructor( Transformer{.transform = jsg::Function( JSG_VISITABLE_LAMBDA( (decoder = decoder.addRef()), (decoder), (jsg::Lock& js, auto chunk, auto controller) { - JSG_REQUIRE(chunk->IsArrayBuffer() || chunk->IsArrayBufferView(), TypeError, + JSG_REQUIRE(chunk.isArrayBuffer() || chunk.isArrayBufferView(), TypeError, "This TransformStream is being used as a byte stream, " "but received a value that is not a BufferSource."); - jsg::BufferSource source(js, chunk); + jsg::JsBufferSource source(chunk); auto decoded = JSG_REQUIRE_NONNULL(decoder->decodePtr(js, source.asArrayPtr(), false), TypeError, "Failed to decode input."); diff --git a/src/workerd/api/streams/internal-test.c++ b/src/workerd/api/streams/internal-test.c++ index d6baca04935..c2f9cc8f817 100644 --- a/src/workerd/api/streams/internal-test.c++ +++ b/src/workerd/api/streams/internal-test.c++ @@ -2,6 +2,7 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 +#include "identity-transform-stream.h" #include "internal.h" #include "readable.h" #include "standard.h" @@ -280,12 +281,12 @@ KJ_TEST("WritableStreamInternalController queue size assertion") { "is currently locked to a writer."); } - auto buffersource = env.js.bytes(kj::heapArray(10)); + jsg::JsValue buffersource = jsg::JsUint8Array::create(env.js, 10); bool writeFailed = false; auto write = sink->getController() - .write(env.js, buffersource.getHandle(env.js)) + .write(env.js, buffersource) .catch_(env.js, [&](jsg::Lock& js, jsg::Value value) { writeFailed = true; auto ex = js.exceptionToKj(kj::mv(value)); @@ -376,9 +377,9 @@ KJ_TEST("WritableStreamInternalController observability") { stream = env.js.alloc(env.context, kj::heap(), kj::mv(myObserver)); auto write = [&](size_t size) { - auto buffersource = env.js.bytes(kj::heapArray(size)); - return env.context.awaitJs(env.js, - KJ_ASSERT_NONNULL(stream)->getController().write(env.js, buffersource.getHandle(env.js))); + jsg::JsValue buffersource = jsg::JsUint8Array::create(env.js, size); + return env.context.awaitJs( + env.js, KJ_ASSERT_NONNULL(stream)->getController().write(env.js, buffersource)); }; KJ_ASSERT(observer.queueSize == 0); @@ -427,8 +428,8 @@ KJ_TEST("WritableStreamInternalController pipeLoop abort during pending read") { auto& c = KJ_ASSERT_NONNULL(controller.tryGet>()); if (pullCount == 1) { // First pull: enqueue some data so the pipe loop can make progress - auto data = js.bytes(kj::heapArray({1, 2, 3, 4})); - c->enqueue(js, data.getHandle(js)); + jsg::JsValue data = jsg::JsUint8Array::create(js, {1, 2, 3, 4}); + c->enqueue(js, data); } // Second pull onwards: don't enqueue anything, leaving the read pending. // This simulates an async data source that hasn't received data yet. @@ -445,7 +446,7 @@ KJ_TEST("WritableStreamInternalController pipeLoop abort during pending read") { env.js.runMicrotasks(); // Abort while pipeLoop is waiting for a pending read - auto abortPromise = sink->getController().abort(env.js, env.js.v8TypeError("Test abort"_kj)); + auto abortPromise = sink->getController().abort(env.js, env.js.typeError("Test abort"_kj)); abortPromise.markAsHandled(env.js); env.js.runMicrotasks(); @@ -946,5 +947,28 @@ KJ_TEST("ReadableStreamBYOBReader rejects read after releaseLock") { }); } +KJ_TEST("Writing strings works") { + auto fixture = makeStreamTestFixture(); + fixture.runInIoContext([&](const TestFixture::Environment& env) { + auto sink = env.js.alloc(env.context, kj::heap(), kj::none); + auto writer = sink->getWriter(env.js); + // Previously this would throw synchronously when a string was passed. + auto writePromise = writer->write(env.js, env.js.str("works"_kj)); + env.js.runMicrotasks(); + }); +} + +KJ_TEST("Writing SharedArrayBuffer works") { + auto fixture = makeStreamTestFixture(); + fixture.runInIoContext([&](const TestFixture::Environment& env) { + auto sink = env.js.alloc(env.context, kj::heap(), kj::none); + auto writer = sink->getWriter(env.js); + // Previously this would throw synchronously when a SAB was passed. + auto sab = v8::SharedArrayBuffer::New(env.js.v8Isolate, 5); + auto writePromise = writer->write(env.js, jsg::JsSharedArrayBuffer(sab)); + env.js.runMicrotasks(); + }); +} + } // namespace } // namespace workerd::api diff --git a/src/workerd/api/streams/internal.c++ b/src/workerd/api/streams/internal.c++ index 82a355d20b2..9f38e7fee91 100644 --- a/src/workerd/api/streams/internal.c++ +++ b/src/workerd/api/streams/internal.c++ @@ -16,6 +16,8 @@ #include +#include + namespace workerd::api { namespace { @@ -246,17 +248,16 @@ class AllReader final { continue; } KJ_DASSERT(slicedPart.size() <= out.size()); - out.first(slicedPart.size()).copyFrom(slicedPart); - out = out.slice(slicedPart.size()); + out.write(slicedPart); } } }; kj::Exception reasonToException(jsg::Lock& js, - jsg::Optional> maybeReason, + jsg::Optional maybeReason, kj::String defaultDescription = kj::str(JSG_EXCEPTION(Error) ": Stream was cancelled.")) { KJ_IF_SOME(reason, maybeReason) { - return js.exceptionToKj(js.v8Ref(reason)); + return js.exceptionToKj(reason); } else { // We get here if the caller is something like `r.cancel()` (or `r.cancel(undefined)`). return kj::Exception( @@ -444,7 +445,7 @@ kj::Maybe> ReadableStreamInternalController::read( if (isPendingClosure) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream belongs to an object that is closing."_kj)); + js.typeError("This ReadableStream belongs to an object that is closing."_kj)); } v8::Local store; @@ -460,7 +461,7 @@ kj::Maybe> ReadableStreamInternalController::read( if (byobOptions.detachBuffer) { if (!store->IsDetachable()) { return js.rejectedPromise( - js.v8TypeError("Unable to use non-detachable ArrayBuffer"_kj)); + js.typeError("Unable to use non-detachable ArrayBuffer"_kj)); } auto backing = store->GetBackingStore(); jsg::check(store->Detach(v8::Local())); @@ -495,17 +496,18 @@ kj::Maybe> ReadableStreamInternalController::read( auto theStore = getOrInitStore(true); if (theStore.IsEmpty()) { return js.rejectedPromise( - js.v8TypeError("Unable to allocate memory for read"_kj)); + js.typeError("Unable to allocate memory for read"_kj)); } + auto u8 = v8::Uint8Array::New(theStore, 0, 0); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(v8::Uint8Array::New(theStore, 0, 0).As()), + .value = jsg::JsValue(u8).addRef(js), .done = true, }); } return js.resolvedPromise(ReadResult{.done = true}); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(readable, Readable) { // TODO(conform): Requiring serialized read requests is non-conformant, but we've never had a @@ -515,7 +517,7 @@ kj::Maybe> ReadableStreamInternalController::read( // TransformStream implementation is primarily (only?) used for constructing manually // streamed Responses, and no teed ReadableStream has ever supported them. if (readPending) { - return js.rejectedPromise(js.v8TypeError( + return js.rejectedPromise(js.typeError( "This ReadableStream only supports a single pending read request at a time."_kj)); } readPending = true; @@ -523,7 +525,7 @@ kj::Maybe> ReadableStreamInternalController::read( auto theStore = getOrInitStore(); if (theStore.IsEmpty()) { return js.rejectedPromise( - js.v8TypeError("Unable to allocate memory for read"_kj)); + js.typeError("Unable to allocate memory for read"_kj)); } // In the case the ArrayBuffer is detached/transfered while the read is pending, we @@ -550,8 +552,9 @@ kj::Maybe> ReadableStreamInternalController::read( auto currentByteLength = theStore->ByteLength(); if (byteOffset >= currentByteLength) { readPending = false; + auto u8 = v8::Uint8Array::New(theStore, 0, 0); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(v8::Uint8Array::New(theStore, 0, 0).As()), + .value = jsg::JsValue(u8).addRef(js), .done = false, }); } @@ -588,27 +591,28 @@ kj::Maybe> ReadableStreamInternalController::read( // That's a larger refactor, though. auto& ioContext = IoContext::current(); return ioContext.awaitIoLegacy(js, kj::mv(promise)) - .then(js, ioContext.addFunctor(JSG_VISITABLE_LAMBDA( - (this, ref = addRef(), store = js.v8Ref(store), - byteOffset, byteLength, isByob = maybeByobOptions != kj::none, - isResizable, readPtr, tempBuffer = kj::mv(tempBuffer)), - (ref), - (jsg::Lock& js, size_t amount) mutable -> jsg::Promise { - readPending = false; + .then(js, + ioContext.addFunctor( + [ref = addRef(), store = js.v8Ref(store), byteOffset, byteLength, + isByob = maybeByobOptions != kj::none, isResizable, readPtr, + tempBuffer = kj::mv(tempBuffer)]( + jsg::Lock& js, size_t amount) mutable -> jsg::Promise { + auto& controller = static_cast(ref->getController()); + controller.readPending = false; KJ_ASSERT(amount <= byteLength); if (amount == 0) { - if (!state.is()) { - doClose(js); + if (!controller.state.is()) { + controller.doClose(js); } - KJ_IF_SOME(o, owner) { + KJ_IF_SOME(o, controller.owner) { o.signalEof(js); - } else {} + } if (isByob && FeatureFlags::get(js).getInternalStreamByobReturn()) { // When using the BYOB reader, we must return a sized-0 Uint8Array that is backed // by the ArrayBuffer passed in the options. auto u8 = v8::Uint8Array::New(store.getHandle(js), 0, 0); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(u8.As()), + .value = jsg::JsValue(u8).addRef(js), .done = true, }); } @@ -632,8 +636,9 @@ kj::Maybe> ReadableStreamInternalController::read( "flag, to prevent this from happening."_kj); auto buffer = v8::ArrayBuffer::New(js.v8Isolate, 0); + auto u8 = v8::Uint8Array::New(buffer, 0, 0); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(v8::Uint8Array::New(buffer, 0, 0).As()), + .value = jsg::JsValue(u8).addRef(js), .done = false, }); } @@ -650,8 +655,9 @@ kj::Maybe> ReadableStreamInternalController::read( "happening."_kj); if (byteOffset >= handle->ByteLength()) { + auto u8 = v8::Uint8Array::New(store.getHandle(js), 0, 0); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(v8::Uint8Array::New(store.getHandle(js), 0, 0).As()), + .value = jsg::JsValue(u8).addRef(js), .done = false, }); } @@ -665,22 +671,23 @@ kj::Maybe> ReadableStreamInternalController::read( memcpy(destPtr + byteOffset, readPtr, amount); } + auto u8 = v8::Uint8Array::New(store.getHandle(js), byteOffset, amount); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref( - v8::Uint8Array::New(store.getHandle(js), byteOffset, amount).As()), + .value = jsg::JsValue(u8).addRef(js), .done = false, }); - })), - ioContext.addFunctor(JSG_VISITABLE_LAMBDA( - (this, ref = addRef()), - (ref), - (jsg::Lock& js, jsg::Value reason) -> jsg::Promise { - readPending = false; - if (!state.is()) { - doError(js, reason.getHandle(js)); + }), + ioContext.addFunctor([ref = addRef()](jsg::Lock& js, + jsg::Value reason) mutable -> jsg::Promise { + auto& controller = static_cast(ref->getController()); + controller.readPending = false; + auto error = jsg::JsValue(reason.getHandle(js)); + if (!controller.state.is()) { + controller.doError(js, error); } - return js.rejectedPromise(kj::mv(reason)); - }))); + + return js.rejectedPromise(error); + })); } } KJ_UNREACHABLE; @@ -699,7 +706,7 @@ kj::Maybe> ReadableStreamInternalController::dr if (isPendingClosure) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream belongs to an object that is closing."_kj)); + js.typeError("This ReadableStream belongs to an object that is closing."_kj)); } static constexpr size_t kAtLeast = 1; @@ -715,7 +722,7 @@ kj::Maybe> ReadableStreamInternalController::dr } KJ_CASE_ONEOF(readable, Readable) { if (readPending) { - return js.rejectedPromise(js.v8TypeError( + return js.rejectedPromise(js.typeError( "This ReadableStream only supports a single pending read request at a time."_kj)); } readPending = true; @@ -749,35 +756,37 @@ kj::Maybe> ReadableStreamInternalController::dr auto& ioContext = IoContext::current(); return ioContext.awaitIoLegacy(js, kj::mv(promise)) - .then(js, ioContext.addFunctor(JSG_VISITABLE_LAMBDA( - (this, ref = addRef(), store = kj::mv(store)), - (ref), - (jsg::Lock& js, size_t amount) mutable -> jsg::Promise { - readPending = false; + .then(js, + ioContext.addFunctor([ref = addRef(), store = kj::mv(store)](jsg::Lock& js, + size_t amount) mutable -> jsg::Promise { + auto& controller = static_cast(ref->getController()); + controller.readPending = false; KJ_ASSERT(amount <= store.size()); if (amount == 0) { - if (!state.is()) { - doClose(js); + if (!controller.state.is()) { + controller.doClose(js); } - KJ_IF_SOME(o, owner) { + KJ_IF_SOME(o, controller.owner) { o.signalEof(js); - } else {} + } return js.resolvedPromise(DrainingReadResult{.done = true}); } // Return a slice so the script can see how many bytes were read. return js.resolvedPromise(DrainingReadResult{ .chunks = kj::arr(store.slice(0, amount).attach(kj::mv(store))), .done = false}); - })), - ioContext.addFunctor(JSG_VISITABLE_LAMBDA( - (this, ref = addRef()), - (ref), - (jsg::Lock& js, jsg::Value reason) -> jsg::Promise { - readPending = false; - if (!state.is()) { - doError(js, reason.getHandle(js)); + }), + ioContext.addFunctor( + [ref = addRef()](jsg::Lock& js, + jsg::Value reason) mutable -> jsg::Promise { + auto& controller = static_cast(ref->getController()); + controller.readPending = false; + auto error = jsg::JsValue(reason.getHandle(js)); + if (!controller.state.is()) { + controller.doError(js, error); } - return js.rejectedPromise(kj::mv(reason)); - }))); + + return js.rejectedPromise(error); + })); } } KJ_UNREACHABLE; @@ -791,7 +800,7 @@ jsg::Promise ReadableStreamInternalController::pipeTo( if (isPendingClosure) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream belongs to an object that is closing."_kj)); + js.typeError("This ReadableStream belongs to an object that is closing."_kj)); } disturbed = true; @@ -801,11 +810,11 @@ jsg::Promise ReadableStreamInternalController::pipeTo( } return js.rejectedPromise( - js.v8TypeError("This ReadableStream cannot be piped to this WritableStream."_kj)); + js.typeError("This ReadableStream cannot be piped to this WritableStream."_kj)); } jsg::Promise ReadableStreamInternalController::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { disturbed = true; KJ_IF_SOME(errored, state.tryGetUnsafe()) { @@ -818,7 +827,7 @@ jsg::Promise ReadableStreamInternalController::cancel( } void ReadableStreamInternalController::doCancel( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { auto exception = reasonToException(js, maybeReason); KJ_IF_SOME(locked, readState.tryGetUnsafe()) { KJ_IF_SOME(canceler, locked.getCanceler()) { @@ -843,11 +852,11 @@ void ReadableStreamInternalController::doClose(jsg::Lock& js) { } } -void ReadableStreamInternalController::doError(jsg::Lock& js, v8::Local reason) { +void ReadableStreamInternalController::doError(jsg::Lock& js, jsg::JsValue reason) { // If already in a terminal state, nothing to do. if (state.isTerminal()) return; - state.transitionTo(js.v8Ref(reason)); + state.transitionTo(reason.addRef(js)); KJ_IF_SOME(locked, readState.tryGetUnsafe()) { maybeRejectPromise(js, locked.getClosedFulfiller(), reason); } else { @@ -982,7 +991,7 @@ void ReadableStreamInternalController::releaseReader( "Cannot call releaseLock() on a reader with outstanding read promises."); } maybeRejectPromise(js, locked.getClosedFulfiller(), - js.v8TypeError("This ReadableStream reader has been released."_kj)); + js.typeError("This ReadableStream reader has been released."_kj)); } locked.clear(); @@ -1013,18 +1022,50 @@ jsg::Ref WritableStreamInternalController::addRef() { } jsg::Promise WritableStreamInternalController::write( - jsg::Lock& js, jsg::Optional> value) { + jsg::Lock& js, jsg::Optional value) { if (isPendingClosure) { return js.rejectedPromise( - js.v8TypeError("This WritableStream belongs to an object that is closing."_kj)); + js.typeError("This WritableStream belongs to an object that is closing."_kj)); } if (isClosedOrClosing()) { - return js.rejectedPromise(js.v8TypeError("This WritableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This WritableStream has been closed."_kj)); } if (isPiping()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream is currently being piped to."_kj)); - } + js.typeError("This WritableStream is currently being piped to."_kj)); + } + + static const auto processChunk = + [](jsg::Lock& js, kj::Maybe value) -> kj::Maybe> { + KJ_IF_SOME(chunk, value) { + KJ_IF_SOME(ab, chunk.tryCast()) { + if (ab.size() > 0) return ab.copy(); + return kj::none; + } else KJ_IF_SOME(sab, chunk.tryCast()) { + if (sab.size() > 0) return sab.copy(); + return kj::none; + } else KJ_IF_SOME(view, chunk.tryCast()) { + if (view.size() > 0) return jsg::JsBufferSource(view).copy(); + return kj::none; + } else if (chunk.isString()) { + // While slightly outside of spec, we can allow writing strings by converting those + // to UTF-8 bytes. This is an ergonomic improvement to avoid forcing users to create + // a TextEncoder just to write strings to a stream, which is an exceedingly common + // use case. + auto str = chunk.toString(js); + auto ptr = str.asBytes(); + return ptr.attach(kj::mv(str)); + } else { + // TODO(later): This really ought to return a rejected promise and not a sync throw. + throwTypeErrorAndConsoleWarn( + "This TransformStream is being used as a byte stream, but received an object of " + "non-ArrayBuffer/ArrayBufferView type on its writable side."); + // The throwTypeErrorAndConsoleWarn is marked [[noreturn]] + } + KJ_UNREACHABLE; + } + return kj::none; + }; KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(closed, StreamStates::Closed) { @@ -1032,66 +1073,38 @@ jsg::Promise WritableStreamInternalController::write( KJ_UNREACHABLE; } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(writable, IoOwn) { - if (value == kj::none) { - return js.resolvedPromise(); - } - auto chunk = KJ_ASSERT_NONNULL(value); - - std::shared_ptr store; - size_t byteLength = 0; - size_t byteOffset = 0; - if (chunk->IsArrayBuffer()) { - auto buffer = chunk.As(); - store = buffer->GetBackingStore(); - byteLength = buffer->ByteLength(); - } else if (chunk->IsArrayBufferView()) { - auto view = chunk.As(); - store = view->Buffer()->GetBackingStore(); - byteLength = view->ByteLength(); - byteOffset = view->ByteOffset(); - } else if (chunk->IsString()) { - // TODO(later): This really ought to return a rejected promise and not a sync throw. - // This case caused me a moment of confusion during testing, so I think it's worth - // a specific error message. - throwTypeErrorAndConsoleWarn( - "This TransformStream is being used as a byte stream, but received a string on its " - "writable side. If you wish to write a string, you'll probably want to explicitly " - "UTF-8-encode it with TextEncoder."); - } else { - // TODO(later): This really ought to return a rejected promise and not a sync throw. - throwTypeErrorAndConsoleWarn( - "This TransformStream is being used as a byte stream, but received an object of " - "non-ArrayBuffer/ArrayBufferView type on its writable side."); - } + // Because writes happen outside of the isolate lock, and because ArrayBuffers + // might not be detached by the Writer, or might be detached after being written, + // or might be resizable and resized after being written, processChunk always + // creates a safe copy of the data to be written in a kj::Array + KJ_IF_SOME(data, processChunk(js, value)) { + size_t len = data.size(); + auto ptr = data.asPtr(); + + auto prp = js.newPromiseAndResolver(); + adjustWriteBufferSize(js, len); + KJ_IF_SOME(o, observer) { + o->onChunkEnqueued(len); + } + queue.push_back(WriteEvent{ + .outputLock = IoContext::current().waitForOutputLocksIfNecessaryIoOwn(), + .event = + Write{ + .promise = kj::mv(prp.resolver), + .totalBytes = len, + .ownBytes = kj::mv(data), + .bytes = ptr, + }, + }); - if (byteLength == 0) { + ensureWriting(js); + return kj::mv(prp.promise); + } else { return js.resolvedPromise(); } - - auto prp = js.newPromiseAndResolver(); - adjustWriteBufferSize(js, byteLength); - KJ_IF_SOME(o, observer) { - o->onChunkEnqueued(byteLength); - } - - auto src = kj::arrayPtr(static_cast(store->Data()) + byteOffset, byteLength); - auto data = kj::heapArray(src.size()); - data.asPtr().copyFrom(src); - auto ptr = data.asPtr(); - queue.push_back( - WriteEvent{.outputLock = IoContext::current().waitForOutputLocksIfNecessaryIoOwn(), - .event = kj::heap({ - .promise = kj::mv(prp.resolver), - .totalBytes = store->ByteLength(), - .ownBytes = kj::mv(data), - .bytes = ptr, - })}); - - ensureWriting(js); - return kj::mv(prp.promise); } } @@ -1133,7 +1146,7 @@ jsg::Promise WritableStreamInternalController::closeImpl(jsg::Lock& js, bo return js.resolvedPromise(); } if (isPiping()) { - auto reason = js.v8TypeError("This WritableStream is currently being piped to."_kj); + auto reason = js.typeError("This WritableStream is currently being piped to."_kj); return rejectedMaybeHandledPromise(js, reason, markAsHandled); } @@ -1153,7 +1166,7 @@ jsg::Promise WritableStreamInternalController::closeImpl(jsg::Lock& js, bo } queue.push_back( WriteEvent{.outputLock = IoContext::current().waitForOutputLocksIfNecessaryIoOwn(), - .event = kj::heap({.promise = kj::mv(prp.resolver)})}); + .event = Close{.promise = kj::mv(prp.resolver)}}); ensureWriting(js); return kj::mv(prp.promise); } @@ -1170,8 +1183,9 @@ jsg::Promise WritableStreamInternalController::close(jsg::Lock& js, bool m return closureWaitable.whenResolved(js); } waitingOnClosureWritableAlready = true; - auto promise = closureWaitable.then(js, [markAsHandled, this](jsg::Lock& js) { - return closeImpl(js, markAsHandled); + auto promise = closureWaitable.then(js, [ref = addRef(), markAsHandled](jsg::Lock& js) mutable { + auto& controller = static_cast(ref->getController()); + return controller.closeImpl(js, markAsHandled); }, [](jsg::Lock& js, jsg::Value) { // Ignore rejection as it will be reported in the Socket's `closed`/`opened` promises // instead. @@ -1186,11 +1200,11 @@ jsg::Promise WritableStreamInternalController::close(jsg::Lock& js, bool m jsg::Promise WritableStreamInternalController::flush(jsg::Lock& js, bool markAsHandled) { if (isClosedOrClosing()) { - auto reason = js.v8TypeError("This WritableStream has been closed."_kj); + auto reason = js.typeError("This WritableStream has been closed."_kj); return rejectedMaybeHandledPromise(js, reason, markAsHandled); } if (isPiping()) { - auto reason = js.v8TypeError("This WritableStream is currently being piped to."_kj); + auto reason = js.typeError("This WritableStream is currently being piped to."_kj); return rejectedMaybeHandledPromise(js, reason, markAsHandled); } @@ -1210,7 +1224,7 @@ jsg::Promise WritableStreamInternalController::flush(jsg::Lock& js, bool m } queue.push_back( WriteEvent{.outputLock = IoContext::current().waitForOutputLocksIfNecessaryIoOwn(), - .event = kj::heap({.promise = kj::mv(prp.resolver)})}); + .event = Flush{.promise = kj::mv(prp.resolver)}}); ensureWriting(js); return kj::mv(prp.promise); } @@ -1220,15 +1234,15 @@ jsg::Promise WritableStreamInternalController::flush(jsg::Lock& js, bool m } jsg::Promise WritableStreamInternalController::abort( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { // While it may be confusing to users to throw `undefined` rather than a more helpful Error here, // doing so is required by the relevant spec: // https://streams.spec.whatwg.org/#writable-stream-abort - return doAbort(js, maybeReason.orDefault(js.v8Undefined())); + return doAbort(js, maybeReason.orDefault(js.undefined())); } jsg::Promise WritableStreamInternalController::doAbort( - jsg::Lock& js, v8::Local reason, AbortOptions options) { + jsg::Lock& js, jsg::JsValue reason, AbortOptions options) { // If maybePendingAbort is set, then the returned abort promise will be rejected // with the specified error once the abort is completed, otherwise the promise will // be resolved with undefined. @@ -1236,8 +1250,8 @@ jsg::Promise WritableStreamInternalController::doAbort( // If there is already an abort pending, return that pending promise // instead of trying to schedule another. KJ_IF_SOME(pendingAbort, maybePendingAbort) { - pendingAbort->reject = options.reject; - auto promise = pendingAbort->whenResolved(js); + pendingAbort.reject = options.reject; + auto promise = pendingAbort.whenResolved(js); if (options.handled) { promise.markAsHandled(js); } @@ -1245,7 +1259,7 @@ jsg::Promise WritableStreamInternalController::doAbort( } KJ_IF_SOME(writable, state.tryGetUnsafe>()) { - auto exception = js.exceptionToKj(js.v8Ref(reason)); + auto exception = js.exceptionToKj(reason); if (FeatureFlags::get(js).getInternalWritableStreamAbortClearsQueue()) { // If this flag is set, we will clear the queue proactively and immediately @@ -1266,8 +1280,8 @@ jsg::Promise WritableStreamInternalController::doAbort( : js.resolvedPromise(); } - maybePendingAbort = kj::heap(js, reason, options.reject); - auto promise = KJ_ASSERT_NONNULL(maybePendingAbort)->whenResolved(js); + auto& pending = maybePendingAbort.emplace(js, reason, options.reject); + auto promise = pending.whenResolved(js); if (options.handled) { promise.markAsHandled(js); } @@ -1294,7 +1308,7 @@ kj::Maybe> WritableStreamInternalController::tryPipeFrom( auto pipeThrough = options.pipeThrough; if (isPiping()) { - auto reason = js.v8TypeError("This WritableStream is currently being piped to."_kj); + auto reason = js.typeError("This WritableStream is currently being piped to."_kj); return rejectedMaybeHandledPromise(js, reason, pipeThrough); } @@ -1365,7 +1379,7 @@ kj::Maybe> WritableStreamInternalController::tryPipeFrom( // If the destination has closed, the spec requires us to close the source if // preventCancel is false (Propagate closing backward). if (isClosedOrClosing()) { - auto destClosed = js.v8TypeError("This destination writable stream is closed."_kj); + auto destClosed = js.typeError("This destination writable stream is closed."_kj); writeState.transitionTo(); if (!preventCancel) { @@ -1389,7 +1403,7 @@ kj::Maybe> WritableStreamInternalController::tryPipeFrom( } queue.push_back(WriteEvent{ .outputLock = IoContext::current().waitForOutputLocksIfNecessaryIoOwn(), - .event = kj::heap(*this, sourceLock, kj::mv(prp.resolver), preventAbort, preventClose, + .event = Pipe(*this, sourceLock, kj::mv(prp.resolver), preventAbort, preventClose, preventCancel, kj::mv(options.signal)), }); ensureWriting(js); @@ -1502,7 +1516,7 @@ void WritableStreamInternalController::releaseWriter( KJ_ASSERT(&locked.getWriter() == &writer); KJ_IF_SOME(js, maybeJs) { maybeRejectPromise(js, locked.getClosedFulfiller(), - js.v8TypeError("This WritableStream writer has been released."_kj)); + js.typeError("This WritableStream writer has been released."_kj)); } locked.clear(); @@ -1519,13 +1533,12 @@ void WritableStreamInternalController::releaseWriter( bool WritableStreamInternalController::isClosedOrClosing() { - bool isClosing = !queue.empty() && queue.back().event.is>(); - bool isFlushing = !queue.empty() && queue.back().event.is>(); - return state.is() || isClosing || isFlushing; + bool isCloseOrFlush = !queue.empty() && queue.back().isCloseOrFlush(); + return state.is() || isCloseOrFlush; } bool WritableStreamInternalController::isPiping() { - return state.is>() && !queue.empty() && queue.back().event.is>(); + return state.is>() && !queue.empty() && queue.back().isPipe(); } bool WritableStreamInternalController::isErrored() { @@ -1544,14 +1557,14 @@ void WritableStreamInternalController::doClose(jsg::Lock& js) { } else { (void)writeState.transitionFromTo(); } - PendingAbort::dequeue(maybePendingAbort); + maybePendingAbort = kj::none; } -void WritableStreamInternalController::doError(jsg::Lock& js, v8::Local reason) { +void WritableStreamInternalController::doError(jsg::Lock& js, jsg::JsValue reason) { // If already in a terminal state, nothing to do. if (state.isTerminal()) return; - state.transitionTo(js.v8Ref(reason)); + state.transitionTo(reason.addRef(js)); KJ_IF_SOME(locked, writeState.tryGetUnsafe()) { maybeRejectPromise(js, locked.getClosedFulfiller(), reason); maybeResolvePromise(js, locked.getReadyFulfiller()); @@ -1559,7 +1572,7 @@ void WritableStreamInternalController::doError(jsg::Lock& js, v8::Local(); } - PendingAbort::dequeue(maybePendingAbort); + maybePendingAbort = kj::none; } void WritableStreamInternalController::ensureWriting(jsg::Lock& js) { @@ -1574,27 +1587,30 @@ jsg::Promise WritableStreamInternalController::writeLoop( if (queue.empty()) { return js.resolvedPromise(); } else KJ_IF_SOME(promise, queue.front().outputLock) { - return ioContext.awaitIo(js, kj::mv(*promise), - [this](jsg::Lock& js) -> jsg::Promise { return writeLoopAfterFrontOutputLock(js); }); + return ioContext.awaitIo( + js, kj::mv(*promise), [ref = addRef()](jsg::Lock& js) mutable -> jsg::Promise { + auto& controller = static_cast(ref->getController()); + return controller.writeLoopAfterFrontOutputLock(js); + }); } else { return writeLoopAfterFrontOutputLock(js); } } void WritableStreamInternalController::finishClose(jsg::Lock& js) { - KJ_IF_SOME(pendingAbort, PendingAbort::dequeue(maybePendingAbort)) { - pendingAbort->complete(js); + KJ_IF_SOME(pendingAbort, kj::mv(maybePendingAbort)) { + pendingAbort.complete(js); } doClose(js); } -void WritableStreamInternalController::finishError(jsg::Lock& js, v8::Local reason) { - KJ_IF_SOME(pendingAbort, PendingAbort::dequeue(maybePendingAbort)) { +void WritableStreamInternalController::finishError(jsg::Lock& js, jsg::JsValue reason) { + KJ_IF_SOME(pendingAbort, kj::mv(maybePendingAbort)) { // In this case, and only this case, we ignore any pending rejection // that may be stored in the pendingAbort. The current exception takes // precedence. - pendingAbort->fail(js, reason); + pendingAbort.fail(js, reason); } doError(js, reason); @@ -1605,21 +1621,21 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo // This helper function is just used to enhance the assert logging when checking // that the request in flight is the one we expect. - static constexpr auto inspectQueue = [](auto& queue, kj::StringPtr name) { + static constexpr auto inspectQueue = [](auto& queue) { if (queue.size() > 1) { kj::Vector events; for (auto& event: queue) { KJ_SWITCH_ONEOF(event.event) { - KJ_CASE_ONEOF(write, kj::Own) { + KJ_CASE_ONEOF(write, Write) { events.add(kj::str("Write")); } - KJ_CASE_ONEOF(flush, kj::Own) { + KJ_CASE_ONEOF(flush, Flush) { events.add(kj::str("Flush")); } - KJ_CASE_ONEOF(close, kj::Own) { + KJ_CASE_ONEOF(close, Close) { events.add(kj::str("Close")); } - KJ_CASE_ONEOF(pipe, kj::Own) { + KJ_CASE_ONEOF(pipe, Pipe) { events.add(kj::str("Pipe")); } } @@ -1630,7 +1646,7 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo return kj::String(); }; - const auto makeChecker = [this]() { + const auto makeChecker = [](auto& controller) { // Make a helper function that asserts that the queue did not change state during a write/close // operation. We normally only pop/drain the queue after write/close completion. We drain the // queue concurrently during finalization, but finalization would also have canceled our @@ -1640,32 +1656,30 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo // We capture the current generation and verify it hasn't changed, rather than using pointer // comparison, because RingBuffer may relocate elements when it grows. - return [this, expectedGeneration = queue.currentGeneration()]() -> Request& { - if constexpr (kj::isSameType() || kj::isSameType()) { - // Write and flush requests can have any number of requests backed up after them. - KJ_ASSERT(!queue.empty()); - } else if constexpr (kj::isSameType()) { + return [expectedGeneration = controller.queue.currentGeneration()]( + auto& controller) -> Request& { + auto& queue = controller.queue; + KJ_ASSERT(!queue.empty()); + KJ_ASSERT(queue.currentGeneration() == expectedGeneration); + if constexpr (kj::isSameType()) { // Pipe and Close requests are always the last one in the queue. - KJ_ASSERT(queue.size() == 1, queue.size(), inspectQueue(queue, "Pipe")); + KJ_ASSERT(queue.size() == 1, queue.size(), inspectQueue(queue)); } else if constexpr (kj::isSameType()) { // Pipe and Close requests are always the last one in the queue. - KJ_ASSERT(queue.size() == 1, queue.size(), inspectQueue(queue, "Pipe")); + KJ_ASSERT(queue.size() == 1, queue.size(), inspectQueue(queue)); } - // Verify nothing was popped from the queue while we were waiting. - KJ_ASSERT(queue.currentGeneration() == expectedGeneration); - - return *queue.front().event.get>(); + return queue.front().event.template get(); }; }; - const auto maybeAbort = [this](jsg::Lock& js) -> bool { - auto& writable = KJ_ASSERT_NONNULL(state.tryGetUnsafe>()); - KJ_IF_SOME(pendingAbort, WritableStreamController::PendingAbort::dequeue(maybePendingAbort)) { - auto ex = js.exceptionToKj(pendingAbort->reason.addRef(js)); - writable->abort(kj::mv(ex)); - drain(js, pendingAbort->reason.getHandle(js)); - pendingAbort->complete(js); + const auto maybeAbort = [](jsg::Lock& js, auto& controller) -> bool { + KJ_IF_SOME(pendingAbort, kj::mv(controller.maybePendingAbort)) { + controller.state.whenActive([&](IoOwn& writable) { + writable->abort(js.exceptionToKj(pendingAbort.reason.addRef(js))); + }); + controller.drain(js, pendingAbort.reason.getHandle(js)); + pendingAbort.complete(js); return true; } return false; @@ -1675,12 +1689,13 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo if (queue.empty()) return js.resolvedPromise(); KJ_SWITCH_ONEOF(queue.front().event) { - KJ_CASE_ONEOF(request, kj::Own) { - if (request->bytes.size() == 0) { + KJ_CASE_ONEOF(request, Write) { + auto amountToWrite = request.bytes.size(); + if (amountToWrite == 0) { // Zero-length writes are no-ops with a pending event. If we allowed them, we'd have a hard // time distinguishing between disconnections and zero-length reads on the other end of the // TransformStream. - maybeResolvePromise(js, request->promise); + maybeResolvePromise(js, request.promise); queue.pop_front(); // Note: we don't bother checking for an abort() here because either this write was just @@ -1689,13 +1704,11 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo return writeLoop(js, ioContext); } - // writeLoop() is only called with the sink in the Writable state. - auto& writable = state.getUnsafe>(); - auto check = makeChecker(); - - auto amountToWrite = request->bytes.size(); - - auto promise = writable->sink->write(request->bytes).attach(kj::mv(request->ownBytes)); + auto check = makeChecker(*this); + auto promise = KJ_ASSERT_NONNULL(state.whenActive([&request](IoOwn& writable) { + return writable->canceler.wrap( + writable->sink->write(request.bytes).attach(kj::mv(request.ownBytes))); + })); // TODO(soon): We use awaitIoLegacy() here because if the stream terminates in JavaScript in // this same isolate, then the promise may actually be waiting on JavaScript to do something, @@ -1705,75 +1718,96 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo // jsg::Promises and not kj::Promises, so that it doesn't look like I/O at all, and there's // no need to drop the isolate lock and take it again every time some data is read/written. // That's a larger refactor, though. - return ioContext.awaitIoLegacy(js, writable->canceler.wrap(kj::mv(promise))) + return ioContext.awaitIoLegacy(js, kj::mv(promise)) .then(js, - ioContext.addFunctor( - [this, check, maybeAbort, amountToWrite](jsg::Lock& js) -> jsg::Promise { + ioContext.addFunctor([self = addRef(), check, maybeAbort, amountToWrite]( + jsg::Lock& js) mutable -> jsg::Promise { + auto& controller = static_cast(self->getController()); // Under some conditions, the clean up has already happened. - if (queue.empty()) return js.resolvedPromise(); - auto& request = check.template operator()(); - maybeResolvePromise(js, request.promise); - adjustWriteBufferSize(js, -amountToWrite); - KJ_IF_SOME(o, observer) { + if (controller.queue.empty()) return js.resolvedPromise(); + auto& request = check.template operator()(controller); + auto fulfiller = kj::mv(request.promise); + maybeResolvePromise(js, fulfiller); + controller.adjustWriteBufferSize(js, -amountToWrite); + KJ_IF_SOME(o, controller.observer) { o->onChunkDequeued(amountToWrite); } - queue.pop_front(); - maybeAbort(js); - return writeLoop(js, IoContext::current()); + controller.queue.pop_front(); + maybeAbort(js, controller); + return controller.writeLoop(js, IoContext::current()); }), - ioContext.addFunctor([this, check, maybeAbort, amountToWrite]( - jsg::Lock& js, jsg::Value reason) -> jsg::Promise { + ioContext.addFunctor( + [self = addRef(), check, maybeAbort, amountToWrite]( + jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { // Under some conditions, the clean up has already happened. - if (queue.empty()) return js.resolvedPromise(); - auto handle = reason.getHandle(js); - auto& request = check.template operator()(); - auto& writable = state.getUnsafe>(); - adjustWriteBufferSize(js, -amountToWrite); - KJ_IF_SOME(o, observer) { + auto& controller = static_cast(self->getController()); + if (controller.queue.empty()) return js.resolvedPromise(); + auto handle = jsg::JsValue(reason.getHandle(js)); + auto& request = check.template operator()(controller); + controller.adjustWriteBufferSize(js, -amountToWrite); + KJ_IF_SOME(o, controller.observer) { o->onChunkDequeued(amountToWrite); } - maybeRejectPromise(js, request.promise, handle); - queue.pop_front(); - if (!maybeAbort(js)) { + + auto fulfiller = kj::mv(request.promise); + maybeRejectPromise(js, fulfiller, handle); + controller.queue.pop_front(); + if (!maybeAbort(js, controller)) { auto ex = js.exceptionToKj(reason.addRef(js)); - writable->abort(kj::mv(ex)); - drain(js, handle); + controller.state.whenActive( + [&](IoOwn& writable) { writable->abort(kj::mv(ex)); }); + + controller.drain(js, handle); } return js.resolvedPromise(); })); } - KJ_CASE_ONEOF(request, kj::Own) { - // The destination should still be Writable, because the only way to transition to an - // errored state would have been if a write request in the queue ahead of us encountered an - // error. But in that case, the queue would already have been drained and we wouldn't be here. - auto& writable = state.getUnsafe>(); - - if (request->checkSignal(js)) { + KJ_CASE_ONEOF(request, Pipe) { + if (request.checkSignal(js)) { // If the signal is triggered, checkSignal will handle erroring the source and destination. return js.resolvedPromise(); } // The readable side should *should* still be readable here but let's double check, just - // to be safe, both for closed state and errored states. - if (request->source().isClosed()) { - request->source().release(js); - // If the source is closed, the spec requires us to close the destination unless the - // preventClose option is true. - if (!request->preventClose() && !isClosedOrClosing()) { - doClose(js); - } else { - writeState.transitionTo(); + // to be safe, both for closed state and errored states. We just constructed the Pipe + // and haven't yet entered pipeLoop, so source is guaranteed non-null. + auto& sourceRef = KJ_ASSERT_NONNULL(request.source); + auto preventClose = request.flags.preventClose; + auto preventAbort = request.flags.preventAbort; + + if (sourceRef.isClosed()) { + // Resolve the pipe promise before pop_front destroys the Pipe event. + auto promise = request.takePromise(); + maybeResolvePromise(js, promise); + request.releaseSource(js); + // Pop the Pipe from the queue before calling close() — isPiping() + // checks the queue, and close() rejects if isPiping() is true. + queue.pop_front(); + // Unlock writeState — doClose() no longer transitions PipeLocked → + // Unlocked (vtable poison safety), and the KJ pump path has no pipe + // loop iteration to do it. + writeState.transitionTo(); + // If the source is closed, the spec requires us to close the destination + // unless the preventClose option is true. + if (!preventClose && !isClosedOrClosing()) { + return close(js, true); } return js.resolvedPromise(); } - KJ_IF_SOME(errored, request->source().tryGetErrored(js)) { - request->source().release(js); + KJ_IF_SOME(errored, sourceRef.tryGetErrored(js)) { + // Reject the pipe promise before pop_front destroys the Pipe event. + auto promise = request.takePromise(); + maybeRejectPromise(js, promise, errored); + request.releaseSource(js); + // Pop the Pipe from the queue before further processing — the source + // has been released, so the Pipe entry is stale. + queue.pop_front(); // If the source is errored, the spec requires us to error the destination unless the // preventAbort option is true. - if (!request->preventAbort()) { - auto ex = js.exceptionToKj(js.v8Ref(errored)); - writable->abort(kj::mv(ex)); + if (!preventAbort) { + auto ex = js.exceptionToKj(errored); + state.whenActive([&](IoOwn& writable) mutable { writable->abort(kj::mv(ex)); }); drain(js, errored); } else { writeState.transitionTo(); @@ -1789,114 +1823,175 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo // ReadableStream is JavaScript-backed and we need to setup a JavaScript-promise read/write // loop to pass the data into the destination. - const auto handlePromise = [this, &ioContext, check = makeChecker(), - preventAbort = request->preventAbort()]( - jsg::Lock& js, auto promise) { - return promise.then(js, ioContext.addFunctor([this, check](jsg::Lock& js) mutable { + // Capturing `this` in the handlePromise lambda is safe. Handle promise is only + // invoked synchronously and `this` is not propagated into the promise continuations. + const auto handlePromise = [this, &ioContext, check = makeChecker(*this), preventAbort, + preventClose](jsg::Lock& js, auto promise) { + return promise.then(js, + ioContext.addFunctor( + [self = addRef(), check, preventAbort, preventClose](jsg::Lock& js) mutable { + auto& controller = static_cast(self->getController()); // Under some conditions, the clean up has already happened. - if (queue.empty()) return js.resolvedPromise(); - - auto& request = check.template operator()(); - - // It's possible we got here because the source errored but preventAbort was set. - // In that case, we need to treat preventAbort the same as preventClose. Be - // sure to check this before calling sourceLock.close() or the error detail will - // be lost. - // Capture preventClose now so we can modify it locally if needed. - bool preventClose = request.preventClose(); - KJ_IF_SOME(errored, request.source().tryGetErrored(js)) { - if (request.preventAbort()) preventClose = true; - // Even through we're not going to close the destination, we still want the - // pipe promise itself to be rejected in this case. - maybeRejectPromise(js, request.promise(), errored); - } else KJ_IF_SOME(errored, state.tryGetUnsafe()) { - maybeRejectPromise(js, request.promise(), errored.getHandle(js)); + if (controller.queue.empty()) return js.resolvedPromise(); + + auto& request = check.template operator()(controller); + + // KJ_IF_SOME on request.source(): if pipeLoop already released the + // source (via Pipe::State::releaseSource()), source is now + // kj::none and we MUST NOT attempt a deref. Use the stashed + // capturedSourceError in that case. + KJ_IF_SOME(sourceRef, request.source) { + auto fulfiller = request.takePromise(); + KJ_IF_SOME(errored, sourceRef.tryGetErrored(js)) { + if (preventAbort) preventClose = true; + // Even through we're not going to close the destination, we still want the + // pipe promise itself to be rejected in this case. + maybeRejectPromise(js, fulfiller, errored); + } else KJ_IF_SOME(errored, controller.state.tryGetUnsafe()) { + auto error = errored.getHandle(js); + maybeRejectPromise(js, fulfiller, error); + } else { + maybeResolvePromise(js, fulfiller); + } + + // Always transition the readable side to the closed state, because we read until EOF. + // Note that preventClose (below) means "don't close the writable side", i.e. don't + // call end(). + sourceRef.close(js); + // Release the readable's pipe lock. doClose() no longer transitions + // PipeLocked → Unlocked (to prevent vtable-poison crashes from stale + // PipeController& refs held by the pipe loop). For the JS pipeLoop + // path, the loop detects isClosed() and releases on its next iteration. + // But the KJ tryPumpTo path has no loop — handlePromise is the terminal + // handler — so we must release explicitly here. + request.releaseSource(js); } else { - maybeResolvePromise(js, request.promise()); + // pipeLoop already released the source; consult the stashed + // error value (if any) rather than dereferencing source. + auto promise = request.takePromise(); + KJ_IF_SOME(err, request.capturedSourceError) { + if (preventAbort) preventClose = true; + maybeRejectPromise(js, promise, err.getHandle(js)); + } else KJ_IF_SOME(errored, controller.state.tryGetUnsafe()) { + auto error = errored.getHandle(js); + maybeRejectPromise(js, promise, error); + } else { + maybeResolvePromise(js, promise); + } } - - // Always transition the readable side to the closed state, because we read until EOF. - // Note that preventClose (below) means "don't close the writable side", i.e. don't - // call end(). - request.source().close(js); - queue.pop_front(); + controller.queue.pop_front(); + // Unlock writeState — doClose() no longer transitions PipeLocked → + // Unlocked (vtable poison safety). Must happen before close() so the + // writable appears unlocked after the pipe completes. + controller.writeState.transitionTo(); if (!preventClose) { // Note: unlike a real Close request, it's not possible for us to have been aborted. - return close(js, true); - } else { - writeState.transitionTo(); + return controller.close(js, true); } return js.resolvedPromise(); }), ioContext.addFunctor( - [this, check, preventAbort](jsg::Lock& js, jsg::Value reason) mutable { - auto handle = reason.getHandle(js); - auto& request = check.template operator()(); - maybeRejectPromise(js, request.promise(), handle); - // TODO(conform): Remember all those checks we performed in ReadableStream::pipeTo()? - // We're supposed to perform the same checks continually, e.g., errored writes should - // cancel the readable side unless preventCancel is truthy... This would require - // deeper integration with the implementation of pumpTo(). Oh well. One consequence - // of this is that if there is an error on the writable side, we error the readable - // side, rather than close (cancel) it, which is what the spec would have us do. - // TODO(now): Warn on the console about this. - request.source().error(js, handle); - queue.pop_front(); + [self = addRef(), check, preventAbort](jsg::Lock& js, jsg::Value reason) mutable { + auto& controller = static_cast(self->getController()); + // Under some conditions, the clean up has already happened — either + // because checkSignal popped the Pipe before rejecting, or because + // doAbort/drain ran externally between pipeLoop's rejection and + // this microtask. Mirror the success continuation's empty-queue + // guard to avoid the fatal check() assertion on an empty queue. + if (controller.queue.empty()) return js.resolvedPromise(); + + auto handle = jsg::JsValue(reason.getHandle(js)); + + auto& request = check.template operator()(controller); + + auto fulfiller = request.takePromise(); + maybeRejectPromise(js, fulfiller, handle); + // KJ_IF_SOME on request.source(): if pipeLoop already released the + // source, skip — the underlying PipeController is gone. + KJ_IF_SOME(sourceRef, request.source) { + // TODO(conform): Remember all those checks we performed in ReadableStream::pipeTo()? + // We're supposed to perform the same checks continually, e.g., errored writes should + // cancel the readable side unless preventCancel is truthy... This would require + // deeper integration with the implementation of pumpTo(). Oh well. One consequence + // of this is that if there is an error on the writable side, we error the readable + // side, rather than close (cancel) it, which is what the spec would have us do. + // TODO(now): Warn on the console about this. + sourceRef.error(js, handle); + // Release the readable's pipe lock — same rationale as the success + // path: the KJ tryPumpTo path has no loop iteration to detect the + // error and release. + request.releaseSource(js); + } + controller.queue.pop_front(); if (!preventAbort) { - return abort(js, handle); + return controller.abort(js, handle); } - doError(js, handle); + // preventAbort path: unlock writeState explicitly. + controller.writeState.transitionTo(); return js.resolvedPromise(); })); }; - KJ_IF_SOME(promise, request->source().tryPumpTo(*writable->sink, !request->preventClose())) { - return handlePromise(js, - ioContext.awaitIo(js, - writable->canceler.wrap( - AbortSignal::maybeCancelWrap(js, request->maybeSignal(), kj::mv(promise))))); - } - - // The ReadableStream is JavaScript-backed. We can still pipe the data but it's going to be - // a bit slower because we will be relying on JavaScript promises when reading the data - // from the ReadableStream, then waiting on kj::Promises to write the data. We will keep - // reading until either the source or destination errors or until the source signals that - // it is done. - return handlePromise(js, request->pipeLoop(js)); + // The destination should still be Writable, because the only way to transition to an + // errored state would have been if a write request in the queue ahead of us encountered an + // error. But in that case, the queue would already have been drained and we wouldn't be here. + return KJ_ASSERT_NONNULL( + state.whenActive([&](IoOwn& writable) mutable -> jsg::Promise { + KJ_IF_SOME(promise, sourceRef.tryPumpTo(*writable->sink, !preventClose)) { + return handlePromise(js, + ioContext.awaitIo(js, + writable->canceler.wrap( + AbortSignal::maybeCancelWrap(js, request.maybeSignal, kj::mv(promise))))); + } else { + // The ReadableStream is JavaScript-backed. We can still pipe the data but it's going to be + // a bit slower because we will be relying on JavaScript promises when reading the data + // from the ReadableStream, then waiting on kj::Promises to write the data. We will keep + // reading until either the source or destination errors or until the source signals that + // it is done. + return handlePromise(js, request.pipeLoop(js)); + } + })); } - KJ_CASE_ONEOF(request, kj::Own) { - // writeLoop() is only called with the sink in the Writable state. - auto& writable = state.getUnsafe>(); - auto check = makeChecker(); + KJ_CASE_ONEOF(request, Close) { + return KJ_ASSERT_NONNULL(state.whenActive([&](IoOwn& writable) { + return ioContext.awaitIo(js, writable->canceler.wrap(writable->sink->end())) + .then(js, + ioContext.addFunctor( + [self = addRef(), check = makeChecker(*this)](jsg::Lock& js) mutable { + auto& controller = static_cast(self->getController()); + // Under some conditions, the clean up has already happened. - return ioContext.awaitIo(js, writable->canceler.wrap(writable->sink->end())) - .then(js, ioContext.addFunctor([this, check](jsg::Lock& js) { - // Under some conditions, the clean up has already happened. - if (queue.empty()) return; - auto& request = check.template operator()(); - maybeResolvePromise(js, request.promise); - queue.pop_front(); - finishClose(js); - }), - ioContext.addFunctor([this, check](jsg::Lock& js, jsg::Value reason) { - // Under some conditions, the clean up has already happened. - if (queue.empty()) return; - auto handle = reason.getHandle(js); - auto& request = check.template operator()(); - maybeRejectPromise(js, request.promise, handle); - queue.pop_front(); - finishError(js, handle); + if (controller.queue.empty()) return; + auto& request = check.template operator()(controller); + auto fulfiller = kj::mv(request.promise); + maybeResolvePromise(js, fulfiller); + controller.queue.pop_front(); + controller.finishClose(js); + }), + ioContext.addFunctor([self = addRef(), check = makeChecker(*this)]( + jsg::Lock& js, jsg::Value reason) mutable { + auto& controller = static_cast(self->getController()); + // Under some conditions, the clean up has already happened. + if (controller.queue.empty()) return; + auto handle = jsg::JsValue(reason.getHandle(js)); + + auto& request = check.template operator()(controller); + auto fulfiller = kj::mv(request.promise); + maybeRejectPromise(js, fulfiller, handle); + controller.queue.pop_front(); + controller.finishError(js, handle); + })); })); } - KJ_CASE_ONEOF(request, kj::Own) { + KJ_CASE_ONEOF(request, Flush) { // This is not a standards-defined state for a WritableStream and is only used internally // for Socket's startTls call. // - // Flushing is similar to closing the stream, the main difference is that `finishClose` - // and `writable->end()` are never called. + // Flushing is essentially just a signal that the write loop has reached this point. // Note: For Flush, we don't need makeChecker since we process immediately without async I/O. - maybeResolvePromise(js, request->promise); + auto fulfiller = kj::mv(request.promise); + maybeResolvePromise(js, fulfiller); queue.pop_front(); return js.resolvedPromise(); @@ -1907,40 +2002,58 @@ jsg::Promise WritableStreamInternalController::writeLoopAfterFrontOutputLo } bool WritableStreamInternalController::Pipe::State::checkSignal(jsg::Lock& js) { - // Returns true if the caller should bail out and stop processing. This happens in two cases: - // 1. The State was aborted (e.g., by drain()) - the Pipe is being torn down - // 2. The AbortSignal was triggered - we handle the abort and return true - // In both cases, the caller should return a resolved promise and not continue the pipe loop. - if (aborted) return true; + // If the weakRef is not alive, we'll return true to indicate aborted. + bool answer = true; + weakRef->runIfAlive([&](Pipe& ref) { answer = ref.checkSignal(js); }); + return answer; +} +bool WritableStreamInternalController::Pipe::checkSignal(jsg::Lock& js) { + // Returns true if the caller should bail out and stop processing. + // The caller should return a resolved promise and not continue the pipe loop. KJ_IF_SOME(signal, maybeSignal) { if (signal->getAborted(js)) { auto reason = signal->getReason(js); // abort process might call parent.drain which will delete this, // move/copy everything we need after into temps. - auto& parentRef = this->parent; - auto& sourceRef = this->source; - auto preventCancelCopy = this->preventCancel; - auto promiseCopy = kj::mv(this->promise); + auto& parentRef = parent; + auto preventCancel = flags.preventCancel; + auto preventAbort = flags.preventAbort; + auto promiseCopy = kj::mv(promise); + auto weakRef = kj::mv(selfRef); + + // Before the drain, keep the readable alive so sourceRef stays valid + auto readableRef = [&]() -> kj::Maybe> { + kj::Maybe> maybeRef; + parentRef.writeState.whenState( + [&](PipeLocked& locked) { maybeRef = locked.ref.addRef(); }); + return kj::mv(maybeRef); + }(); + + if (!preventCancel) { + releaseSource(js, reason); + } else { + releaseSource(js); + } if (!preventAbort) { - KJ_IF_SOME(writable, parent.state.tryGetUnsafe>()) { - auto ex = js.exceptionToKj(reason); - writable->abort(kj::mv(ex)); + if (parentRef.state.isActive()) { + parentRef.state.whenActive( + [&](IoOwn& writable) { writable->abort(js.exceptionToKj(reason)); }); parentRef.drain(js, reason); + // Note that drain deletes `this`. Do not touch any members after this. } else { - parent.writeState.transitionTo(); + parentRef.writeState.transitionTo(); + parentRef.queue.pop_front(); } } else { - parent.writeState.transitionTo(); - } - if (!preventCancelCopy) { - sourceRef.release(js, v8::Local(reason)); - } else { - sourceRef.release(js); + parentRef.writeState.transitionTo(); + parentRef.queue.pop_front(); } + maybeRejectPromise(js, promiseCopy, reason); + KJ_ASSERT_NONNULL(weakRef)->invalidate(); return true; } } @@ -1948,27 +2061,19 @@ bool WritableStreamInternalController::Pipe::State::checkSignal(jsg::Lock& js) { } jsg::Promise WritableStreamInternalController::Pipe::State::write( - v8::Local handle) { - auto& writable = parent.state.getUnsafe>(); - // TODO(soon): Once jsg::BufferSource lands and we're able to use it, this can be simplified. - KJ_ASSERT(handle->IsArrayBuffer() || handle->IsArrayBufferView()); - std::shared_ptr store; - size_t byteLength = 0; - size_t byteOffset = 0; - if (handle->IsArrayBuffer()) { - auto buffer = handle.template As(); - store = buffer->GetBackingStore(); - byteLength = buffer->ByteLength(); - } else { - auto view = handle.template As(); - store = view->Buffer()->GetBackingStore(); - byteLength = view->ByteLength(); - byteOffset = view->ByteOffset(); + jsg::Lock& js, jsg::JsValue handle) { + kj::Maybe> promise; + weakRef->runIfAlive([&](auto& ref) { promise = ref.write(js, handle); }); + KJ_IF_SOME(p, promise) { + return kj::mv(p); } - kj::byte* data = reinterpret_cast(store->Data()) + byteOffset; - // TODO(cleanup): Have this method accept a jsg::Lock& from the caller instead of using - // v8::Isolate::GetCurrent(); - auto& js = jsg::Lock::current(); + return js.rejectedPromise(js.typeError("The pipe operation was aborted."_kj)); +} + +jsg::Promise WritableStreamInternalController::Pipe::write( + jsg::Lock& js, jsg::JsValue handle) { + KJ_ASSERT(handle.isArrayBuffer() || handle.isSharedArrayBuffer() || handle.isArrayBufferView() || + handle.isString()); // For resizable ArrayBuffers or shared backing stores, we must eagerly copy // the data. A resizable ArrayBuffer's logical byte length can be changed by user @@ -1977,14 +2082,77 @@ jsg::Promise WritableStreamInternalController::Pipe::State::write( // But also just beacuse of V8 Sandbox requirements, we really should be copying // the data from the ArrayBuffer anyway... We incur an allocation and copy cost // here but that's to be expected. - auto backing = kj::heapArray(byteLength); - backing.asPtr().copyFrom(kj::arrayPtr(data, byteLength)); - return IoContext::current().awaitIo(js, - writable->canceler.wrap(writable->sink->write(backing)).attach(kj::mv(backing)), - [](jsg::Lock&) {}); + // + auto writeBytes = [&](kj::Array data) { + auto& ioContext = IoContext::current(); + return KJ_ASSERT_NONNULL(parent.state.whenActive([&](IoOwn& writable) { + return ioContext.awaitIo(js, + writable->canceler.wrap(writable->sink->write(data).attach(kj::mv(data))), + [](jsg::Lock&) {}); + })); + }; + + if (handle.isString()) { + auto str = handle.toString(js); + return writeBytes(str.asBytes().attach(kj::mv(str))); + } + + return writeBytes(jsg::JsBufferSource(handle).copy()); +} + +bool WritableStreamInternalController::Pipe::State::isSourceReleased() { + bool answer = true; + weakRef->runIfAlive([&](auto& ref) { answer = ref.isSourceReleased(); }); + return answer; +} + +void WritableStreamInternalController::Pipe::State::tryErrorParent( + jsg::Lock& js, jsg::JsValue reason) { + weakRef->runIfAlive([&](auto& ref) { ref.errorParent(js, reason); }); +} + +void WritableStreamInternalController::Pipe::errorParent(jsg::Lock& js, jsg::JsValue reason) { + parent.doError(js, reason); +} + +void WritableStreamInternalController::Pipe::State::tryFinishCloseParent(jsg::Lock& js) { + weakRef->runIfAlive([&](auto& ref) { ref.finishCloseParent(js); }); +} + +void WritableStreamInternalController::Pipe::State::tryFinishErrorParent( + jsg::Lock& js, jsg::JsValue reason) { + weakRef->runIfAlive([&](auto& ref) { ref.finishErrorParent(js, reason); }); +} + +void WritableStreamInternalController::Pipe::finishCloseParent(jsg::Lock& js) { + parent.finishClose(js); +} + +void WritableStreamInternalController::Pipe::finishErrorParent(jsg::Lock& js, jsg::JsValue reason) { + parent.finishError(js, reason); +} + +void WritableStreamInternalController::Pipe::State::tryNoBytesError(jsg::Lock& js) { + weakRef->runIfAlive([&](auto& ref) { ref.noBytesError(js); }); +} + +void WritableStreamInternalController::Pipe::noBytesError(jsg::Lock& js) { + parent.state.whenActive([&js](IoOwn& writable) { + auto error = js.typeError("This WritableStream only supports writing byte types."_kj); + writable->abort(js.exceptionToKj(error)); + }); } jsg::Promise WritableStreamInternalController::Pipe::State::pipeLoop(jsg::Lock& js) { + kj::Maybe> promise; + weakRef->runIfAlive([&](auto& ref) { promise = ref.pipeLoop(js); }); + KJ_IF_SOME(p, promise) { + return kj::mv(p); + } + return js.resolvedPromise(); +} + +jsg::Promise WritableStreamInternalController::Pipe::pipeLoop(jsg::Lock& js) { // This is a bit of dance. We got here because the source ReadableStream does not support // the internal, more efficient kj pipe (which means it is a JavaScript-backed ReadableStream). // We need to call read() on the source which returns a JavaScript Promise, wait on it to resolve, @@ -1995,12 +2163,13 @@ jsg::Promise WritableStreamInternalController::Pipe::State::pipeLoop(jsg:: auto& ioContext = IoContext::current(); - if (aborted) { + if (source == kj::none) { return js.resolvedPromise(); } if (checkSignal(js)) { // If the signal is triggered, checkSignal will handle erroring the source and destination. + // It also handles popping the Pipe request from the queue. return js.resolvedPromise(); } @@ -2011,74 +2180,103 @@ jsg::Promise WritableStreamInternalController::Pipe::State::pipeLoop(jsg:: // TODO(soon): These are the same checks made before we entered the loop. Try to // unify the code to reduce duplication. + // source is guaranteed non-null at this point — we checked above. + // Bind a local reference for ergonomic access through the checks below. + // After releaseSource() is called, this local reference becomes dangling + // and MUST NOT be used; each branch returns immediately after + // releaseSource() so this is enforced by control flow. + auto& source = KJ_ASSERT_NONNULL(this->source); + auto& parentRef = parent; + bool preventAbort = flags.preventAbort; + bool preventCancel = flags.preventCancel; + bool preventClose = flags.preventClose; + + // Each branch below calls releaseSource(), which both destroys the + // source's PipeController AND nulls state->source. handlePromise's + // success/failure continuations check state->source via KJ_IF_SOME and + // skip the source-derefs they would otherwise have done. We also stash + // the captured source error so the success continuation can settle the + // pipe promise with the right reason. + KJ_IF_SOME(errored, source.tryGetErrored(js)) { - source.release(js); - if (!preventAbort) { - KJ_IF_SOME(writable, parent.state.tryGetUnsafe>()) { - auto ex = js.exceptionToKj(js.v8Ref(errored)); - writable->abort(kj::mv(ex)); - return js.rejectedPromise(errored); - } + capturedSourceError = errored.addRef(js); + releaseSource(js); + if (!preventAbort && parentRef.state.isActive()) { + parentRef.state.whenActive( + [&](IoOwn& writable) { writable->abort(js.exceptionToKj(errored)); }); + return js.rejectedPromise(errored); } // If preventAbort was true, we're going to unlock the destination now. - // We are not going to propagate the error here tho. - parent.writeState.transitionTo(); - return js.resolvedPromise(); + parentRef.writeState.transitionTo(); + return js.rejectedPromise(errored); } - KJ_IF_SOME(errored, parent.state.tryGetUnsafe()) { - parent.writeState.transitionTo(); + auto getReadableRef = [&]() -> kj::Maybe> { + kj::Maybe> maybeRef; + parentRef.writeState.whenState( + [&](PipeLocked& locked) { maybeRef = locked.ref.addRef(); }); + return kj::mv(maybeRef); + }; + + KJ_IF_SOME(errored, parentRef.state.tryGetUnsafe()) { + auto reason = errored.getHandle(js); + auto readableRef = getReadableRef(); + parentRef.writeState.transitionTo(); if (!preventCancel) { - auto reason = errored.getHandle(js); - source.release(js, reason); + releaseSource(js, reason); return js.rejectedPromise(reason); } - source.release(js); + releaseSource(js); return js.resolvedPromise(); } if (source.isClosed()) { - source.release(js); + releaseSource(js); if (!preventClose) { - KJ_ASSERT(!parent.state.is()); - if (!parent.isClosedOrClosing()) { + KJ_ASSERT(!parentRef.state.is()); + if (!parentRef.isClosedOrClosing()) { // We'll only be here if the sink is in the Writable state. auto& ioContext = IoContext::current(); // Capture a ref to the state to keep it alive during async operations. - return ioContext - .awaitIo(js, parent.state.getUnsafe>()->sink->end(), [](jsg::Lock&) {}) - .then(js, ioContext.addFunctor([state = kj::addRef(*this)](jsg::Lock& js) { - if (state->aborted) return; - state->parent.finishClose(js); + auto pipeState = getState(); + auto promise = KJ_ASSERT_NONNULL(parentRef.state.whenActive( + [&](IoOwn& writable) { return writable->sink->end(); })); + return ioContext.awaitIo(js, kj::mv(promise), [](jsg::Lock&) {}) + .then(js, ioContext.addFunctor([state = pipeState.addRef()](jsg::Lock& js) mutable { + if (state->isAborted()) return; + state->tryFinishCloseParent(js); }), - ioContext.addFunctor([state = kj::addRef(*this)](jsg::Lock& js, jsg::Value reason) { - if (state->aborted) return; - state->parent.finishError(js, reason.getHandle(js)); + ioContext.addFunctor( + [state = pipeState.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + if (state->isAborted()) return; + auto error = jsg::JsValue(reason.getHandle(js)); + state->tryFinishErrorParent(js, error); })); } - parent.writeState.transitionTo(); + parentRef.writeState.transitionTo(); } return js.resolvedPromise(); } - if (parent.isClosedOrClosing()) { - auto destClosed = js.v8TypeError("This destination writable stream is closed."_kj); - parent.writeState.transitionTo(); + if (parentRef.isClosedOrClosing()) { + auto readableRef = getReadableRef(); + auto destClosed = js.typeError("This destination writable stream is closed."_kj); + parentRef.writeState.transitionTo(); if (!preventCancel) { - source.release(js, destClosed); + releaseSource(js, destClosed); } else { - source.release(js); + releaseSource(js); } return js.rejectedPromise(destClosed); } return source.read(js).then(js, - ioContext.addFunctor([state = kj::addRef(*this)]( - jsg::Lock& js, ReadResult result) mutable -> jsg::Promise { - if (state->aborted || state->checkSignal(js) || result.done) { + ioContext.addFunctor( + [state = getState()](jsg::Lock& js, ReadResult result) mutable -> jsg::Promise { + if (state->isAborted() || state->checkSignal(js) || result.done) { return js.resolvedPromise(); } @@ -2087,37 +2285,33 @@ jsg::Promise WritableStreamInternalController::Pipe::State::pipeLoop(jsg:: // we sent those bytes on to the WritableStreamSink. KJ_IF_SOME(value, result.value) { auto handle = value.getHandle(js); - if (handle->IsArrayBuffer() || handle->IsArrayBufferView()) { - return state->write(handle).then(js, - [state = kj::addRef(*state)](jsg::Lock& js) mutable -> jsg::Promise { - if (state->aborted) { - return js.resolvedPromise(); - } - // The signal will be checked again at the start of the next loop iteration. - return state->pipeLoop(js); - }, - [state = kj::addRef(*state)]( - jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { - if (state->aborted) { + + if (handle.isArrayBuffer() || handle.isSharedArrayBuffer() || handle.isArrayBufferView() || + handle.isString()) { + return state->write(js, handle) + .then(js, + [state = state.addRef()]( + jsg::Lock& js) mutable -> jsg::Promise { return state->pipeLoop(js); }, + [state = state.addRef()]( + jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { + if (state->isAborted() || state->isSourceReleased()) { return js.resolvedPromise(); } - state->parent.doError(js, reason.getHandle(js)); + auto error = jsg::JsValue(reason.getHandle(js)); + state->tryErrorParent(js, error); return state->pipeLoop(js); }); } } // Undefined and null are perfectly valid values to pass through a ReadableStream, // but we can't interpret them as bytes so if we get them here, we error the pipe. - auto error = js.v8TypeError("This WritableStream only supports writing byte types."_kj); - auto& writable = state->parent.state.getUnsafe>(); - auto ex = js.exceptionToKj(js.v8Ref(error)); - writable->abort(kj::mv(ex)); + state->tryNoBytesError(js); // The error condition will be handled at the start of the next iteration. return state->pipeLoop(js); }), - ioContext.addFunctor([state = kj::addRef(*this)]( - jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { - if (state->aborted) { + ioContext.addFunctor( + [state = getState()](jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { + if (state->isAborted() || state->isSourceReleased()) { return js.resolvedPromise(); } // The error will be processed and propagated in the next iteration. @@ -2125,24 +2319,54 @@ jsg::Promise WritableStreamInternalController::Pipe::State::pipeLoop(jsg:: })); } -void WritableStreamInternalController::drain(jsg::Lock& js, v8::Local reason) { +void WritableStreamInternalController::Pipe::State::releaseSource( + jsg::Lock& js, kj::Maybe maybeError) { + weakRef->runIfAlive([&](auto& ref) { ref.releaseSource(js, kj::mv(maybeError)); }); +} + +void WritableStreamInternalController::Pipe::releaseSource( + jsg::Lock& js, kj::Maybe maybeError) { + // Read the source into a local Maybe<&> (copying the pointer) so the + // method body can null state->source BEFORE the underlying + // PipeController::release() call. That way, no one — including ourselves + // through a stale `this->source` access — can use the freed reference + // after release; the field is observably kj::none on every code path + // following this call. + KJ_IF_SOME(s, source) { + auto& sourceRef = s; + source = kj::none; + KJ_IF_SOME(error, maybeError) { + sourceRef.release(js, error); + } else { + sourceRef.release(js); + } + } +} + +void WritableStreamInternalController::drain(jsg::Lock& js, jsg::JsValue reason) { doError(js, reason); while (!queue.empty()) { KJ_SWITCH_ONEOF(queue.front().event) { - KJ_CASE_ONEOF(writeRequest, kj::Own) { - maybeRejectPromise(js, writeRequest->promise, reason); + KJ_CASE_ONEOF(writeRequest, Write) { + auto promise = kj::mv(writeRequest.promise); + maybeRejectPromise(js, promise, reason); } - KJ_CASE_ONEOF(pipeRequest, kj::Own) { - if (!pipeRequest->preventCancel()) { - pipeRequest->source().cancel(js, reason); + KJ_CASE_ONEOF(pipeRequest, Pipe) { + if (!pipeRequest.flags.preventCancel) { + KJ_IF_SOME(sourceRef, pipeRequest.source) { + sourceRef.cancel(js, reason); + } } - maybeRejectPromise(js, pipeRequest->promise(), reason); + auto promise = pipeRequest.takePromise(); + maybeRejectPromise(js, promise, reason); } - KJ_CASE_ONEOF(closeRequest, kj::Own) { - maybeRejectPromise(js, closeRequest->promise, reason); + KJ_CASE_ONEOF(closeRequest, Close) { + auto promise = kj::mv(closeRequest.promise); + maybeRejectPromise(js, promise, reason); } - KJ_CASE_ONEOF(flushRequest, kj::Own) { - maybeRejectPromise(js, flushRequest->promise, reason); + KJ_CASE_ONEOF(flushRequest, Flush) { + auto promise = kj::mv(flushRequest.promise); + maybeRejectPromise(js, promise, reason); } } queue.pop_front(); @@ -2152,27 +2376,24 @@ void WritableStreamInternalController::drain(jsg::Lock& js, v8::Local void WritableStreamInternalController::visitForGc(jsg::GcVisitor& visitor) { for (auto& event: queue) { KJ_SWITCH_ONEOF(event.event) { - KJ_CASE_ONEOF(write, kj::Own) { - visitor.visit(write->promise); + KJ_CASE_ONEOF(write, Write) { + visitor.visit(write.promise); } - KJ_CASE_ONEOF(close, kj::Own) { - visitor.visit(close->promise); + KJ_CASE_ONEOF(close, Close) { + visitor.visit(close.promise); } - KJ_CASE_ONEOF(flush, kj::Own) { - visitor.visit(flush->promise); + KJ_CASE_ONEOF(flush, Flush) { + visitor.visit(flush.promise); } - KJ_CASE_ONEOF(pipe, kj::Own) { - visitor.visit(pipe->maybeSignal(), pipe->promise()); + KJ_CASE_ONEOF(pipe, Pipe) { + pipe.visitForGc(visitor); } } } KJ_IF_SOME(locked, writeState.tryGetUnsafe()) { visitor.visit(locked); } - KJ_IF_SOME(pendingAbort, maybePendingAbort) { - visitor.visit(*pendingAbort); - } - visitor.visit(maybeClosureWaitable); + visitor.visit(maybeClosureWaitable, maybePendingAbort); } void ReadableStreamInternalController::visitForGc(jsg::GcVisitor& visitor) { @@ -2193,16 +2414,14 @@ bool ReadableStreamInternalController::PipeLocked::isClosed() { return inner.state.is(); } -kj::Maybe> ReadableStreamInternalController::PipeLocked::tryGetErrored( - jsg::Lock& js) { +kj::Maybe ReadableStreamInternalController::PipeLocked::tryGetErrored(jsg::Lock& js) { KJ_IF_SOME(errored, inner.state.tryGetUnsafe()) { return errored.getHandle(js); } return kj::none; } -void ReadableStreamInternalController::PipeLocked::cancel( - jsg::Lock& js, v8::Local reason) { +void ReadableStreamInternalController::PipeLocked::cancel(jsg::Lock& js, jsg::JsValue reason) { if (inner.state.is()) { inner.doCancel(js, reason); } @@ -2212,13 +2431,12 @@ void ReadableStreamInternalController::PipeLocked::close(jsg::Lock& js) { inner.doClose(js); } -void ReadableStreamInternalController::PipeLocked::error( - jsg::Lock& js, v8::Local reason) { +void ReadableStreamInternalController::PipeLocked::error(jsg::Lock& js, jsg::JsValue reason) { inner.doError(js, reason); } void ReadableStreamInternalController::PipeLocked::release( - jsg::Lock& js, kj::Maybe> maybeError) { + jsg::Lock& js, kj::Maybe maybeError) { KJ_IF_SOME(error, maybeError) { cancel(js, error); } @@ -2237,35 +2455,31 @@ jsg::Promise ReadableStreamInternalController::PipeLocked::read(jsg: return KJ_ASSERT_NONNULL(inner.read(js, kj::none)); } -jsg::Promise ReadableStreamInternalController::readAllBytes( +jsg::Promise> ReadableStreamInternalController::readAllBytes( jsg::Lock& js, uint64_t limit) { if (isLockedToReader()) { - return js.rejectedPromise(KJ_EXCEPTION( - FAILED, "jsg.TypeError: This ReadableStream is currently locked to a reader.")); + return js.rejectedPromise>( + js.typeError("This ReadableStream is currently locked to a reader."_kj)); } if (isPendingClosure) { - return js.rejectedPromise( - js.v8TypeError("This ReadableStream belongs to an object that is closing."_kj)); + return js.rejectedPromise>( + js.typeError("This ReadableStream belongs to an object that is closing."_kj)); } KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(closed, StreamStates::Closed) { - auto backing = jsg::BackingStore::alloc(js, 0); - return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); + auto ab = jsg::JsArrayBuffer::create(js, 0); + return js.resolvedPromise(ab.addRef(js)); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise>(errored.getHandle(js)); } KJ_CASE_ONEOF(readable, Readable) { auto source = KJ_ASSERT_NONNULL(removeSource(js)); auto& context = IoContext::current(); - // TODO(perf): v8 sandboxing will require that backing stores are allocated within - // the sandbox. This will require a change to the API of ReadableStreamSource::readAllBytes. - // For now, we'll read and allocate into a proper backing store. return context.awaitIoLegacy(js, source->readAllBytes(limit).attach(kj::mv(source))) - .then(js, [](jsg::Lock& js, kj::Array bytes) -> jsg::BufferSource { - auto backing = jsg::BackingStore::alloc(js, bytes.size()); - backing.asArrayPtr().copyFrom(bytes); - return jsg::BufferSource(js, kj::mv(backing)); + .then(js, [](jsg::Lock& js, kj::Array bytes) -> jsg::JsRef { + auto ab = jsg::JsArrayBuffer::create(js, bytes); + return ab.addRef(js); }); } } @@ -2275,19 +2489,19 @@ jsg::Promise ReadableStreamInternalController::readAllBytes( jsg::Promise ReadableStreamInternalController::readAllText( jsg::Lock& js, uint64_t limit) { if (isLockedToReader()) { - return js.rejectedPromise(KJ_EXCEPTION( - FAILED, "jsg.TypeError: This ReadableStream is currently locked to a reader.")); + return js.rejectedPromise( + js.typeError("This ReadableStream is currently locked to a reader."_kj)); } if (isPendingClosure) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream belongs to an object that is closing."_kj)); + js.typeError("This ReadableStream belongs to an object that is closing."_kj)); } KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(closed, StreamStates::Closed) { return js.resolvedPromise(kj::String()); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(readable, Readable) { auto source = KJ_ASSERT_NONNULL(removeSource(js)); diff --git a/src/workerd/api/streams/internal.h b/src/workerd/api/streams/internal.h index 5580db65292..787967dc4d8 100644 --- a/src/workerd/api/streams/internal.h +++ b/src/workerd/api/streams/internal.h @@ -28,7 +28,7 @@ namespace workerd::api { // The ReadableStreamInternalController is always in one of three states: Readable, Closed, // or Errored. When the state is Readable, the controller has an associated ReadableStreamSource. // When the state is Errored, the ReadableStreamSource has been released and the controller -// stores a jsg::Value with whatever value was used to error. When Closed, the +// stores a js Value with whatever value was used to error. When Closed, the // ReadableStreamSource has been released. // Likewise, the WritableStreamInternalController is always either Writable, Closed, or Errored. @@ -71,7 +71,7 @@ class ReadableStreamInternalController: public ReadableStreamController { jsg::Promise pipeTo( jsg::Lock& js, WritableStreamController& destination, PipeToOptions options) override; - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> reason) override; + jsg::Promise cancel(jsg::Lock& js, jsg::Optional reason) override; Tee tee(jsg::Lock& js) override; @@ -103,7 +103,7 @@ class ReadableStreamInternalController: public ReadableStreamController { void visitForGc(jsg::GcVisitor& visitor) override; - jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit) override; + jsg::Promise> readAllBytes(jsg::Lock& js, uint64_t limit) override; jsg::Promise readAllText(jsg::Lock& js, uint64_t limit) override; kj::Maybe tryGetLength(StreamEncoding encoding) override; @@ -124,9 +124,9 @@ class ReadableStreamInternalController: public ReadableStreamController { void jsgGetMemoryInfo(jsg::MemoryTracker& info) const override; private: - void doCancel(jsg::Lock& js, jsg::Optional> reason); + void doCancel(jsg::Lock& js, jsg::Optional reason); void doClose(jsg::Lock& js); - void doError(jsg::Lock& js, v8::Local reason); + void doError(jsg::Lock& js, jsg::JsValue reason); class PipeLocked: public PipeController { public: @@ -135,15 +135,15 @@ class ReadableStreamInternalController: public ReadableStreamController { bool isClosed() override; - kj::Maybe> tryGetErrored(jsg::Lock& js) override; + kj::Maybe tryGetErrored(jsg::Lock& js) override; - void cancel(jsg::Lock& js, v8::Local reason) override; + void cancel(jsg::Lock& js, jsg::JsValue reason) override; void close(jsg::Lock& js) override; - void error(jsg::Lock& js, v8::Local reason) override; + void error(jsg::Lock& js, jsg::JsValue reason) override; - void release(jsg::Lock& js, kj::Maybe> maybeError = kj::none) override; + void release(jsg::Lock& js, kj::Maybe maybeError = kj::none) override; kj::Maybe> tryPumpTo(WritableStreamSink& sink, bool end) override; @@ -222,13 +222,13 @@ class WritableStreamInternalController: public WritableStreamController { jsg::Ref addRef() override; - jsg::Promise write(jsg::Lock& js, jsg::Optional> value) override; + jsg::Promise write(jsg::Lock& js, jsg::Optional value) override; jsg::Promise close(jsg::Lock& js, bool markAsHandled = false) override; jsg::Promise flush(jsg::Lock& js, bool markAsHandled = false) override; - jsg::Promise abort(jsg::Lock& js, jsg::Optional> reason) override; + jsg::Promise abort(jsg::Lock& js, jsg::Optional reason) override; kj::Maybe> tryPipeFrom( jsg::Lock& js, jsg::Ref source, PipeToOptions options) override; @@ -247,7 +247,7 @@ class WritableStreamInternalController: public WritableStreamController { void releaseWriter(Writer& writer, kj::Maybe maybeJs) override; // See the comment for releaseWriter in common.h for details on the use of maybeJs - kj::Maybe> isErroring(jsg::Lock& js) override { + kj::Maybe isErroring(jsg::Lock& js) override { // TODO(later): The internal controller has no concept of an "erroring" // state, so for now we just return kj::none here. return kj::none; @@ -280,17 +280,17 @@ class WritableStreamInternalController: public WritableStreamController { }; jsg::Promise doAbort(jsg::Lock& js, - v8::Local reason, + jsg::JsValue reason, AbortOptions options = {.reject = false, .handled = false}); void doClose(jsg::Lock& js); - void doError(jsg::Lock& js, v8::Local reason); + void doError(jsg::Lock& js, jsg::JsValue reason); void ensureWriting(jsg::Lock& js); jsg::Promise writeLoop(jsg::Lock& js, IoContext& ioContext); jsg::Promise writeLoopAfterFrontOutputLock(jsg::Lock& js); - void drain(jsg::Lock& js, v8::Local reason); + void drain(jsg::Lock& js, jsg::JsValue reason); void finishClose(jsg::Lock& js); - void finishError(jsg::Lock& js, v8::Local reason); + void finishError(jsg::Lock& js, jsg::JsValue reason); jsg::Promise closeImpl(jsg::Lock& js, bool markAsHandled); struct PipeLocked { @@ -324,7 +324,7 @@ class WritableStreamInternalController: public WritableStreamController { kj::Maybe> observer; - kj::Maybe> maybePendingAbort; + kj::Maybe maybePendingAbort; uint64_t currentWriteBufferSize = 0; @@ -349,7 +349,7 @@ class WritableStreamInternalController: public WritableStreamController { struct Write { kj::Maybe::Resolver> promise; size_t totalBytes; - kj::Array ownBytes; + kj::Array ownBytes; kj::ArrayPtr bytes; JSG_MEMORY_INFO(Write) { @@ -372,123 +372,138 @@ class WritableStreamInternalController: public WritableStreamController { } }; struct Pipe { - // PipeState is ref-counted so that it can be safely captured by lambdas in pipeLoop(). - // When drain() destroys the Pipe, the state survives as long as pending callbacks need it. - // The `aborted` flag is set when the Pipe is destroyed. struct State: public kj::Refcounted { - WritableStreamInternalController& parent; - ReadableStreamController::PipeController& source; - kj::Maybe::Resolver> promise; - kj::Maybe> maybeSignal; - - bool preventAbort; - bool preventClose; - bool preventCancel; - - // True when the Pipe is being destroyed - bool aborted = false; - - State(WritableStreamInternalController& parent, - ReadableStreamController::PipeController& source, - kj::Maybe::Resolver> promise, - bool preventAbort, - bool preventClose, - bool preventCancel, - kj::Maybe> maybeSignal) - : parent(parent), - source(source), - promise(kj::mv(promise)), - maybeSignal(kj::mv(maybeSignal)), - preventAbort(preventAbort), - preventClose(preventClose), - preventCancel(preventCancel) {} + jsg::Ref owner; + kj::Rc> weakRef; - bool checkSignal(jsg::Lock& js); - jsg::Promise pipeLoop(jsg::Lock& js); - jsg::Promise write(v8::Local value); + State(jsg::Ref owner, kj::Rc> weakRef) + : owner(kj::mv(owner)), + weakRef(kj::mv(weakRef)) {} - JSG_MEMORY_INFO(State) { - tracker.trackField("resolver", promise); - tracker.trackField("signal", maybeSignal); + inline bool isAborted() const { + return !weakRef->isValid(); } + bool checkSignal(jsg::Lock& js); + jsg::Promise pipeLoop(jsg::Lock& js); + jsg::Promise write(jsg::Lock& js, jsg::JsValue value); + void releaseSource(jsg::Lock& js, kj::Maybe maybeError = kj::none); + bool isSourceReleased(); + void tryErrorParent(jsg::Lock& js, jsg::JsValue reason); + void tryFinishCloseParent(jsg::Lock& js); + void tryFinishErrorParent(jsg::Lock& js, jsg::JsValue reason); + void tryNoBytesError(jsg::Lock& js); }; - kj::Own state; + WritableStreamInternalController& parent; + kj::Maybe source; + kj::Maybe::Resolver> promise; + struct Flags { + uint8_t preventAbort : 1; + uint8_t preventClose : 1; + uint8_t preventCancel : 1; + }; + Flags flags{}; + kj::Maybe> maybeSignal; + kj::Maybe> capturedSourceError; + kj::Maybe>> selfRef; Pipe(WritableStreamInternalController& parent, ReadableStreamController::PipeController& source, - kj::Maybe::Resolver> promise, + jsg::Promise::Resolver promise, bool preventAbort, bool preventClose, bool preventCancel, kj::Maybe> maybeSignal) - : state(kj::refcounted(parent, - source, - kj::mv(promise), - preventAbort, - preventClose, - preventCancel, - kj::mv(maybeSignal))) {} - - ~Pipe() noexcept(false) { - state->aborted = true; + : parent(parent), + source(source), + promise(kj::mv(promise)), + maybeSignal(kj::mv(maybeSignal)), + selfRef(kj::rc>(kj::Badge(), *this)) { + flags.preventAbort = preventAbort; + flags.preventClose = preventClose; + flags.preventCancel = preventCancel; } - WritableStreamInternalController& parent() { - return state->parent; - } - ReadableStreamController::PipeController& source() { - return state->source; - } - kj::Maybe::Resolver>& promise() { - return state->promise; - } - bool preventAbort() const { - return state->preventAbort; - } - bool preventClose() const { - return state->preventClose; + Pipe(Pipe&& other) noexcept(false) + : parent(other.parent), + source(kj::mv(other.source)), + promise(kj::mv(other.promise)), + flags(other.flags), + maybeSignal(kj::mv(other.maybeSignal)), + capturedSourceError(kj::mv(other.capturedSourceError)), + selfRef(kj::rc>(kj::Badge(), *this)) { + // Invalidate the old Pipe's weak ref — any State objects pointing to it + // will see isAborted() = true. + KJ_IF_SOME(ref, other.selfRef) { + ref->invalidate(); + other.selfRef = kj::none; + } } - bool preventCancel() const { - return state->preventCancel; + + ~Pipe() noexcept(false) { + KJ_IF_SOME(ref, selfRef) { + ref->invalidate(); + } } - kj::Maybe>& maybeSignal() { - return state->maybeSignal; + + KJ_DISALLOW_COPY(Pipe); + + kj::Rc getState() { + return kj::rc(parent.addRef(), KJ_ASSERT_NONNULL(selfRef).addRef()); } - bool checkSignal(jsg::Lock& js) { - return state->checkSignal(js); + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(promise, maybeSignal, capturedSourceError); } - jsg::Promise pipeLoop(jsg::Lock& js) { - return state->pipeLoop(js); + + void releaseSource(jsg::Lock& js, kj::Maybe maybeError = kj::none); + bool checkSignal(jsg::Lock& js); + jsg::Promise pipeLoop(jsg::Lock& js); + jsg::Promise write(jsg::Lock& js, jsg::JsValue value); + bool isSourceReleased() const { + return source == kj::none; } - jsg::Promise write(v8::Local value) { - return state->write(value); + void errorParent(jsg::Lock& js, jsg::JsValue reason); + void finishCloseParent(jsg::Lock& js); + void finishErrorParent(jsg::Lock& js, jsg::JsValue reason); + void noBytesError(jsg::Lock& js); + kj::Maybe::Resolver> takePromise() { + return kj::mv(promise); } JSG_MEMORY_INFO(Pipe) { - tracker.trackField("state", state); + tracker.trackField("promise", promise); + tracker.trackField("signal", maybeSignal); + tracker.trackField("capturedSourceError", capturedSourceError); } }; struct WriteEvent { kj::Maybe>> outputLock; // must wait for this before actually writing - kj::OneOf, kj::Own, kj::Own, kj::Own> event; + kj::OneOf event; + + bool isCloseOrFlush() const { + return event.is() || event.is(); + } + + bool isPipe() const { + return event.is(); + } JSG_MEMORY_INFO(WriteEvent) { if (outputLock != kj::none) { tracker.trackFieldWithSize("outputLock", sizeof(IoOwn>)); } KJ_SWITCH_ONEOF(event) { - KJ_CASE_ONEOF(w, kj::Own) { + KJ_CASE_ONEOF(w, Write) { tracker.trackField("inner", w); } - KJ_CASE_ONEOF(p, kj::Own) { + KJ_CASE_ONEOF(p, Pipe) { tracker.trackField("inner", p); } - KJ_CASE_ONEOF(c, kj::Own) { + KJ_CASE_ONEOF(c, Close) { tracker.trackField("inner", c); } - KJ_CASE_ONEOF(f, kj::Own) { + KJ_CASE_ONEOF(f, Flush) { tracker.trackField("inner", f); } } diff --git a/src/workerd/api/streams/pendingabort-gc-uaf-test.c++ b/src/workerd/api/streams/pendingabort-gc-uaf-test.c++ new file mode 100644 index 00000000000..eff29c8d227 --- /dev/null +++ b/src/workerd/api/streams/pendingabort-gc-uaf-test.c++ @@ -0,0 +1,85 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Tests for re-entrancy edge cases during draining reads. +// These exercise scenarios where synchronous JS callbacks during +// drainingRead's internal pump loop trigger state changes that +// could cause use-after-free or hangs if not properly guarded. + +#include "readable.h" +#include "standard.h" + +#include +#include + +namespace workerd::api { +namespace { + +jsg::V8System v8System({"--expose-gc"_kj}); + +using PendingAbort = WritableStreamController::PendingAbort; + +class Foo: public jsg::Object { + public: + Foo(jsg::Lock& js) + : pendingAbort(PendingAbort(js, js.newPromiseAndResolver(), js.obj(), false)) {} + + void triggerTest(jsg::Lock& js) { + // Calling gc once should force a trace. This makes the PendingAbort's + // members weak and eligible for collection if the next trace doesn't + // find them. + js.requestGcForTesting(); + KJ_ASSERT(traced); + // Moving, then calling GC again should not cause anything to be freed, + // since the PendingAbort was moved, it's traced members are made strong + // again. The move makes it so the PendingAbort's members are not found + // during the next trace; but since they are strong now, they won't be + // collected. + KJ_IF_SOME(deq, kj::mv(pendingAbort)) { + js.requestGcForTesting(); + // Should not UAF + deq.complete(js); + KJ_ASSERT(deq.promise.getState(js) == jsg::Promise::State::FULFILLED); + } + } + + JSG_RESOURCE_TYPE(Foo) { + JSG_METHOD(triggerTest); + } + + private: + kj::Maybe pendingAbort; + bool traced = false; + + void visitForGc(jsg::GcVisitor& visitor) { + traced = true; + visitor.visit(pendingAbort); + } +}; + +class ContextGlobalObject: public jsg::Object, public jsg::ContextGlobal { + public: + jsg::Ref makeAFoo(jsg::Lock& js) { + return js.alloc(js); + } + JSG_RESOURCE_TYPE(ContextGlobalObject) { + JSG_METHOD(makeAFoo); + } +}; + +JSG_DECLARE_ISOLATE_TYPE(ContextGlobalIsolate, ContextGlobalObject, Foo); + +KJ_TEST("DrainingReader: concurrent draining reads are rejected (value stream)") { + setPredictableModeForTest(); + jsg::test::Evaluator e( + v8System); + e.expectEval(R"FOO( + const foo = makeAFoo(); + foo.triggerTest(); + )FOO", + "undefined", "undefined"); +} + +} // namespace +} // namespace workerd::api diff --git a/src/workerd/api/streams/queue-test.c++ b/src/workerd/api/streams/queue-test.c++ index 0babee6f993..451d749132c 100644 --- a/src/workerd/api/streams/queue-test.c++ +++ b/src/workerd/api/streams/queue-test.c++ @@ -91,7 +91,8 @@ auto byobRead(jsg::Lock& js, auto& consumer, int size) { }; auto getEntry(jsg::Lock& js, auto size) { - return kj::rc(js.v8Ref(v8::True(js.v8Isolate).As()), size); + jsg::JsValue b = js.boolean(true); + return kj::rc(b.addRef(js), size); } #pragma region ValueQueue Tests @@ -129,7 +130,7 @@ KJ_TEST("ValueQueue erroring works") { preamble([](jsg::Lock& js) { ValueQueue queue(2); - queue.error(js, js.v8Ref(js.v8Error("boom"_kj))); + queue.error(js, js.error("boom"_kj)); KJ_ASSERT(queue.desiredSize() == 0); @@ -165,7 +166,7 @@ KJ_TEST("ValueQueue with single consumer") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsTrue()); + KJ_ASSERT(value.getHandle(js).isTrue()); KJ_ASSERT(consumer.size() == 0); KJ_ASSERT(queue.size() == 0); @@ -202,7 +203,7 @@ KJ_TEST("ValueQueue with multiple consumers") { MustCall read1Continuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsTrue()); + KJ_ASSERT(value.getHandle(js).isTrue()); KJ_ASSERT(consumer1.size() == 0); KJ_ASSERT(consumer2.size() == 2); @@ -217,7 +218,7 @@ KJ_TEST("ValueQueue with multiple consumers") { MustCall read2Continuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsTrue()); + KJ_ASSERT(value.getHandle(js).isTrue()); KJ_ASSERT(consumer2.size() == 0); @@ -265,7 +266,7 @@ KJ_TEST("ValueQueue consumer with multiple-reads") { MustCall read1Continuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsTrue()); + KJ_ASSERT(value.getHandle(js).isTrue()); return js.resolvedPromise(kj::mv(result)); }); read(js, consumer).then(js, read1Continuation); @@ -307,7 +308,7 @@ KJ_TEST("ValueQueue errors consumer with multiple-reads") { read(js, consumer).then(js, readContinuation, errorContinuation); read(js, consumer).then(js, readContinuation, errorContinuation); - queue.error(js, js.v8Ref(js.v8Error("boom"_kj))); + queue.error(js, js.error("boom"_kj)); js.runMicrotasks(); }); @@ -325,7 +326,7 @@ KJ_TEST("ValueQueue with multiple consumers with pending reads") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsTrue()); + KJ_ASSERT(value.getHandle(js).isTrue()); // Both reads were fulfilled immediately without buffering. KJ_ASSERT(consumer1.size() == 0); @@ -388,7 +389,7 @@ KJ_TEST("ByteQueue erroring works") { preamble([](jsg::Lock& js) { ByteQueue queue(2); - queue.error(js, js.v8Ref(js.v8Error("boom"_kj))); + queue.error(js, js.error("boom"_kj)); KJ_ASSERT(queue.desiredSize() == 0); @@ -433,8 +434,9 @@ KJ_TEST("ByteQueue with single consumer") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); KJ_ASSERT(source.size() == 4); KJ_ASSERT(source.asArrayPtr()[0] == 'a'); KJ_ASSERT(source.asArrayPtr()[1] == 'a'); @@ -471,8 +473,9 @@ KJ_TEST("ByteQueue with single byob consumer") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'b'); @@ -525,8 +528,9 @@ KJ_TEST("ByteQueue with byob consumer and default consumer") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'b'); @@ -564,8 +568,9 @@ KJ_TEST("ByteQueue with byob consumer and default consumer") { MustCall read2Continuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); // The second consumer receives exactly the same data. KJ_ASSERT(source.size() == 3); @@ -603,8 +608,9 @@ KJ_TEST("ByteQueue with multiple byob consumers") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'b'); @@ -659,8 +665,9 @@ KJ_TEST("ByteQueue with multiple byob consumers") { MustCall readContinuation([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'b'); @@ -715,8 +722,9 @@ KJ_TEST("ByteQueue with multiple byob consumers (multi-reads)") { MustCall readConsumer1([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'a'); @@ -729,8 +737,9 @@ KJ_TEST("ByteQueue with multiple byob consumers (multi-reads)") { MustCall readConsumer2([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'a'); @@ -743,8 +752,9 @@ KJ_TEST("ByteQueue with multiple byob consumers (multi-reads)") { MustCall secondReadBothConsumers([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 2); KJ_ASSERT(ptr[0] == 'b'); @@ -796,8 +806,9 @@ KJ_TEST("ByteQueue with multiple byob consumers (multi-reads, 2)") { MustCall readConsumer1([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'a'); @@ -809,8 +820,9 @@ KJ_TEST("ByteQueue with multiple byob consumers (multi-reads, 2)") { MustCall readConsumer2([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 3); KJ_ASSERT(ptr[0] == 'a'); @@ -823,8 +835,9 @@ KJ_TEST("ByteQueue with multiple byob consumers (multi-reads, 2)") { MustCall secondReadBothConsumers([&](jsg::Lock& js, auto&& result) -> auto { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); - KJ_ASSERT(value.getHandle(js)->IsArrayBufferView()); - jsg::BufferSource source(js, value.getHandle(js)); + auto handle = value.getHandle(js); + KJ_ASSERT(handle.isArrayBufferView()); + jsg::BufferSource source(js, handle); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 2); KJ_ASSERT(ptr[0] == 'b'); @@ -895,7 +908,7 @@ KJ_TEST("ByteQueue with default consumer with atLeast") { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); auto ptr = source.asArrayPtr(); KJ_ASSERT(ptr[0] == 1); @@ -912,7 +925,7 @@ KJ_TEST("ByteQueue with default consumer with atLeast") { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); KJ_ASSERT(source.asArrayPtr()[0], 6); KJ_ASSERT(source.size() == 1); @@ -983,7 +996,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (same rate)") { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); auto ptr = source.asArrayPtr(); KJ_ASSERT(ptr[0] == 1); @@ -1000,7 +1013,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (same rate)") { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); auto ptr = source.asArrayPtr(); KJ_ASSERT(ptr[0] == 1); @@ -1017,7 +1030,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (same rate)") { KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); KJ_ASSERT(source.asArrayPtr()[0], 6); KJ_ASSERT(source.size() == 1); @@ -1089,7 +1102,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (different rate) KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); KJ_ASSERT(source.size() == 4); auto ptr = source.asArrayPtr(); @@ -1107,7 +1120,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (different rate) KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); KJ_ASSERT(source.size() == 2); auto ptr = source.asArrayPtr(); @@ -1120,7 +1133,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (different rate) KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); auto ptr = source.asArrayPtr(); KJ_ASSERT(source.size() == 5); @@ -1137,7 +1150,7 @@ KJ_TEST("ByteQueue with multiple default consumers with atLeast (different rate) KJ_ASSERT(!result.done); auto& value = KJ_ASSERT_NONNULL(result.value); auto view = value.getHandle(js); - KJ_ASSERT(view->IsArrayBufferView()); + KJ_ASSERT(view.isArrayBufferView()); jsg::BufferSource source(js, view); KJ_ASSERT(source.asArrayPtr()[0] == 6); KJ_ASSERT(source.size() == 1); @@ -1243,7 +1256,7 @@ KJ_TEST("ValueQueue push to errored consumer is safe") { ValueQueue::Consumer consumer2(queue); // Error consumer2 - consumer2.error(js, js.v8Ref(js.v8Error("error reason"_kj))); + consumer2.error(js, js.error("error reason"_kj)); // Now push to the queue queue.push(js, getEntry(js, 4)); @@ -1296,12 +1309,12 @@ KJ_TEST("ValueQueue draining read with buffered data") { store.asArrayPtr()[1] = 'b'; store.asArrayPtr()[2] = 'c'; store.asArrayPtr()[3] = 'd'; - auto ab = jsg::BufferSource(js, kj::mv(store)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab.As()), 4)); + auto ab = jsg::JsValue(jsg::BufferSource(js, kj::mv(store)).getHandle(js)); + queue.push(js, kj::rc(ab.addRef(js), 4)); // Push a string - auto str = jsg::v8Str(js.v8Isolate, "hello"); - queue.push(js, kj::rc(js.v8Ref(str.As()), 5)); + auto str = jsg::JsValue(js.str("hello"_kj)); + queue.push(js, kj::rc(str.addRef(js), 5)); KJ_ASSERT(consumer.size() == 9); @@ -1404,7 +1417,7 @@ KJ_TEST("ValueQueue draining read on errored stream") { ValueQueue queue(10); ValueQueue::Consumer consumer(queue); - queue.error(js, js.v8Ref(js.v8Error("boom"_kj))); + queue.error(js, js.error("boom"_kj)); MustNotCall readContinuation; MustCall errorContinuation([&](jsg::Lock& js, auto&& value) { @@ -1544,7 +1557,7 @@ KJ_TEST("ByteQueue draining read on errored stream") { ByteQueue queue(10); ByteQueue::Consumer consumer(queue); - queue.error(js, js.v8Ref(js.v8Error("boom"_kj))); + queue.error(js, js.error("boom"_kj)); MustNotCall readContinuation; MustCall errorContinuation([&](jsg::Lock& js, auto&& value) { @@ -1568,8 +1581,8 @@ KJ_TEST("ValueQueue draining read with close signal") { store.asArrayPtr()[1] = 'b'; store.asArrayPtr()[2] = 'c'; store.asArrayPtr()[3] = 'd'; - auto ab = jsg::BufferSource(js, kj::mv(store)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab.As()), 4)); + auto ab = jsg::JsValue(jsg::BufferSource(js, kj::mv(store)).getHandle(js)); + queue.push(js, kj::rc(ab.addRef(js), 4)); // Close the queue queue.close(js); @@ -1624,8 +1637,8 @@ KJ_TEST("ValueQueue draining read errors on non-byte value") { ValueQueue::Consumer consumer(queue); // Push a plain object - this cannot be converted to bytes - auto obj = v8::Object::New(js.v8Isolate); - queue.push(js, kj::rc(js.v8Ref(obj.As()), 1)); + jsg::JsValue obj = jsg::JsValue(js.obj()); + queue.push(js, kj::rc(obj.addRef(js), 1)); KJ_ASSERT(consumer.size() == 1); @@ -1659,8 +1672,8 @@ KJ_TEST("ValueQueue draining read errors on number value") { ValueQueue::Consumer consumer(queue); // Push a number - this cannot be converted to bytes - auto num = v8::Number::New(js.v8Isolate, 42); - queue.push(js, kj::rc(js.v8Ref(num.As()), 1)); + jsg::JsValue num = jsg::JsValue(js.num(42)); + queue.push(js, kj::rc(num.addRef(js), 1)); MustNotCall readContinuation; MustCall errorContinuation([&](jsg::Lock& js, auto&& value) { @@ -1693,13 +1706,13 @@ KJ_TEST("ValueQueue draining read respects maxRead during buffer drain") { // Buffer 200 bytes of data (two 100-byte chunks) auto store1 = jsg::BackingStore::alloc(js, 100); store1.asArrayPtr().fill(0xAA); - auto ab1 = jsg::BufferSource(js, kj::mv(store1)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab1.As()), 100)); + auto ab1 = jsg::JsValue(jsg::BufferSource(js, kj::mv(store1)).getHandle(js)); + queue.push(js, kj::rc(ab1.addRef(js), 100)); auto store2 = jsg::BackingStore::alloc(js, 100); store2.asArrayPtr().fill(0xBB); - auto ab2 = jsg::BufferSource(js, kj::mv(store2)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab2.As()), 100)); + auto ab2 = jsg::JsValue(jsg::BufferSource(js, kj::mv(store2)).getHandle(js)); + queue.push(js, kj::rc(ab2.addRef(js), 100)); KJ_ASSERT(consumer.size() == 200); @@ -1760,13 +1773,13 @@ KJ_TEST("ValueQueue draining read with large maxRead drains entire buffer") { // Buffer 200 bytes (two 100-byte chunks) auto store1 = jsg::BackingStore::alloc(js, 100); store1.asArrayPtr().fill(0xAA); - auto ab1 = jsg::BufferSource(js, kj::mv(store1)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab1.As()), 100)); + auto ab1 = jsg::JsValue(jsg::BufferSource(js, kj::mv(store1)).getHandle(js)); + queue.push(js, kj::rc(ab1.addRef(js), 100)); auto store2 = jsg::BackingStore::alloc(js, 100); store2.asArrayPtr().fill(0xBB); - auto ab2 = jsg::BufferSource(js, kj::mv(store2)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab2.As()), 100)); + auto ab2 = jsg::JsValue(jsg::BufferSource(js, kj::mv(store2)).getHandle(js)); + queue.push(js, kj::rc(ab2.addRef(js), 100)); KJ_ASSERT(consumer.size() == 200); @@ -1794,8 +1807,8 @@ KJ_TEST("ValueQueue draining read with default maxRead (unlimited)") { // Buffer some data auto store = jsg::BackingStore::alloc(js, 100); store.asArrayPtr().fill(0xAA); - auto ab = jsg::BufferSource(js, kj::mv(store)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab.As()), 100)); + auto ab = jsg::JsValue(jsg::BufferSource(js, kj::mv(store)).getHandle(js)); + queue.push(js, kj::rc(ab.addRef(js), 100)); // Default maxRead (kj::maxValue) should drain buffer normally MustCall readContinuation( @@ -1822,8 +1835,8 @@ KJ_TEST("ValueQueue draining read maxRead bounds multiple iterations") { for (int i = 0; i < 4; i++) { auto store = jsg::BackingStore::alloc(js, 100); store.asArrayPtr().fill(0x10 * (i + 1)); - auto ab = jsg::BufferSource(js, kj::mv(store)).getHandle(js); - queue.push(js, kj::rc(js.v8Ref(ab.As()), 100)); + auto ab = jsg::JsValue(jsg::BufferSource(js, kj::mv(store)).getHandle(js)); + queue.push(js, kj::rc(ab.addRef(js), 100)); } KJ_ASSERT(consumer.size() == 400); @@ -1965,7 +1978,7 @@ KJ_TEST("ValueQueue error then destroy before consumer doesn't crash") { auto consumer = kj::heap(*queue); // Error the queue first - queue->error(js, js.v8Ref(js.v8Error("boom"_kj))); + queue->error(js, js.error("boom"_kj)); // Then destroy it queue = nullptr; diff --git a/src/workerd/api/streams/queue.c++ b/src/workerd/api/streams/queue.c++ index 6423537ed04..f2e7f7f9594 100644 --- a/src/workerd/api/streams/queue.c++ +++ b/src/workerd/api/streams/queue.c++ @@ -23,21 +23,27 @@ void ValueQueue::ReadRequest::resolveAsDone(jsg::Lock& js) { resolver.resolve(js, ReadResult{.done = true}); } -void ValueQueue::ReadRequest::resolve(jsg::Lock& js, jsg::Value value) { - resolver.resolve(js, ReadResult{.value = kj::mv(value), .done = false}); +void ValueQueue::ReadRequest::resolve(jsg::Lock& js, jsg::JsRef value) { + resolver.resolve(js, + ReadResult{ + .value = kj::mv(value), + .done = false, + }); } -void ValueQueue::ReadRequest::reject(jsg::Lock& js, jsg::Value& value) { - resolver.reject(js, value.getHandle(js)); +void ValueQueue::ReadRequest::reject(jsg::Lock& js, jsg::JsValue value) { + resolver.reject(js, value); } #pragma endregion ValueQueue::ReadRequest #pragma region ValueQueue::Entry -ValueQueue::Entry::Entry(jsg::Value value, size_t size): value(kj::mv(value)), size(size) {} +ValueQueue::Entry::Entry(jsg::JsRef value, size_t size) + : value(kj::mv(value)), + size(size) {} -jsg::Value ValueQueue::Entry::getValue(jsg::Lock& js) { +jsg::JsRef ValueQueue::Entry::getValue(jsg::Lock& js) { return value.addRef(js); } @@ -76,7 +82,7 @@ ValueQueue::Consumer::Consumer( ValueQueue::Consumer::Consumer(kj::Maybe stateListener) : impl(stateListener) {} -void ValueQueue::Consumer::cancel(jsg::Lock& js, jsg::Optional> maybeReason) { +void ValueQueue::Consumer::cancel(jsg::Lock& js, jsg::Optional maybeReason) { impl.cancel(js, maybeReason); } @@ -88,8 +94,8 @@ bool ValueQueue::Consumer::empty() { return impl.empty(); } -void ValueQueue::Consumer::error(jsg::Lock& js, jsg::Value reason) { - impl.error(js, kj::mv(reason)); +void ValueQueue::Consumer::error(jsg::Lock& js, jsg::JsValue reason) { + impl.error(js, reason); }; void ValueQueue::Consumer::read(jsg::Lock& js, ReadRequest request) { @@ -133,9 +139,8 @@ bool ValueQueue::Consumer::hasPendingDrainingRead() { namespace { // Helper to convert a JS value to bytes. Returns kj::none if the value cannot be converted. -kj::Maybe> valueToBytes(jsg::Lock& js, jsg::Value& value) { - auto jsval = jsg::JsValue(value.getHandle(js)); - +kj::Maybe> valueToBytes(jsg::Lock& js, jsg::JsRef value) { + auto jsval = value.getHandle(js); // Try ArrayBuffer first. KJ_IF_SOME(ab, jsval.tryCast()) { auto src = ab.asArrayPtr(); @@ -202,8 +207,7 @@ jsg::Promise ValueQueue::Consumer::drainingRead(jsg::Lock& j break; } KJ_CASE_ONEOF(entry, QueueEntry) { - auto value = entry.entry->getValue(js); - KJ_IF_SOME(bytes, valueToBytes(js, value)) { + KJ_IF_SOME(bytes, valueToBytes(js, entry.entry->getValue(js))) { totalRead += bytes.size(); chunks.add(kj::mv(bytes)); ready.queueTotalSize -= entry.entry->getSize(); @@ -211,7 +215,7 @@ jsg::Promise ValueQueue::Consumer::drainingRead(jsg::Lock& j } else { auto error = js.typeError( "Draining read encountered a value that cannot be converted to bytes"_kj); - impl.error(js, jsg::Value(js.v8Isolate, error)); + impl.error(js, error); return js.rejectedPromise(error); } } @@ -328,7 +332,7 @@ jsg::Promise ValueQueue::Consumer::drainingRead(jsg::Lock& j // Convert the value to bytes. kj::Vector> chunks; KJ_IF_SOME(val, result.value) { - KJ_IF_SOME(bytes, valueToBytes(js, val)) { + KJ_IF_SOME(bytes, valueToBytes(js, val.addRef(js))) { chunks.add(kj::mv(bytes)); } // If valueToBytes returned kj::none, we just return empty chunks. @@ -367,8 +371,8 @@ ssize_t ValueQueue::desiredSize() const { return impl.desiredSize(); } -void ValueQueue::error(jsg::Lock& js, jsg::Value reason) { - impl.error(js, kj::mv(reason)); +void ValueQueue::error(jsg::Lock& js, jsg::JsValue reason) { + impl.error(js, reason); } void ValueQueue::maybeUpdateBackpressure() { @@ -515,25 +519,36 @@ void ByteQueue::ReadRequest::resolveAsDone(jsg::Lock& js) { // There's been at least some data written, we need to respond but not // set done to true since that's what the streams spec requires. pullInto.store.trim(js, pullInto.store.size() - pullInto.filled); - resolver.resolve( - js, ReadResult{.value = js.v8Ref(pullInto.store.getHandle(js)), .done = false}); + resolver.resolve(js, + ReadResult{ + .value = jsg::JsValue(pullInto.store.getHandle(js)).addRef(js), + .done = false, + }); } else { // Otherwise, we set the length to zero pullInto.store.trim(js, pullInto.store.size()); KJ_ASSERT(pullInto.store.size() == 0); - resolver.resolve(js, ReadResult{.value = js.v8Ref(pullInto.store.getHandle(js)), .done = true}); + resolver.resolve(js, + ReadResult{ + .value = jsg::JsValue(pullInto.store.getHandle(js)).addRef(js), + .done = true, + }); } maybeInvalidateByobRequest(byobReadRequest); } void ByteQueue::ReadRequest::resolve(jsg::Lock& js) { pullInto.store.trim(js, pullInto.store.size() - pullInto.filled); - resolver.resolve(js, ReadResult{.value = js.v8Ref(pullInto.store.getHandle(js)), .done = false}); + resolver.resolve(js, + ReadResult{ + .value = jsg::JsValue(pullInto.store.getHandle(js)).addRef(js), + .done = false, + }); maybeInvalidateByobRequest(byobReadRequest); } -void ByteQueue::ReadRequest::reject(jsg::Lock& js, jsg::Value& value) { - resolver.reject(js, value.getHandle(js)); +void ByteQueue::ReadRequest::reject(jsg::Lock& js, jsg::JsValue value) { + resolver.reject(js, value); maybeInvalidateByobRequest(byobReadRequest); } @@ -590,7 +605,7 @@ ByteQueue::Consumer::Consumer( ByteQueue::Consumer::Consumer(kj::Maybe stateListener) : impl(stateListener) {} -void ByteQueue::Consumer::cancel(jsg::Lock& js, jsg::Optional> maybeReason) { +void ByteQueue::Consumer::cancel(jsg::Lock& js, jsg::Optional maybeReason) { impl.cancel(js, maybeReason); } @@ -602,8 +617,8 @@ bool ByteQueue::Consumer::empty() const { return impl.empty(); } -void ByteQueue::Consumer::error(jsg::Lock& js, jsg::Value reason) { - impl.error(js, kj::mv(reason)); +void ByteQueue::Consumer::error(jsg::Lock& js, jsg::JsValue reason) { + impl.error(js, reason); } void ByteQueue::Consumer::read(jsg::Lock& js, ReadRequest request) { @@ -884,7 +899,7 @@ bool ByteQueue::ByobRequest::respond(jsg::Lock& js, size_t amount) { auto start = sourcePtr.slice(req.pullInto.filled); // Safely copy the data over into the entry. - entry->toArrayPtr().first(amount).copyFrom(start.first(amount)); + entry->toArrayPtr().write(start.first(amount)); // Push the entry into the other consumers. queue.push(js, kj::mv(entry), consumer); @@ -932,7 +947,7 @@ bool ByteQueue::ByobRequest::respond(jsg::Lock& js, size_t amount) { KJ_IF_SOME(store, jsg::BufferSource::tryAllocUnsafe(js, unaligned)) { auto excess = kj::rc(kj::mv(store)); - excess->toArrayPtr().first(unaligned).copyFrom(start.first(unaligned)); + excess->toArrayPtr().write(start.first(unaligned)); consumer.push(js, kj::mv(excess)); } else { js.throwException(js.error("Failed to allocate memory for the byob read response."_kj)); @@ -1021,8 +1036,8 @@ ssize_t ByteQueue::desiredSize() const { return impl.desiredSize(); } -void ByteQueue::error(jsg::Lock& js, jsg::Value reason) { - impl.error(js, kj::mv(reason)); +void ByteQueue::error(jsg::Lock& js, jsg::JsValue reason) { + impl.error(js, reason); } void ByteQueue::maybeUpdateBackpressure() { @@ -1127,7 +1142,7 @@ void ByteQueue::handlePush(jsg::Lock& js, KJ_REQUIRE(sourceSize > 0 && sourceSize < destAmount); // Safely copy sourceSize bytes from sourcePtr to destPtr - destPtr.first(sourceSize).copyFrom(sourcePtr.slice(entry.offset)); + destPtr.write(sourcePtr.slice(entry.offset)); // We have completely consumed the data in this entry and can safely free // our reference to it now. Yay! @@ -1176,7 +1191,7 @@ void ByteQueue::handlePush(jsg::Lock& js, // where we start copying. auto entryPtr = newEntry->toArrayPtr(); auto destPtr = pending.pullInto.store.asArrayPtr().slice(pending.pullInto.filled); - destPtr.first(amountToCopy).copyFrom(entryPtr.slice(entryOffset).first(amountToCopy)); + destPtr.write(entryPtr.slice(entryOffset).first(amountToCopy)); // Yay! this pending read has been fulfilled. There might be more tho. Let's adjust // the amountAvailable and continue trying to consume data. @@ -1273,7 +1288,7 @@ void ByteQueue::handleRead(jsg::Lock& js, auto sourcePtr = entry.entry->toArrayPtr().slice(entry.offset); auto destPtr = request.pullInto.store.asArrayPtr().slice(request.pullInto.filled); - destPtr.first(amountToCopy).copyFrom(sourcePtr.first(amountToCopy)); + destPtr.write(sourcePtr.first(amountToCopy)); request.pullInto.filled += amountToCopy; @@ -1428,7 +1443,7 @@ bool ByteQueue::handleMaybeClose(jsg::Lock& js, KJ_ASSERT(amountToCopy <= sourceStart.size()); // Safely copy amountToCopy bytes from the source into the destination. - destPtr.first(amountToCopy).copyFrom(sourceStart.first(amountToCopy)); + destPtr.write(sourceStart.first(amountToCopy)); pending.pullInto.filled += amountToCopy; // We do not need to adjust down the atLeast here because, no matter what, diff --git a/src/workerd/api/streams/queue.h b/src/workerd/api/streams/queue.h index 0f79efb7e70..2ae4d99af26 100644 --- a/src/workerd/api/streams/queue.h +++ b/src/workerd/api/streams/queue.h @@ -194,14 +194,14 @@ class QueueImpl final { // which will, in turn, reset their internal buffers and reject // all pending consume promises. // If we are already closed or errored, do nothing here. - void error(jsg::Lock& js, jsg::Value reason) { + void error(jsg::Lock& js, jsg::JsValue reason) { if (state.isActive()) { #ifdef KJ_DEBUG isClosingOrErroring = true; KJ_DEFER(isClosingOrErroring = false); #endif - allConsumers.forEach([&](ConsumerImpl& consumer) { consumer.error(js, reason.addRef(js)); }); - state.template transitionTo(kj::mv(reason)); + allConsumers.forEach([&](ConsumerImpl& consumer) { consumer.error(js, reason); }); + state.template transitionTo(reason.addRef(js)); } } @@ -274,7 +274,7 @@ class QueueImpl final { }; struct Errored { static constexpr kj::StringPtr NAME KJ_UNUSED = "errored"_kj; - jsg::Value reason; + jsg::JsRef reason; }; struct Ready final: public State { @@ -337,7 +337,7 @@ class ConsumerImpl final { public: struct StateListener { virtual void onConsumerClose(jsg::Lock& js) = 0; - virtual void onConsumerError(jsg::Lock& js, jsg::Value reason) = 0; + virtual void onConsumerError(jsg::Lock& js, jsg::JsValue reason) = 0; // Called when the consumer has a pending read and needs data. // Returns true if the pull algorithm completed synchronously (meaning // more pumping might yield additional synchronous data), false if the @@ -400,7 +400,7 @@ class ConsumerImpl final { queue = kj::none; } - void cancel(jsg::Lock& js, jsg::Optional> maybeReason) { + void cancel(jsg::Lock& js, jsg::Optional) { // Already closed or errored - nothing to do. KJ_IF_SOME(ready, state.tryGetActiveUnsafe()) { for (auto& request: ready.readRequests) { @@ -428,11 +428,11 @@ class ConsumerImpl final { return size() == 0; } - void error(jsg::Lock& js, jsg::Value reason) { + void error(jsg::Lock& js, jsg::JsValue reason) { // If we are already closed or errored, then we do nothing here. // The new error doesn't matter. if (state.isActive()) { - maybeDrainAndSetState(js, kj::mv(reason)); + maybeDrainAndSetState(js, reason); } } @@ -458,14 +458,13 @@ class ConsumerImpl final { return request.resolveAsDone(js); } KJ_IF_SOME(errored, state.tryGetErrorUnsafe()) { - return request.reject(js, errored.reason); + return request.reject(js, errored.reason.getHandle(js)); } auto& ready = state.requireActiveUnsafe(); // Mutual exclusion with draining reads. if (ready.hasPendingDrainingRead) { - auto error = jsg::Value( - js.v8Isolate, js.typeError("Cannot call read while there is a pending draining read"_kj)); - return request.reject(js, error); + auto err = js.typeError("Cannot call read while there is a pending draining read"_kj); + return request.reject(js, err); } // handleRead may trigger the pull callback (via onConsumerWantsData), which // may synchronously call reader.cancel(). Cancel can destroy this ConsumerImpl @@ -580,7 +579,7 @@ class ConsumerImpl final { }; struct Errored { static constexpr kj::StringPtr NAME KJ_UNUSED = "errored"_kj; - jsg::Value reason; + jsg::JsRef reason; }; struct Ready { static constexpr kj::StringPtr NAME KJ_UNUSED = "ready"_kj; @@ -643,7 +642,7 @@ class ConsumerImpl final { return result; } - void maybeDrainAndSetState(jsg::Lock& js, kj::Maybe maybeReason = kj::none) { + void maybeDrainAndSetState(jsg::Lock& js, kj::Maybe maybeReason = kj::none) { // If the state is already errored or closed then there is nothing to drain. KJ_IF_SOME(ready, state.tryGetActiveUnsafe()) { UpdateBackpressureScope scope(*this); @@ -674,7 +673,7 @@ class ConsumerImpl final { weak->runIfAlive([&](ConsumerImpl& self) { self.state.template transitionTo(reason.addRef(js)); KJ_IF_SOME(listener, self.stateListener) { - listener.onConsumerError(js, kj::mv(reason)); + listener.onConsumerError(js, reason); // After this point, we should not assume that this consumer can // be safely used at all. It's most likely the stateListener has // released it. @@ -750,8 +749,8 @@ class ValueQueue final { jsg::Promise::Resolver resolver; void resolveAsDone(jsg::Lock& js); - void resolve(jsg::Lock& js, jsg::Value value); - void reject(jsg::Lock& js, jsg::Value& value); + void resolve(jsg::Lock& js, jsg::JsRef value); + void reject(jsg::Lock& js, jsg::JsValue value); JSG_MEMORY_INFO(ValueQueue::ReadRequest) { tracker.trackField("resolver", resolver); @@ -762,10 +761,10 @@ class ValueQueue final { // calculated by the size algorithm function provided in the stream constructor. class Entry: public kj::Refcounted { public: - explicit Entry(jsg::Value value, size_t size); + explicit Entry(jsg::JsRef value, size_t size); KJ_DISALLOW_COPY_AND_MOVE(Entry); - jsg::Value getValue(jsg::Lock& js); + jsg::JsRef getValue(jsg::Lock& js); size_t getSize() const; @@ -778,7 +777,7 @@ class ValueQueue final { } private: - jsg::Value value; + jsg::JsRef value; size_t size; }; @@ -802,13 +801,13 @@ class ValueQueue final { Consumer& operator=(Consumer&&) = delete; Consumer& operator=(Consumer&) = delete; - void cancel(jsg::Lock& js, jsg::Optional> maybeReason); + void cancel(jsg::Lock& js, jsg::Optional maybeReason); void close(jsg::Lock& js); bool empty(); - void error(jsg::Lock& js, jsg::Value reason); + void error(jsg::Lock& js, jsg::JsValue reason); void read(jsg::Lock& js, ReadRequest request); @@ -852,7 +851,7 @@ class ValueQueue final { ssize_t desiredSize() const; - void error(jsg::Lock& js, jsg::Value reason); + void error(jsg::Lock& js, jsg::JsValue reason); void maybeUpdateBackpressure(); @@ -928,7 +927,7 @@ class ByteQueue final { ~ReadRequest() noexcept(false); void resolveAsDone(jsg::Lock& js); void resolve(jsg::Lock& js); - void reject(jsg::Lock& js, jsg::Value& value); + void reject(jsg::Lock& js, jsg::JsValue value); kj::Own makeByobReadRequest(ConsumerImpl& consumer, QueueImpl& queue); @@ -1051,13 +1050,13 @@ class ByteQueue final { Consumer& operator=(Consumer&&) = delete; Consumer& operator=(Consumer&) = delete; - void cancel(jsg::Lock& js, jsg::Optional> maybeReason); + void cancel(jsg::Lock& js, jsg::Optional maybeReason); void close(jsg::Lock& js); bool empty() const; - void error(jsg::Lock& js, jsg::Value reason); + void error(jsg::Lock& js, jsg::JsValue reason); void read(jsg::Lock& js, ReadRequest request); @@ -1097,7 +1096,7 @@ class ByteQueue final { ssize_t desiredSize() const; - void error(jsg::Lock& js, jsg::Value reason); + void error(jsg::Lock& js, jsg::JsValue reason); void maybeUpdateBackpressure(); diff --git a/src/workerd/api/streams/readable-source-adapter-test.c++ b/src/workerd/api/streams/readable-source-adapter-test.c++ index 0a57c29bf01..816e2009781 100644 --- a/src/workerd/api/streams/readable-source-adapter-test.c++ +++ b/src/workerd/api/streams/readable-source-adapter-test.c++ @@ -114,9 +114,10 @@ KJ_TEST("Adapter shutdown with no reads") { adapter->shutdown(env.js); // second call is no-op // Read after shutdown should be resolved immediate + auto u8 = jsg::JsUint8Array::create(env.js, 10); auto read = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, jsg::BackingStore::alloc(env.js, 10)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), }); KJ_ASSERT(read.getState(env.js) == jsg::Promise::State::FULFILLED, @@ -144,9 +145,10 @@ KJ_TEST("Adapter cancel with no reads") { adapter->cancel(env.js, env.js.error("boom")); + auto u8 = jsg::JsUint8Array::create(env.js, 10); auto read = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, jsg::BackingStore::alloc(env.js, 10)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), }); KJ_ASSERT(read.getState(env.js) == jsg::Promise::State::REJECTED, @@ -200,25 +202,21 @@ KJ_TEST("Adapter with single read (ArrayBuffer)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 10; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto u8 = jsg::JsUint8Array::create(env.js, 10); return env.context .awaitJs(env.js, adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), .minBytes = 5, }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 10, "Read buffer should be full size"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaa"_kjb); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsArrayBuffer()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 10, "Read buffer should be full size"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaa"_kjb); })).attach(kj::mv(adapter)); }); } @@ -236,25 +234,22 @@ KJ_TEST("Adapter with single read (Uint8Array)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 10; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto u8 = jsg::JsUint8Array::create(env.js, 10); return env.context .awaitJs(env.js, adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), .minBytes = 5, }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 10, "Read buffer should be full size"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaa"_kjb); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsUint8Array()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 10, "Read buffer should be full size"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaa"_kjb); + KJ_ASSERT(handle.isUint8Array()); })).attach(kj::mv(adapter)); }); } @@ -272,25 +267,24 @@ KJ_TEST("Adapter with single read (Int32Array)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 16; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto ab = jsg::JsArrayBuffer::create(env.js, 16); + auto i32 = v8::Int32Array::New(ab, 0, 4); + auto i32View = jsg::JsArrayBufferView(i32); return env.context .awaitJs(env.js, adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = i32View.addRef(env.js), .minBytes = 5, }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 16, "Read buffer should be full size"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaaaaaaaa"_kjb); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsInt32Array()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 16, "Read buffer should be full size"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaaaaaaaa"_kjb); + KJ_ASSERT(handle.isInt32Array()); })).attach(kj::mv(adapter)); }); } @@ -308,24 +302,21 @@ KJ_TEST("Adapter with single large read (ArrayBuffer)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 16 * 1024; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto u8 = jsg::JsUint8Array::create(env.js, 16 * 1024); return env.context .awaitJs(env.js, adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), .minBytes = 5, }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 16 * 1024, "Read buffer should be full size"); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsArrayBuffer()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 16 * 1024, "Read buffer should be full size"); + KJ_ASSERT(handle.isUint8Array()); })).attach(kj::mv(adapter)); }); } @@ -343,24 +334,21 @@ KJ_TEST("Adapter with single small read (ArrayBuffer)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 1; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto u8 = jsg::JsUint8Array::create(env.js, 1); return env.context .awaitJs(env.js, adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), .minBytes = 5, }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 1, "Read buffer should be full size"); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsArrayBuffer()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 1, "Read buffer should be full size"); + KJ_ASSERT(handle.isUint8Array()); })).attach(kj::mv(adapter)); }); } @@ -378,23 +366,20 @@ KJ_TEST("Adapter with minimal reads (Uint8Array)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 10; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto u8 = jsg::JsUint8Array::create(env.js, 10); auto promise = adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), .minBytes = 3, }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 3, "Read buffer should be three bytes"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaa"_kjb); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsUint8Array()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 3, "Read buffer should be three bytes"); + KJ_ASSERT(handle.asArrayPtr() == "aaa"_kjb); + KJ_ASSERT(handle.isUint8Array()); }); return env.context.awaitJs(env.js, kj::mv(promise)).attach(kj::mv(adapter)); @@ -414,23 +399,22 @@ KJ_TEST("Adapter with minimal reads (Uint32Array)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 16; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto ab = jsg::JsArrayBuffer::create(env.js, 16); + auto u32 = v8::Uint32Array::New(ab, 0, 4); + auto u32View = jsg::JsArrayBufferView(u32); auto promise = adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = u32View.addRef(env.js), .minBytes = 3, // Impl with round up to 4 }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 4, "Read buffer should be four bytes"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaa"_kjb); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsUint32Array()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 4, "Read buffer should be four bytes"); + KJ_ASSERT(handle.asArrayPtr() == "aaaa"_kjb); + KJ_ASSERT(handle.isUint32Array()); }); return env.context.awaitJs(env.js, kj::mv(promise)).attach(kj::mv(adapter)); @@ -450,23 +434,22 @@ KJ_TEST("Adapter with over large min reads (Uint32Array)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 16; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto ab = jsg::JsArrayBuffer::create(env.js, 16); + auto u32 = v8::Uint32Array::New(ab, 0, 4); + auto u32View = jsg::JsArrayBufferView(u32); auto promise = adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = u32View.addRef(env.js), .minBytes = 24, // Impl with round up to 4 }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 16, "Read buffer should be four bytes"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaaaaaaaa"_kjb); - - // BufferSource should be an ArrayBuffer auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsUint32Array()); + KJ_ASSERT(!result.done, "Stream should not be done yet"); + KJ_ASSERT(handle.asArrayPtr().size() == 16, "Read buffer should be four bytes"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaaaaaaaa"_kjb); + KJ_ASSERT(handle.isUint32Array()); }); return env.context.awaitJs(env.js, kj::mv(promise)).attach(kj::mv(adapter)); @@ -484,19 +467,18 @@ KJ_TEST("Adapter with over large min reads (Uint32Array)") { KJ_ASSERT( adapter->isCanceled() == kj::none, "Adapter should not be canceled upon construction"); - const size_t bufferSize = 1; - auto backing = jsg::BackingStore::alloc(env.js, bufferSize); + auto u8 = jsg::JsUint8Array::create(env.js, 1); auto promise = adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, kj::mv(backing)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), }) .then(env.js, [](jsg::Lock& js, auto result) { - KJ_ASSERT(result.done, "Stream should be done"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 0, "Read buffer should be 0 bytes"); auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsArrayBuffer()); + KJ_ASSERT(result.done, "Stream should be done"); + KJ_ASSERT(handle.asArrayPtr().size() == 0, "Read buffer should be 0 bytes"); + KJ_ASSERT(handle.isUint8Array()); }); return env.context.awaitJs(env.js, kj::mv(promise)).attach(kj::mv(adapter)); @@ -518,20 +500,21 @@ KJ_TEST("Adapter with multiple reads (Uint8Array)") { const size_t bufferSize = 10; + auto u81 = jsg::JsUint8Array::create(env.js, bufferSize); + auto u82 = jsg::JsUint8Array::create(env.js, bufferSize); + auto u83 = jsg::JsUint8Array::create(env.js, bufferSize); + auto read1 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u81).addRef(env.js), }); auto read2 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u82).addRef(env.js), }); auto read3 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u83).addRef(env.js), }); return env.context @@ -539,20 +522,23 @@ KJ_TEST("Adapter with multiple reads (Uint8Array)") { read1 .then(env.js, [read2 = kj::mv(read2)](jsg::Lock& js, auto result) mutable { + auto handle = result.buffer.getHandle(js); KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 10, "Read buffer should be full size"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaa"_kjb); + KJ_ASSERT(handle.asArrayPtr().size() == 10, "Read buffer should be full size"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaa"_kjb); return kj::mv(read2); }) .then(env.js, [read3 = kj::mv(read3)](jsg::Lock& js, auto result) mutable { + auto handle = result.buffer.getHandle(js); KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 10, "Read buffer should be full size"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaa"_kjb); + KJ_ASSERT(handle.asArrayPtr().size() == 10, "Read buffer should be full size"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaa"_kjb); return kj::mv(read3); }).then(env.js, [](jsg::Lock& js, auto result) mutable { + auto handle = result.buffer.getHandle(js); KJ_ASSERT(!result.done, "Stream should not be done yet"); - KJ_ASSERT(result.buffer.asArrayPtr().size() == 10, "Read buffer should be full size"); - KJ_ASSERT(result.buffer.asArrayPtr() == "aaaaaaaaaa"_kjb); + KJ_ASSERT(handle.asArrayPtr().size() == 10, "Read buffer should be full size"); + KJ_ASSERT(handle.asArrayPtr() == "aaaaaaaaaa"_kjb); return js.resolvedPromise(); })).attach(kj::mv(adapter)); }); @@ -573,20 +559,21 @@ KJ_TEST("Adapter with multiple reads shutdown") { const size_t bufferSize = 10; + auto u81 = jsg::JsUint8Array::create(env.js, bufferSize); + auto u82 = jsg::JsUint8Array::create(env.js, bufferSize); + auto u83 = jsg::JsUint8Array::create(env.js, bufferSize); + auto read1 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u81).addRef(env.js), }); auto read2 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u82).addRef(env.js), }); auto read3 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u83).addRef(env.js), }); adapter->shutdown(env.js); @@ -634,20 +621,21 @@ KJ_TEST("Adapter with multiple reads cancel") { const size_t bufferSize = 10; + auto u81 = jsg::JsUint8Array::create(env.js, bufferSize); + auto u82 = jsg::JsUint8Array::create(env.js, bufferSize); + auto u83 = jsg::JsUint8Array::create(env.js, bufferSize); + auto read1 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u81).addRef(env.js), }); auto read2 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u82).addRef(env.js), }); auto read3 = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource( - env.js, jsg::BackingStore::alloc(env.js, bufferSize)), + .buffer = jsg::JsArrayBufferView(u83).addRef(env.js), }); adapter->cancel(env.js, env.js.error("boom")); @@ -699,9 +687,11 @@ KJ_TEST("Adapter close after read") { auto adapter = kj::heap( env.js, env.context, newReadableSource(kj::mv(fake))); + auto u8 = jsg::JsUint8Array::create(env.js, 10); + auto read = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, jsg::BackingStore::alloc(env.js, 10)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), }); auto closePromise = adapter->close(env.js); @@ -731,9 +721,11 @@ KJ_TEST("Adapter close") { auto closePromise = adapter->close(env.js); // reads after close should be resoved immediately. + auto u8 = jsg::JsUint8Array::create(env.js, 10); + auto read = adapter->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, jsg::BackingStore::alloc(env.js, 10)), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), }); KJ_ASSERT(read.getState(env.js) == jsg::Promise::State::FULFILLED, @@ -784,22 +776,22 @@ KJ_TEST("After read BackingStore maintains identity") { std::unique_ptr backing = v8::ArrayBuffer::NewBackingStore(env.js.v8Isolate, 10); auto* backingPtr = backing.get(); - v8::Local originalArrayBuffer = - v8::ArrayBuffer::New(env.js.v8Isolate, kj::mv(backing)); - jsg::BufferSource source(env.js, originalArrayBuffer); + auto ab = jsg::JsArrayBuffer::create(env.js, kj::mv(backing)); + auto u8 = jsg::JsUint8Array::create(env.js, ab); return env.context .awaitJs(env.js, adapter ->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = jsg::BufferSource(env.js, originalArrayBuffer), + .buffer = jsg::JsArrayBufferView(u8).addRef(env.js), .minBytes = 5, }) .then(env.js, [backingPtr](jsg::Lock& js, auto result) { auto handle = result.buffer.getHandle(js); - KJ_ASSERT(handle->IsArrayBuffer()); - auto backing = handle.template As()->GetBackingStore(); + KJ_ASSERT(handle.isUint8Array()); + v8::Local buf = handle.getBuffer(); + auto backing = buf->GetBackingStore(); KJ_ASSERT(backing.get() == backingPtr); return js.resolvedPromise(); })).attach(kj::mv(adapter)); @@ -838,10 +830,10 @@ KJ_TEST("Read all bytes") { return env.context .awaitJs(env.js, - adapter->readAllBytes(env.js).then( - env.js, [&adapter = *adapter](jsg::Lock& js, jsg::BufferSource result) { + adapter->readAllBytes(env.js).then(env.js, + [&adapter = *adapter](jsg::Lock& js, jsg::JsRef result) { // With exponential growth strategy: 1024 + 2048 + 4096 + 8192 = 15360 - KJ_ASSERT(result.size() == 15360); + KJ_ASSERT(result.getHandle(js).size() == 15360); KJ_ASSERT(adapter.isClosed(), "Adapter should be closed after readAllText()"); })).attach(kj::mv(adapter)); }); @@ -926,31 +918,31 @@ KJ_TEST("tee successful") { KJ_ASSERT(!branch2->isClosed(), "Branch2 should not be closed after tee"); KJ_ASSERT(branch2->isCanceled() == kj::none, "Branch2 should not be canceled after tee"); - auto backing1 = jsg::BackingStore::alloc(env.js, 11); - auto buffer1 = jsg::BufferSource(env.js, kj::mv(backing1)); + auto u81 = jsg::JsUint8Array::create(env.js, 11); + auto u82 = jsg::JsUint8Array::create(env.js, 11); auto read1 = branch1->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = kj::mv(buffer1), + .buffer = jsg::JsArrayBufferView(u81).addRef(env.js), }); - auto backing2 = jsg::BackingStore::alloc(env.js, 11); - auto buffer2 = jsg::BufferSource(env.js, kj::mv(backing2)); auto read2 = branch2->read(env.js, ReadableStreamSourceJsAdapter::ReadOptions{ - .buffer = kj::mv(buffer2), + .buffer = jsg::JsArrayBufferView(u82).addRef(env.js), }); return env.context .awaitJs(env.js, kj::mv(read1) .then(env.js, [read2 = kj::mv(read2)](jsg::Lock& js, auto result1) mutable { + auto handle = result1.buffer.getHandle(js); KJ_ASSERT(!result1.done, "Stream should not be done yet"); - KJ_ASSERT(result1.buffer.asArrayPtr().size() == 11); - KJ_ASSERT(result1.buffer.asArrayPtr() == "hello world"_kjb); + KJ_ASSERT(handle.asArrayPtr().size() == 11); + KJ_ASSERT(handle.asArrayPtr() == "hello world"_kjb); return kj::mv(read2); }).then(env.js, [](jsg::Lock& js, auto result2) { + auto handle = result2.buffer.getHandle(js); KJ_ASSERT(!result2.done, "Stream should not be done yet"); - KJ_ASSERT(result2.buffer.asArrayPtr().size() == 11); - KJ_ASSERT(result2.buffer.asArrayPtr() == "hello world"_kjb); + KJ_ASSERT(handle.asArrayPtr().size() == 11); + KJ_ASSERT(handle.asArrayPtr() == "hello world"_kjb); return js.resolvedPromise(); })).attach(kj::mv(branch1), kj::mv(branch2)); }); @@ -974,10 +966,9 @@ jsg::Ref createFiniteBytesReadableStream( KJ_ASSERT_NONNULL(controller.template tryGet>())); auto& counter = *count; if (counter++ < 10) { - auto backing = jsg::BackingStore::alloc(js, chunkSize); - jsg::BufferSource buffer(js, kj::mv(backing)); - buffer.asArrayPtr().fill(96 + counter); // fill with 'a'...'j' - c->enqueue(js, buffer.getHandle(js)); + auto ab = jsg::JsArrayBuffer::create(js, chunkSize); + ab.asArrayPtr().fill(96 + counter); // fill with 'a'...'j' + c->enqueue(js, ab); } if (counter == 10) { c->close(js); @@ -1001,9 +992,9 @@ jsg::Ref createFiniteByobReadableStream(jsg::Lock& js, size_t ch KJ_ASSERT_NONNULL(controller.template tryGet>())); static int count = 0; if (count++ < 10) { - auto backing = jsg::BackingStore::alloc(js, chunkSize); - jsg::BufferSource buffer(js, kj::mv(backing)); - c->enqueue(js, kj::mv(buffer)); + // TODO(soon): Switch from jsg::BufferSource + auto ab = jsg::JsArrayBuffer::create(js, chunkSize); + c->enqueue(js, jsg::BufferSource(js, ab)); } if (count == 10) { c->close(js); @@ -1587,10 +1578,9 @@ KJ_TEST("KjAdapter MinReadPolicy IMMEDIATE behavior") { controller.template tryGet>()); if (counter < 8) { // Return 256 bytes per chunk, 8 chunks total (2048 bytes) - auto backing = jsg::BackingStore::alloc(js, 256); - jsg::BufferSource buffer(js, kj::mv(backing)); - buffer.asArrayPtr().fill(97 + counter); // 'a', 'b', 'c', etc. - c->enqueue(js, buffer.getHandle(js)); + auto ab = jsg::JsArrayBuffer::create(js, 256); + ab.asArrayPtr().fill(97 + counter); // 'a', 'b', 'c', etc. + c->enqueue(js, ab); counter++; } else { c->close(js); @@ -1643,10 +1633,9 @@ KJ_TEST("KjAdapter MinReadPolicy OPPORTUNISTIC behavior") { if (counter < 8) { // Return 256 bytes per chunk, 8 chunks total (2048 bytes) - auto backing = jsg::BackingStore::alloc(js, 256); - jsg::BufferSource buffer(js, kj::mv(backing)); - buffer.asArrayPtr().fill(97 + counter); // 'a', 'b', 'c', etc. - c->enqueue(js, buffer.getHandle(js)); + auto ab = jsg::JsArrayBuffer::create(js, 256); + ab.asArrayPtr().fill(97 + counter); // 'a', 'b', 'c', etc. + c->enqueue(js, ab); counter++; } else { c->close(js); diff --git a/src/workerd/api/streams/readable-source-adapter.c++ b/src/workerd/api/streams/readable-source-adapter.c++ index 6e5e81b2032..72a2817225b 100644 --- a/src/workerd/api/streams/readable-source-adapter.c++ +++ b/src/workerd/api/streams/readable-source-adapter.c++ @@ -15,13 +15,10 @@ namespace { // does that. It takes the original allocation and wraps it into a new ArrayBuffer // instance that is wrapped by a zero-length view of the same type as the original // TypedArray we were given. -jsg::BufferSource transferToEmptyBuffer(jsg::Lock& js, jsg::BufferSource buffer) { - KJ_DASSERT(!buffer.isDetached() && buffer.canDetach(js)); - auto backing = buffer.detach(js); - backing.limit(0); - auto buf = jsg::BufferSource(js, kj::mv(backing)); - KJ_DASSERT(buf.size() == 0); - return kj::mv(buf); +jsg::JsArrayBufferView transferToEmptyBuffer(jsg::Lock& js, jsg::JsArrayBufferView buffer) { + KJ_DASSERT(!buffer.isDetached() && buffer.isDetachable()); + auto backing = buffer.detachAndTake(js); + return backing.slice(js, 0, 0); } } // namespace @@ -165,14 +162,16 @@ jsg::Promise ReadableStreamSourceJsAd KJ_IF_SOME(exception, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise(js.exceptionToJs(exception.clone())); + return js.rejectedPromise(js.exceptionToJsValue(exception.clone())); } + auto buffer = options.buffer.getHandle(js); + if (state.is()) { // We are already in a closed state. This is a no-op, just return // an empty buffer. return js.resolvedPromise(ReadResult{ - .buffer = transferToEmptyBuffer(js, kj::mv(options.buffer)), + .buffer = transferToEmptyBuffer(js, buffer).addRef(js), .done = true, }); } @@ -185,7 +184,7 @@ jsg::Promise ReadableStreamSourceJsAd // Treat them as if the stream is closed. if (active.closePending) { return js.resolvedPromise(ReadResult{ - .buffer = transferToEmptyBuffer(js, kj::mv(options.buffer)), + .buffer = transferToEmptyBuffer(js, buffer).addRef(js), .done = true, }); } @@ -194,13 +193,10 @@ jsg::Promise ReadableStreamSourceJsAd // Let's enqueue our read request. auto& ioContext = IoContext::current(); - auto buffer = kj::mv(options.buffer); auto elementSize = buffer.getElementSize(); // The buffer size should always be a multiple of the element size and should - // always be at least as large as minBytes. This should be handled for us by - // the jsg::BufferSource, but just to be safe, we will double-check with a - // debug assert here. + // always be at least as large as minBytes. KJ_DASSERT(buffer.size() % elementSize == 0); auto minBytes = kj::min(options.minBytes.orDefault(elementSize), buffer.size()); @@ -231,10 +227,11 @@ jsg::Promise ReadableStreamSourceJsAd })); return ioContext .awaitIo(js, kj::mv(promise), - [buffer = kj::mv(buffer), self = selfRef.addRef()](jsg::Lock& js, + [buffer = buffer.addRef(js), self = selfRef.addRef()](jsg::Lock& js, size_t bytesRead) mutable -> jsg::Promise { // If the bytesRead is 0, that indicates the stream is closed. We will // move the stream to a closed state and return the empty buffer. + auto handle = buffer.getHandle(js); if (bytesRead == 0) { self->runIfAlive([](ReadableStreamSourceJsAdapter& self) { KJ_IF_SOME(open, self.state.tryGetActiveUnsafe()) { @@ -242,27 +239,27 @@ jsg::Promise ReadableStreamSourceJsAd } }); return js.resolvedPromise(ReadResult{ - .buffer = transferToEmptyBuffer(js, kj::mv(buffer)), + .buffer = transferToEmptyBuffer(js, handle).addRef(js), .done = true, }); } - KJ_DASSERT(bytesRead <= buffer.size()); + KJ_DASSERT(bytesRead <= handle.size()); // If bytesRead is not a multiple of the element size, that indicates // that the source either read less than minBytes (and ended), or is // simply unable to satisfy the element size requirement. We cannot // provide a partial element to the caller, so reject the read. - if (bytesRead % buffer.getElementSize() != 0) { + if (bytesRead % handle.getElementSize() != 0) { return js.rejectedPromise( js.typeError(kj::str("The underlying stream failed to provide a multiple of the " "target element size ", - buffer.getElementSize()))); + handle.getElementSize()))); } - auto backing = buffer.detach(js); - backing.limit(bytesRead); + auto backing = handle.detachAndTake(js); + return js.resolvedPromise(ReadResult{ - .buffer = jsg::BufferSource(js, kj::mv(backing)), + .buffer = backing.slice(js, 0, bytesRead).addRef(js), .done = false, }); }) @@ -283,7 +280,7 @@ jsg::Promise ReadableStreamSourceJsAdapter::close(jsg::Lock& js) { KJ_IF_SOME(exception, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise(js.exceptionToJs(exception.clone())); + return js.rejectedPromise(js.exceptionToJsValue(exception.clone())); } if (state.is()) { @@ -322,14 +319,14 @@ jsg::Promise> ReadableStreamSourceJsAdapter::readAllTe KJ_IF_SOME(exception, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise>(js.exceptionToJs(exception.clone())); + return js.rejectedPromise>(js.exceptionToJsValue(exception.clone())); } if (state.is()) { // We are already in a closed state. This is a no-op. This really // should not have been called if closed but just in case, return // a resolved promise. - return js.resolvedPromise(jsg::JsRef(js, js.str())); + return js.resolvedPromise(js.str().addRef(js)); } auto& open = state.requireActiveUnsafe(); @@ -361,9 +358,9 @@ jsg::Promise> ReadableStreamSourceJsAdapter::readAllTe [&](ReadableStreamSourceJsAdapter& self) { self.state.transitionTo(); }); KJ_IF_SOME(result, holder->result) { KJ_DASSERT(result.size() == amount); - return jsg::JsRef(js, js.str(result)); + return js.str(result).addRef(js); } else { - return jsg::JsRef(js, js.str()); + return js.str().addRef(js); } }) .catch_(js, @@ -377,20 +374,20 @@ jsg::Promise> ReadableStreamSourceJsAdapter::readAllTe }); } -jsg::Promise ReadableStreamSourceJsAdapter::readAllBytes( +jsg::Promise> ReadableStreamSourceJsAdapter::readAllBytes( jsg::Lock& js, uint64_t limit) { KJ_IF_SOME(exception, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise(js.exceptionToJs(exception.clone())); + return js.rejectedPromise>(js.exceptionToJs(exception.clone())); } if (state.is()) { // We are already in a closed state. This is a no-op. This really // should not have been called if closed but just in case, return // a resolved promise. - auto backing = jsg::BackingStore::alloc(js, 0); - return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); + auto ab = jsg::JsArrayBuffer::create(js, 0); + return js.resolvedPromise(ab.addRef(js)); } auto& open = state.requireActiveUnsafe(); @@ -398,7 +395,7 @@ jsg::Promise ReadableStreamSourceJsAdapter::readAllBytes( auto& active = *open.active; if (active.closePending) { - return js.rejectedPromise( + return js.rejectedPromise>( js.typeError("Close already pending, cannot read.")); } active.closePending = true; @@ -424,16 +421,14 @@ jsg::Promise ReadableStreamSourceJsAdapter::readAllBytes( KJ_DASSERT(result.size() == amount); // We have to copy the data into the backing store because of the // v8 sandboxing rules. - auto backing = jsg::BackingStore::alloc(js, amount); - backing.asArrayPtr().copyFrom(result); - return jsg::BufferSource(js, kj::mv(backing)); + return jsg::JsArrayBuffer::create(js, result).addRef(js); } else { - auto backing = jsg::BackingStore::alloc(js, 0); - return jsg::BufferSource(js, kj::mv(backing)); + return jsg::JsArrayBuffer::create(js, 0).addRef(js); } }) .catch_(js, - [self = selfRef.addRef()](jsg::Lock& js, jsg::Value&& exception) -> jsg::BufferSource { + [self = selfRef.addRef()]( + jsg::Lock& js, jsg::Value&& exception) -> jsg::JsRef { // Likewise, while nothing should be waiting on the ready promise, we // should still reject it just in case. auto error = jsg::JsValue(exception.getHandle(js)); @@ -452,7 +447,7 @@ kj::Maybe ReadableStreamSourceJsAdapter::tryGetLength(StreamEncoding e kj::Maybe ReadableStreamSourceJsAdapter::tryTee( jsg::Lock& js, uint64_t limit) { KJ_IF_SOME(exception, state.tryGetErrorUnsafe()) { - js.throwException(js.exceptionToJs(exception.clone())); + js.throwException(js.exceptionToJsValue(exception.clone())); } if (state.is()) { @@ -589,11 +584,11 @@ using JsByteSource = kj::OneOf, kj::Maybe tryExtractJsByteSource(jsg::Lock& js, const jsg::JsValue& jsval) { KJ_IF_SOME(abView, jsval.tryCast()) { - return kj::Maybe(jsg::JsRef(js, abView)); + return kj::Maybe(abView.addRef(js)); } else KJ_IF_SOME(ab, jsval.tryCast()) { - return kj::Maybe(jsg::JsRef(js, ab)); + return kj::Maybe(ab.addRef(js)); } else KJ_IF_SOME(str, jsval.tryCast()) { - return kj::Maybe(jsg::JsRef(js, str)); + return kj::Maybe(str.addRef(js)); } return kj::none; } @@ -618,9 +613,8 @@ kj::Maybe> copyFromSource( // again into our buffer. This is because the V8 string UTF-8 // write API does not support partial writes with an offset. auto data = view.toUSVString(js); - context.buffer.first(toCopy).copyFrom(data.asBytes().first(toCopy)); + context.buffer.write(data.asBytes().first(toCopy)); context.totalRead += toCopy; - context.buffer = context.buffer.slice(toCopy); KJ_DASSERT(context.buffer.size() == 0); return kj::Maybe(data.asBytes().slice(toCopy).attach(kj::mv(data))); } @@ -641,9 +635,8 @@ kj::Maybe> copyFromSource( return kj::none; } - context.buffer.first(toCopy).copyFrom(src.first(toCopy)); + context.buffer.write(src.first(toCopy)); context.totalRead += toCopy; - context.buffer = context.buffer.slice(toCopy); if (toCopy < src.size()) { KJ_DASSERT(context.buffer.size() == 0); @@ -666,9 +659,8 @@ kj::Maybe> copyFromSource( return kj::none; } - context.buffer.first(toCopy).copyFrom(src.first(toCopy)); + context.buffer.write(src.first(toCopy)); context.totalRead += toCopy; - context.buffer = context.buffer.slice(toCopy); if (toCopy < src.size()) { KJ_DASSERT(context.buffer.size() == 0); @@ -704,9 +696,9 @@ void ReadableSourceKjAdapter::Active::cancel(kj::Exception reason) { // If the previous read indicated that it was the last read, then // the reader will have already been dropped. We do not need to // cancel it here. - ioContext.addTask(ioContext.run([readable = kj::mv(stream), reader = kj::mv(reader), - exception = kj::mv(reason)](jsg::Lock& js) mutable { - auto& ioContext = IoContext::current(); + ioContext.addTask( + ioContext.run([readable = kj::mv(stream), reader = kj::mv(reader), + exception = kj::mv(reason)](jsg::Lock& js, IoContext& ioContext) mutable { auto error = js.exceptionToJsValue(kj::mv(exception)); auto promise = reader->cancel(js, error.getHandle(js)); return ioContext.awaitJs(js, kj::mv(promise)); @@ -753,7 +745,7 @@ jsg::Promise> ReadableSourceKjAdap // Ok, we have some data. Let's make sure it is bytes. // We accept either an ArrayBuffer, ArrayBufferView, or string. - auto jsval = jsg::JsValue(value.getHandle(js)); + auto jsval = value.getHandle(js); KJ_IF_SOME(result, tryExtractJsByteSource(js, jsval)) { // Process the resulting data. KJ_IF_SOME(leftOver, copyFromSource(js, *context, result)) { @@ -858,8 +850,7 @@ kj::Promise ReadableSourceKjAdapter::readImpl( // Otherwise, consume what we do have left over. auto size = readable.view.size(); - dest.first(size).copyFrom(readable.view); - dest = dest.slice(size); + dest.write(readable.view); active.state.transitionTo(); @@ -904,10 +895,8 @@ kj::Promise ReadableSourceKjAdapter::readImpl( // reference to the adapter itself and check that we are still alive // and active before trying to update any state. active.ioContext.run([context = kj::mv(context), self = selfRef.addRef(), - minReadPolicy = options.minReadPolicy]( - jsg::Lock& js) mutable -> kj::Promise { - auto& ioContext = IoContext::current(); - + minReadPolicy = options.minReadPolicy](jsg::Lock& js, + IoContext& ioContext) mutable -> kj::Promise { // Perform the actual read. return ioContext.awaitJs(js, readInternal(js, kj::mv(context), minReadPolicy)) .then([self = kj::mv(self)](kj::Own context) mutable -> kj::Promise { @@ -1129,8 +1118,7 @@ kj::Promise ReadableSourceKjAdapter::pumpToImpl( // to minimize the number of times we need to re-enter the lock. DrainingReader* readerPtr = reader.get(); DrainingReadResult result = - co_await active->ioContext.run([readerPtr](jsg::Lock& js) mutable { - auto& ioContext = IoContext::current(); + co_await active->ioContext.run([readerPtr](jsg::Lock& js, IoContext& ioContext) mutable { // Use a 256KB limit to allow periodic yielding to the event loop, // preventing a fast producer from monopolizing the thread. This limit // only affects subsequent pump iterations after the initial buffer drain. @@ -1169,8 +1157,8 @@ kj::Promise ReadableSourceKjAdapter::pumpToImpl( // If there was an error, cancel the reader and propagate the exception. KJ_IF_SOME(exception, pendingException) { DrainingReader* readerPtr = reader.get(); - co_await active->ioContext.run([readerPtr, ex = exception.clone()](jsg::Lock& js) mutable { - auto& ioContext = IoContext::current(); + co_await active->ioContext.run( + [readerPtr, ex = exception.clone()](jsg::Lock& js, IoContext& ioContext) mutable { auto error = js.exceptionToJsValue(kj::mv(ex)); return ioContext.awaitJs(js, readerPtr->cancel(js, error.getHandle(js))); }); @@ -1293,7 +1281,7 @@ kj::Promise> ReadableSourceKjAdapter::readAllImpl(size_t limit) { CancelationToken cancelationToken; co_return co_await IoContext::current().run( [limit, active = kj::mv(activeState), cancelationToken = cancelationToken.getWeakRef()]( - jsg::Lock& js) mutable -> kj::Promise> { + jsg::Lock& js, IoContext& ioContext) mutable -> kj::Promise> { kj::Vector accumulated; // If we know the length of the stream ahead of time, and it is within the limit, // we can reserve that much space in the accumulator to avoid multiple allocations. @@ -1303,7 +1291,6 @@ kj::Promise> ReadableSourceKjAdapter::readAllImpl(size_t limit) { } } - auto& ioContext = IoContext::current(); return ioContext.awaitJs(js, readAllReadImpl(js, ioContext.addObject(kj::mv(active)), kj::mv(accumulated), limit, kj::mv(cancelationToken))); @@ -1330,8 +1317,7 @@ jsg::Promise> ReadableSourceKjAdapter::readAllReadImpl(jsg::Lock& j auto leftover = readable.view.asBytes(); if (leftover.size() > limit) { auto error = js.rangeError("Memory limit would be exceeded before EOF."); - return active->reader->cancel(js, error).then( - js, [ex = jsg::JsRef(js, error)](jsg::Lock& js) { + return active->reader->cancel(js, error).then(js, [ex = error.addRef(js)](jsg::Lock& js) { return js.rejectedPromise>(ex.getHandle(js)); }); } @@ -1362,7 +1348,7 @@ jsg::Promise> ReadableSourceKjAdapter::readAllReadImpl(jsg::Lock& j } auto& value = KJ_ASSERT_NONNULL(result.value); - auto jsval = jsg::JsValue(value.getHandle(js)); + auto jsval = value.getHandle(js); kj::ArrayPtr bytes; kj::Maybe maybeOwnedString; @@ -1378,16 +1364,14 @@ jsg::Promise> ReadableSourceKjAdapter::readAllReadImpl(jsg::Lock& j } else { auto error = js.typeError("ReadableStream provided a non-bytes value. Only ArrayBuffer, " "ArrayBufferView, or string are supported."); - return active->reader->cancel(js, error).then( - js, [err = jsg::JsRef(js, error)](jsg::Lock& js) { + return active->reader->cancel(js, error).then(js, [err = error.addRef(js)](jsg::Lock& js) { return js.rejectedPromise>(err.getHandle(js)); }); } if (accumulated.size() + bytes.size() > limit) { auto error = js.rangeError("Memory limit would be exceeded before EOF."); - return active->reader->cancel(js, error).then( - js, [err = jsg::JsRef(js, error)](jsg::Lock& js) { + return active->reader->cancel(js, error).then(js, [err = error.addRef(js)](jsg::Lock& js) { return js.rejectedPromise>(err.getHandle(js)); }); } diff --git a/src/workerd/api/streams/readable-source-adapter.h b/src/workerd/api/streams/readable-source-adapter.h index e167798bc06..7bca8298cf2 100644 --- a/src/workerd/api/streams/readable-source-adapter.h +++ b/src/workerd/api/streams/readable-source-adapter.h @@ -159,7 +159,7 @@ class ReadableStreamSourceJsAdapter final { // is equal to the length of this buffer. The actual number of // bytes read is indicated by the resolved value of the promise // but will never exceed the length of this buffer. - jsg::BufferSource buffer; + jsg::JsRef buffer; // The optional minimum number of bytes to read. If not provided, // the read will complete as soon as at least the mininum number @@ -179,7 +179,7 @@ class ReadableStreamSourceJsAdapter final { // of the same type as that provided in ReadOptions. // If the read produced no data because the stream is // closed, the type array will be zero length. - jsg::BufferSource buffer; + jsg::JsRef buffer; // True if the stream is now closed and no further reads // are possible. If this is true, the buffer will be zero @@ -210,7 +210,8 @@ class ReadableStreamSourceJsAdapter final { // If there are pending reads when this is called, those reads // will be allowed to complete first, and then the stream will // be read to the end. - jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit = kj::maxValue); + jsg::Promise> readAllBytes( + jsg::Lock& js, uint64_t limit = kj::maxValue); // If the stream is still active, tries to get the total length, // if known. If the length is not known, the encoding does not diff --git a/src/workerd/api/streams/readable-source-test.c++ b/src/workerd/api/streams/readable-source-test.c++ index ff6d8e3fb0e..1b03ab04a98 100644 --- a/src/workerd/api/streams/readable-source-test.c++ +++ b/src/workerd/api/streams/readable-source-test.c++ @@ -100,7 +100,7 @@ class MemoryAsyncInputStream: public kj::AsyncInputStream { kj::Promise tryRead(void* buffer, size_t minBytes, size_t maxBytes) override { auto dest = kj::arrayPtr(static_cast(buffer), maxBytes); size_t amount = kj::min(dest.size(), data_.size()); - dest.first(amount).copyFrom(data_.first(amount)); + dest.write(data_.first(amount)); data_ = data_.slice(amount); return amount; } diff --git a/src/workerd/api/streams/readable-source.c++ b/src/workerd/api/streams/readable-source.c++ index 9c2a4c736af..6c1b84fe04d 100644 --- a/src/workerd/api/streams/readable-source.c++ +++ b/src/workerd/api/streams/readable-source.c++ @@ -122,8 +122,7 @@ class AllReader final { void copyInto(kj::ArrayPtr out, kj::ArrayPtr> in) { for (auto& part: in) { KJ_DASSERT(part.size() <= out.size()); - out.first(part.size()).copyFrom(part); - out = out.slice(part.size()); + out.write(part); } } }; @@ -827,7 +826,7 @@ class MemoryInputStream final: public ReadableStreamSource { kj::Promise> pumpTo(WritableStreamSink& output, bool end) override { // Explicitly NOT using KJ_CO_MAGIC BEGIN_DEFERRED_PROXYING here! - // The backing memory may be tied to V8 heap (e.g., jsg::BackingStore, Blob data), + // The backing memory may be tied to V8 heap (e.g., ArrayBuffer, Blob data), // so we must complete all I/O before the IoContext can be released. if (unread.size() > 0) { auto data = unread; diff --git a/src/workerd/api/streams/readable-source.h b/src/workerd/api/streams/readable-source.h index 55074de4f84..2b4804f725f 100644 --- a/src/workerd/api/streams/readable-source.h +++ b/src/workerd/api/streams/readable-source.h @@ -217,7 +217,7 @@ kj::Own wrapTeeBranch(kj::Own branch // A ReadableStreamSource backed by in-memory data. Unlike newSystemStream() wrapping a // newMemoryInputStream(), this implementation does NOT support deferred proxying. This is -// important when the backing memory has V8 heap provenance (e.g., jsg::BackingStore, Blob data, +// important when the backing memory has V8 heap provenance (e.g., ArrayBuffer, Blob data, // kj::Array with a v8::BackingStore attached, etc) // since the memory could be freed by GC after the IoContext completes. // diff --git a/src/workerd/api/streams/readable.c++ b/src/workerd/api/streams/readable.c++ index aa965262fe4..9490f02b9d1 100644 --- a/src/workerd/api/streams/readable.c++ +++ b/src/workerd/api/streams/readable.c++ @@ -39,12 +39,11 @@ void ReaderImpl::detach() { } } -jsg::Promise ReaderImpl::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { +jsg::Promise ReaderImpl::cancel(jsg::Lock& js, jsg::Optional maybeReason) { assertAttachedOrTerminal(); if (state.is()) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream reader has been released."_kj)); + js.typeError("This ReadableStream reader has been released."_kj)); } if (state.is()) { return js.resolvedPromise(); @@ -74,11 +73,10 @@ jsg::Promise ReaderImpl::read( assertAttachedOrTerminal(); if (state.is()) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream reader has been released."_kj)); + js.typeError("This ReadableStream reader has been released."_kj)); } if (state.is()) { - return js.rejectedPromise( - js.v8TypeError("This ReadableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This ReadableStream has been closed."_kj)); } auto& attached = state.requireActiveUnsafe(); KJ_IF_SOME(options, byobOptions) { @@ -87,23 +85,23 @@ jsg::Promise ReaderImpl::read( if (options.byteLength == 0) { return js.rejectedPromise( - js.v8TypeError("You must call read() on a \"byob\" reader with a positive-sized " - "TypedArray object."_kj)); + js.typeError("You must call read() on a \"byob\" reader with a positive-sized " + "TypedArray object."_kj)); } if (atLeast == 0) { - return js.rejectedPromise(js.v8TypeError( + return js.rejectedPromise(js.typeError( kj::str("Requested invalid minimum number of bytes to read (", atLeast, ")."))); } // Both read() and readAtLeast() pass atLeast in element count. // Convert to bytes before validation and forwarding to the controller. - jsg::BufferSource source(js, options.bufferView.getHandle(js)); + jsg::JsArrayBufferView source(options.bufferView.getHandle(js)); auto elementSize = source.getElementSize(); atLeast = atLeast * elementSize; if (atLeast > options.byteLength) { - return js.rejectedPromise(js.v8TypeError(kj::str("Minimum bytes to read (", - atLeast, ") exceeds size of buffer (", options.byteLength, ")."))); + return js.rejectedPromise(js.typeError(kj::str("Minimum bytes to read (", atLeast, + ") exceeds size of buffer (", options.byteLength, ")."))); } options.atLeast = atLeast; @@ -154,7 +152,7 @@ void ReadableStreamDefaultReader::attach( } jsg::Promise ReadableStreamDefaultReader::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { return impl.cancel(js, kj::mv(maybeReason)); } @@ -207,7 +205,7 @@ void ReadableStreamBYOBReader::attach( } jsg::Promise ReadableStreamBYOBReader::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { return impl.cancel(js, kj::mv(maybeReason)); } @@ -316,11 +314,11 @@ jsg::Promise DrainingReader::read(jsg::Lock& js, size_t maxR return kj::mv(result); } return js.rejectedPromise( - js.v8TypeError("Unable to perform draining read on this stream."_kj)); + js.typeError("Unable to perform draining read on this stream."_kj)); } KJ_CASE_ONEOF(r, Released) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream reader has been released."_kj)); + js.typeError("This ReadableStream reader has been released."_kj)); } KJ_CASE_ONEOF(c, StreamStates::Closed) { return js.resolvedPromise(DrainingReadResult{ @@ -332,8 +330,7 @@ jsg::Promise DrainingReader::read(jsg::Lock& js, size_t maxR KJ_UNREACHABLE; } -jsg::Promise DrainingReader::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { +jsg::Promise DrainingReader::cancel(jsg::Lock& js, jsg::Optional maybeReason) { KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(i, Initial) { KJ_FAIL_ASSERT("this reader was never attached"); @@ -344,7 +341,7 @@ jsg::Promise DrainingReader::cancel( } KJ_CASE_ONEOF(r, Released) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream reader has been released."_kj)); + js.typeError("This ReadableStream reader has been released."_kj)); } KJ_CASE_ONEOF(c, StreamStates::Closed) { return js.resolvedPromise(); @@ -431,11 +428,10 @@ ReadableStreamController& ReadableStream::getController() { return *controller; } -jsg::Promise ReadableStream::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { +jsg::Promise ReadableStream::cancel(jsg::Lock& js, jsg::Optional maybeReason) { if (isLocked()) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream is currently locked to a reader."_kj)); + js.typeError("This ReadableStream is currently locked to a reader."_kj)); } return getController().cancel(js, maybeReason); } @@ -484,10 +480,9 @@ jsg::Ref ReadableStream::pipeThrough( // The lambda intentionally captures self as a visitable reference, ensuring // JSG_THIS stays alive until the pipe promise resolves. controller.pipeTo(js, destination, kj::mv(options)) - .then(js, - JSG_VISITABLE_LAMBDA( - (self = JSG_THIS), (self), (jsg::Lock& js) { return js.resolvedPromise(); })) - .markAsHandled(js); + .then(js, [self = JSG_THIS](jsg::Lock& js) { + return js.resolvedPromise(); + }).markAsHandled(js); return kj::mv(transform.readable); } @@ -496,12 +491,12 @@ jsg::Promise ReadableStream::pipeTo(jsg::Lock& js, jsg::Optional maybeOptions) { if (isLocked()) { return js.rejectedPromise( - js.v8TypeError("This ReadableStream is currently locked to a reader."_kj)); + js.typeError("This ReadableStream is currently locked to a reader."_kj)); } if (destination->getController().isLockedToWriter()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream is currently locked to a writer"_kj)); + js.typeError("This WritableStream is currently locked to a writer"_kj)); } auto options = kj::mv(maybeOptions).orDefault({}); @@ -530,30 +525,30 @@ jsg::Optional ReadableStream::inspectLength() { return tryGetLength(StreamEncoding::IDENTITY); } -jsg::Promise> ReadableStream::nextFunction( +jsg::Promise>> ReadableStream::nextFunction( jsg::Lock& js, AsyncIteratorState& state) { return state.reader->read(js).then( js, [reader = state.reader.addRef()](jsg::Lock& js, ReadResult result) mutable { if (result.done) { reader->releaseLock(js); - return js.resolvedPromise(kj::Maybe(kj::none)); + return js.resolvedPromise(kj::Maybe>(kj::none)); } - return js.resolvedPromise>(kj::mv(result.value)); + return js.resolvedPromise>>(kj::mv(result.value)); }); } jsg::Promise ReadableStream::returnFunction( - jsg::Lock& js, AsyncIteratorState& state, jsg::Optional& value) { + jsg::Lock& js, AsyncIteratorState& state, jsg::Optional>& value) { if (state.reader.get() != nullptr) { auto reader = kj::mv(state.reader); if (!state.preventCancel) { - auto promise = reader->cancel(js, value.map([&](jsg::Value& v) { return v.getHandle(js); })); + auto promise = + reader->cancel(js, value.map([&](jsg::V8Ref& v) { return v.getHandle(js); })); reader->releaseLock(js); - auto result = promise.then(js, - JSG_VISITABLE_LAMBDA((reader = kj::mv(reader)), (reader), (jsg::Lock& js) { - // Ensure that the reader is not garbage collected until the cancel promise resolves. - return js.resolvedPromise(); - })); + auto result = promise.then(js, [reader = kj::mv(reader)](jsg::Lock& js) mutable { + // Ensure that the reader is not garbage collected until the cancel promise resolves. + return js.resolvedPromise(); + }); // When the stream is already errored, cancel() returns a rejected promise // that propagates through the .then() chain. Mark it as handled so V8 does // not fire unhandledrejection events during iterator teardown. @@ -603,19 +598,20 @@ jsg::Ref ReadableStream::constructor(jsg::Lock& js, } jsg::Optional ByteLengthQueuingStrategy::size( - jsg::Lock& js, jsg::Optional> maybeValue) { + jsg::Lock& js, jsg::Optional maybeValue) { KJ_IF_SOME(value, maybeValue) { - if ((value)->IsArrayBuffer()) { - auto buffer = value.As(); + if (value.isArrayBuffer()) { + v8::Local buffer = KJ_ASSERT_NONNULL(value.tryCast()); return buffer->ByteLength(); - } else if ((value)->IsArrayBufferView()) { - auto view = value.As(); + } else if (value.isArrayBufferView()) { + v8::Local view = + KJ_ASSERT_NONNULL(value.tryCast()); return view->ByteLength(); } else { // Per the WHATWG Streams spec, ByteLengthQueuingStrategy.size should return // GetV(chunk, "byteLength"), which means getting the byteLength property // from any object, not just ArrayBuffer/ArrayBufferView. - KJ_IF_SOME(obj, jsg::JsValue(value).tryCast()) { + KJ_IF_SOME(obj, value.tryCast()) { auto byteLength = obj.get(js, "byteLength"_kj); KJ_IF_SOME(num, byteLength.tryCast()) { KJ_IF_SOME(val, num.value(js)) { diff --git a/src/workerd/api/streams/readable.h b/src/workerd/api/streams/readable.h index ad76d7d9304..61fd2a69b5f 100644 --- a/src/workerd/api/streams/readable.h +++ b/src/workerd/api/streams/readable.h @@ -22,7 +22,7 @@ class ReaderImpl final { void attach(ReadableStreamController& controller, jsg::Promise closedPromise); - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> maybeReason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional maybeReason); void detach(); @@ -105,7 +105,7 @@ class ReadableStreamDefaultReader : public jsg::Object, jsg::Lock& js, jsg::Ref stream); jsg::MemoizedIdentity>& getClosed(); - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> reason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional reason); jsg::Promise read(jsg::Lock& js); void releaseLock(jsg::Lock& js); @@ -156,7 +156,7 @@ class ReadableStreamBYOBReader: public jsg::Object, jsg::Ref stream); jsg::MemoizedIdentity>& getClosed(); - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> reason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional reason); struct ReadableStreamBYOBReaderReadOptions { jsg::Optional min; @@ -238,7 +238,7 @@ class DrainingReader: public ReadableStreamController::Reader { jsg::Promise read(jsg::Lock& js, size_t maxRead = kj::maxValue); // Cancels the stream. - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> maybeReason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional maybeReason); // Releases the lock on the stream. void releaseLock(jsg::Lock& js); @@ -272,14 +272,14 @@ class ReadableStream: public jsg::Object { bool preventCancel; }; - static jsg::Promise> nextFunction( + static jsg::Promise>> nextFunction( jsg::Lock& js, AsyncIteratorState& state); static jsg::Promise returnFunction( jsg::Lock& js, AsyncIteratorState& state, - jsg::Optional& value); + jsg::Optional>& value); public: explicit ReadableStream(IoContext& ioContext, @@ -304,7 +304,7 @@ class ReadableStream: public jsg::Object { jsg::Optional underlyingSource, jsg::Optional queuingStrategy); - static jsg::Ref from(jsg::Lock& js, jsg::AsyncGenerator generator); + static jsg::Ref from(jsg::Lock& js, jsg::AsyncGenerator> generator); bool isLocked(); @@ -312,7 +312,7 @@ class ReadableStream: public jsg::Object { // results. `reason` will be passed to the underlying source's cancel algorithm -- if this // readable stream is one side of a transform stream, then its cancel algorithm causes the // transform's writable side to become errored with `reason`. - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> reason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional reason); using Reader = kj::OneOf, jsg::Ref>; @@ -337,7 +337,7 @@ class ReadableStream: public jsg::Object { JSG_ASYNC_ITERATOR_WITH_OPTIONS(ReadableStreamAsyncIterator, values, - jsg::Value, + jsg::V8Ref, AsyncIteratorState, nextFunction, returnFunction, @@ -491,8 +491,7 @@ struct QueuingStrategyInit { JSG_STRUCT(highWaterMark); }; -using QueuingStrategySizeFunction = - jsg::Optional(jsg::Optional>); +using QueuingStrategySizeFunction = jsg::Optional(jsg::Optional); // Utility class defined by the streams spec that uses byteLength to calculate // backpressure changes. @@ -519,7 +518,7 @@ class ByteLengthQueuingStrategy: public jsg::Object { } private: - static jsg::Optional size(jsg::Lock& js, jsg::Optional>); + static jsg::Optional size(jsg::Lock& js, jsg::Optional); QueuingStrategyInit init; }; @@ -549,7 +548,7 @@ class CountQueuingStrategy: public jsg::Object { } private: - static jsg::Optional size(jsg::Lock& js, jsg::Optional>) { + static jsg::Optional size(jsg::Lock& js, jsg::Optional) { return 1; } diff --git a/src/workerd/api/streams/standard-test.c++ b/src/workerd/api/streams/standard-test.c++ index 3dec1d8871b..7360b919e62 100644 --- a/src/workerd/api/streams/standard-test.c++ +++ b/src/workerd/api/streams/standard-test.c++ @@ -15,8 +15,9 @@ void preamble(auto callback) { fixture.runInIoContext([&](const TestFixture::Environment& env) { callback(env.js); }); } -v8::Local toBytes(jsg::Lock& js, kj::String str) { - return jsg::BackingStore::from(js, str.asBytes().attach(kj::mv(str))).createHandle(js); +jsg::JsValue toBytes(jsg::Lock& js, kj::String str) { + return jsg::JsValue( + jsg::BackingStore::from(js, str.asBytes().attach(kj::mv(str))).createHandle(js)); } jsg::BufferSource toBufferSource(jsg::Lock& js, kj::String str) { @@ -230,8 +231,8 @@ KJ_TEST("ReadableStream read all bytes (value readable)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, jsg::BufferSource&& text) { - KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); + js, [&](jsg::Lock& js, jsg::JsRef text) { + KJ_ASSERT(text.getHandle(js).asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -287,8 +288,8 @@ KJ_TEST("ReadableStream read all bytes (byte readable)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, jsg::BufferSource&& text) { - KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); + js, [&](jsg::Lock& js, jsg::JsRef text) { + KJ_ASSERT(text.getHandle(js).asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -349,8 +350,8 @@ KJ_TEST("ReadableStream read all bytes (value readable, more reads)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, jsg::BufferSource&& text) { - KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); + js, [&](jsg::Lock& js, jsg::JsRef text) { + KJ_ASSERT(text.getHandle(js).asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -412,8 +413,8 @@ KJ_TEST("ReadableStream read all bytes (byte readable, more reads)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then( - js, [&](jsg::Lock& js, jsg::BufferSource&& text) { - KJ_ASSERT(text.asArrayPtr() == "Hello, world!"_kjb); + js, [&](jsg::Lock& js, jsg::JsRef text) { + KJ_ASSERT(text.getHandle(js).asArrayPtr() == "Hello, world!"_kjb); checked++; }); @@ -479,13 +480,14 @@ KJ_TEST("ReadableStream read all bytes (byte readable, large data)") { // Starts a read loop of javascript promises. auto promise = rs->getController() .readAllBytes(js, (BASE * 7) + 1) - .then(js, [&](jsg::Lock& js, jsg::BufferSource&& text) { + .then(js, [&](jsg::Lock& js, jsg::JsRef text) { kj::byte check[BASE * 7]{}; kj::arrayPtr(check).first(BASE).fill('A'); kj::arrayPtr(check).slice(BASE).first(BASE * 2).fill('B'); kj::arrayPtr(check).slice(BASE * 3).fill('C'); - KJ_ASSERT(text.size() == BASE * 7); - KJ_ASSERT(text.asArrayPtr() == check); + auto handle = text.getHandle(js); + KJ_ASSERT(handle.size() == BASE * 7); + KJ_ASSERT(handle.asArrayPtr() == check); checked++; }); @@ -546,7 +548,7 @@ KJ_TEST("ReadableStream read all bytes (value readable, wrong type)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "TypeError: This ReadableStream did not return bytes."); @@ -601,7 +603,7 @@ KJ_TEST("ReadableStream read all bytes (value readable, to many bytes)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "TypeError: Memory limit exceeded before EOF."); checked++; @@ -656,7 +658,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, to many bytes)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "TypeError: Memory limit exceeded before EOF."); checked++; @@ -698,7 +700,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, failed read)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -739,7 +741,7 @@ KJ_TEST("ReadableStream read all bytes (value readable, failed read)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -781,7 +783,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, failed start)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -823,7 +825,7 @@ KJ_TEST("ReadableStream read all bytes (byte readable, failed start 2)") { // Starts a read loop of javascript promises. auto promise = rs->getController().readAllBytes(js, 20).then(js, - [](jsg::Lock& js, jsg::BufferSource&& text) { KJ_UNREACHABLE; }, + [](jsg::Lock& js, jsg::JsRef text) { KJ_UNREACHABLE; }, [&](jsg::Lock& js, jsg::Value&& exception) { KJ_ASSERT(kj::str(exception.getHandle(js)) == "Error: boom"); checked++; @@ -1994,7 +1996,7 @@ KJ_TEST("WritableStream close during abort algorithm returns rejected promise") // clang-format off ws->getController().setup(js, UnderlyingSink{ - .abort = [&](jsg::Lock& js, v8::Local reason) -> jsg::Promise { + .abort = [&](jsg::Lock& js, jsg::JsValue reason) -> jsg::Promise { abortCalled = true; // Re-entrantly call close() on the writer during the abort algorithm. // At this point, WritableImpl has already transitioned to Errored state @@ -2122,7 +2124,7 @@ KJ_TEST("DrainingReader: pull that synchronously errors does not UAF (value stre .pull = [&](jsg::Lock& js, UnderlyingSource::Controller controller) { KJ_SWITCH_ONEOF(controller) { KJ_CASE_ONEOF(c, jsg::Ref) { - c->error(js, js.v8TypeError("test error"_kj)); + c->error(js, js.typeError("test error"_kj)); return js.resolvedPromise(); } KJ_CASE_ONEOF(c, jsg::Ref) {} @@ -2360,7 +2362,7 @@ KJ_TEST("DrainingReader: pending error in endOperation rejects read (value strea // and calls doError(), which defers the error because beginOperation() is // active. When wrapDrainingRead's endOperation() fires, it applies the // pending error and should throw rather than returning the data. - return js.rejectedPromise(js.v8TypeError("pull failed"_kj)); + return js.rejectedPromise(js.typeError("pull failed"_kj)); } KJ_CASE_ONEOF(c, jsg::Ref) {} } @@ -2396,7 +2398,7 @@ KJ_TEST("DrainingReader: pending error in endOperation rejects read (byte stream KJ_CASE_ONEOF(c, jsg::Ref) {} KJ_CASE_ONEOF(c, jsg::Ref) { c->enqueue(js, toBufferSource(js, kj::str("should-be-discarded"))); - return js.rejectedPromise(js.v8TypeError("pull failed"_kj)); + return js.rejectedPromise(js.typeError("pull failed"_kj)); } } KJ_UNREACHABLE; diff --git a/src/workerd/api/streams/standard.c++ b/src/workerd/api/streams/standard.c++ index a3154877059..4c7f4adb218 100644 --- a/src/workerd/api/streams/standard.c++ +++ b/src/workerd/api/streams/standard.c++ @@ -62,7 +62,7 @@ class ReadableLockImpl { bool lock(); void onClose(jsg::Lock& js); - void onError(jsg::Lock& js, v8::Local reason); + void onError(jsg::Lock& js, jsg::JsValue reason); kj::Maybe tryPipeLock(Controller& self); @@ -95,14 +95,14 @@ class ReadableLockImpl { return inner.state.template is(); } - kj::Maybe> tryGetErrored(jsg::Lock& js) override { + kj::Maybe tryGetErrored(jsg::Lock& js) override { KJ_IF_SOME(errored, inner.state.template tryGetUnsafe()) { return errored.getHandle(js); } return kj::none; } - void cancel(jsg::Lock& js, v8::Local reason) override { + void cancel(jsg::Lock& js, jsg::JsValue reason) override { // Cancel here returns a Promise but we do not need to propagate it. // We can safely drop it on the floor here. auto promise KJ_UNUSED = inner.cancel(js, reason); @@ -112,11 +112,11 @@ class ReadableLockImpl { inner.doClose(js); } - void error(jsg::Lock& js, v8::Local reason) override { + void error(jsg::Lock& js, jsg::JsValue reason) override { inner.doError(js, reason); } - void release(jsg::Lock& js, kj::Maybe> maybeError = kj::none) override { + void release(jsg::Lock& js, kj::Maybe maybeError = kj::none) override { KJ_IF_SOME(error, maybeError) { cancel(js, error); } @@ -334,7 +334,7 @@ void ReadableLockImpl::onClose(jsg::Lock& js) { } template -void ReadableLockImpl::onError(jsg::Lock& js, v8::Local reason) { +void ReadableLockImpl::onError(jsg::Lock& js, jsg::JsValue reason) { KJ_IF_SOME(locked, state.template tryGetUnsafe()) { try { maybeRejectPromise(js, locked.getClosedFulfiller(), reason); @@ -429,7 +429,7 @@ void WritableLockImpl::releaseWriter( // Per spec (WritableStreamDefaultWriterRelease), both the ready and closed // promises must be rejected when the writer is released. - auto releaseReason = js.v8TypeError("This WritableStream writer has been released."_kjc); + auto releaseReason = js.typeError("This WritableStream writer has been released."_kjc); if (FeatureFlags::get(js).getWritableStreamSpecCompliantWriter()) { if (locked.getReadyFulfiller() != kj::none) { maybeRejectPromise(js, locked.getReadyFulfiller(), releaseReason); @@ -515,14 +515,15 @@ kj::Maybe> WritableLockImpl::PipeLocked::checkSig if (signal->getAborted(js)) { auto reason = signal->getReason(js); if (!flags.preventCancel) { - source.release(js, v8::Local(reason)); + source.release(js, reason); } else { source.release(js); } if (!flags.preventAbort) { - return self.abort(js, reason).then(js, JSG_VISITABLE_LAMBDA((this, reason = reason.addRef(js), ref = self.addRef()), (reason, ref), (jsg::Lock& js) { + return self.abort(js, reason) + .then(js, [this, reason = reason.addRef(js), ref = self.addRef()](jsg::Lock& js) { return rejectedMaybeHandledPromise(js, reason.getHandle(js), flags.pipeThrough); - })); + }); } return rejectedMaybeHandledPromise(js, reason, flags.pipeThrough); } @@ -559,27 +560,19 @@ jsg::Promise maybeRunAlgorithm( // onFailure case since such errors are generally indicative of a fatal // condition in the isolate (e.g. out of memory, other fatal exception, etc). JSG_TRY(js) { + auto promise = ([&]() -> jsg::Promise { + JSG_TRY(js) { + return algorithm(js, kj::fwd(args)...); + } + JSG_CATCH(exception) { + return js.rejectedPromise(kj::mv(exception)); + } + })(); KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { - auto getInnerPromise = [&]() -> jsg::Promise { - JSG_TRY(js) { - return algorithm(js, kj::fwd(args)...); - } - JSG_CATCH(exception) { - return js.rejectedPromise(kj::mv(exception)); - } - }; - return getInnerPromise().then( + return promise.then( js, ioContext.addFunctor(kj::mv(onSuccess)), ioContext.addFunctor(kj::mv(onFailure))); } else { - auto getInnerPromise = [&]() -> jsg::Promise { - JSG_TRY(js) { - return algorithm(js, kj::fwd(args)...); - } - JSG_CATCH(exception) { - return js.rejectedPromise(kj::mv(exception)); - } - }; - return getInnerPromise().then(js, kj::mv(onSuccess), kj::mv(onFailure)); + return promise.then(js, kj::mv(onSuccess), kj::mv(onFailure)); } } JSG_CATCH(exception) { @@ -611,21 +604,25 @@ jsg::Promise maybeRunAlgorithmAsync( // rare cases. For those we return a rejected promise but do not call the // onFailure case since such errors are generally indicative of a fatal // condition in the isolate (e.g. out of memory, other fatal exception, etc). - return js.tryCatch([&] { + JSG_TRY(js) { + auto promise = ([&] { + JSG_TRY(js) { + return algorithm(js, kj::fwd(args)...); + } + JSG_CATCH(exception) { + return js.rejectedPromise(kj::mv(exception)); + }; + })(); KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { - return js - .tryCatch([&] { return algorithm(js, kj::fwd(args)...); }, - [&](jsg::Value&& exception) { return js.rejectedPromise(kj::mv(exception)); }) - .then(js, ioContext.addFunctor(kj::mv(onSuccess)), - ioContext.addFunctor(kj::mv(onFailure))); + return promise.then( + js, ioContext.addFunctor(kj::mv(onSuccess)), ioContext.addFunctor(kj::mv(onFailure))); } else { - return js - .tryCatch([&] { return algorithm(js, kj::fwd(args)...); }, - [&](jsg::Value&& exception) { - return js.rejectedPromise(kj::mv(exception)); - }).then(js, kj::mv(onSuccess), kj::mv(onFailure)); + return promise.then(js, kj::mv(onSuccess), kj::mv(onFailure)); } - }, [&](jsg::Value&& exception) { return js.rejectedPromise(kj::mv(exception)); }); + } + JSG_CATCH(exception) { + return js.rejectedPromise(kj::mv(exception)); + }; } // If the algorithm does not exist, we handle it as a success but ensure @@ -659,7 +656,7 @@ jsg::Promise deferControllerStateChange(jsg::Lock& js, // methods, as well as the methods can trigger JavaScript errors to be thrown // synchronously in some cases. We want to make sure non-fatal errors cause the // stream to error and only fatal cases bubble up. - return js.tryCatch([&] { + JSG_TRY(js) { controller.state.beginOperation(); auto result = readCallback(); endOperation = false; @@ -682,15 +679,17 @@ jsg::Promise deferControllerStateChange(jsg::Lock& js, } return kj::mv(result); - }, [&](jsg::Value exception) -> jsg::Promise { + } + JSG_CATCH(exception) { if (endOperation) { // Clear any pending state since we're erroring controller.state.clearPendingState(); (void)controller.state.endOperation(); } - controller.doError(js, exception.getHandle(js)); - return js.rejectedPromise(kj::mv(exception)); - }); + auto err = jsg::JsValue(exception.getHandle(js)); + controller.doError(js, err); + return js.rejectedPromise(err); + }; } // The ReadableStreamJsController provides the implementation of custom @@ -746,11 +745,11 @@ class ReadableStreamJsController final: public ReadableStreamController { // is still pending, the ReadableStream will be no longer usable and any // data still in the queue will be dropped. Pending read requests will be // rejected if a reason is given, or resolved with no data otherwise. - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> reason) override; + jsg::Promise cancel(jsg::Lock& js, jsg::Optional reason) override; void doClose(jsg::Lock& js); - void doError(jsg::Lock& js, v8::Local reason); + void doError(jsg::Lock& js, jsg::JsValue reason); bool canCloseOrEnqueue(); bool hasBackpressure(); @@ -767,7 +766,7 @@ class ReadableStreamJsController final: public ReadableStreamController { bool lockReader(jsg::Lock& js, Reader& reader) override; - kj::Maybe> isErrored(jsg::Lock& js); + kj::Maybe isErrored(jsg::Lock& js); kj::Maybe getDesiredSize(); @@ -796,7 +795,7 @@ class ReadableStreamJsController final: public ReadableStreamController { kj::Maybe> getController(); - jsg::Promise readAllBytes(jsg::Lock& js, uint64_t limit) override; + jsg::Promise> readAllBytes(jsg::Lock& js, uint64_t limit) override; jsg::Promise readAllText(jsg::Lock& js, uint64_t limit) override; kj::Maybe tryGetLength(StreamEncoding encoding) override; @@ -886,7 +885,7 @@ class WritableStreamJsController final: public WritableStreamController { KJ_DISALLOW_COPY_AND_MOVE(WritableStreamJsController); - jsg::Promise abort(jsg::Lock& js, jsg::Optional> reason) override; + jsg::Promise abort(jsg::Lock& js, jsg::Optional reason) override; jsg::Ref addRef() override; @@ -898,16 +897,16 @@ class WritableStreamJsController final: public WritableStreamController { void doClose(jsg::Lock& js); - void doError(jsg::Lock& js, v8::Local reason); + void doError(jsg::Lock& js, jsg::JsValue reason); // Error through the underlying controller if available, going through the proper // error transition (Erroring -> Errored). - void errorIfNeeded(jsg::Lock& js, v8::Local reason); + void errorIfNeeded(jsg::Lock& js, jsg::JsValue reason); kj::Maybe getDesiredSize() override; - kj::Maybe> isErroring(jsg::Lock& js) override; - kj::Maybe> isErroredOrErroring(jsg::Lock& js); + kj::Maybe isErroring(jsg::Lock& js) override; + kj::Maybe isErroredOrErroring(jsg::Lock& js); bool isLocked() const; @@ -923,7 +922,7 @@ class WritableStreamJsController final: public WritableStreamController { bool lockWriter(jsg::Lock& js, Writer& writer) override; - void maybeRejectReadyPromise(jsg::Lock& js, v8::Local reason); + void maybeRejectReadyPromise(jsg::Lock& js, jsg::JsValue reason); void maybeResolveReadyPromise(jsg::Lock& js); @@ -944,7 +943,7 @@ class WritableStreamJsController final: public WritableStreamController { void updateBackpressure(jsg::Lock& js, bool backpressure); - jsg::Promise write(jsg::Lock& js, jsg::Optional> value) override; + jsg::Promise write(jsg::Lock& js, jsg::Optional value) override; void visitForGc(jsg::GcVisitor& visitor) override; @@ -1015,21 +1014,21 @@ void ReadableImpl::start(jsg::Lock& js, jsg::Ref self) { // Per the streams spec, the size function should be called with `undefined` as `this`, // not as a method on the strategy object. KJ_IF_SOME(sizeFunc, algorithms.size) { - sizeFunc.setReceiver(jsg::Value(js.v8Isolate, js.v8Undefined())); + sizeFunc.setReceiver(js.v8Ref(v8::Local(js.undefined()))); } - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), (jsg::Lock& js) { + auto onSuccess = [this, self = self.addRef()](jsg::Lock& js) mutable { flags.started = true; flags.starting = false; pullIfNeeded(js, kj::mv(self)); - }); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - flags.started = true; - flags.starting = false; - doError(js, kj::mv(reason)); - }); + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + flags.started = true; + flags.starting = false; + auto err = jsg::JsValue(reason.getHandle(js)); + doError(js, err); + }; maybeRunAlgorithm(js, algorithms.start, kj::mv(onSuccess), kj::mv(onFailure), kj::mv(self)); algorithms.start = kj::none; @@ -1042,7 +1041,7 @@ size_t ReadableImpl::consumerCount() { template jsg::Promise ReadableImpl::cancel( - jsg::Lock& js, jsg::Ref self, v8::Local reason) { + jsg::Lock& js, jsg::Ref self, jsg::JsValue reason) { if (state.template is()) { // We are already closed. There's nothing to cancel. // This shouldn't happen but we handle the case anyway, just to be safe. @@ -1095,29 +1094,25 @@ bool ReadableImpl::canCloseOrEnqueue() { // that they called cancel. What we do want to do here, tho, is close the implementation // and trigger the cancel algorithm. template -void ReadableImpl::doCancel(jsg::Lock& js, jsg::Ref self, v8::Local reason) { +void ReadableImpl::doCancel(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason) { state.template transitionTo(); - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), (jsg::Lock& js) { + auto onSuccess = [this, self = self.addRef()](jsg::Lock& js) mutable { doClose(js); KJ_IF_SOME(pendingCancel, maybePendingCancel) { - maybeResolvePromise(js, pendingCancel.fulfiller); - } else { - // Else block to avert dangling else compiler warning. + maybeResolvePromise(js, pendingCancel.fulfiller); } - }); - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - // We do not call doError() here because there's really no point. Everything - // that cares about the state of this controller impl has signaled that it - // no longer cares and has gone away. - doClose(js); - KJ_IF_SOME(pendingCancel, maybePendingCancel) { - maybeRejectPromise(js, pendingCancel.fulfiller, reason.getHandle(js)); - } else { - // Else block to avert dangling else compiler warning. - } - }); + }; + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + // We do not call doError() here because there's really no point. Everything + // that cares about the state of this controller impl has signaled that it + // no longer cares and has gone away. + doClose(js); + KJ_IF_SOME(pendingCancel, maybePendingCancel) { + auto err = jsg::JsValue(reason.getHandle(js)); + maybeRejectPromise(js, pendingCancel.fulfiller, err); + } + }; maybeRunAlgorithm(js, algorithms.cancel, kj::mv(onSuccess), kj::mv(onFailure), reason); } @@ -1136,10 +1131,9 @@ void ReadableImpl::close(jsg::Lock& js) { auto& queue = state.template getUnsafe(); if (queue.hasPartiallyFulfilledRead()) { - auto error = - js.v8Ref(js.v8TypeError("This ReadableStream was closed with a partial read pending.")); - doError(js, error.addRef(js)); - js.throwException(kj::mv(error)); + auto err = js.typeError("This ReadableStream was closed with a partial read pending."); + doError(js, err); + js.throwException(err); return; } @@ -1157,15 +1151,15 @@ void ReadableImpl::doClose(jsg::Lock& js) { } template -void ReadableImpl::doError(jsg::Lock& js, jsg::Value reason) { +void ReadableImpl::doError(jsg::Lock& js, jsg::JsValue reason) { // If already closed or errored, do nothing if (state.isInactive()) { return; } auto& queue = state.template getUnsafe(); - queue.error(js, reason.addRef(js)); - state.template transitionTo(kj::mv(reason)); + queue.error(js, reason); + state.template transitionTo(reason.addRef(js)); algorithms.clear(); } @@ -1203,19 +1197,19 @@ void ReadableImpl::pullIfNeeded(jsg::Lock& js, jsg::Ref self) { KJ_ASSERT(!flags.pullAgain); flags.pulling = true; - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), (jsg::Lock& js) { + auto onSuccess = [this, self = self.addRef()](jsg::Lock& js) mutable { flags.pulling = false; if (flags.pullAgain) { - flags.pullAgain = false; - pullIfNeeded(js, kj::mv(self)); + flags.pullAgain = false; + pullIfNeeded(js, kj::mv(self)); } - }); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - flags.pulling = false; - doError(js, kj::mv(reason)); - }); + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + flags.pulling = false; + auto err = jsg::JsValue(reason.getHandle(js)); + doError(js, err); + }; maybeRunAlgorithm(js, algorithms.pull, kj::mv(onSuccess), kj::mv(onFailure), self.addRef()); } @@ -1235,20 +1229,20 @@ void ReadableImpl::forcePullIfNeeded(jsg::Lock& js, jsg::Ref self) { KJ_ASSERT(!flags.pullAgain); flags.pulling = true; - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), (jsg::Lock& js) { + auto onSuccess = [this, self = self.addRef()](jsg::Lock& js) mutable { flags.pulling = false; if (flags.pullAgain) { - flags.pullAgain = false; - // After a force pull, we go back to normal pullIfNeeded behavior. - pullIfNeeded(js, kj::mv(self)); + flags.pullAgain = false; + // After a force pull, we go back to normal pullIfNeeded behavior. + pullIfNeeded(js, kj::mv(self)); } - }); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - flags.pulling = false; - doError(js, kj::mv(reason)); - }); + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + flags.pulling = false; + auto err = jsg::JsValue(reason.getHandle(js)); + doError(js, err); + }; maybeRunAlgorithm(js, algorithms.pull, kj::mv(onSuccess), kj::mv(onFailure), self.addRef()); } @@ -1281,16 +1275,16 @@ WritableImpl::WritableImpl( template jsg::Promise WritableImpl::abort( - jsg::Lock& js, jsg::Ref self, v8::Local reason) { + jsg::Lock& js, jsg::Ref self, jsg::JsValue reason) { // Per the spec, the signal.reason should be a DOMException with name 'AbortError' // when no reason is provided, but the stored error should remain as the original reason. auto signalReason = [&]() -> jsg::JsValue { - if (reason->IsUndefined() && FeatureFlags::get(js).getPedanticWpt()) { + if (reason.isUndefined() && FeatureFlags::get(js).getPedanticWpt()) { auto ex = js.domException( kj::str("AbortError"), kj::str("This writable stream has been aborted."), kj::none); return jsg::JsValue(KJ_ASSERT_NONNULL(ex.tryGetHandle(js))); } - return jsg::JsValue(reason); + return reason; }(); signal->triggerAbort(js, signalReason); @@ -1302,19 +1296,19 @@ jsg::Promise WritableImpl::abort( KJ_IF_SOME(pendingAbort, maybePendingAbort) { // Notice here that, per the spec, the reason given in this call of abort is // intentionally ignored if there is already an abort pending. - return pendingAbort->whenResolved(js); + return pendingAbort.whenResolved(js); } bool wasAlreadyErroring = false; if (state.template is()) { wasAlreadyErroring = true; - reason = js.v8Undefined(); + reason = js.undefined(); } KJ_DEFER(if (!wasAlreadyErroring) { startErroring(js, kj::mv(self), reason); }); - maybePendingAbort = kj::heap(js, reason, wasAlreadyErroring); - return KJ_ASSERT_NONNULL(maybePendingAbort)->whenResolved(js); + auto& pending = maybePendingAbort.emplace(js, reason, wasAlreadyErroring); + return pending.whenResolved(js); } template @@ -1349,13 +1343,13 @@ void WritableImpl::advanceQueueIfNeeded(jsg::Lock& js, jsg::Ref self KJ_ASSERT_NONNULL(closeRequest); inFlightClose = kj::mv(closeRequest); - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), - (jsg::Lock& js) { finishInFlightClose(js, kj::mv(self)); }); + auto onSuccess = [this, self = self.addRef()]( + jsg::Lock& js) mutable { finishInFlightClose(js, kj::mv(self)); }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - finishInFlightClose(js, kj::mv(self), reason.getHandle(js)); - }); + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + auto err = jsg::JsValue(reason.getHandle(js)); + finishInFlightClose(js, kj::mv(self), err); + }; // Per the spec, the close algorithm should always run asynchronously, even if // there's no user-provided close handler. This ensures that releaseLock() can @@ -1378,36 +1372,34 @@ void WritableImpl::advanceQueueIfNeeded(jsg::Lock& js, jsg::Ref self auto size = req.size; inFlightWrite = kj::mv(req); - auto onSuccess = - JSG_VISITABLE_LAMBDA((this, self = self.addRef(), size), (self), (jsg::Lock& js) { - amountBuffered -= size; - finishInFlightWrite(js, self.addRef()); - KJ_ASSERT(isWritable() || state.template is()); - if (!isCloseQueuedOrInFlight() && isWritable()) { - updateBackpressure(js); - } - if (state.template is() || writeRequests.empty()) { - // In this case, we know advanceQueueIfNeeded won't recurse further, so we can - // avoid the extra microtask hop. + auto onSuccess = [this, self = self.addRef(), size](jsg::Lock& js) mutable { + amountBuffered -= size; + finishInFlightWrite(js, self.addRef()); + KJ_ASSERT(isWritable() || state.template is()); + if (!isCloseQueuedOrInFlight() && isWritable()) { + updateBackpressure(js); + } + if (state.template is() || writeRequests.empty()) { + // In this case, we know advanceQueueIfNeeded won't recurse further, so we can + // avoid the extra microtask hop. + advanceQueueIfNeeded(js, kj::mv(self)); + return js.resolvedPromise(); + } + // Here, however, let's avoid potentially deep recursion by hopping to a new + // microtask to continue processing the queue. + return js.resolvedPromise().then(js, [this, self = kj::mv(self)](jsg::Lock& js) mutable { + if (isWritable() || state.template is()) { advanceQueueIfNeeded(js, kj::mv(self)); - return js.resolvedPromise(); - } - // Here, however, let's avoid potentially deep recursion by hopping to a new - // microtask to continue processing the queue. - return js.resolvedPromise().then( - js, JSG_VISITABLE_LAMBDA((this, self = kj::mv(self)), (self), (jsg::Lock & js) mutable { - if (isWritable() || state.template is()) { - advanceQueueIfNeeded(js, kj::mv(self)); - } - })); - }); + } + }); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef(), size), (self), (jsg::Lock& js, jsg::Value reason) { - amountBuffered -= size; - finishInFlightWrite(js, kj::mv(self), reason.getHandle(js)); - return js.resolvedPromise(); - }); + auto onFailure = [this, self = self.addRef(), size](jsg::Lock& js, jsg::Value reason) mutable { + amountBuffered -= size; + auto err = jsg::JsValue(reason.getHandle(js)); + finishInFlightWrite(js, kj::mv(self), err); + return js.resolvedPromise(); + }; // Per the spec, the write algorithm should always run asynchronously, even if // there's no user-provided write handler. This ensures that backpressure changes @@ -1425,10 +1417,10 @@ void WritableImpl::advanceQueueIfNeeded(jsg::Lock& js, jsg::Ref self template jsg::Promise WritableImpl::close(jsg::Lock& js, jsg::Ref self) { if (state.template is()) { - return js.rejectedPromise(js.v8TypeError("This WritableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This WritableStream has been closed."_kj)); } KJ_IF_SOME(errored, state.template tryGetUnsafe()) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_ASSERT(isWritable() || state.template is()); JSG_REQUIRE( @@ -1449,7 +1441,7 @@ jsg::Promise WritableImpl::close(jsg::Lock& js, jsg::Ref self) template void WritableImpl::dealWithRejection( - jsg::Lock& js, jsg::Ref self, v8::Local reason) { + jsg::Lock& js, jsg::Ref self, jsg::JsValue reason) { if (isWritable()) { return startErroring(js, kj::mv(self), reason); } @@ -1481,7 +1473,7 @@ void WritableImpl::doClose(jsg::Lock& js) { } template -void WritableImpl::doError(jsg::Lock& js, v8::Local reason) { +void WritableImpl::doError(jsg::Lock& js, jsg::JsValue reason) { KJ_ASSERT(closeRequest == kj::none); KJ_ASSERT(inFlightClose == kj::none); KJ_ASSERT(inFlightWrite == kj::none); @@ -1497,7 +1489,7 @@ void WritableImpl::doError(jsg::Lock& js, v8::Local reason) { } template -void WritableImpl::error(jsg::Lock& js, jsg::Ref self, v8::Local reason) { +void WritableImpl::error(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason) { if (isWritable()) { algorithms.clear(); startErroring(js, kj::mv(self), reason); @@ -1518,24 +1510,23 @@ void WritableImpl::finishErroring(jsg::Lock& js, jsg::Ref self) { KJ_ASSERT(writeRequests.empty()); KJ_IF_SOME(pendingAbort, maybePendingAbort) { - if (pendingAbort->reject) { - pendingAbort->fail(js, reason); + if (pendingAbort.reject) { + pendingAbort.fail(js, reason); return rejectCloseAndClosedPromiseIfNeeded(js); } - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), (jsg::Lock& js) { + auto onSuccess = [this, self = self.addRef()](jsg::Lock& js) mutable { auto& pendingAbort = KJ_ASSERT_NONNULL(maybePendingAbort); - pendingAbort->reject = false; - pendingAbort->complete(js); + pendingAbort.reject = false; + pendingAbort.complete(js); rejectCloseAndClosedPromiseIfNeeded(js); - }); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - auto& pendingAbort = KJ_ASSERT_NONNULL(maybePendingAbort); - pendingAbort->fail(js, reason.getHandle(js)); - rejectCloseAndClosedPromiseIfNeeded(js); - }); + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + auto& pendingAbort = KJ_ASSERT_NONNULL(maybePendingAbort); + pendingAbort.fail(js, jsg::JsValue(reason.getHandle(js))); + rejectCloseAndClosedPromiseIfNeeded(js); + }; maybeRunAlgorithm(js, algorithms.abort, kj::mv(onSuccess), kj::mv(onFailure), reason); return; @@ -1545,7 +1536,7 @@ void WritableImpl::finishErroring(jsg::Lock& js, jsg::Ref self) { template void WritableImpl::finishInFlightClose( - jsg::Lock& js, jsg::Ref self, kj::Maybe> maybeReason) { + jsg::Lock& js, jsg::Ref self, kj::Maybe maybeReason) { algorithms.clear(); KJ_ASSERT_NONNULL(inFlightClose); KJ_ASSERT(isWritable() || state.template is()); @@ -1553,8 +1544,8 @@ void WritableImpl::finishInFlightClose( KJ_IF_SOME(reason, maybeReason) { maybeRejectPromise(js, inFlightClose, reason); - KJ_IF_SOME(pendingAbort, PendingAbort::dequeue(maybePendingAbort)) { - pendingAbort->fail(js, reason); + KJ_IF_SOME(pendingAbort, kj::mv(maybePendingAbort)) { + pendingAbort.fail(js, reason); } return dealWithRejection(js, kj::mv(self), reason); @@ -1563,12 +1554,12 @@ void WritableImpl::finishInFlightClose( maybeResolvePromise(js, inFlightClose); if (state.template is()) { - KJ_IF_SOME(pendingAbort, PendingAbort::dequeue(maybePendingAbort)) { - pendingAbort->reject = false; - pendingAbort->complete(js); + KJ_IF_SOME(pendingAbort, kj::mv(maybePendingAbort)) { + pendingAbort.reject = false; + pendingAbort.complete(js); } } - KJ_ASSERT(maybePendingAbort == kj::none); + KJ_DASSERT(maybePendingAbort == kj::none); state.template transitionTo(); doClose(js); @@ -1576,7 +1567,7 @@ void WritableImpl::finishInFlightClose( template void WritableImpl::finishInFlightWrite( - jsg::Lock& js, jsg::Ref self, kj::Maybe> maybeReason) { + jsg::Lock& js, jsg::Ref self, kj::Maybe maybeReason) { auto& write = KJ_ASSERT_NONNULL(inFlightWrite); KJ_IF_SOME(reason, maybeReason) { @@ -1601,7 +1592,7 @@ void WritableImpl::rejectCloseAndClosedPromiseIfNeeded(jsg::Lock& js) { auto reason = KJ_ASSERT_NONNULL(state.template tryGetUnsafe()).getHandle(js); maybeRejectPromise(js, closeRequest, reason); - PendingAbort::dequeue(maybePendingAbort); + maybePendingAbort = kj::none; doError(js, reason); } @@ -1622,40 +1613,35 @@ void WritableImpl::setup(jsg::Lock& js, // Per the streams spec, the size function should be called with `undefined` as `this`, // not as a method on the strategy object. KJ_IF_SOME(sizeFunc, algorithms.size) { - sizeFunc.setReceiver(jsg::Value(js.v8Isolate, js.v8Undefined())); + sizeFunc.setReceiver(js.v8Ref(v8::Local(js.undefined()))); } - auto onSuccess = JSG_VISITABLE_LAMBDA((this, self = self.addRef()), (self), (jsg::Lock& js) { + auto onSuccess = [this, self = self.addRef()](jsg::Lock& js) mutable { KJ_ASSERT(isWritable() || state.template is()); if (isWritable()) { - // Only resolve the ready promise if an abort is not pending. - // It will have been rejected already. - KJ_IF_SOME(owner, tryGetOwner()) { - owner.maybeResolveReadyPromise(js); - } else { - // Else block to avert dangling else compiler warning. - } + // Only resolve the ready promise if an abort is not pending. + // It will have been rejected already. + KJ_IF_SOME(owner, tryGetOwner()) { + owner.maybeResolveReadyPromise(js); + } } flags.started = true; flags.starting = false; advanceQueueIfNeeded(js, kj::mv(self)); - }); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, self = self.addRef()), (self), (jsg::Lock& js, jsg::Value reason) { - auto handle = reason.getHandle(js); - KJ_ASSERT(isWritable() || state.template is()); - KJ_IF_SOME(owner, tryGetOwner()) { - owner.maybeRejectReadyPromise(js, handle); - } else { - // Else block to avert dangling else compiler warning. - } - flags.started = true; - flags.starting = false; - dealWithRejection(js, kj::mv(self), handle); - }); + auto onFailure = [this, self = self.addRef()](jsg::Lock& js, jsg::Value reason) mutable { + auto handle = jsg::JsValue(reason.getHandle(js)); + KJ_ASSERT(isWritable() || state.template is()); + KJ_IF_SOME(owner, tryGetOwner()) { + owner.maybeRejectReadyPromise(js, handle); + } + flags.started = true; + flags.starting = false; + dealWithRejection(js, kj::mv(self), handle); + }; flags.backpressure = getDesiredSize() <= 0; @@ -1663,13 +1649,12 @@ void WritableImpl::setup(jsg::Lock& js, } template -void WritableImpl::startErroring( - jsg::Lock& js, jsg::Ref self, v8::Local reason) { +void WritableImpl::startErroring(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason) { KJ_ASSERT(isWritable()); KJ_IF_SOME(owner, tryGetOwner()) { owner.maybeRejectReadyPromise(js, reason); } - state.template transitionTo(js.v8Ref(reason)); + state.template transitionTo(reason.addRef(js)); if (inFlightWrite == kj::none && inFlightClose == kj::none && flags.started) { finishErroring(js, kj::mv(self)); } @@ -1691,20 +1676,21 @@ void WritableImpl::updateBackpressure(jsg::Lock& js) { template jsg::Promise WritableImpl::write( - jsg::Lock& js, jsg::Ref self, v8::Local value) { + jsg::Lock& js, jsg::Ref self, jsg::JsValue value) { size_t size = 1; KJ_IF_SOME(sizeFunc, algorithms.size) { - kj::Maybe failure; + kj::Maybe> failure; JSG_TRY(js) { size = sizeFunc(js, value); } JSG_CATCH(exception) { - startErroring(js, self.addRef(), exception.getHandle(js)); - failure = kj::mv(exception); + auto error = jsg::JsValue(exception.getHandle(js)); + startErroring(js, self.addRef(), error); + failure = error.addRef(js); } KJ_IF_SOME(exception, failure) { - return js.rejectedPromise(kj::mv(exception)); + return js.rejectedPromise(exception.getHandle(js)); } } @@ -1717,21 +1703,21 @@ jsg::Promise WritableImpl::write( KJ_IF_SOME(owner, tryGetOwner()) { if (!owner.isLockedToWriter()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream writer has been released."_kjc)); + js.typeError("This WritableStream writer has been released."_kjc)); } } } KJ_IF_SOME(error, state.template tryGetUnsafe()) { - return js.rejectedPromise(error.addRef(js)); + return js.rejectedPromise(error.getHandle(js)); } if (isCloseQueuedOrInFlight() || state.template is()) { - return js.rejectedPromise(js.v8TypeError("This ReadableStream is closed."_kj)); + return js.rejectedPromise(js.typeError("This ReadableStream is closed."_kj)); } KJ_IF_SOME(erroring, state.template tryGetUnsafe()) { - return js.rejectedPromise(erroring.reason.addRef(js)); + return js.rejectedPromise(erroring.reason.getHandle(js)); } KJ_ASSERT(isWritable()); @@ -1739,7 +1725,7 @@ jsg::Promise WritableImpl::write( auto prp = js.newPromiseAndResolver(); writeRequests.push_back(WriteRequest{ .resolver = kj::mv(prp.resolver), - .value = js.v8Ref(value), + .value = value.addRef(js), .size = size, }); amountBuffered += size; @@ -1753,9 +1739,7 @@ template void WritableImpl::visitForGc(jsg::GcVisitor& visitor) { state.visitForGc(visitor); visitor.visit(inFlightWrite, inFlightClose, closeRequest, algorithms, signal); - KJ_IF_SOME(pendingAbort, maybePendingAbort) { - visitor.visit(*pendingAbort); - } + visitor.visit(maybePendingAbort); visitor.visitAll(writeRequests); } @@ -1884,7 +1868,7 @@ struct ValueReadable final: private api::ValueQueue::ConsumerImpl::StateListener }); } - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Promise cancel(jsg::Lock& js, jsg::Optional maybeReason) { // When a ReadableStream is canceled, the expected behavior is that the underlying // controller is notified and the cancel algorithm on the underlying source is // called. When there are multiple ReadableStreams sharing consumption of a @@ -1898,7 +1882,7 @@ struct ValueReadable final: private api::ValueQueue::ConsumerImpl::StateListener // will resolve the pending read and we need to know if we should defer destruction. bool hasPendingDrainingRead = s.consumer->hasPendingDrainingRead(); s.consumer->cancel(js, maybeReason); - auto promise = s.controller->cancel(js, kj::mv(maybeReason)); + auto promise = s.controller->cancel(js, maybeReason); // If we're currently in a read (sync or draining), we need to wait for that to // finish before dropping our state. For draining reads, the promise callbacks // capture 'this' (the Consumer) to clear hasPendingDrainingRead. If we destroy @@ -1924,13 +1908,13 @@ struct ValueReadable final: private api::ValueQueue::ConsumerImpl::StateListener } } - void onConsumerError(jsg::Lock& js, jsg::Value reason) override { + void onConsumerError(jsg::Lock& js, jsg::JsValue reason) override { // Called by the consumer when a state change to errored happens. // We need to notify the owner. Note that the owner may drop this // readable in doClose so it is not safe to access anything on this // after calling doError. KJ_IF_SOME(s, state) { - s.owner.doError(js, reason.getHandle(js)); + s.owner.doError(js, reason); } } @@ -2081,7 +2065,7 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { .type = ByteQueue::ReadRequest::Type::BYOB, })); } else { - prp.resolver.reject(js, js.v8Error("Failed to allocate buffer for read.")); + prp.resolver.reject(js, js.error("Failed to allocate buffer for read.")); } } else { // autoAllocateChunkSize is not set. Per spec, we do a DEFAULT read which means @@ -2096,7 +2080,7 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { .type = ByteQueue::ReadRequest::Type::DEFAULT, })); } else { - prp.resolver.reject(js, js.v8Error("Failed to allocate buffer for read.")); + prp.resolver.reject(js, js.error("Failed to allocate buffer for read.")); } } // reading is reset by KJ_DEFER above. @@ -2117,7 +2101,7 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { auto store = source.detach(js); store.consume(store.size()); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(store.createHandle(js)), + .value = jsg::JsValue(store.createHandle(js)).addRef(js), .done = true, }); } else { @@ -2148,14 +2132,14 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { // the underlying controller only when the last reader is canceled. // Here, we rely on the controller implementing the correct behavior since it owns // the queue that knows about all of the attached consumers. - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Promise cancel(jsg::Lock& js, jsg::Optional maybeReason) { if (pendingCancel) return js.resolvedPromise(); KJ_IF_SOME(s, state) { // Check if there's a pending draining read before calling cancel, since cancel // will resolve the pending read and we need to know if we should defer destruction. bool hasPendingDrainingRead = s.consumer->hasPendingDrainingRead(); s.consumer->cancel(js, maybeReason); - auto promise = s.controller->cancel(js, kj::mv(maybeReason)); + auto promise = s.controller->cancel(js, maybeReason); // If we're currently in a read (sync or draining), we need to wait for that to // finish before dropping our state. For sync reads, consumer->read() is still on // the call stack and will access the consumer after we return. For draining reads, @@ -2180,11 +2164,11 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { } } - void onConsumerError(jsg::Lock& js, jsg::Value reason) override { + void onConsumerError(jsg::Lock& js, jsg::JsValue reason) override { // Note that the owner may drop this readable in doClose so it // is not safe to access anything on this after calling doError. KJ_IF_SOME(s, state) { - s.owner.doError(js, reason.getHandle(js)); + s.owner.doError(js, reason); }; } @@ -2285,16 +2269,15 @@ void ReadableStreamDefaultController::visitForGc(jsg::GcVisitor& visitor) { } jsg::Promise ReadableStreamDefaultController::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { - return impl.cancel(js, JSG_THIS, maybeReason.orDefault([&] { return js.v8Undefined(); })); + jsg::Lock& js, jsg::Optional maybeReason) { + return impl.cancel(js, JSG_THIS, maybeReason.orDefault([&] { return js.undefined(); })); } void ReadableStreamDefaultController::close(jsg::Lock& js) { impl.close(js); } -void ReadableStreamDefaultController::enqueue( - jsg::Lock& js, jsg::Optional> chunk) { +void ReadableStreamDefaultController::enqueue(jsg::Lock& js, jsg::Optional chunk) { // Hold a strong reference to prevent this controller from being freed if the // user-provided size algorithm (below) re-enters JS and errors the controller // through a side-channel (e.g. TransformStreamDefaultController::error() @@ -2307,22 +2290,25 @@ void ReadableStreamDefaultController::enqueue( size_t size = 1; bool errored = false; KJ_IF_SOME(sizeFunc, impl.algorithms.size) { - js.tryCatch([&] { size = sizeFunc(js, value); }, [&](jsg::Value exception) { - impl.doError(js, kj::mv(exception)); + JSG_TRY(js) { + size = sizeFunc(js, value); + } + JSG_CATCH(exception) { + impl.doError(js, jsg::JsValue(exception.getHandle(js))); errored = true; - }); + } } // Re-check canCloseOrEnqueue: the size callback may have errored us without // throwing (e.g. by calling transformController.error()), in which case // `errored` is still false but the impl state has transitioned to Errored. if (!errored && impl.canCloseOrEnqueue()) { - impl.enqueue(js, kj::rc(js.v8Ref(value), size), kj::mv(self)); + impl.enqueue(js, kj::rc(value.addRef(js), size), kj::mv(self)); } } -void ReadableStreamDefaultController::error(jsg::Lock& js, v8::Local reason) { - impl.doError(js, js.v8Ref(reason)); +void ReadableStreamDefaultController::error(jsg::Lock& js, jsg::JsValue reason) { + impl.doError(js, reason); } // When a consumer receives a read request, but does not have the data available to @@ -2545,7 +2531,7 @@ void ReadableByteStreamController::visitForGc(jsg::GcVisitor& visitor) { } jsg::Promise ReadableByteStreamController::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { KJ_IF_SOME(byobRequest, maybeByobRequest) { if (impl.consumerCount() == 1) { byobRequest->invalidate(js); @@ -2594,8 +2580,8 @@ void ReadableByteStreamController::enqueue(jsg::Lock& js, jsg::BufferSource chun impl.enqueue(js, kj::rc(jsg::BufferSource(js, chunk.detach(js))), kj::mv(self)); } -void ReadableByteStreamController::error(jsg::Lock& js, v8::Local reason) { - impl.doError(js, js.v8Ref(reason)); +void ReadableByteStreamController::error(jsg::Lock& js, jsg::JsValue reason) { + impl.doError(js, reason); } kj::Maybe> ReadableByteStreamController::getByobRequest( @@ -2660,13 +2646,13 @@ jsg::Ref ReadableStreamJsController::addRef() { } jsg::Promise ReadableStreamJsController::cancel( - jsg::Lock& js, jsg::Optional> maybeReason) { + jsg::Lock& js, jsg::Optional maybeReason) { disturbed = true; const auto doCancel = [&](auto& consumer) { - auto reason = js.v8Ref(maybeReason.orDefault([&] { return js.v8Undefined(); })); + auto reason = maybeReason.orDefault([&] { return js.undefined(); }); KJ_DEFER(doClose(js)); - return consumer->cancel(js, reason.getHandle(js)); + return consumer->cancel(js, reason); }; // Check for pending state first (deferred close/error during a read operation) @@ -2674,7 +2660,7 @@ jsg::Promise ReadableStreamJsController::cancel( return js.resolvedPromise(); } KJ_IF_SOME(pendingError, state.tryGetPendingStateUnsafe()) { - return js.rejectedPromise(pendingError.addRef(js)); + return js.rejectedPromise(pendingError.getHandle(js)); } KJ_SWITCH_ONEOF(state) { @@ -2686,7 +2672,7 @@ jsg::Promise ReadableStreamJsController::cancel( return js.resolvedPromise(); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(consumer, kj::Own) { if (canceling) return js.resolvedPromise(); @@ -2728,13 +2714,13 @@ void ReadableStreamJsController::doClose(jsg::Lock& js) { // erroring. We detach ourselves from the underlying controller by releasing the ValueReadable // or ByteReadable in the state and changing that to errored. // We also clean up other state here. -void ReadableStreamJsController::doError(jsg::Lock& js, v8::Local reason) { +void ReadableStreamJsController::doError(jsg::Lock& js, jsg::JsValue reason) { // If already in a terminal state, nothing to do. if (state.isTerminal()) return; // deferTransitionTo will defer if an operation is in progress, otherwise transition immediately. // Returns true if transition happened immediately. - if (state.deferTransitionTo(js.v8Ref(reason))) { + if (state.deferTransitionTo(reason.addRef(js))) { lock.onError(js, reason); } // If deferred, lock.onError will be called when the pending state is applied @@ -2779,7 +2765,7 @@ jsg::Promise ReadableStreamJsController::pipeTo( } return js.rejectedPromise( - js.v8TypeError("This ReadableStream cannot be piped to this WritableStream"_kj)); + js.typeError("This ReadableStream cannot be piped to this WritableStream"_kj)); } kj::Maybe> ReadableStreamJsController::read( @@ -2791,17 +2777,17 @@ kj::Maybe> ReadableStreamJsController::read( auto view = byobOptions.bufferView.getHandle(js); if (!view->Buffer()->IsDetachable()) { return js.rejectedPromise( - js.v8TypeError("Unabled to use non-detachable ArrayBuffer."_kj)); + js.typeError("Unabled to use non-detachable ArrayBuffer."_kj)); } if (view->ByteLength() == 0 || view->Buffer()->ByteLength() == 0) { return js.rejectedPromise( - js.v8TypeError("Unable to use a zero-length ArrayBuffer."_kj)); + js.typeError("Unable to use a zero-length ArrayBuffer."_kj)); } // Check for pending error first (deferred error during a prior read operation) KJ_IF_SOME(pendingError, state.tryGetPendingStateUnsafe()) { - return js.rejectedPromise(pendingError.addRef(js)); + return js.rejectedPromise(pendingError.getHandle(js)); } if (state.is() || state.pendingStateIs()) { @@ -2812,7 +2798,7 @@ kj::Maybe> ReadableStreamJsController::read( auto store = source.detach(js); store.consume(store.size()); return js.resolvedPromise(ReadResult{ - .value = js.v8Ref(store.createHandle(js)), + .value = jsg::JsValue(store.createHandle(js)).addRef(js), .done = true, }); } @@ -2825,7 +2811,7 @@ kj::Maybe> ReadableStreamJsController::read( return js.resolvedPromise(ReadResult{.done = true}); } KJ_IF_SOME(pendingError, state.tryGetPendingStateUnsafe()) { - return js.rejectedPromise(pendingError.addRef(js)); + return js.rejectedPromise(pendingError.getHandle(js)); } KJ_SWITCH_ONEOF(state) { @@ -2840,7 +2826,7 @@ kj::Maybe> ReadableStreamJsController::read( return js.resolvedPromise(ReadResult{.done = true}); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(consumer, kj::Own) { // The ReadableStreamDefaultController does not support ByobOptions. @@ -2868,7 +2854,7 @@ kj::Maybe> ReadableStreamJsController::draining }); } KJ_IF_SOME(pendingError, state.tryGetPendingStateUnsafe()) { - return js.rejectedPromise(pendingError.addRef(js)); + return js.rejectedPromise(pendingError.getHandle(js)); } // Like deferControllerStateChange for regular reads, we need to prevent the controller @@ -2886,9 +2872,9 @@ kj::Maybe> ReadableStreamJsController::draining // state change only fires after the promise resolves/rejects and the Consumer's // this-capturing callbacks have already run. auto wrapDrainingRead = - [this](jsg::Lock& js, - jsg::Promise promise) -> jsg::Promise { - return promise.then(js, [this](jsg::Lock& js, DrainingReadResult result) { + [this](jsg::Lock& js, jsg::Promise promise, + jsg::Ref ref) mutable -> jsg::Promise { + return promise.then(js, [this, ref = ref.addRef()](jsg::Lock& js, DrainingReadResult result) { if (state.endOperation()) { // A pending state was applied. Call the appropriate callback. if (state.template is()) { @@ -2899,12 +2885,12 @@ kj::Maybe> ReadableStreamJsController::draining // The error was applied during this operation — the data we collected // may be invalid. Discard it and propagate the error rather than // silently returning possibly-corrupt data. - js.throwException(err.addRef(js)); + js.throwException(err.getHandle(js)); } } } return kj::mv(result); - }, [this](jsg::Lock& js, jsg::Value exception) -> DrainingReadResult { + }, [this, ref = ref.addRef()](jsg::Lock& js, jsg::Value exception) -> DrainingReadResult { state.clearPendingState(); (void)state.endOperation(); js.throwException(kj::mv(exception)); @@ -2926,32 +2912,34 @@ kj::Maybe> ReadableStreamJsController::draining }); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(consumer, kj::Own) { // beginOperation MUST be before consumer->drainingRead() — see comment above. state.beginOperation(); JSG_TRY(js) { - return wrapDrainingRead(js, consumer->drainingRead(js, maxRead)); + return wrapDrainingRead(js, consumer->drainingRead(js, maxRead), addRef()); } JSG_CATCH(exception) { state.clearPendingState(); (void)state.endOperation(); - doError(js, exception.getHandle(js)); - return js.rejectedPromise(kj::mv(exception)); + auto error = jsg::JsValue(exception.getHandle(js)); + doError(js, error); + return js.rejectedPromise(error); }; } KJ_CASE_ONEOF(consumer, kj::Own) { // beginOperation MUST be before consumer->drainingRead() — see comment above. state.beginOperation(); JSG_TRY(js) { - return wrapDrainingRead(js, consumer->drainingRead(js, maxRead)); + return wrapDrainingRead(js, consumer->drainingRead(js, maxRead), addRef()); } JSG_CATCH(exception) { state.clearPendingState(); (void)state.endOperation(); - doError(js, exception.getHandle(js)); - return js.rejectedPromise(kj::mv(exception)); + auto error = jsg::JsValue(exception.getHandle(js)); + doError(js, error); + return js.rejectedPromise(error); }; } } @@ -3158,14 +3146,14 @@ kj::Maybe ReadableStreamJsController::getDesiredSize() { KJ_UNREACHABLE; } -kj::Maybe> ReadableStreamJsController::isErrored(jsg::Lock& js) { +kj::Maybe ReadableStreamJsController::isErrored(jsg::Lock& js) { // Check for pending error first KJ_IF_SOME(pendingError, state.tryGetPendingStateUnsafe()) { return pendingError.getHandle(js); } // Pending Closed means not errored, so we can just check current state return state.tryGetUnsafe().map( - [&](jsg::Value& reason) { return reason.getHandle(js); }); + [&](jsg::JsRef& reason) { return reason.getHandle(js); }); } bool ReadableStreamJsController::canCloseOrEnqueue() { @@ -3239,11 +3227,12 @@ class AllReader { limit(limit) {} KJ_DISALLOW_COPY_AND_MOVE(AllReader); - jsg::Promise allBytes(jsg::Lock& js) { - return loop(js).then(js, [this](auto& js, PartList&& partPtrs) -> jsg::BufferSource { - auto out = jsg::BackingStore::alloc(js, runningTotal); - copyInto(out.asArrayPtr(), partPtrs.asPtr()); - return jsg::BufferSource(js, kj::mv(out)); + jsg::Promise> allBytes(jsg::Lock& js) { + return loop(js).then( + js, [this](auto& js, PartList&& partPtrs) -> jsg::JsRef { + auto ab = jsg::JsArrayBuffer::create(js, runningTotal); + copyInto(ab.asArrayPtr(), partPtrs.asPtr()); + return ab.addRef(js); }); } @@ -3302,45 +3291,46 @@ class AllReader { // and are passed into to promise returned by this method. It is the responsibility // of the caller to ensure that the AllReader instance is kept alive until the // promise is settled. - auto onSuccess = JSG_VISITABLE_LAMBDA((this, readable = readable.addRef()), (readable), - (jsg::Lock & js, ReadResult result) mutable->jsg::Promise { - if (result.done) { - state.template transitionTo(); - return loop(js); - } - - // If we're not done, the result value must be interpretable as - // bytes for the read to make any sense. - auto handle = KJ_ASSERT_NONNULL(result.value).getHandle(js); - if (!handle->IsArrayBufferView() && !handle->IsArrayBuffer()) { - auto error = js.v8TypeError("This ReadableStream did not return bytes."); - state.template transitionTo(js.v8Ref(error)); - return readable->getController().cancel(js, error).then( - js, [&](jsg::Lock& js) { return loop(js); }); - } - - jsg::BufferSource bufferSource(js, handle); - - if (bufferSource.size() == 0) { - // Weird but allowed, we'll skip it. - return loop(js); - } - - if ((runningTotal + bufferSource.size()) > limit) { - auto error = js.v8TypeError("Memory limit exceeded before EOF."); - state.template transitionTo(js.v8Ref(error)); - return readable->getController().cancel(js, error).then( - js, [&](jsg::Lock& js) { return loop(js); }); - } - - runningTotal += bufferSource.size(); - parts.add(bufferSource.copy(js)); - return loop(js); - }); + auto onSuccess = [this, readable = readable.addRef()]( + jsg::Lock& js, ReadResult result) mutable -> jsg::Promise { + if (result.done) { + state.template transitionTo(); + return loop(js); + } + + // If we're not done, the result value must be interpretable as + // bytes for the read to make any sense. + auto handle = KJ_ASSERT_NONNULL(result.value).getHandle(js); + if (!handle.isArrayBufferView() && !handle.isArrayBuffer()) { + auto error = js.typeError("This ReadableStream did not return bytes."); + state.template transitionTo(error.addRef(js)); + return readable->getController().cancel(js, error).then( + js, [&](jsg::Lock& js) { return loop(js); }); + } + + jsg::BufferSource bufferSource(js, handle); + + if (bufferSource.size() == 0) { + // Weird but allowed, we'll skip it. + return loop(js); + } + + if ((runningTotal + bufferSource.size()) > limit) { + auto error = js.typeError("Memory limit exceeded before EOF."); + state.template transitionTo(error.addRef(js)); + return readable->getController().cancel(js, error).then( + js, [&](jsg::Lock& js) { return loop(js); }); + } + + runningTotal += bufferSource.size(); + parts.add(bufferSource.copy(js)); + return loop(js); + }; auto onFailure = [this](auto& js, jsg::Value exception) -> jsg::Promise { // In this case the stream should already be errored. - state.template transitionTo(js.v8Ref(exception.getHandle(js))); + auto error = jsg::JsValue(exception.getHandle(js)); + state.template transitionTo(error.addRef(js)); return loop(js); }; @@ -3354,203 +3344,12 @@ class AllReader { void copyInto(kj::ArrayPtr out, kj::ArrayPtr> in) { for (auto& part: in) { KJ_ASSERT(part.size() <= out.size()); - out.first(part.size()).copyFrom(part); - out = out.slice(part.size()); - } - } -}; - -// PumpToReader implements the original JS promise-loop approach to pumping data from -// a ReadableStream to a WritableStreamSink. It reads one chunk at a time using the -// standard read() API, writes each chunk to the sink, and loops until done or errored. -// This is the fallback path used when the ENABLE_DRAINING_READ_ON_STANDARD_STREAMS -// autogate is not enabled. -class PumpToReader { - public: - PumpToReader(jsg::Ref stream, kj::Own sink, bool end) - : ioContext(IoContext::current()), - state(State::create>(kj::mv(stream))), - sink(kj::mv(sink)), - self(kj::refcounted>(kj::Badge{}, *this)), - end(end) {} - KJ_DISALLOW_COPY_AND_MOVE(PumpToReader); - - ~PumpToReader() noexcept(false) { - self->invalidate(); - // Ensure that if a write promise is pending it is proactively canceled. - canceler.cancel("PumpToReader was destroyed"); - } - - kj::Promise pumpTo(jsg::Lock& js) { - ioContext.requireCurrentOrThrowJs(); - KJ_SWITCH_ONEOF(state) { - KJ_CASE_ONEOF(stream, jsg::Ref) { - auto readable = stream.addRef(); - state.template transitionTo(); - return ioContext.awaitJs( - js, pumpLoop(js, ioContext, kj::mv(readable), ioContext.addObject(self->addRef()))); - } - KJ_CASE_ONEOF(pumping, Pumping) { - return KJ_EXCEPTION(FAILED, "pumping is already in progress"); - } - KJ_CASE_ONEOF(closed, StreamStates::Closed) { - return KJ_EXCEPTION(FAILED, "stream has already been consumed"); - } - KJ_CASE_ONEOF(errored, kj::Exception) { - return errored.clone(); - } - } - KJ_UNREACHABLE; - } - - private: - struct Pumping { - static constexpr kj::StringPtr NAME KJ_UNUSED = "pumping"_kj; - }; - IoContext& ioContext; - - using State = StateMachine, - ErrorState, - Pumping, - StreamStates::Closed, - kj::Exception, - jsg::Ref>; - State state; - kj::Own sink; - kj::Own> self; - kj::Canceler canceler; - bool end; - - bool isErroredOrClosed() { - return state.isTerminal(); - } - - jsg::Promise pumpLoop(jsg::Lock& js, - IoContext& ioContext, - jsg::Ref readable, - IoOwn> pumpToReader) { - ioContext.requireCurrentOrThrowJs(); - - KJ_SWITCH_ONEOF(state) { - KJ_CASE_ONEOF(ready, jsg::Ref) { - KJ_UNREACHABLE; - } - KJ_CASE_ONEOF(closed, StreamStates::Closed) { - return end ? ioContext.awaitIoLegacy(js, sink->end().attach(kj::mv(sink))) - : js.resolvedPromise(); - } - KJ_CASE_ONEOF(errored, kj::Exception) { - if (end) { - sink->abort(errored.clone()); - } - return js.rejectedPromise(errored.clone()); - } - KJ_CASE_ONEOF(pumping, Pumping) { - using Result = kj::OneOf, StreamStates::Closed, jsg::Value>; - - return KJ_ASSERT_NONNULL(readable->getController().read(js, kj::none)) - .then(js, - ioContext.addFunctor([byteStream = readable->getController().isByteOriented()]( - auto& js, ReadResult result) mutable -> Result { - if (result.done) { - return StreamStates::Closed(); - } - - auto handle = KJ_ASSERT_NONNULL(result.value).getHandle(js); - if (!handle->IsArrayBufferView() && !handle->IsArrayBuffer()) { - return js.v8Ref(js.v8TypeError("This ReadableStream did not return bytes.")); - } - - jsg::BufferSource bufferSource(js, handle); - if (bufferSource.size() == 0) { - return Pumping{}; - } - - if (byteStream) { - jsg::BackingStore backing = bufferSource.detach(js); - return backing.asArrayPtr().attach(kj::mv(backing)); - } - return bufferSource.asArrayPtr().attach(kj::mv(bufferSource)); - }), - [](auto& js, jsg::Value exception) mutable -> Result { return kj::mv(exception); }) - .then(js, ioContext.addFunctor( JSG_VISITABLE_LAMBDA((readable = kj::mv(readable), pumpToReader = kj::mv(pumpToReader)), (readable), (jsg::Lock & js, Result result) mutable { - KJ_IF_SOME(reader, pumpToReader->tryGet()) { - reader.ioContext.requireCurrentOrThrowJs(); - auto& ioContext = IoContext::current(); - KJ_SWITCH_ONEOF(result) { - KJ_CASE_ONEOF(bytes, kj::Array) { - auto promise = reader.sink->write(bytes).attach(kj::mv(bytes)); - return ioContext.awaitIo(js, reader.canceler.wrap(kj::mv(promise))) - .then(js, - [](jsg::Lock& js) -> kj::Maybe { - return kj::Maybe(kj::none); - }, - [](jsg::Lock& js, jsg::Value exception) mutable -> kj::Maybe { - return kj::mv(exception); - }) - .then(js, - ioContext.addFunctor(JSG_VISITABLE_LAMBDA( - (readable = readable.addRef(), pumpToReader = kj::mv(pumpToReader)), - (readable), - (jsg::Lock & js, kj::Maybe maybeException) mutable { - KJ_IF_SOME(reader, pumpToReader->tryGet()) { - auto& ioContext = reader.ioContext; - ioContext.requireCurrentOrThrowJs(); - KJ_IF_SOME(exception, maybeException) { - if (!reader.isErroredOrClosed()) { - reader.state.transitionTo( - js.exceptionToKj(kj::mv(exception))); - } - } else { - // Else block to avert dangling else compiler warning. - } - return reader.pumpLoop( - js, ioContext, readable.addRef(), kj::mv(pumpToReader)); - } else { - return readable->getController().cancel(js, - maybeException.map( - [&](jsg::Value& ex) { return ex.getHandle(js); })); - } - }))); - } - KJ_CASE_ONEOF(pumping, Pumping) {} - KJ_CASE_ONEOF(closed, StreamStates::Closed) { - if (!reader.isErroredOrClosed()) { - reader.state.transitionTo(); - } - } - KJ_CASE_ONEOF(exception, jsg::Value) { - if (!reader.isErroredOrClosed()) { - reader.state.transitionTo(js.exceptionToKj(kj::mv(exception))); - } - } - } - return reader.pumpLoop(js, ioContext, readable.addRef(), kj::mv(pumpToReader)); - } else { - KJ_SWITCH_ONEOF(result) { - KJ_CASE_ONEOF(bytes, kj::Array) { - return readable->getController().cancel(js, kj::none); - } - KJ_CASE_ONEOF(pumping, Pumping) { - return readable->getController().cancel(js, kj::none); - } - KJ_CASE_ONEOF(closed, StreamStates::Closed) { - return js.resolvedPromise(); - } - KJ_CASE_ONEOF(exception, jsg::Value) { - return readable->getController().cancel(js, exception.getHandle(js)); - } - } - } - KJ_UNREACHABLE; - }))); - } + out.write(part); } - KJ_UNREACHABLE; } }; -// pumpToCoroutine uses a DrainingReader to efficiently pull all synchronously available +// pumpToImpl uses a DrainingReader to efficiently pull all synchronously available // data from the stream in each iteration, then writes it to the sink using vectored // I/O. This minimizes isolate lock acquisitions by batching: each time the lock is // held, the stream's internal queue is fully drained and the JS pull callback is @@ -3618,8 +3417,8 @@ kj::Promise pumpToImpl(IoContext& ioContext, template jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limit) { if (isLockedToReader()) { - return js.rejectedPromise(KJ_EXCEPTION( - FAILED, "jsg.TypeError: This ReadableStream is currently locked to a reader.")); + return js.rejectedPromise( + js.typeError("This ReadableStream is currently locked to a reader.")); } disturbed = true; @@ -3637,7 +3436,7 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi auto reader = kj::heap(addRef(), limit); auto promise = ([&js, &reader, stripBom]() -> jsg::Promise { - if constexpr (kj::isSameType()) { + if constexpr (kj::isSameType>()) { (void)stripBom; // Unused in this branch. return reader->allBytes(js); } else { @@ -3654,11 +3453,9 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi // or an error. Accordingly, we wrap it in a visitable lambda attached as a // continuation on the promise to ensure that it is GC visited and kept alive until // the promise settles. - JSG_VISITABLE_LAMBDA((reader = kj::mv(reader)), (reader), - (jsg::Lock & js, T result)->jsg::Promise { - return js.resolvedPromise(kj::mv(result)); - }), - [](jsg::Lock& js, jsg::Value exception) -> jsg::Promise { + [reader = kj::mv(reader)](jsg::Lock& js, T result) -> jsg::Promise { + return js.resolvedPromise(kj::mv(result)); + }, [](jsg::Lock& js, jsg::Value exception) -> jsg::Promise { return js.rejectedPromise(kj::mv(exception)); }); }; @@ -3666,23 +3463,23 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(initial, Initial) { // Stream not yet set up, treat as closed. - if constexpr (kj::isSameType()) { - auto backing = jsg::BackingStore::alloc(js, 0); - return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); + if constexpr (kj::isSameType>()) { + auto ab = jsg::JsArrayBuffer::create(js, 0); + return js.resolvedPromise(ab.addRef(js)); } else { return js.resolvedPromise(T()); } } KJ_CASE_ONEOF(closed, StreamStates::Closed) { - if constexpr (kj::isSameType()) { - auto backing = jsg::BackingStore::alloc(js, 0); - return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing))); + if constexpr (kj::isSameType>()) { + auto ab = jsg::JsArrayBuffer::create(js, 0); + return js.resolvedPromise(ab.addRef(js)); } else { return js.resolvedPromise(T()); } } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(valueReadable, kj::Own) { return readAll(js); @@ -3694,9 +3491,9 @@ jsg::Promise ReadableStreamJsController::readAll(jsg::Lock& js, uint64_t limi KJ_UNREACHABLE; } -jsg::Promise ReadableStreamJsController::readAllBytes( +jsg::Promise> ReadableStreamJsController::readAllBytes( jsg::Lock& js, uint64_t limit) { - return readAll(js, limit); + return readAll>(js, limit); } jsg::Promise ReadableStreamJsController::readAllText(jsg::Lock& js, uint64_t limit) { @@ -3757,22 +3554,11 @@ kj::Promise> ReadableStreamJsController::pumpTo( // This operation will leave the ReadableStream locked and disturbed. It will consume // the stream until it either closed or errors. // - // When the ENABLE_DRAINING_READ_ON_STANDARD_STREAMS autogate is enabled, uses the new - // pumpToImpl coroutine with DrainingReader for batched reads and vectored writes. - // Otherwise, falls back to the original PumpToReader JS promise loop that reads one - // chunk at a time. - const auto handlePump = [&] { - if (util::Autogate::isEnabled(util::AutogateKey::ENABLE_DRAINING_READ_ON_STANDARD_STREAMS)) { - auto reader = KJ_ASSERT_NONNULL(DrainingReader::create(js, *this->addRef()), - "Failed to create DrainingReader — stream should not be locked"); - auto& ioContext = IoContext::current(); - return addNoopDeferredProxy(pumpToImpl(ioContext, kj::mv(reader), kj::mv(sink), end)); - } else { - KJ_ASSERT(lock.lock()); - auto reader = kj::heap(addRef(), kj::mv(sink), end); - return addNoopDeferredProxy(reader->pumpTo(js).attach(kj::mv(reader))); - } + auto reader = KJ_ASSERT_NONNULL(DrainingReader::create(js, *this->addRef()), + "Failed to create DrainingReader — stream should not be locked"); + auto& ioContext = IoContext::current(); + return addNoopDeferredProxy(pumpToImpl(ioContext, kj::mv(reader), kj::mv(sink), end)); }; KJ_SWITCH_ONEOF(state) { @@ -3804,8 +3590,7 @@ WritableStreamDefaultController::WritableStreamDefaultController( : ioContext(tryGetIoContext()), impl(js, owner, kj::mv(abortSignal)) {} -jsg::Promise WritableStreamDefaultController::abort( - jsg::Lock& js, v8::Local reason) { +jsg::Promise WritableStreamDefaultController::abort(jsg::Lock& js, jsg::JsValue reason) { return impl.abort(js, JSG_THIS, reason); } @@ -3817,8 +3602,7 @@ jsg::Promise WritableStreamDefaultController::close(jsg::Lock& js) { return impl.close(js, JSG_THIS); } -void WritableStreamDefaultController::error( - jsg::Lock& js, jsg::Optional> reason) { +void WritableStreamDefaultController::error(jsg::Lock& js, jsg::Optional reason) { impl.error(js, JSG_THIS, reason.orDefault(js.undefined())); } @@ -3834,7 +3618,7 @@ jsg::Ref WritableStreamDefaultController::getSignal() { return impl.signal.addRef(); } -kj::Maybe> WritableStreamDefaultController::isErroring(jsg::Lock& js) { +kj::Maybe WritableStreamDefaultController::isErroring(jsg::Lock& js) { KJ_IF_SOME(erroring, impl.state.tryGetUnsafe()) { return erroring.reason.getHandle(js); } @@ -3846,8 +3630,7 @@ void WritableStreamDefaultController::setup( impl.setup(js, JSG_THIS, kj::mv(underlyingSink), kj::mv(queuingStrategy)); } -jsg::Promise WritableStreamDefaultController::write( - jsg::Lock& js, v8::Local value) { +jsg::Promise WritableStreamDefaultController::write(jsg::Lock& js, jsg::JsValue value) { return impl.write(js, JSG_THIS, value); } @@ -3892,7 +3675,7 @@ WritableStreamJsController::WritableStreamJsController(StreamStates::Errored err } jsg::Promise WritableStreamJsController::abort( - jsg::Lock& js, jsg::Optional> reason) { + jsg::Lock& js, jsg::Optional reason) { // The spec requires that if abort is called multiple times, it is supposed to return the same // promise each time. That's a bit cumbersome here with jsg::Promise so we intentionally just // return a continuation branch off the same promise. @@ -3938,16 +3721,16 @@ jsg::Promise WritableStreamJsController::close(jsg::Lock& js, bool markAsH KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(initial, Initial) { return rejectedMaybeHandledPromise( - js, js.v8TypeError("This WritableStream has been closed."_kj), markAsHandled); + js, js.typeError("This WritableStream has been closed."_kj), markAsHandled); } KJ_CASE_ONEOF(closed, StreamStates::Closed) { return rejectedMaybeHandledPromise( - js, js.v8TypeError("This WritableStream has been closed."_kj), markAsHandled); + js, js.typeError("This WritableStream has been closed."_kj), markAsHandled); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { if (FeatureFlags::get(js).getPedanticWpt()) { return rejectedMaybeHandledPromise( - js, js.v8TypeError("This WritableStream has been errored."_kj), markAsHandled); + js, js.typeError("This WritableStream has been errored."_kj), markAsHandled); } return rejectedMaybeHandledPromise(js, errored.getHandle(js), markAsHandled); } @@ -3976,7 +3759,7 @@ void WritableStreamJsController::doClose(jsg::Lock& js) { } } -void WritableStreamJsController::doError(jsg::Lock& js, v8::Local reason) { +void WritableStreamJsController::doError(jsg::Lock& js, jsg::JsValue reason) { // If already in a terminal state, nothing to do. if (state.isTerminal()) return; @@ -3985,7 +3768,7 @@ void WritableStreamJsController::doError(jsg::Lock& js, v8::Local rea controller->clearAlgorithms(); } - state.transitionTo(js.v8Ref(reason)); + state.transitionTo(reason.addRef(js)); KJ_IF_SOME(locked, lock.state.tryGetUnsafe()) { maybeRejectPromise(js, locked.getClosedFulfiller(), reason); maybeResolvePromise(js, locked.getReadyFulfiller()); @@ -4002,7 +3785,7 @@ void WritableStreamJsController::doError(jsg::Lock& js, v8::Local rea } } -void WritableStreamJsController::errorIfNeeded(jsg::Lock& js, v8::Local reason) { +void WritableStreamJsController::errorIfNeeded(jsg::Lock& js, jsg::JsValue reason) { // Error through the underlying controller if available, which goes through the proper // error transition (Erroring -> Errored). This allows close() to be called while the // stream is "erroring" and reject with the stored error. @@ -4030,7 +3813,7 @@ kj::Maybe WritableStreamJsController::getDesiredSize() { KJ_UNREACHABLE; } -kj::Maybe> WritableStreamJsController::isErroring(jsg::Lock& js) { +kj::Maybe WritableStreamJsController::isErroring(jsg::Lock& js) { KJ_IF_SOME(controller, state.tryGetUnsafe()) { return controller->isErroring(js); } @@ -4041,7 +3824,7 @@ bool WritableStreamDefaultController::isErroring() const { return impl.state.is(); } -kj::Maybe> WritableStreamJsController::isErroredOrErroring(jsg::Lock& js) { +kj::Maybe WritableStreamJsController::isErroredOrErroring(jsg::Lock& js) { KJ_IF_SOME(err, state.tryGetErrorUnsafe()) { return err.getHandle(js); } @@ -4085,8 +3868,7 @@ bool WritableStreamJsController::lockWriter(jsg::Lock& js, Writer& writer) { return lock.lockWriter(js, *this, writer); } -void WritableStreamJsController::maybeRejectReadyPromise( - jsg::Lock& js, v8::Local reason) { +void WritableStreamJsController::maybeRejectReadyPromise(jsg::Lock& js, jsg::JsValue reason) { KJ_IF_SOME(writerLock, lock.state.tryGetUnsafe()) { if (writerLock.getReadyFulfiller() != kj::none) { maybeRejectPromise(js, writerLock.getReadyFulfiller(), reason); @@ -4159,7 +3941,7 @@ kj::Maybe> WritableStreamJsController::tryPipeFrom( // Let's also acquire the destination pipe lock. lock.pipeLock(KJ_ASSERT_NONNULL(owner), kj::mv(source), options); - return pipeLoop(js).then(js, JSG_VISITABLE_LAMBDA((ref = addRef()), (ref), (auto& js){})); + return pipeLoop(js).then(js, [ref = addRef()](auto& js) {}); } jsg::Promise WritableStreamJsController::pipeLoop(jsg::Lock& js) { @@ -4183,10 +3965,9 @@ jsg::Promise WritableStreamJsController::pipeLoop(jsg::Lock& js) { source.release(js); lock.releasePipeLock(); if (!preventAbort) { - auto onSuccess = JSG_VISITABLE_LAMBDA( - (pipeThrough, reason = js.v8Ref(errored)), (reason), (jsg::Lock& js) { - return rejectedMaybeHandledPromise(js, reason.getHandle(js), pipeThrough); - }); + auto onSuccess = [pipeThrough, reason = errored.addRef(js)](jsg::Lock& js) { + return rejectedMaybeHandledPromise(js, reason.getHandle(js), pipeThrough); + }; auto promise = abort(js, errored); KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { return promise.then(js, ioContext.addFunctor(kj::mv(onSuccess))); @@ -4233,7 +4014,7 @@ jsg::Promise WritableStreamJsController::pipeLoop(jsg::Lock& js) { if (state.is()) { lock.releasePipeLock(); - auto reason = js.v8TypeError("This destination writable stream is closed."_kj); + auto reason = js.typeError("This destination writable stream is closed."_kj); if (!preventCancel) { source.release(js, reason); } else { @@ -4253,54 +4034,48 @@ jsg::Promise WritableStreamJsController::pipeLoop(jsg::Lock& js) { // source (again, depending on options). If the write operation is successful, // we call pipeLoop again to move on to the next iteration. - auto onSuccess = JSG_VISITABLE_LAMBDA((this, ref = addRef(), preventCancel, pipeThrough), (ref), - (jsg::Lock & js, ReadResult result)->jsg::Promise { - auto maybePipeLock = lock.tryGetPipe(); - if (maybePipeLock == kj::none) return js.resolvedPromise(); - auto& pipeLock = KJ_REQUIRE_NONNULL(maybePipeLock); + auto onSuccess = [this, ref = addRef(), preventCancel, pipeThrough]( + jsg::Lock& js, ReadResult result) mutable -> jsg::Promise { + auto maybePipeLock = lock.tryGetPipe(); + if (maybePipeLock == kj::none) return js.resolvedPromise(); + auto& pipeLock = KJ_REQUIRE_NONNULL(maybePipeLock); - KJ_IF_SOME(promise, pipeLock.checkSignal(js, *this)) { - lock.releasePipeLock(); - return kj::mv(promise); - } else { - } // Trailing else() is squash compiler warning - - if (result.done) { - // We'll handle the close at the start of the next iteration. - return pipeLoop(js); - } + KJ_IF_SOME(promise, pipeLock.checkSignal(js, *this)) { + lock.releasePipeLock(); + return kj::mv(promise); + } // Trailing else() is squash compiler warning - auto onSuccess = JSG_VISITABLE_LAMBDA( - (this, ref=addRef()), (ref) , (jsg::Lock& js) { + if (result.done) { + // We'll handle the close at the start of the next iteration. return pipeLoop(js); - } ); + } + + auto onSuccess = [this, ref = addRef()](jsg::Lock& js) { return pipeLoop(js); }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (this, ref=addRef(), preventCancel, pipeThrough), - (ref) , (jsg::Lock& js, jsg::Value value) { + auto onFailure = [this, ref = addRef(), preventCancel, pipeThrough]( + jsg::Lock& js, jsg::V8Ref exception) mutable { // The write failed. We need to release the source if the pipe lock still exists. - auto reason = value.getHandle(js); + auto reason = jsg::JsValue(exception.getHandle(js)); KJ_IF_SOME(pipeLock, lock.tryGetPipe()) { if (!preventCancel) { pipeLock.source.release(js, reason); } else { pipeLock.source.release(js); } - } else {} // Trailing else() to squash compiler warning + } // Trailing else() to squash compiler warning return rejectedMaybeHandledPromise(js, reason, pipeThrough); - } ); + }; - auto promise = - write(js, result.value.map([&](jsg::Value& value) { return value.getHandle(js); })); + auto promise = write( + js, result.value.map([&](jsg::JsRef& value) { return value.getHandle(js); })); - return maybeAddFunctor(js, kj::mv(promise), kj::mv(onSuccess), kj::mv(onFailure)); - }); + return maybeAddFunctor(js, kj::mv(promise), kj::mv(onSuccess), kj::mv(onFailure)); + }; - auto onFailure = - JSG_VISITABLE_LAMBDA((this, ref = addRef()), (ref), (jsg::Lock& js, jsg::Value value) { - // The read failed. We will handle the error at the start of the next iteration. - return pipeLoop(js); - }); + auto onFailure = [this, ref = addRef()](jsg::Lock& js, jsg::Value value) mutable { + // The read failed. We will handle the error at the start of the next iteration. + return pipeLoop(js); + }; return maybeAddFunctor(js, pipeLock.source.read(js), kj::mv(onSuccess), kj::mv(onFailure)); } @@ -4322,19 +4097,19 @@ void WritableStreamJsController::updateBackpressure(jsg::Lock& js, bool backpres } jsg::Promise WritableStreamJsController::write( - jsg::Lock& js, jsg::Optional> value) { + jsg::Lock& js, jsg::Optional value) { KJ_SWITCH_ONEOF(state) { KJ_CASE_ONEOF(initial, Initial) { - return js.rejectedPromise(js.v8TypeError("This WritableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This WritableStream has been closed."_kj)); } KJ_CASE_ONEOF(closed, StreamStates::Closed) { - return js.rejectedPromise(js.v8TypeError("This WritableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This WritableStream has been closed."_kj)); } KJ_CASE_ONEOF(errored, StreamStates::Errored) { - return js.rejectedPromise(errored.addRef(js)); + return js.rejectedPromise(errored.getHandle(js)); } KJ_CASE_ONEOF(controller, Controller) { - return controller->write(js, value.orDefault([&] { return js.undefined(); })); + return controller->write(js, value.orDefault(js.undefined())); } } KJ_UNREACHABLE; @@ -4358,7 +4133,7 @@ kj::Maybe TransformStreamDefaultController::getDesiredSize() { return kj::none; } -void TransformStreamDefaultController::enqueue(jsg::Lock& js, v8::Local chunk) { +void TransformStreamDefaultController::enqueue(jsg::Lock& js, jsg::JsValue chunk) { auto& readableController = JSG_REQUIRE_NONNULL(tryGetReadableController(), TypeError, "The readable side of this TransformStream is no longer readable."); // Hold a strong reference to the readable controller for the duration of this @@ -4372,10 +4147,14 @@ void TransformStreamDefaultController::enqueue(jsg::Lock& js, v8::Local reason) { +void TransformStreamDefaultController::error(jsg::Lock& js, jsg::JsValue reason) { KJ_IF_SOME(readableController, tryGetReadableController()) { readableController.error(js, reason); readable = kj::none; @@ -4409,11 +4188,10 @@ void TransformStreamDefaultController::terminate(jsg::Lock& js) { readableController.close(js); readable = kj::none; } - errorWritableAndUnblockWrite(js, js.v8TypeError("The transform stream has been terminated"_kj)); + errorWritableAndUnblockWrite(js, js.typeError("The transform stream has been terminated"_kj)); } -jsg::Promise TransformStreamDefaultController::write( - jsg::Lock& js, v8::Local chunk) { +jsg::Promise TransformStreamDefaultController::write(jsg::Lock& js, jsg::JsValue chunk) { KJ_IF_SOME(writableController, tryGetWritableController()) { KJ_IF_SOME(error, writableController.isErroredOrErroring(js)) { return js.rejectedPromise(error); @@ -4422,10 +4200,12 @@ jsg::Promise TransformStreamDefaultController::write( KJ_ASSERT(writableController.isWritable()); if (backpressure) { - auto chunkRef = js.v8Ref(chunk); - return KJ_ASSERT_NONNULL(maybeBackpressureChange).promise.whenResolved(js).then(js, - JSG_VISITABLE_LAMBDA((chunkRef = kj::mv(chunkRef), ref=JSG_THIS), - (chunkRef, ref), (jsg::Lock& js) mutable -> jsg::Promise { + auto chunkRef = chunk.addRef(js); + return KJ_ASSERT_NONNULL(maybeBackpressureChange) + .promise.whenResolved(js) + .then(js, + [chunkRef = kj::mv(chunkRef), ref = JSG_THIS]( + jsg::Lock& js) mutable -> jsg::Promise { KJ_IF_SOME(writableController, ref->tryGetWritableController()) { KJ_IF_SOME(error, writableController.isErroring(js)) { return js.rejectedPromise(error); @@ -4436,17 +4216,15 @@ jsg::Promise TransformStreamDefaultController::write( // Else block to avert dangling else compiler warning. } return ref->performTransform(js, chunkRef.getHandle(js)); - })); + }); } return performTransform(js, chunk); } else { - return js.rejectedPromise( - KJ_EXCEPTION(FAILED, "jsg.TypeError: Writing to the TransformStream failed.")); + return js.rejectedPromise(js.typeError("Writing to the TransformStream failed.")); } } -jsg::Promise TransformStreamDefaultController::abort( - jsg::Lock& js, v8::Local reason) { +jsg::Promise TransformStreamDefaultController::abort(jsg::Lock& js, jsg::JsValue reason) { if (FeatureFlags::get(js).getPedanticWpt()) { // If a finish operation is already in progress, return the existing promise // or handle the case where we're being called synchronously from within another @@ -4460,7 +4238,7 @@ jsg::Promise TransformStreamDefaultController::abort( // We need to error the stream with the abort reason so that both the current // operation and this abort reject with the abort reason. error(js, reason); - return js.rejectedPromise(js.v8Ref(reason)); + return js.rejectedPromise(reason); } // Mark that we're starting a finish operation before running the algorithm. @@ -4473,27 +4251,21 @@ jsg::Promise TransformStreamDefaultController::abort( return algorithms.maybeFinish .emplace(maybeRunAlgorithm(js, algorithms.cancel, - JSG_VISITABLE_LAMBDA( - (this, ref = JSG_THIS, reason = jsg::JsRef(js, jsg::JsValue(reason))), (ref, reason), - (jsg::Lock & js)->jsg::Promise { - // If the readable side is errored, return a rejected promise with the stored error - { - KJ_IF_SOME(err, getReadableErrorState(js)) { - return js.rejectedPromise(kj::mv(err)); - } else { - // Else block to avert dangling else compiler warning. - } - } - // Otherwise... error with the given reason and resolve the abort promise - error(js, reason.getHandle(js)); - return js.resolvedPromise(); - }), - JSG_VISITABLE_LAMBDA((this, ref = JSG_THIS), (ref), - (jsg::Lock & js, jsg::Value reason)->jsg::Promise { - error(js, reason.getHandle(js)); - return js.rejectedPromise(kj::mv(reason)); - }), - jsg::JsValue(reason))) + [this, ref = JSG_THIS, reason = reason.addRef(js)]( + jsg::Lock& js) mutable -> jsg::Promise { + // If the readable side is errored, return a rejected promise with the stored error + KJ_IF_SOME(err, getReadableErrorState(js)) { + return js.rejectedPromise(err.getHandle(js)); + } + // Otherwise... error with the given reason and resolve the abort promise + error(js, reason.getHandle(js)); + return js.resolvedPromise(); + }, + [this, ref = JSG_THIS](jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { + auto err = jsg::JsValue(reason.getHandle(js)); + error(js, err); + return js.rejectedPromise(err); + }, reason)) .whenResolved(js); } @@ -4516,7 +4288,7 @@ jsg::Promise TransformStreamDefaultController::close(jsg::Lock& js) { } } KJ_IF_SOME(err, getReadableErrorState(js)) { - return js.rejectedPromise(kj::mv(err)); + return js.rejectedPromise(err.getHandle(js)); } return js.resolvedPromise(); } @@ -4526,38 +4298,32 @@ jsg::Promise TransformStreamDefaultController::close(jsg::Lock& js) { algorithms.finishStarted = true; } - auto onSuccess = - JSG_VISITABLE_LAMBDA((ref = JSG_THIS), (ref), (jsg::Lock & js)->jsg::Promise { - // If the stream was errored during the flush algorithm (e.g., by controller.error() - // or by a parallel cancel() calling abort()), we should reject with that error. - if (FeatureFlags::get(js).getPedanticWpt()) { - KJ_IF_SOME(err, ref->getReadableErrorState(js)) { - return js.rejectedPromise(kj::mv(err)); - } else { - // Else block to avert dangling else compiler warning. - } - } - // Allows for a graceful close of the readable side. Close will - // complete once all of the queued data is read or the stream - // errors. Only close if the stream can still be closed (e.g., - // it wasn't closed by a cancel operation from within flush). - { - KJ_IF_SOME(readableController, ref->tryGetReadableController()) { - if (readableController.canCloseOrEnqueue()) { + auto onSuccess = [ref = JSG_THIS](jsg::Lock& js) mutable -> jsg::Promise { + // If the stream was errored during the flush algorithm (e.g., by controller.error() + // or by a parallel cancel() calling abort()), we should reject with that error. + if (FeatureFlags::get(js).getPedanticWpt()) { + KJ_IF_SOME(err, ref->getReadableErrorState(js)) { + return js.rejectedPromise(err.getHandle(js)); + } + } + // Allows for a graceful close of the readable side. Close will + // complete once all of the queued data is read or the stream + // errors. Only close if the stream can still be closed (e.g., + // it wasn't closed by a cancel operation from within flush). + KJ_IF_SOME(readableController, ref->tryGetReadableController()) { + if (readableController.canCloseOrEnqueue()) { readableController.close(js); - } - } else { - // Else block to avert dangling else compiler warning. - } - } - return js.resolvedPromise(); - }); + } + } + return js.resolvedPromise(); + }; - auto onFailure = JSG_VISITABLE_LAMBDA( - (ref = JSG_THIS), (ref), (jsg::Lock & js, jsg::Value reason)->jsg::Promise { - ref->error(js, reason.getHandle(js)); - return js.rejectedPromise(kj::mv(reason)); - }); + auto onFailure = [ref = JSG_THIS]( + jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { + auto err = jsg::JsValue(reason.getHandle(js)); + ref->error(js, err); + return js.rejectedPromise(err); + }; if (flags.getPedanticWpt()) { return algorithms.maybeFinish @@ -4575,8 +4341,7 @@ jsg::Promise TransformStreamDefaultController::pull(jsg::Lock& js) { return KJ_ASSERT_NONNULL(maybeBackpressureChange).promise.whenResolved(js); } -jsg::Promise TransformStreamDefaultController::cancel( - jsg::Lock& js, v8::Local reason) { +jsg::Promise TransformStreamDefaultController::cancel(jsg::Lock& js, jsg::JsValue reason) { if (FeatureFlags::get(js).getPedanticWpt()) { // If a finish operation is already in progress, return the existing promise // or check for errors if we're being called synchronously from within another @@ -4588,7 +4353,7 @@ jsg::Promise TransformStreamDefaultController::cancel( // finishStarted is true but maybeFinish is not set yet - check if the stream // was errored during that operation. KJ_IF_SOME(err, getReadableErrorState(js)) { - return js.rejectedPromise(kj::mv(err)); + return js.rejectedPromise(err.getHandle(js)); } return js.resolvedPromise(); } @@ -4599,52 +4364,50 @@ jsg::Promise TransformStreamDefaultController::cancel( return algorithms.maybeFinish .emplace(maybeRunAlgorithm(js, algorithms.cancel, - JSG_VISITABLE_LAMBDA( - (this, ref = JSG_THIS, reason = jsg::JsRef(js, jsg::JsValue(reason))), (ref, reason), - (jsg::Lock & js)->jsg::Promise { - // If the stream was errored during the cancel algorithm (e.g., by controller.error() - // or by a parallel abort()), we should reject with that error. - if (FeatureFlags::get(js).getPedanticWpt()) { - KJ_IF_SOME(err, getReadableErrorState(js)) { - readable = kj::none; - errorWritableAndUnblockWrite(js, reason.getHandle(js)); - return js.rejectedPromise(kj::mv(err)); - } else { - // Else block to avert dangling else compiler warning. - } - } - readable = kj::none; - errorWritableAndUnblockWrite(js, reason.getHandle(js)); - return js.resolvedPromise(); - }), - JSG_VISITABLE_LAMBDA((this, ref = JSG_THIS), (ref), - (jsg::Lock & js, jsg::Value reason)->jsg::Promise { - readable = kj::none; - errorWritableAndUnblockWrite(js, reason.getHandle(js)); - return js.rejectedPromise(kj::mv(reason)); - }), - jsg::JsValue(reason))) + [this, ref = JSG_THIS, reason = reason.addRef(js)]( + jsg::Lock& js) mutable -> jsg::Promise { + // If the stream was errored during the cancel algorithm (e.g., by controller.error() + // or by a parallel abort()), we should reject with that error. + if (FeatureFlags::get(js).getPedanticWpt()) { + KJ_IF_SOME(err, getReadableErrorState(js)) { + readable = kj::none; + errorWritableAndUnblockWrite(js, reason.getHandle(js)); + return js.rejectedPromise(err.getHandle(js)); + } + } + readable = kj::none; + errorWritableAndUnblockWrite(js, reason.getHandle(js)); + return js.resolvedPromise(); + }, + [this, ref = JSG_THIS](jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { + readable = kj::none; + auto error = jsg::JsValue(reason.getHandle(js)); + errorWritableAndUnblockWrite(js, error); + return js.rejectedPromise(error); + }, reason)) .whenResolved(js); } jsg::Promise TransformStreamDefaultController::performTransform( - jsg::Lock& js, v8::Local chunk) { + jsg::Lock& js, jsg::JsValue chunk) { if (algorithms.transform != kj::none) { - return maybeRunAlgorithm(js, algorithms.transform, - [](jsg::Lock& js) -> jsg::Promise { return js.resolvedPromise(); }, - JSG_VISITABLE_LAMBDA((ref = JSG_THIS), (ref), - (jsg::Lock & js, jsg::Value reason)->jsg::Promise { - ref->error(js, reason.getHandle(js)); - return js.rejectedPromise(kj::mv(reason)); - }), - chunk, JSG_THIS); + return maybeRunAlgorithm(js, algorithms.transform, [](jsg::Lock& js) -> jsg::Promise { + return js.resolvedPromise(); + }, [ref = JSG_THIS](jsg::Lock& js, jsg::Value reason) mutable -> jsg::Promise { + auto error = jsg::JsValue(reason.getHandle(js)); + ref->error(js, error); + return js.rejectedPromise(error); + }, chunk, JSG_THIS); } // If we got here, there is no transform algorithm. Per the spec, the default // behavior then is to just pass along the value untransformed. - return js.tryCatch([&] { + JSG_TRY(js) { enqueue(js, chunk); return js.resolvedPromise(); - }, [&](jsg::Value exception) { return js.rejectedPromise(kj::mv(exception)); }); + } + JSG_CATCH(exception) { + return js.rejectedPromise(kj::mv(exception)); + } } void TransformStreamDefaultController::setBackpressure(jsg::Lock& js, bool newBackpressure) { @@ -4658,7 +4421,7 @@ void TransformStreamDefaultController::setBackpressure(jsg::Lock& js, bool newBa } void TransformStreamDefaultController::errorWritableAndUnblockWrite( - jsg::Lock& js, v8::Local reason) { + jsg::Lock& js, jsg::JsValue reason) { algorithms.clear(); KJ_IF_SOME(writableController, tryGetWritableController()) { if (FeatureFlags::get(js).getPedanticWpt()) { @@ -4724,14 +4487,11 @@ void TransformStreamDefaultController::init(jsg::Lock& js, setBackpressure(js, true); - maybeRunAlgorithm(js, transformer.start, - JSG_VISITABLE_LAMBDA( - (ref = JSG_THIS), (ref), (jsg::Lock& js) { ref->startPromise.resolver.resolve(js); }), - JSG_VISITABLE_LAMBDA((ref = JSG_THIS), (ref), - (jsg::Lock& js, jsg::Value reason) { - ref->startPromise.resolver.reject(js, reason.getHandle(js)); - }), - JSG_THIS); + maybeRunAlgorithm(js, transformer.start, [ref = JSG_THIS](jsg::Lock& js) mutable { + ref->startPromise.resolver.resolve(js); + }, [ref = JSG_THIS](jsg::Lock& js, jsg::Value reason) mutable { + ref->startPromise.resolver.reject(js, reason.getHandle(js)); + }, JSG_THIS); } kj::Maybe TransformStreamDefaultController:: @@ -4750,7 +4510,8 @@ kj::Maybe TransformStreamDefaultController:: return kj::none; } -kj::Maybe TransformStreamDefaultController::getReadableErrorState(jsg::Lock& js) { +kj::Maybe> TransformStreamDefaultController::getReadableErrorState( + jsg::Lock& js) { KJ_IF_SOME(controller, tryGetReadableController()) { return controller.getMaybeErrorState(js); } @@ -4903,21 +4664,20 @@ void TransformStreamDefaultController::visitForMemoryInfo(jsg::MemoryTracker& tr // ====================================================================================== jsg::Ref ReadableStream::from( - jsg::Lock& js, jsg::AsyncGenerator generator) { + jsg::Lock& js, jsg::AsyncGenerator> generator) { // AsyncGenerator is not a refcounted type, so we need to wrap it in a refcounted // struct so that we can keep it alive through the various promise branches below. auto rcGenerator = - kj::rc>>(kj::mv(generator)); + kj::rc>>>(kj::mv(generator)); // clang-format off return constructor(js, UnderlyingSource{ .pull = [generator = rcGenerator.addRef()](jsg::Lock& js, auto controller) mutable { auto& c = controller.template get(); return generator->getWrapped().next(js).then(js, - JSG_VISITABLE_LAMBDA((controller = c.addRef(), generator = generator.addRef()), - (controller), - (jsg::Lock& js, kj::Maybe value) { + [controller = c.addRef(), generator = generator.addRef()] + (jsg::Lock& js, kj::Maybe value) mutable { KJ_IF_SOME(v, value) { auto handle = v.getHandle(js); // Per the ReadableStream.from spec, if the value is a promise, @@ -4927,28 +4687,27 @@ jsg::Ref ReadableStream::from( // are promises will be slow, but that's the spec. if (handle->IsPromise()) { return js.toPromise(handle.As()).then(js, - JSG_VISITABLE_LAMBDA( - (controller=controller.addRef()), - (controller), - (jsg::Lock& js, jsg::Value val) mutable { - controller->enqueue(js, val.getHandle(js)); + [controller=controller.addRef()] + (jsg::Lock& js, jsg::V8Ref val) mutable { + controller->enqueue(js, jsg::JsValue(val.getHandle(js))); return js.resolvedPromise(); - })); + }); } - controller->enqueue(js, v.getHandle(js)); + controller->enqueue(js, jsg::JsValue(v.getHandle(js))); } else { controller->close(js); } return js.resolvedPromise(); - }), - JSG_VISITABLE_LAMBDA((controller = c.addRef(), generator = generator.addRef()), - (controller), (jsg::Lock& js, jsg::Value reason) { - controller->error(js, reason.getHandle(js)); - return js.rejectedPromise(kj::mv(reason)); - })); + }, + [controller = c.addRef(), generator = generator.addRef()] + (jsg::Lock& js, jsg::Value reason) mutable { + auto handle = jsg::JsValue(reason.getHandle(js)); + controller->error(js, handle); + return js.rejectedPromise(handle); + }); }, - .cancel = [generator = rcGenerator.addRef()](jsg::Lock& js, auto reason) mutable { - return generator->getWrapped().return_(js, js.v8Ref(reason)) + .cancel = [generator = rcGenerator.addRef()](jsg::Lock& js, jsg::JsValue reason) mutable { + return generator->getWrapped().return_(js, js.v8Ref(reason)) .then(js, [generator = kj::mv(generator)](auto& lock, auto) { // The generator might produce a value on return and might even want to continue, // but the stream has been canceled at this point, so we stop here. diff --git a/src/workerd/api/streams/standard.h b/src/workerd/api/streams/standard.h index e7e2499971d..3808bef70c8 100644 --- a/src/workerd/api/streams/standard.h +++ b/src/workerd/api/streams/standard.h @@ -143,14 +143,14 @@ class ReadableImpl { void start(jsg::Lock& js, jsg::Ref self); // If the readable is not already closed or errored, initiates a cancellation. - jsg::Promise cancel(jsg::Lock& js, jsg::Ref self, v8::Local maybeReason); + jsg::Promise cancel(jsg::Lock& js, jsg::Ref self, jsg::JsValue maybeReason); // True if the readable is not closed, not errored, and close has not already been requested. bool canCloseOrEnqueue(); // Invokes the cancel algorithm to let the underlying source know that the // readable has been canceled. - void doCancel(jsg::Lock& js, jsg::Ref self, v8::Local reason); + void doCancel(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason); // Close the queue if we are in a state where we can be closed. void close(jsg::Lock& js); @@ -162,7 +162,7 @@ class ReadableImpl { // If it isn't already errored or closed, errors the queue, causing all consumers to be errored // and detached. - void doError(jsg::Lock& js, jsg::Value reason); + void doError(jsg::Lock& js, jsg::JsValue reason); // When a negative number is returned, indicates that we are above the highwatermark // and backpressure should be signaled. @@ -277,7 +277,7 @@ class WritableImpl { struct WriteRequest { jsg::Promise::Resolver resolver; - jsg::Value value; + jsg::JsRef value; size_t size; void visitForGc(jsg::GcVisitor& visitor) { @@ -292,29 +292,29 @@ class WritableImpl { WritableImpl(jsg::Lock& js, WritableStream& owner, jsg::Ref abortSignal); - jsg::Promise abort(jsg::Lock& js, jsg::Ref self, v8::Local reason); + jsg::Promise abort(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason); void advanceQueueIfNeeded(jsg::Lock& js, jsg::Ref self); jsg::Promise close(jsg::Lock& js, jsg::Ref self); - void dealWithRejection(jsg::Lock& js, jsg::Ref self, v8::Local reason); + void dealWithRejection(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason); WriteRequest dequeueWriteRequest(); void doClose(jsg::Lock& js); - void doError(jsg::Lock& js, v8::Local reason); + void doError(jsg::Lock& js, jsg::JsValue reason); - void error(jsg::Lock& js, jsg::Ref self, v8::Local reason); + void error(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason); void finishErroring(jsg::Lock& js, jsg::Ref self); void finishInFlightClose( - jsg::Lock& js, jsg::Ref self, kj::Maybe> reason = kj::none); + jsg::Lock& js, jsg::Ref self, kj::Maybe reason = kj::none); void finishInFlightWrite( - jsg::Lock& js, jsg::Ref self, kj::Maybe> reason = kj::none); + jsg::Lock& js, jsg::Ref self, kj::Maybe reason = kj::none); ssize_t getDesiredSize(); @@ -331,7 +331,7 @@ class WritableImpl { // Puts the writable into an erroring state. This allows any in flight write or // close to complete before actually transitioning the writable. - void startErroring(jsg::Lock& js, jsg::Ref self, v8::Local reason); + void startErroring(jsg::Lock& js, jsg::Ref self, jsg::JsValue reason); // Notifies the Writer of the current backpressure state. If the amount of data queued // is equal to or above the highwatermark, then backpressure is applied. @@ -339,7 +339,7 @@ class WritableImpl { // Writes a chunk to the Writable, possibly queuing the chunk in the internal buffer // if there are already other writes pending. - jsg::Promise write(jsg::Lock& js, jsg::Ref self, v8::Local value); + jsg::Promise write(jsg::Lock& js, jsg::Ref self, jsg::JsValue value); // True if the writable is in a state where new chunks can be written bool isWritable() const; @@ -418,7 +418,7 @@ class WritableImpl { kj::Maybe inFlightWrite; kj::Maybe::Resolver> inFlightClose; kj::Maybe::Resolver> closeRequest; - kj::Maybe> maybePendingAbort; + kj::Maybe maybePendingAbort; struct Flags { uint8_t started : 1 = 0; @@ -446,7 +446,7 @@ class ReadableStreamDefaultController: public jsg::Object { void start(jsg::Lock& js); - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> maybeReason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional maybeReason); void close(jsg::Lock& js); @@ -454,9 +454,9 @@ class ReadableStreamDefaultController: public jsg::Object { bool hasBackpressure(); kj::Maybe getDesiredSize(); - void enqueue(jsg::Lock& js, jsg::Optional> chunk); + void enqueue(jsg::Lock& js, jsg::Optional chunk); - void error(jsg::Lock& js, v8::Local reason); + void error(jsg::Lock& js, jsg::JsValue reason); void pull(jsg::Lock& js); @@ -584,13 +584,13 @@ class ReadableByteStreamController: public jsg::Object { void start(jsg::Lock& js); - jsg::Promise cancel(jsg::Lock& js, jsg::Optional> maybeReason); + jsg::Promise cancel(jsg::Lock& js, jsg::Optional maybeReason); void close(jsg::Lock& js); void enqueue(jsg::Lock& js, jsg::BufferSource chunk); - void error(jsg::Lock& js, v8::Local reason); + void error(jsg::Lock& js, jsg::JsValue reason); bool canCloseOrEnqueue(); bool hasBackpressure(); @@ -652,17 +652,17 @@ class WritableStreamDefaultController: public jsg::Object { ~WritableStreamDefaultController() noexcept(false); - jsg::Promise abort(jsg::Lock& js, v8::Local reason); + jsg::Promise abort(jsg::Lock& js, jsg::JsValue reason); jsg::Promise close(jsg::Lock& js); - void error(jsg::Lock& js, jsg::Optional> reason); + void error(jsg::Lock& js, jsg::Optional reason); kj::Maybe getDesiredSize(); jsg::Ref getSignal(); - kj::Maybe> isErroring(jsg::Lock& js); + kj::Maybe isErroring(jsg::Lock& js); // Returns true if the stream is in the erroring state. Unlike the overload // that takes a lock, this method does not require a lock since it doesn't @@ -679,7 +679,7 @@ class WritableStreamDefaultController: public jsg::Object { void setup(jsg::Lock& js, UnderlyingSink underlyingSink, StreamQueuingStrategy queuingStrategy); - jsg::Promise write(jsg::Lock& js, v8::Local value); + jsg::Promise write(jsg::Lock& js, jsg::JsValue value); JSG_RESOURCE_TYPE(WritableStreamDefaultController) { JSG_READONLY_PROTOTYPE_PROPERTY(signal, getSignal); @@ -728,9 +728,9 @@ class TransformStreamDefaultController: public jsg::Object { kj::Maybe getDesiredSize(); - void enqueue(jsg::Lock& js, v8::Local chunk); + void enqueue(jsg::Lock& js, jsg::JsValue chunk); - void error(jsg::Lock& js, v8::Local reason); + void error(jsg::Lock& js, jsg::JsValue reason); void terminate(jsg::Lock& js); @@ -745,11 +745,11 @@ class TransformStreamDefaultController: public jsg::Object { }); } - jsg::Promise write(jsg::Lock& js, v8::Local chunk); - jsg::Promise abort(jsg::Lock& js, v8::Local reason); + jsg::Promise write(jsg::Lock& js, jsg::JsValue chunk); + jsg::Promise abort(jsg::Lock& js, jsg::JsValue reason); jsg::Promise close(jsg::Lock& js); jsg::Promise pull(jsg::Lock& js); - jsg::Promise cancel(jsg::Lock& js, v8::Local reason); + jsg::Promise cancel(jsg::Lock& js, jsg::JsValue reason); void visitForMemoryInfo(jsg::MemoryTracker& tracker) const; @@ -781,8 +781,8 @@ class TransformStreamDefaultController: public jsg::Object { } }; - void errorWritableAndUnblockWrite(jsg::Lock& js, v8::Local reason); - jsg::Promise performTransform(jsg::Lock& js, v8::Local chunk); + void errorWritableAndUnblockWrite(jsg::Lock& js, jsg::JsValue reason); + jsg::Promise performTransform(jsg::Lock& js, jsg::JsValue chunk); void setBackpressure(jsg::Lock& js, bool newBackpressure); kj::Maybe ioContext; @@ -791,7 +791,7 @@ class TransformStreamDefaultController: public jsg::Object { kj::Maybe tryGetReadableController(); kj::Maybe tryGetWritableController(); - kj::Maybe getReadableErrorState(jsg::Lock& js); + kj::Maybe> getReadableErrorState(jsg::Lock& js); // Currently, JS-backed transform streams only support value-oriented streams. // In the future, that may change and this will need to become a kj::OneOf diff --git a/src/workerd/api/streams/writable-sink-adapter-test.c++ b/src/workerd/api/streams/writable-sink-adapter-test.c++ index eeaa836bacb..46a61c5cdab 100644 --- a/src/workerd/api/streams/writable-sink-adapter-test.c++ +++ b/src/workerd/api/streams/writable-sink-adapter-test.c++ @@ -612,17 +612,12 @@ KJ_TEST("zero-length writes are a non-op (ArrayBuffer)") { auto adapter = kj::heap( env.js, env.context, newWritableSink(kj::mv(recordingSink))); - auto backing = jsg::BackingStore::alloc(env.js, 0); - jsg::BufferSource source(env.js, kj::mv(backing)); - jsg::JsValue handle(source.getHandle(env.js)); - + auto handle = jsg::JsArrayBuffer::create(env.js, 0); auto writePromise = adapter->write(env.js, handle); KJ_ASSERT(state.writeCalled == 0, "Underlying sink's write() should not have been called"); - return env.context - .awaitJs(env.js, writePromise.then(env.js, [&state](jsg::Lock& js) { - KJ_ASSERT(state.writeCalled == 0, "Underlying sink's write() should not have been called"); - })).attach(kj::mv(adapter)); + return env.context.awaitJs( + env.js, writePromise.then(env.js, [adapter = kj::mv(adapter)](jsg::Lock& js) {})); }); } @@ -638,21 +633,18 @@ KJ_TEST("writing small ArrayBuffer") { .highWaterMark = 10, }); - auto backing = jsg::BackingStore::alloc(env.js, 10); - jsg::BufferSource source(env.js, kj::mv(backing)); - jsg::JsValue handle(source.getHandle(env.js)); + auto handle = jsg::JsArrayBuffer::create(env.js, 10); auto writePromise = adapter->write(env.js, handle); - KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should not have been called"); + KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should have been called"); KJ_ASSERT(KJ_ASSERT_NONNULL(adapter->getDesiredSize()) == 0, "Adapter's desired size should be 0 after writing highWaterMark bytes"); - return env.context - .awaitJs(env.js, writePromise.then(env.js, [&state, &adapter = *adapter](jsg::Lock& js) { - KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should not have been called"); - KJ_ASSERT(KJ_ASSERT_NONNULL(adapter.getDesiredSize()) == 10, + return env.context.awaitJs( + env.js, writePromise.then(env.js, [adapter = kj::mv(adapter)](jsg::Lock& js) mutable { + KJ_ASSERT(KJ_ASSERT_NONNULL(adapter->getDesiredSize()) == 10, "Back to initial desired size after write completes"); - })).attach(kj::mv(adapter)); + })); }); } @@ -668,21 +660,18 @@ KJ_TEST("writing medium ArrayBuffer") { .highWaterMark = 5 * 1024, }); - auto backing = jsg::BackingStore::alloc(env.js, 4 * 1024); - jsg::BufferSource source(env.js, kj::mv(backing)); - jsg::JsValue handle(source.getHandle(env.js)); + auto handle = jsg::JsArrayBuffer::create(env.js, 4 * 1024); auto writePromise = adapter->write(env.js, handle); - KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should not have been called"); + KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should have been called"); KJ_ASSERT(KJ_ASSERT_NONNULL(adapter->getDesiredSize()) == 1024, "Adapter's desired size should be 1024 after writing 4 * 1024 bytes"); - return env.context - .awaitJs(env.js, writePromise.then(env.js, [&state, &adapter = *adapter](jsg::Lock& js) { - KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should not have been called"); - KJ_ASSERT(KJ_ASSERT_NONNULL(adapter.getDesiredSize()) == 5 * 1024, + return env.context.awaitJs( + env.js, writePromise.then(env.js, [adapter = kj::mv(adapter)](jsg::Lock& js) mutable { + KJ_ASSERT(KJ_ASSERT_NONNULL(adapter->getDesiredSize()) == 5 * 1024, "Back to initial desired size after write completes"); - })).attach(kj::mv(adapter)); + })); }); } @@ -698,21 +687,17 @@ KJ_TEST("writing large ArrayBuffer") { .highWaterMark = 8 * 1024, }); - auto backing = jsg::BackingStore::alloc(env.js, 16 * 1024); - jsg::BufferSource source(env.js, kj::mv(backing)); - jsg::JsValue handle(source.getHandle(env.js)); - + auto handle = jsg::JsArrayBuffer::create(env.js, 16 * 1024); auto writePromise = adapter->write(env.js, handle); - KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should not have been called"); + KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should have been called"); KJ_ASSERT(KJ_ASSERT_NONNULL(adapter->getDesiredSize()) == -(8 * 1024), "Adapter's desired size should be negative after writing 16 * 1024 bytes"); - return env.context - .awaitJs(env.js, writePromise.then(env.js, [&state, &adapter = *adapter](jsg::Lock& js) { - KJ_ASSERT(state.writeCalled == 1, "Underlying sink's write() should not have been called"); - KJ_ASSERT(KJ_ASSERT_NONNULL(adapter.getDesiredSize()) == 8 * 1024, + return env.context.awaitJs( + env.js, writePromise.then(env.js, [adapter = kj::mv(adapter)](jsg::Lock& js) mutable { + KJ_ASSERT(KJ_ASSERT_NONNULL(adapter->getDesiredSize()) == 8 * 1024, "Back to initial desired size after write completes"); - })).attach(kj::mv(adapter)); + })); }); } @@ -756,19 +741,16 @@ KJ_TEST("large number of large writes") { kj::heap(env.js, env.context, newWritableSink(kj::mv(fake))); for (int i = 0; i < 1000; i++) { - auto backing = jsg::BackingStore::alloc(env.js, 16 * 1024); - jsg::BufferSource source(env.js, kj::mv(backing)); - jsg::JsValue handle(source.getHandle(env.js)); - + auto handle = jsg::JsArrayBuffer::create(env.js, 16 * 1024); adapter->write(env.js, handle); } auto endPromise = adapter->end(env.js); - return env.context - .awaitJs(env.js, - endPromise.then(env.js, [&state = sink.getState(), &adapter = *adapter](jsg::Lock& js) { + return env.context.awaitJs(env.js, + endPromise.then( + env.js, [&state = sink.getState(), adapter = kj::mv(adapter)](jsg::Lock& js) { KJ_ASSERT(state.writeCalled == 1000, "Underlying sink's write() should have been called"); - })).attach(kj::mv(adapter)); + })); }); } @@ -813,15 +795,14 @@ KJ_TEST("detachOnWrite option detaches ArrayBuffer before write") { .detachOnWrite = true, }); - auto backing = jsg::BackingStore::alloc(env.js, 10); - jsg::BufferSource source(env.js, kj::mv(backing)); - KJ_ASSERT(!source.isDetached()); - jsg::JsValue handle(source.getHandle(env.js)); + auto handle = jsg::JsArrayBuffer::create(env.js, 10); + KJ_ASSERT(!handle.isDetached()); + KJ_ASSERT(handle.size() == 10); auto writePromise = adapter->write(env.js, handle); - jsg::BufferSource source2(env.js, handle); - KJ_ASSERT(source2.size() == 0); + KJ_ASSERT(handle.isDetached()); + KJ_ASSERT(handle.size() == 0); return env.context.awaitJs(env.js, kj::mv(writePromise)).attach(kj::mv(adapter)); }); @@ -838,15 +819,14 @@ KJ_TEST("detachOnWrite option detaches Uint8Array before write") { .detachOnWrite = true, }); - auto backing = jsg::BackingStore::alloc(env.js, 10); - jsg::BufferSource source(env.js, kj::mv(backing)); - KJ_ASSERT(!source.isDetached()); - jsg::JsValue handle(source.getHandle(env.js)); + auto handle = jsg::JsUint8Array::create(env.js, 10); + KJ_ASSERT(!handle.isDetached()); + KJ_ASSERT(handle.size() == 10); auto writePromise = adapter->write(env.js, handle); - jsg::BufferSource source2(env.js, handle); - KJ_ASSERT(source2.size() == 0); + KJ_ASSERT(handle.isDetached()); + KJ_ASSERT(handle.size() == 0); return env.context.awaitJs(env.js, kj::mv(writePromise)).attach(kj::mv(adapter)); }); @@ -911,14 +891,12 @@ jsg::Ref createSimpleWritableStream(jsg::Lock& js, WritableStrea UnderlyingSink{ .write = [&context](jsg::Lock& js, auto chunk, auto) { - jsg::BufferSource source(js, chunk); - auto data = kj::heapArray(source.asArrayPtr()); - context.chunks.add(kj::mv(data)); + context.chunks.add(jsg::JsBufferSource(chunk).copy()); return js.resolvedPromise(); }, .abort = [&context](jsg::Lock& js, auto reason) { - context.maybeAbort = jsg::JsRef(js, jsg::JsValue(reason)); + context.maybeAbort = reason.addRef(js); return js.resolvedPromise(); }, .close = diff --git a/src/workerd/api/streams/writable-sink-adapter.c++ b/src/workerd/api/streams/writable-sink-adapter.c++ index 4b15143776b..2a333970998 100644 --- a/src/workerd/api/streams/writable-sink-adapter.c++ +++ b/src/workerd/api/streams/writable-sink-adapter.c++ @@ -176,7 +176,7 @@ jsg::Promise WritableStreamSinkJsAdapter::write(jsg::Lock& js, const jsg:: KJ_IF_SOME(exc, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise(js.exceptionToJs(exc.clone())); + return js.rejectedPromise(js.exceptionToJsValue(exc.clone())); } if (state.is()) { @@ -204,12 +204,12 @@ jsg::Promise WritableStreamSinkJsAdapter::write(jsg::Lock& js, const jsg:: // types: ArrayBuffer, ArrayBufferView, and String. If it is a string, // we convert it to UTF-8 bytes. Anything else is an error. if (value.isArrayBufferView() || value.isArrayBuffer() || value.isSharedArrayBuffer()) { - // We can just wrap the value with a jsg::BufferSource and write it. - jsg::BufferSource source(js, value); - if (active.options.detachOnWrite && source.canDetach(js)) { + // We can just wrap the value with a buffer source and write it. + jsg::JsBufferSource source(value); + if (active.options.detachOnWrite && source.isDetachable()) { // Detach from the original ArrayBuffer... - // ... and re-wrap it with a new BufferSource that we own. - source = jsg::BufferSource(js, source.detach(js)); + // ... and re-wrap it with a new buffer source that we own. + source = source.detachAndTake(js); } // Zero-length writes are a no-op. @@ -240,12 +240,14 @@ jsg::Promise WritableStreamSinkJsAdapter::write(jsg::Lock& js, const jsg:: // held by the write queue, which is itself held by Active. If active // is destroyed, the write queue is destroyed along with the lambda. auto promise = - active.enqueue(kj::coCapture([&active, source = kj::mv(source)]() -> kj::Promise { - co_await active.sink->write(source.asArrayPtr()); - active.bytesInFlight -= source.size(); + active.enqueue(kj::coCapture([&active, ptr = source.asArrayPtr()]() -> kj::Promise { + co_await active.sink->write(ptr); + active.bytesInFlight -= ptr.size(); })); + return ioContext - .awaitIo(js, kj::mv(promise), [self = selfRef.addRef()](jsg::Lock& js) { + .awaitIo(js, kj::mv(promise), + [self = selfRef.addRef(), source = source.addRef(js)](jsg::Lock& js) { // Why do we need a weak ref here? Well, because this is a JavaScript // promise continuation. It is possible that the kj::Own holding our // adapter can be dropped while we are waiting for the continuation @@ -306,7 +308,7 @@ jsg::Promise WritableStreamSinkJsAdapter::flush(jsg::Lock& js) { KJ_IF_SOME(exc, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise(js.exceptionToJs(exc.clone())); + return js.rejectedPromise(js.exceptionToJsValue(exc.clone())); } if (state.is()) { @@ -343,7 +345,7 @@ jsg::Promise WritableStreamSinkJsAdapter::end(jsg::Lock& js) { KJ_IF_SOME(exc, state.tryGetErrorUnsafe()) { // Really should not have been called if errored but just in case, // return a rejected promise. - return js.rejectedPromise(js.exceptionToJs(exc.clone())); + return js.rejectedPromise(js.exceptionToJsValue(exc.clone())); } if (state.is()) { @@ -608,18 +610,14 @@ kj::Promise WritableStreamSinkKjAdapter::write( // WritableStream API has no concept of a vector write, so each write // would incur the overhead of a separate promise and microtask checkpoint. // By collapsing into a single write we reduce that overhead. - auto backing = jsg::BackingStore::alloc(js, totalAmount); - auto ptr = backing.asArrayPtr(); - for (auto piece: pieces) { - ptr.first(piece.size()).copyFrom(piece); - ptr = ptr.slice(piece.size()); - } - jsg::BufferSource source(js, kj::mv(backing)); + auto source = jsg::JsArrayBuffer::create(js, totalAmount); + source.asArrayPtr().write(pieces); - auto ready = KJ_ASSERT_NONNULL(writer->isReady(js)); auto promise = - ready.then(js, [writer = writer.addRef(), source = kj::mv(source)](jsg::Lock& js) mutable { - return writer->write(js, source.getHandle(js)); + KJ_ASSERT_NONNULL(writer->isReady(js)) + .then( + js, [writer = writer.addRef(), source = source.addRef(js)](jsg::Lock& js) mutable { + return writer->write(js, jsg::JsValue(source.getHandle(js))); }); return IoContext::current().awaitJs(js, kj::mv(promise)); })).then([self = selfRef.addRef()]() { diff --git a/src/workerd/api/streams/writable.c++ b/src/workerd/api/streams/writable.c++ index 22e9849501d..94991479558 100644 --- a/src/workerd/api/streams/writable.c++ +++ b/src/workerd/api/streams/writable.c++ @@ -30,11 +30,11 @@ jsg::Ref WritableStreamDefaultWriter::constructor( } jsg::Promise WritableStreamDefaultWriter::abort( - jsg::Lock& js, jsg::Optional> reason) { + jsg::Lock& js, jsg::Optional reason) { assertAttachedOrTerminal(); if (state.is()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream writer has been released."_kj)); + js.typeError("This WritableStream writer has been released."_kj)); } if (state.is()) { return js.resolvedPromise(); @@ -62,10 +62,10 @@ jsg::Promise WritableStreamDefaultWriter::close(jsg::Lock& js) { assertAttachedOrTerminal(); if (state.is()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream writer has been released."_kj)); + js.typeError("This WritableStream writer has been released."_kj)); } if (state.is()) { - return js.rejectedPromise(js.v8TypeError("This WritableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This WritableStream has been closed."_kj)); } auto& attached = state.requireActiveUnsafe(); // In some edge cases, this writer is the last thing holding a strong @@ -135,14 +135,14 @@ void WritableStreamDefaultWriter::replaceReadyPromise( } jsg::Promise WritableStreamDefaultWriter::write( - jsg::Lock& js, jsg::Optional> chunk) { + jsg::Lock& js, jsg::Optional chunk) { assertAttachedOrTerminal(); if (state.is()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream writer has been released."_kj)); + js.typeError("This WritableStream writer has been released."_kj)); } if (state.is()) { - return js.rejectedPromise(js.v8TypeError("This WritableStream has been closed."_kj)); + return js.rejectedPromise(js.typeError("This WritableStream has been closed."_kj)); } auto& attached = state.requireActiveUnsafe(); return attached.stream->getController().write(js, chunk); @@ -215,11 +215,10 @@ void WritableStream::detach(jsg::Lock& js) { getController().detach(js); } -jsg::Promise WritableStream::abort( - jsg::Lock& js, jsg::Optional> reason) { +jsg::Promise WritableStream::abort(jsg::Lock& js, jsg::Optional reason) { if (isLocked()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream is currently locked to a writer."_kj)); + js.typeError("This WritableStream is currently locked to a writer."_kj)); } return getController().abort(js, reason); } @@ -227,7 +226,7 @@ jsg::Promise WritableStream::abort( jsg::Promise WritableStream::close(jsg::Lock& js) { if (isLocked()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream is currently locked to a writer."_kj)); + js.typeError("This WritableStream is currently locked to a writer."_kj)); } return getController().close(js); } @@ -235,7 +234,7 @@ jsg::Promise WritableStream::close(jsg::Lock& js) { jsg::Promise WritableStream::flush(jsg::Lock& js) { if (isLocked()) { return js.rejectedPromise( - js.v8TypeError("This WritableStream is currently locked to a writer."_kj)); + js.typeError("This WritableStream is currently locked to a writer."_kj)); } return getController().flush(js); } @@ -369,7 +368,7 @@ class WritableStreamJsRpcAdapter final: public capnp::ExplicitEndOutputStream { context.addTask(context.run([writer = kj::mv(writer), exception = cancellationException()]( Worker::Lock& lock) mutable { jsg::Lock& js = lock; - auto ex = js.exceptionToJs(kj::mv(exception)); + auto ex = js.exceptionToJsValue(kj::mv(exception)); return IoContext::current().awaitJs(lock, writer->abort(lock, ex.getHandle(js))); })); } @@ -394,7 +393,7 @@ class WritableStreamJsRpcAdapter final: public capnp::ExplicitEndOutputStream { obj.context.run([writer = kj::mv(writer), exception = cancellationException()]( Worker::Lock& lock) mutable { jsg::Lock& js = lock; - auto ex = js.exceptionToJs(kj::mv(exception)); + auto ex = js.exceptionToJsValue(kj::mv(exception)); return IoContext::current().awaitJs(lock, writer->abort(lock, ex.getHandle(js))); })); } @@ -409,9 +408,8 @@ class WritableStreamJsRpcAdapter final: public capnp::ExplicitEndOutputStream { if (buffer == nullptr) return kj::READY_NOW; return canceler.wrap(context.run([this, buffer](Worker::Lock& lock) mutable { auto& writer = getInner(); - auto source = KJ_ASSERT_NONNULL(jsg::BufferSource::tryAlloc(lock, buffer.size())); - source.asArrayPtr().copyFrom(buffer); - return context.awaitJs(lock, writer.write(lock, source.getHandle(lock))); + auto ab = jsg::JsArrayBuffer::create(lock, buffer); + return context.awaitJs(lock, writer.write(lock, jsg::JsValue(ab))); })); } @@ -430,17 +428,16 @@ class WritableStreamJsRpcAdapter final: public capnp::ExplicitEndOutputStream { // guaranteed to live until the returned promise is resolved, but the application code // may hold onto the ArrayBuffer for longer. We need to make sure that the backing store // for the ArrayBuffer remains valid. - auto source = KJ_ASSERT_NONNULL(jsg::BufferSource::tryAlloc(lock, amount)); - auto ptr = source.asArrayPtr(); + auto ab = jsg::JsArrayBuffer::create(lock, amount); + auto ptr = ab.asArrayPtr(); for (auto& piece: pieces) { KJ_DASSERT(ptr.size() > 0); KJ_DASSERT(piece.size() <= ptr.size()); if (piece.size() == 0) continue; - ptr.first(piece.size()).copyFrom(piece); - ptr = ptr.slice(piece.size()); + ptr.write(piece); } - return context.awaitJs(lock, writer.write(lock, source.getHandle(lock))); + return context.awaitJs(lock, writer.write(lock, jsg::JsValue(ab))); })); } diff --git a/src/workerd/api/streams/writable.h b/src/workerd/api/streams/writable.h index 6db12ef0257..d3fba654fd4 100644 --- a/src/workerd/api/streams/writable.h +++ b/src/workerd/api/streams/writable.h @@ -26,7 +26,7 @@ class WritableStreamDefaultWriter: public jsg::Object, public WritableStreamCont jsg::MemoizedIdentity>& getReady(); kj::Maybe getDesiredSize(); - jsg::Promise abort(jsg::Lock& js, jsg::Optional> reason); + jsg::Promise abort(jsg::Lock& js, jsg::Optional reason); // Closes the stream. All present write requests will complete, but future write requests will // be rejected with a TypeError to the effect of "This writable stream has been closed." @@ -40,7 +40,7 @@ class WritableStreamDefaultWriter: public jsg::Object, public WritableStreamCont // complete on this side if we don't care that they're actually read? jsg::Promise close(jsg::Lock& js); - jsg::Promise write(jsg::Lock& js, jsg::Optional> chunk); + jsg::Promise write(jsg::Lock& js, jsg::Optional chunk); void releaseLock(jsg::Lock& js); JSG_RESOURCE_TYPE(WritableStreamDefaultWriter, CompatibilityFlags::Reader flags) { @@ -172,7 +172,7 @@ class WritableStream: public jsg::Object { // effect of "This writable stream has been requested to abort." `reason` will be passed to the // underlying sink's abort algorithm -- if this writable stream is one side of a transform stream, // then its abort algorithm causes the transform's readable side to become errored with `reason`. - jsg::Promise abort(jsg::Lock& js, jsg::Optional> reason); + jsg::Promise abort(jsg::Lock& js, jsg::Optional reason); jsg::Promise close(jsg::Lock& js); jsg::Promise flush(jsg::Lock& js); diff --git a/src/workerd/api/sync-kv.c++ b/src/workerd/api/sync-kv.c++ index 26b194b7775..816d2f4618c 100644 --- a/src/workerd/api/sync-kv.c++ +++ b/src/workerd/api/sync-kv.c++ @@ -4,8 +4,7 @@ #include "sync-kv.h" -#include "actor-state.h" - +#include #include namespace workerd::api { @@ -110,12 +109,17 @@ void SyncKvStorage::put(jsg::Lock& js, kj::String key, jsg::JsValue value) { traceContext.setTag("cloudflare.durable_object.kv.query.keys"_kjc, key.asPtr()); traceContext.setTag("cloudflare.durable_object.kv.query.keys.count"_kjc, static_cast(1)); - sqliteKv.put(key, serializeV8Value(js, value)); + sqliteKv.put(key, serializeV8Value(js, key, value)); } kj::OneOf SyncKvStorage::delete_(jsg::Lock& js, kj::String key) { - TraceContext traceContext = - IoContext::current().makeUserTraceSpan("durable_object_storage_kv_delete"_kjc); + auto& ioctx = IoContext::current(); + + KJ_IF_SOME(handler, KJ_ASSERT_NONNULL(ioctx.getActor()).getStoredExternalHandler()) { + handler.cancelPutExternals(key); + } + + TraceContext traceContext = ioctx.makeUserTraceSpan("durable_object_storage_kv_delete"_kjc); SqliteKv& sqliteKv = getSqliteKv(js); traceContext.setTag("db.system.name"_kjc, "cloudflare-durable-object-sql"_kjc); diff --git a/src/workerd/api/tests/BUILD.bazel b/src/workerd/api/tests/BUILD.bazel index 351d515df09..e6eb69a6aa2 100644 --- a/src/workerd/api/tests/BUILD.bazel +++ b/src/workerd/api/tests/BUILD.bazel @@ -1,7 +1,43 @@ load("@aspect_rules_js//js:defs.bzl", "js_binary") +load("@aspect_rules_ts//ts:defs.bzl", "ts_config", "ts_project") load("@rules_shell//shell:sh_test.bzl", "sh_test") load("//:build/wd_test.bzl", "wd_test") +# Compile all ts test sources in one project. This is needed to work around https://github.com/aspect-build/rules_ts/issues/664 – +# rules_ts transpiles all .ts files for each ts_project where the files are present based on the +# tsconfig "include": ["**/*.ts"] directive, which is an issue on Windows where Bazel does not have +# a proper sandbox, so it ends up trying to write to the same file path repeatedly. +# This approach does not scale well if we end up adding many more TS-based wd_tests since we need to +# transpile all TS sources before running TS-based wd-tests. At that point, we hopefully either have +# a proper Windows sandbox and can properly define a separate ts_project for each test, or we may +# have to put the files for a given test in a separate directory so that they are kept separate from +# the other ts files without sandboxing. +ts_config( + name = "ts_config", + src = "tsconfig.json", + deps = ["@workerd//tools:base-tsconfig"], +) + +ts_project( + name = "ts_project", + srcs = glob(["*.ts"]), + composite = True, + tsconfig = ":ts_config", + deps = ["//src/node:node@tsproject"], +) + +wd_test( + src = "stdio-writesync-reentry-uaf-test.wd-test", + args = ["--experimental"], + data = ["stdio-writesync-reentry-uaf-test.js"], +) + +wd_test( + src = "urlpattern-regex-search-oob-test.wd-test", + args = ["--experimental"], + data = ["urlpattern-regex-search-oob-test.js"], +) + wd_test( src = "messageport-postmessage-uaf-test.wd-test", args = ["--experimental"], @@ -20,6 +56,12 @@ wd_test( data = ["streams-byte-handlePush-uaf-test.js"], ) +wd_test( + src = "error-deser-prototype-setter-test.wd-test", + args = ["--experimental"], + data = ["error-deser-prototype-setter-test.js"], +) + wd_test( src = "structuredclone-error-serialize-test.wd-test", args = ["--experimental"], @@ -32,6 +74,12 @@ wd_test( data = ["deserialize-hardening-test.js"], ) +wd_test( + src = "struct-prototype-pollution-test.wd-test", + args = ["--experimental"], + data = ["struct-prototype-pollution-test.js"], +) + wd_test( src = "settimeout-test.wd-test", args = ["--experimental"], @@ -72,6 +120,13 @@ wd_test( data = ["connect-neuter-test.js"], ) +# Regression test for AUTOVULN-EW-EDGEWORKER-17: Socket::close() bare-this UAF. +wd_test( + src = "socket-close-gc-test.wd-test", + args = ["--experimental"], + data = ["socket-close-gc-test.js"], +) + wd_test( src = "actor-alarms-test.wd-test", args = ["--experimental"], @@ -163,6 +218,11 @@ wd_test( args = ["--experimental"], ) +wd_test( + src = "ctx-access-test.wd-test", + data = ["ctx-access-test.js"], +) + wd_test( src = "cache-test.wd-test", args = ["--experimental"], @@ -193,6 +253,18 @@ wd_test( ], ) +wd_test( + src = "queue-resizable-arraybuffer-test.wd-test", + args = ["--experimental"], + data = ["queue-resizable-arraybuffer-test.js"], +) + +wd_test( + src = "queue-do-uaf-test.wd-test", + args = ["--experimental"], + data = ["queue-do-uaf-test.js"], +) + wd_test( src = "queue-metrics-test.wd-test", args = ["--experimental"], @@ -246,13 +318,22 @@ wd_test( ], ) -# Tests for SQL_RESTRICT_RESERVED_NAMES autogate - only runs in @all-autogates variant +wd_test( + src = "sql-resizable-arraybuffer-test.wd-test", + args = ["--experimental"], + data = ["sql-resizable-arraybuffer-test.js"], +) + +wd_test( + src = "kv-resizable-arraybuffer-test.wd-test", + args = ["--experimental"], + data = ["kv-resizable-arraybuffer-test.js"], +) + wd_test( src = "sql-restrict-names-test.wd-test", args = ["--experimental"], data = ["sql-restrict-names-test.js"], - generate_all_compat_flags_variant = False, - generate_default_variant = False, ) wd_test( @@ -331,6 +412,18 @@ wd_test( data = ["crypto-extras-test.js"], ) +wd_test( + src = "resizable-arraybuffer-toctou-test.wd-test", + args = ["--experimental"], + data = ["resizable-arraybuffer-toctou-test.js"], +) + +wd_test( + src = "resizable-arraybuffer-aliasing-test.wd-test", + args = ["--experimental"], + data = ["resizable-arraybuffer-aliasing-test.js"], +) + wd_test( src = "crypto-impl-asymmetric-test.wd-test", args = ["--experimental"], @@ -385,13 +478,11 @@ wd_test( data = ["form-data-test.js"], ) -# TODO(soon): Re-enable once it is determined why this test is failing -# consistently in CI on Windows in all variant -# wd_test( -# src = "form-data-test-ts.wd-test", -# args = ["--experimental"], -# data = ["form-data-test-ts.ts"], -# ) +wd_test( + src = "form-data-test-ts.wd-test", + args = ["--experimental"], + data = ["form-data-test-ts.ts"], +) wd_test( src = "warnings-test.wd-test", @@ -656,6 +747,18 @@ wd_test( data = ["url-test.js"], ) +wd_test( + src = "url-searchparams-iterator-uaf-test.wd-test", + args = ["--experimental"], + data = ["url-searchparams-iterator-uaf-test.js"], +) + +wd_test( + src = "url_standard-searchparams-iterator-uaf-test.wd-test", + args = ["--experimental"], + data = ["url-searchparams-iterator-uaf-test.js"], +) + wd_test( src = "websocket-allow-half-open-test.wd-test", args = ["--experimental"], @@ -857,6 +960,13 @@ wd_test( tags = ["requires-network"], ) +wd_test( + size = "large", + src = "worker-loader-limits-test.wd-test", + args = ["--experimental"], + data = ["worker-loader-limits-test.js"], +) + wd_test( src = "worker-loader-unnamed-gc-test.wd-test", args = ["--experimental"], @@ -869,6 +979,12 @@ wd_test( data = ["worker-loader-rab-test.js"], ) +wd_test( + src = "worker-loader-gc-test.wd-test", + args = ["--experimental"], + data = ["worker-loader-gc-test.js"], +) + wd_test( src = "leak-fetch-test.wd-test", args = ["--experimental"], @@ -993,7 +1109,7 @@ wd_test( sh_test( name = "abortIsolate", - size = "small", + size = "medium", srcs = ["abortIsolate.sh"], args = [ "$(location //src/workerd/server:workerd_cross)", diff --git a/src/workerd/api/tests/abortIsolate.sh b/src/workerd/api/tests/abortIsolate.sh index fc7728bd813..05021ab7bc6 100755 --- a/src/workerd/api/tests/abortIsolate.sh +++ b/src/workerd/api/tests/abortIsolate.sh @@ -19,7 +19,7 @@ run_test() { > "$TEST_TMPDIR/abortIsolate.wd-test.tmp" \ && mv "$TEST_TMPDIR/abortIsolate.wd-test.tmp" "$TEST_TMPDIR/abortIsolate.wd-test" - output=$("$WORKERD" test "$TEST_TMPDIR/abortIsolate.wd-test" --experimental --compat-date=2000-01-01 -dTEST_TMPDIR="$TEST_TMPDIR" 2>&1) + output=$("$WORKERD" test "$TEST_TMPDIR/abortIsolate.wd-test" --compat-date=2000-01-01 -dTEST_TMPDIR="$TEST_TMPDIR" 2>&1) exit_code=$? echo "--- captured output ---" >&2 diff --git a/src/workerd/api/tests/abortIsolate.wd-test b/src/workerd/api/tests/abortIsolate.wd-test index e0972c8b433..212aef5cd8e 100644 --- a/src/workerd/api/tests/abortIsolate.wd-test +++ b/src/workerd/api/tests/abortIsolate.wd-test @@ -7,7 +7,6 @@ const unitTests :Workerd.Config = ( modules = [ (name = "worker", esModule = embed "abortIsolate.js"), ], - compatibilityFlags = ["experimental"], bindings = [ ( name = "topLevelAbort", diff --git a/src/workerd/api/tests/cross-context-promise-test.js b/src/workerd/api/tests/cross-context-promise-test.js index 75c334b93d6..ddc6f35257c 100644 --- a/src/workerd/api/tests/cross-context-promise-test.js +++ b/src/workerd/api/tests/cross-context-promise-test.js @@ -1,7 +1,7 @@ // Copyright (c) 2024 Cloudflare, Inc. // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -import { match, rejects, strictEqual, throws } from 'assert'; +import { match, rejects, strictEqual } from 'assert'; import { AsyncLocalStorage } from 'async_hooks'; import { inspect } from 'util'; import { mock } from 'node:test'; @@ -139,6 +139,19 @@ export const cyclicAwaitsWorks = { }, }; +export const crossContextResolveViaSubrequest = { + async test(_, env) { + // A request creates a promise, delegates resolution to a subrequest, then + // awaits the promise after the subrequest completes. At the point of await + // the request has no other pending I/O. + const res = await env.subrequest.fetch( + 'http://example.org/resolve-via-subrequest' + ); + strictEqual(res.status, 200); + strictEqual(await res.text(), 'ok'); + }, +}; + export default { async fetch(req, env, ctx) { if (req.url.endsWith('/resolve')) { @@ -157,6 +170,10 @@ export default { return asyncIterator(req, env, ctx); } else if (req.url.endsWith('/cyclic')) { return cyclicPromise(req, env, ctx); + } else if (req.url.endsWith('/resolve-via-subrequest')) { + return resolveViaSubrequest(req, env, ctx); + } else if (req.url.endsWith('/resolve-via-subrequest-helper')) { + return resolveViaSubrequestHelper(req, env, ctx); } throw new Error('Invalid URL'); }, @@ -195,23 +212,23 @@ async function resolveTest(req, env, ctx) { // This is our second request. Here, all we do is resolve the promise. - // While we are deferring the continuations from the promise, the promise state - // change should happen immediately. Before calling resolve, the state should - // be pending. After calling resolve, the state should be resolved showing - // an undefined value. Updating the state of the promise immediately and - // synchronously is required by the language specification. - // See: https://tc39.es/ecma262/#sec-promise-resolve-functions + // Before resolving, the promise should be pending. strictEqual(inspect(globalThis.request1.promise), 'Promise { }'); als.run('abc', () => globalThis.request1.resolve()); - strictEqual(inspect(globalThis.request1.promise), 'Promise { undefined }'); + // With cross-context promise settlement, the entire settlement (status + // update + reaction triggering) is deferred to the owning IoContext. + // The promise may still appear pending here. const p = globalThis.request1.promise; globalThis.request1 = undefined; - // We ought to be able to do a cross-request wait on the promise still. + // Verify the promise eventually settles by awaiting it from this context. await p; + // After awaiting, confirm the promise is now resolved. + strictEqual(inspect(p), 'Promise { undefined }'); + return new Response('ok'); } @@ -336,13 +353,11 @@ async function unhandledRejection(req, env, ctx) { globalThis.addEventListener( 'unhandledrejection', (event) => { - // Here we have a gotcha! The unhandledrejection event is dispatched - // synchronously when the promise is rejected. It does not get deferred. - // so the IoContext here will be the second request's IoContext! This - // means that our ab.aborted check will fail! - throws(() => ab.aborted, { - message: /I\/O type: RefcountedCanceler/, - }); + // With deferred cross-context settlement, the rejection (and therefore + // the unhandledrejection event) is dispatched in the owning IoContext, + // not the rejecting request's context. This means ab.aborted should + // work correctly here — we are in the right IoContext. + strictEqual(ab.aborted, true); strictEqual(event.reason, reason); rejectPromise.resolve(); }, @@ -456,3 +471,44 @@ async function cyclicPromise(req, env, ctx) { return new Response('ok'); } + +async function resolveViaSubrequest(req, env, ctx) { + // This handler creates a promise then delegates its resolution to a nested + // subrequest. After the subrequest completes, it awaits the promise. At that + // point the subrequest has already called resolve() from its own IoContext, + // so the cross-context settlement action is guaranteed to be queued in this + // request's delete queue before the await. + // + // This is expected to pass because the hang detector (whenThreadIdle) waits + // for pending event port signals — including the cross-thread fulfiller + // notification from the delete queue — before declaring the thread idle. + // The drain loop processes the settlement action before the hang fires. + // + // Note that we deliberately do NOT call setupWaiter(ctx) here. If the + // resolution had NOT already happened-before the await (e.g. if it were + // deferred via waitUntil + scheduler.wait), the runtime would have no way + // to distinguish "waiting for a cross-context resolution that will arrive + // later" from "waiting on a promise that will never resolve". In that case + // the hang detector should fire, and the caller must use setupWaiter(ctx) + // or ctx.waitUntil() to keep the request alive explicitly. + const { promise, resolve } = Promise.withResolvers(); + globalThis.resolveViaSubrequest = { resolve }; + const ab = AbortSignal.abort(); + strictEqual(ab.aborted, true); + + const res = await env.subrequest.fetch( + 'http://example.org/resolve-via-subrequest-helper' + ); + strictEqual(res.status, 200); + + const result = await promise; + strictEqual(ab.aborted, true); + strictEqual(result, 'resolved-by-subrequest'); + return new Response('ok'); +} + +async function resolveViaSubrequestHelper(req, env, ctx) { + globalThis.resolveViaSubrequest.resolve('resolved-by-subrequest'); + globalThis.resolveViaSubrequest = undefined; + return new Response('ok'); +} diff --git a/src/workerd/api/tests/ctx-access-test.js b/src/workerd/api/tests/ctx-access-test.js new file mode 100644 index 00000000000..a1e8c8ed7bb --- /dev/null +++ b/src/workerd/api/tests/ctx-access-test.js @@ -0,0 +1,16 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +import { strictEqual } from 'node:assert'; + +export const ctxAccessPropertyExists = { + test(controller, env, ctx) { + // The access property is always present on ctx as a lazy instance property. + // In standalone workerd no AccessInfo is supplied to newWorkerEntrypoint(), so the + // current IncomingRequest has no AccessInfo and getAccess() returns kj::none, which + // surfaces as `undefined` to JS. + strictEqual('access' in ctx, true); + strictEqual(ctx.access, undefined); + }, +}; diff --git a/src/workerd/api/tests/ctx-access-test.wd-test b/src/workerd/api/tests/ctx-access-test.wd-test new file mode 100644 index 00000000000..1c85d9902fe --- /dev/null +++ b/src/workerd/api/tests/ctx-access-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "ctx-access-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "ctx-access-test.js") + ], + compatibilityFlags = ["nodejs_compat"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/error-deser-prototype-setter-test.js b/src/workerd/api/tests/error-deser-prototype-setter-test.js new file mode 100644 index 00000000000..0428a72a65b --- /dev/null +++ b/src/workerd/api/tests/error-deser-prototype-setter-test.js @@ -0,0 +1,100 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-337: +// Deserializer::ReadHostObject used obj.set() (ordinary [[Set]]) to restore +// serialized Error own-properties. A tenant-installed setter on +// Error.prototype would be invoked inside V8's DisallowJavascriptExecution +// scope, triggering V8_Fatal -> abort(). The fix uses CreateDataProperty +// which bypasses the prototype chain entirely. + +import { strictEqual, ok } from 'node:assert'; + +export const errorDeserPrototypeSetterRegression = { + test() { + // Install a setter on Error.prototype for a key we will also define as + // an own data property on the Error instance. + let setterInvoked = false; + Object.defineProperty(Error.prototype, 'evilprop', { + set(_v) { + setterInvoked = true; + }, + get() { + return undefined; + }, + configurable: true, + }); + + try { + const err = new Error('hello'); + // Define an own data property with the same key on the instance. + Object.defineProperty(err, 'evilprop', { + value: 42, + enumerable: true, + writable: true, + configurable: true, + }); + + // Pre-patch this would abort the process with: + // V8 fatal error: Invoke in DisallowJavascriptExecutionScope + const clone = structuredClone(err); + + // After fix: the own data property is recreated via + // CreateDataProperty, the prototype setter is never invoked, + // and the value round-trips. + strictEqual( + Object.getOwnPropertyDescriptor(clone, 'evilprop')?.value, + 42, + 'own data property evilprop should round-trip with value 42' + ); + ok( + !setterInvoked, + 'Error.prototype setter must not be invoked during deserialization' + ); + strictEqual(clone.message, 'hello', 'error message should round-trip'); + ok(clone instanceof Error, 'clone should be an Error instance'); + } finally { + // Clean up the prototype pollution. + delete Error.prototype.evilprop; + } + }, +}; + +// Also verify the serialization side: when the serializer copies own +// properties into a temporary plain object, it can safely use Set() +export const errorSerPrototypeSetterRegression = { + test() { + let setterInvoked = false; + Object.defineProperty(Object.prototype, 'serprop', { + set(_v) { + setterInvoked = true; + }, + get() { + return undefined; + }, + configurable: true, + }); + + try { + const err = new Error('ser-test'); + Object.defineProperty(err, 'serprop', { + value: 99, + enumerable: true, + writable: true, + configurable: true, + }); + + // This exercises the serialization path (ser.c++:286) where own + // properties are copied to a temporary plain object. + const _clone = structuredClone(err); + + ok( + setterInvoked, + 'Object.prototype setter must be invoked during serialization' + ); + } finally { + delete Object.prototype.serprop; + } + }, +}; diff --git a/src/workerd/api/tests/error-deser-prototype-setter-test.wd-test b/src/workerd/api/tests/error-deser-prototype-setter-test.wd-test new file mode 100644 index 00000000000..5192ddb9028 --- /dev/null +++ b/src/workerd/api/tests/error-deser-prototype-setter-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "error-deser-prototype-setter-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "error-deser-prototype-setter-test.js") + ], + compatibilityFlags = ["nodejs_compat", "enhanced_error_serialization"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/form-data-test-ts.ts b/src/workerd/api/tests/form-data-test-ts.ts index 47722ba8bcd..429a05ffb80 100644 --- a/src/workerd/api/tests/form-data-test-ts.ts +++ b/src/workerd/api/tests/form-data-test-ts.ts @@ -6,7 +6,7 @@ import { strictEqual } from 'node:assert'; // https://github.com/cloudflare/workerd/issues/5934 export const formDataUnionTypeOverloads = { - test() { + test(): void { const formData = new FormData(); formData.append('stringKey', 'stringValue' as string | Blob); diff --git a/src/workerd/api/tests/form-data-test.js b/src/workerd/api/tests/form-data-test.js index d686ae8cf8e..9d894bc88d2 100644 --- a/src/workerd/api/tests/form-data-test.js +++ b/src/workerd/api/tests/form-data-test.js @@ -946,3 +946,26 @@ export const w3cTestFormData = { //do_test("formdata with named string", create_formdata(['key', new Blob(['value'], {type: 'text/plain'}), 'kv.txt']), '\nkey=kv.txt:text/plain:5,'); }, }; + +export const urlencodedReasonableEntryCount = { + async test() { + const entryCount = 100; + const body = Array.from( + { length: entryCount }, + (_, i) => `key${i}=val${i}` + ).join('&'); + + const req = new Request('http://example.org', { + method: 'POST', + body, + headers: { + 'content-type': 'application/x-www-form-urlencoded', + }, + }); + + const fd = await req.formData(); + strictEqual([...fd].length, entryCount); + strictEqual(fd.get('key0'), 'val0'); + strictEqual(fd.get('key99'), 'val99'); + }, +}; diff --git a/src/workerd/api/tests/http-socket-test.js b/src/workerd/api/tests/http-socket-test.js index cf4b63bef9d..222fac8233c 100644 --- a/src/workerd/api/tests/http-socket-test.js +++ b/src/workerd/api/tests/http-socket-test.js @@ -4,6 +4,7 @@ import { connect, internalNewHttpClient } from 'cloudflare:sockets'; import { strict as assert } from 'node:assert'; +import unsafe from 'workerd:unsafe'; // Basic connectivity and GET test export const oneRequest = { @@ -503,6 +504,52 @@ export const startTlsEarlySend = { }, }; +// When the STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME autogate is enabled +// (e.g. @all-autogates variant), startTls must throw a TypeError if +// expectedServerHostname is provided. When the autogate is off, startTls +// logs and proceeds — both outcomes are acceptable. +export const startTlsRejectExpectedServerHostname = { + async test(ctrl, env, ctx) { + const socket = connect(`localhost:${env.STARTTLS_SOCKET}`, { + secureTransport: 'starttls', + }); + + const writer = socket.writable.getWriter(); + const reader = socket.readable.getReader(); + const encoder = new TextEncoder(); + const decoder = new TextDecoder(); + + const { value: greeting } = await reader.read(); + const greetingText = decoder.decode(greeting).trim(); + if (greetingText !== 'HELLO') throw new Error('Wrong Handshake'); + await writer.write(encoder.encode('HELLO_BACK\n')); + + const { value: signal } = await reader.read(); + const signalText = decoder.decode(signal).trim(); + if (signalText !== 'START_TLS') throw new Error('Cannot Start TLS'); + + reader.releaseLock(); + writer.releaseLock(); + + // We use isTestAutogateEnabled() (which checks the TEST_WORKERD gate) as a proxy + // for whether STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME is enabled, because the + // @all-autogates test variant enables every gate at once. + if (unsafe.isTestAutogateEnabled()) { + // Autogate is on — startTls must throw. + assert.throws( + () => socket.startTls({ expectedServerHostname: 'other.com' }), + { + name: 'TypeError', + message: /expectedServerHostname/, + } + ); + } else { + // Autogate is off — startTls logs but does not throw. + socket.startTls({ expectedServerHostname: 'other.com' }); + } + }, +}; + export const manualProtocolThenFetcher = { async test(ctrl, env, ctx) { const socket = connect(`localhost:${env.HTTP_SOCKET_SERVER_PORT}`); diff --git a/src/workerd/api/tests/http-socket-test.wd-test b/src/workerd/api/tests/http-socket-test.wd-test index 084335ce5b2..c2042a8b8d1 100644 --- a/src/workerd/api/tests/http-socket-test.wd-test +++ b/src/workerd/api/tests/http-socket-test.wd-test @@ -12,7 +12,7 @@ const config :Workerd.Config = ( modules = [ (name = "worker.js", esModule = embed "http-socket-test.js") ], - compatibilityFlags = ["nodejs_compat", "experimental"], + compatibilityFlags = ["nodejs_compat", "experimental", "unsafe_module"], bindings = [ (name = "HTTP_SOCKET_SERVER_PORT", fromEnvironment = "HTTP_SOCKET_SERVER_PORT"), (name = "SOCKET_PARTIALLY_WRITTEN", fromEnvironment = "SOCKET_PARTIALLY_WRITTEN"), @@ -21,15 +21,15 @@ const config :Workerd.Config = ( ], ) ), - ( name = "internet", - network = ( + ( name = "internet", + network = ( allow = ["private"], tlsOptions = ( trustedCertificates = [ embed "starttls-server.pem", ], ), - ) + ) ), ], ); diff --git a/src/workerd/api/tests/js-rpc-params-ownership-test.js b/src/workerd/api/tests/js-rpc-params-ownership-test.js index f2ec00058a0..17c96039de0 100644 --- a/src/workerd/api/tests/js-rpc-params-ownership-test.js +++ b/src/workerd/api/tests/js-rpc-params-ownership-test.js @@ -16,6 +16,7 @@ class Counter extends RpcTarget { [Symbol.dispose]() { ++this.disposeCount; + this.onDispose?.(); } } @@ -177,6 +178,8 @@ export let rpcParamsDupFunction = { export let rpcReturnsTransferOwnership = { async test(controller, env, ctx) { let counter = new Counter(); + const { promise: disposed, resolve } = Promise.withResolvers(); + counter.onDispose = resolve; { using stub = new RpcStub(counter); @@ -186,7 +189,11 @@ export let rpcReturnsTransferOwnership = { assert.strictEqual(counter.disposeCount, 0); } - await scheduler.wait(0); + // Disposing a stub asynchronously disposes the RpcTarget. Await the + // disposal callback rather than relying on a fixed number of event + // loop ticks. + await disposed; assert.strictEqual(counter.disposeCount, 1); }, }; + diff --git a/src/workerd/api/tests/js-rpc-test.js b/src/workerd/api/tests/js-rpc-test.js index 4558cd517fa..69ffe0a4722 100644 --- a/src/workerd/api/tests/js-rpc-test.js +++ b/src/workerd/api/tests/js-rpc-test.js @@ -177,6 +177,14 @@ export class MyService extends WorkerEntrypoint { return await counter.increment(i); } + async getMyActor(name) { + return this.env.MyActor.get(this.env.MyActor.idFromName(name)); + } + + async getMyActorInObject(name) { + return { actor: this.env.MyActor.get(this.env.MyActor.idFromName(name)) }; + } + async getAnObject(i) { return { foo: 123 + i, counter: new MyCounter(i) }; } @@ -509,6 +517,14 @@ export class MyServiceProxy extends WorkerEntrypoint { return this.env.MyService.makeCounter(i); } + getMyActor(name) { + return this.env.MyService.getMyActor(name); + } + + getMyActorInObject(name) { + return this.env.MyService.getMyActorInObject(name); + } + getAnObject(i) { return this.env.MyService.getAnObject(i); } @@ -996,6 +1012,12 @@ export let sendStubOverRpc = { }); assert.strictEqual(await stubDup.increment(7), 16); + + let actor = env.MyActor.get( + env.MyActor.idFromName('send-durable-object-stub') + ); + assert.strictEqual(await env.MyService.incrementCounter(actor, 5), 5); + assert.strictEqual(await actor.increment(7), 12); }, }; @@ -1013,18 +1035,34 @@ export let receiveStubOverRpc = { await Promise.all([promise1, promise2, promise3]), [15, 19, 22] ); + + let actor = await env.MyService.getMyActor('receive-durable-object-stub'); + assert.strictEqual(await actor.increment(2), 2); + assert.strictEqual(await actor.increment(6), 8); }, }; export let promisePipelining = { async test(controller, env, ctx) { assert.strictEqual(await env.MyService.makeCounter(12).increment(3), 15); + assert.strictEqual( + await env.MyService.getMyActor( + 'promise-pipeline-durable-object-stub' + ).increment(3), + 3 + ); assert.strictEqual(await env.MyService.getAnObject(5).foo, 128); assert.strictEqual( await env.MyService.getAnObject(5).counter.increment(7), 12 ); + assert.strictEqual( + await env.MyService.getMyActorInObject( + 'promise-pipeline-durable-object-stub-wrapped' + ).actor.increment(4), + 4 + ); assert.rejects(() => env.MyService.oneArgMethod(5).foo(), { name: 'TypeError', @@ -1059,6 +1097,16 @@ export let promisePipeliningProxy = { assert.strictEqual(await promise2, 20); } + { + let actor = env.MyServiceProxy.getMyActor( + 'promise-pipeline-proxy-durable-object-stub' + ); + let promise1 = actor.increment(3); + let promise2 = actor.increment(5); + assert.strictEqual(await promise1, 3); + assert.strictEqual(await promise2, 8); + } + // Pipeline on a proxied call that returns an object containing an object that contains a // stub. (This ensures that pipelining can traverse JsRpcProperty values.) { @@ -1068,6 +1116,16 @@ export let promisePipeliningProxy = { assert.strictEqual(await promise1, 15); assert.strictEqual(await promise2, 20); } + + { + let actor = env.MyServiceProxy.getMyActorInObject( + 'promise-pipeline-proxy-durable-object-stub-wrapped' + ).actor; + let promise1 = actor.increment(2); + let promise2 = actor.increment(6); + assert.strictEqual(await promise1, 2); + assert.strictEqual(await promise2, 8); + } }, }; @@ -2117,3 +2175,57 @@ export let eOrderTest = { assert.deepEqual(results, [1, 2, 3, 4, 5, 6]); }, }; + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-297: +// Unbounded JsRpcProperty parent chain causes native stack overflow +// (SIGSEGV) on destruction. Building a deep chain of pipelined +// property accesses must be rejected once the depth exceeds +// MAX_PROPERTY_DEPTH (5120). +export let stubDepthLimitTest = { + async test() { + // Create a local RPC stub wrapping a plain object. + let stub = new RpcStub({}); + + // Build a chain of pipelined property accesses. Before the fix, + // this would create an unbounded linked list of native + // JsRpcProperty objects whose recursive destruction overflows + // the native stack. After the fix, getProperty() throws a + // TypeError once depth >= 5120. + let p = stub; + let threw = false; + let depthReached = 0; + try { + for (let i = 0; i < 10000; i++) { + p = p.x; + depthReached = i + 1; + } + } catch (e) { + threw = true; + assert.ok( + e instanceof TypeError, + `Expected TypeError, got ${e.constructor.name}: ${e.message}` + ); + assert.ok( + e.message.includes('too deep'), + `Expected error message about "too deep", got: ${e.message}` + ); + } + + assert.ok( + threw, + 'Expected TypeError to be thrown at depth limit, ' + + `but reached depth ${depthReached} without error` + ); + // The depth limit is 5120, so we should have reached at least 5120 + // before the throw. + assert.ok( + depthReached >= 5120, + `Expected to reach at least depth 5120, only reached ${depthReached}` + ); + // And we should NOT have reached 10000 (the full loop). + assert.ok( + depthReached < 10000, + 'Should not have reached depth 10000 without error' + ); + }, +}; diff --git a/src/workerd/api/tests/kv-resizable-arraybuffer-test.js b/src/workerd/api/tests/kv-resizable-arraybuffer-test.js new file mode 100644 index 00000000000..18142f37869 --- /dev/null +++ b/src/workerd/api/tests/kv-resizable-arraybuffer-test.js @@ -0,0 +1,103 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for resizable ArrayBuffer passed to KV.put(). +// KV.put converts its value to kj::Array via jsg::asBytes(), which must +// deep-copy resizable buffers to prevent SIGSEGV from page decommit after +// resize(0). Ref: AUTOVULN-CLOUDFLARE-WORKERD-73 +// +// This is also the mock KV backend. It stores PUT bodies in memory and returns +// them on GET, so the test can verify the data that was actually transmitted. + +import assert from 'node:assert'; +import { WorkerEntrypoint } from 'cloudflare:workers'; + +// In-memory store shared across requests within the same isolate. +const store = new Map(); + +export default class KVMock extends WorkerEntrypoint { + async fetch(request) { + const { pathname } = new URL(request.url); + const key = decodeURIComponent(pathname.slice(1)); // strip leading / + if (request.method === 'PUT') { + store.set(key, await request.arrayBuffer()); + return new Response(null, { status: 200 }); + } else if (request.method === 'GET') { + const data = store.get(key); + if (data === undefined) { + return new Response(null, { status: 404 }); + } + return new Response(data, { status: 200 }); + } + return new Response(null, { status: 405 }); + } +} + +// Sophie's example: put a resizable ArrayBuffer into KV, mutate it after put +// but before await, then verify what KV received. +export const kvPutResizableArrayBuffer = { + async test(ctrl, env, ctx) { + const body = new ArrayBuffer(7, { maxByteLength: 16 }); + new TextEncoder().encodeInto('initial', new Uint8Array(body)); + const promise = env.KV.put('blah', body); + new TextEncoder().encodeInto('changed', new Uint8Array(body)); + await promise; + + // Verify KV received "initial" (the data at put-time), not "changed". + const stored = await env.KV.get('blah'); + assert.strictEqual(stored, 'initial'); + }, +}; + +// Same test but resize the buffer to 0 after put — the original SIGSEGV vector. +export const kvPutResizableArrayBufferThenShrink = { + async test(ctrl, env, ctx) { + const body = new ArrayBuffer(64, { maxByteLength: 128 }); + const view = new Uint8Array(body); + for (let i = 0; i < 64; i++) view[i] = i; + + const promise = env.KV.put('shrink-test', body); + body.resize(0); // decommits pages — would SIGSEGV without deep copy + await promise; + + // Verify KV received all 64 bytes. + const stored = await env.KV.get('shrink-test', { type: 'arrayBuffer' }); + assert.strictEqual(stored.byteLength, 64); + const result = new Uint8Array(stored); + for (let i = 0; i < 64; i++) { + assert.strictEqual(result[i], i, `byte ${i}`); + } + }, +}; + +// Non-resizable buffer: does KV see the mutation that happens after put() but +// before await? This settles whether KJ's .then() on an immediately-ready +// promise fires synchronously or defers to the event loop. +export const kvPutNonResizableMutateAfterPut = { + async test(ctrl, env, ctx) { + const body = new ArrayBuffer(7); + new TextEncoder().encodeInto('initial', new Uint8Array(body)); + const promise = env.KV.put('non-rab-mutate', body); + new TextEncoder().encodeInto('changed', new Uint8Array(body)); + await promise; + + const stored = await env.KV.get('non-rab-mutate'); + // If KV sees 'changed', the .then() callback that writes the HTTP body + // ran AFTER our encodeInto — i.e. .then() deferred even on READY_NOW. + // If KV sees 'initial', .then() fired inline during put(). + if (stored === 'changed') { + console.log('KV.put .then() is DEFERRED: saw mutation after put()'); + } else if (stored === 'initial') { + console.log( + 'KV.put .then() is SYNCHRONOUS: did not see mutation after put()' + ); + } + // Either way, this test should not crash. Log the result so we can see + // which behaviour we get. Accept both for now. + assert.ok( + stored === 'initial' || stored === 'changed', + `expected 'initial' or 'changed', got '${stored}'` + ); + }, +}; diff --git a/src/workerd/api/tests/kv-resizable-arraybuffer-test.wd-test b/src/workerd/api/tests/kv-resizable-arraybuffer-test.wd-test new file mode 100644 index 00000000000..1d645627791 --- /dev/null +++ b/src/workerd/api/tests/kv-resizable-arraybuffer-test.wd-test @@ -0,0 +1,15 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "kv-resizable-arraybuffer-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "kv-resizable-arraybuffer-test.js") + ], + bindings = [ ( name = "KV", kvNamespace = "kv-resizable-arraybuffer-test" ), ], + compatibilityFlags = ["nodejs_compat", "service_binding_extra_handlers"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/pipe-streams-test.js b/src/workerd/api/tests/pipe-streams-test.js index 28a60d586ec..ee5cf4cfdc3 100644 --- a/src/workerd/api/tests/pipe-streams-test.js +++ b/src/workerd/api/tests/pipe-streams-test.js @@ -10,7 +10,7 @@ export const pipeThroughJsToInternal = { async test() { const enc = new TextEncoder(); const dec = new TextDecoder(); - const chunks = [enc.encode('hello'), enc.encode('there'), 'hello']; + const chunks = [enc.encode('hello'), enc.encode('there'), 'hello', 123]; const rs = new ReadableStream({ pull(c) { c.enqueue(chunks.shift()); @@ -31,7 +31,7 @@ export const pipeThroughJsToInternal = { message: 'This WritableStream only supports writing byte types.', }); - deepStrictEqual(output, ['hello', 'there']); + deepStrictEqual(output, ['hello', 'there', 'hello']); }, }; diff --git a/src/workerd/api/tests/queue-do-uaf-test.js b/src/workerd/api/tests/queue-do-uaf-test.js new file mode 100644 index 00000000000..79489522d65 --- /dev/null +++ b/src/workerd/api/tests/queue-do-uaf-test.js @@ -0,0 +1,89 @@ +// Copyright (c) 2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-300: +// Heap use-after-free in QueueEvent/QueueMessage via dangling +// IoPtr on a Durable Object's persistent +// IoContext. + +import assert from 'node:assert'; + +export class TestDO { + constructor(state) { + this.stashedBatch = null; + } + + async queue(batch) { + // Stash the QueueController so it survives past the queue + // dispatch. Pre-fix, this retains a dangling + // IoPtr after QueueCustomEvent is freed. + this.stashedBatch = batch; + } + + async fetch(req) { + const { pathname } = new URL(req.url); + + if (pathname === '/warmup') { + // This request triggers the previous request's + // drainFulfiller to be fulfilled, which (pre-fix) would + // cause QueueCustomEvent to be freed. + return new Response('ok'); + } + + if (pathname === '/trigger') { + // Pre-fix: dereferences a dangling IoPtr, causing a heap + // use-after-free (ASAN: READ of size 1 in retryAll). + // Post-fix: QueueEventResult is kept alive by IoOwn. + assert.notStrictEqual( + this.stashedBatch, + null, + 'batch should have been stashed' + ); + + // Exercise retryAll — the primary UAF sink. + this.stashedBatch.retryAll({ delaySeconds: 5 }); + + // Exercise per-message retry — the stronger UAF primitive + // (kj::HashMap::upsert on freed memory). + assert( + this.stashedBatch.messages.length > 0, + 'should have at least one message' + ); + this.stashedBatch.messages[0].retry({ delaySeconds: 10 }); + + return new Response('ok'); + } + + return new Response('not found', { status: 404 }); + } +} + +export default { + async test(ctrl, env) { + const stub = env.ns.get(env.ns.idFromName('uaf-regression')); + + // 1. Dispatch a queue event to the DO. The DO stashes the + // QueueController. + const _queueResult = await stub.queue('test-queue', [ + { + id: 'msg-1', + timestamp: new Date(), + body: 'hello', + attempts: 1, + }, + ]); + + // 2. Send a follow-up fetch to the same DO. This triggers + // the previous request's drain, which (pre-fix) would + // free QueueCustomEvent. + const warmupResp = await stub.fetch('http://x/warmup'); + assert.strictEqual(await warmupResp.text(), 'ok'); + + // 3. Now trigger the stashed batch operations. Pre-fix, + // this would be a UAF. Post-fix, QueueEventResult is + // still alive via IoOwn in QueueEvent. + const triggerResp = await stub.fetch('http://x/trigger'); + assert.strictEqual(await triggerResp.text(), 'ok'); + }, +}; diff --git a/src/workerd/api/tests/queue-do-uaf-test.wd-test b/src/workerd/api/tests/queue-do-uaf-test.wd-test new file mode 100644 index 00000000000..7b334c888a3 --- /dev/null +++ b/src/workerd/api/tests/queue-do-uaf-test.wd-test @@ -0,0 +1,21 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "queue-do-uaf-test", + worker = ( + modules = [ + ( name = "worker", esModule = embed "queue-do-uaf-test.js" ) + ], + durableObjectNamespaces = [ + ( className = "TestDO", uniqueKey = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ), + ], + durableObjectStorage = (inMemory = void), + bindings = [ + ( name = "ns", durableObjectNamespace = "TestDO" ), + ], + compatibilityFlags = ["service_binding_extra_handlers", "queue_consumer_no_wait_for_wait_until", "nodejs_compat"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/queue-resizable-arraybuffer-test.js b/src/workerd/api/tests/queue-resizable-arraybuffer-test.js new file mode 100644 index 00000000000..1ccd0df40ed --- /dev/null +++ b/src/workerd/api/tests/queue-resizable-arraybuffer-test.js @@ -0,0 +1,76 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test: sendBatch with a resizable ArrayBuffer "bytes" message +// followed by a "json" message whose toJSON() resizes the buffer to 0. +// Before the fix, the shallow reference captured for the first message would +// read from decommitted pages when base64-encoding the batch body. + +import assert from 'node:assert'; +import { Buffer } from 'node:buffer'; + +export default { + async fetch(request) { + const { pathname } = new URL(request.url); + if (pathname === '/batch') { + const body = await request.json(); + assert(Array.isArray(body?.messages)); + assert.strictEqual(body.messages.length, 2); + + // The bytes message should contain the original data, not zeros. + assert.strictEqual(body.messages[0].contentType, 'bytes'); + const bytes = Buffer.from(body.messages[0].body, 'base64'); + assert.strictEqual(bytes.length, 64); + for (let i = 0; i < 64; i++) { + assert.strictEqual(bytes[i], 0xaa, `byte ${i} should be 0xAA`); + } + + // The json message should contain the hostile toJSON() result. + assert.strictEqual(body.messages[1].contentType, 'json'); + assert.deepStrictEqual( + JSON.parse(Buffer.from(body.messages[1].body, 'base64')), + { poisoned: true } + ); + } + return Response.json({ + metadata: { + metrics: { + backlogCount: 0, + backlogBytes: 0, + oldestMessageTimestamp: 0, + }, + }, + }); + }, + + async test(ctrl, env, ctx) { + // Create a resizable ArrayBuffer and fill with a known pattern. + const rab = new ArrayBuffer(64, { maxByteLength: 128 }); + new Uint8Array(rab).fill(0xaa); + const view = new Uint8Array(rab); + + // Craft a hostile object whose toJSON() shrinks the earlier message's buffer. + const hostile = { + toJSON() { + rab.resize(0); + return { poisoned: true }; + }, + }; + + // sendBatch: first message holds a shallow reference to the resizable buffer, + // second message's serialization runs toJSON() which resizes it to 0. + // Pre-fix: OOB read / SIGSEGV in kj::encodeBase64. + await env.QUEUE.sendBatch([ + { body: view, contentType: 'bytes' }, + { body: hostile, contentType: 'json' }, + ]); + + // sendBatch must not detach the buffer — users may reuse it across calls. + assert.strictEqual( + rab.detached, + false, + 'sendBatch should not detach the ArrayBuffer' + ); + }, +}; diff --git a/src/workerd/api/tests/queue-resizable-arraybuffer-test.wd-test b/src/workerd/api/tests/queue-resizable-arraybuffer-test.wd-test new file mode 100644 index 00000000000..5c2ad273ba7 --- /dev/null +++ b/src/workerd/api/tests/queue-resizable-arraybuffer-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "queue-resizable-arraybuffer-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "queue-resizable-arraybuffer-test.js") + ], + bindings = [ + ( name = "QUEUE", queue = "queue-resizable-arraybuffer-test" ), + ], + compatibilityFlags = ["nodejs_compat", "queues_json_messages"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/resizable-arraybuffer-aliasing-test.js b/src/workerd/api/tests/resizable-arraybuffer-aliasing-test.js new file mode 100644 index 00000000000..0633a895d44 --- /dev/null +++ b/src/workerd/api/tests/resizable-arraybuffer-aliasing-test.js @@ -0,0 +1,125 @@ +// Copyright (c) 2024 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Tests for snapshot semantics of resizable ArrayBuffers passed to APIs that +// go through jsg::asBytes(). +// +// For resizable buffers, asBytes() returns a deep copy to prevent SIGSEGV from +// page decommit after resize(0). This test verifies that: +// +// 1. Data is captured at call time (not affected by post-call mutations) +// 2. The behavior is the same for resizable and non-resizable buffers +// +// In practice, WebSocket.send() (and similar APIs) consume the data +// synchronously during the call -- the WebSocket pump copies data to the pipe +// before returning to JS. So mutations after send() are never visible, +// regardless of whether the buffer is resizable or not. + +import { strictEqual } from 'node:assert'; + +// Helper: send a buffer via WebSocket, mutate it, check what was received. +async function sendMutateReceive(buffer, initialText, mutatedText) { + const pair = new WebSocketPair(); + const [client, server] = pair; + server.accept(); + server.binaryType = 'arraybuffer'; + + const received = new Promise((resolve) => { + server.addEventListener('message', (e) => resolve(e.data)); + }); + + client.accept(); + + // Write initial data + const view = new Uint8Array(buffer); + new TextEncoder().encodeInto(initialText, view); + + // Send the buffer — this calls asBytes() internally + client.send(buffer); + + // Mutate AFTER send — this should NOT affect the sent data because the + // WebSocket pump copies data to the pipe synchronously during send(). + new TextEncoder().encodeInto(mutatedText, view); + + // Wait for the message to be delivered through the WebSocket pipe + const msg = await received; + + client.close(); + server.close(); + + return new TextDecoder().decode(msg); +} + +// Non-resizable buffer: data is captured at send() time. +// Even though asBytes() returns a live view into the BackingStore, the +// WebSocket pump copies data to the pipe synchronously, so post-send +// mutations are not visible. +export const nonResizableBufferSnapshot = { + async test() { + const ab = new ArrayBuffer(7); // non-resizable + const text = await sendMutateReceive(ab, 'initial', 'CHANGED'); + strictEqual( + text, + 'initial', + 'non-resizable: data should be captured at send() time' + ); + }, +}; + +// Resizable buffer: data is captured at send() time via deep copy. +// asBytes() copies the data defensively (to prevent SIGSEGV from resize(0) +// decommitting pages). The result is the same as non-resizable: the sent +// data reflects the buffer content at the time of the send() call. +export const resizableBufferSnapshot = { + async test() { + const ab = new ArrayBuffer(7, { maxByteLength: 16 }); // resizable + const text = await sendMutateReceive(ab, 'initial', 'CHANGED'); + strictEqual( + text, + 'initial', + 'resizable: data should be captured at send() time (deep copy)' + ); + }, +}; + +// Resizable buffer that was already resized down: asBytes() should handle +// the current (smaller) size correctly, not the max reservation size. +export const resizableBufferAfterShrink = { + async test() { + const ab = new ArrayBuffer(16, { maxByteLength: 32 }); + const view = new Uint8Array(ab); + new TextEncoder().encodeInto('hello world12345', view); + + // Shrink to 5 bytes + ab.resize(5); + + const pair = new WebSocketPair(); + const [client, server] = pair; + server.accept(); + server.binaryType = 'arraybuffer'; + + const received = new Promise((resolve) => { + server.addEventListener('message', (e) => resolve(e.data)); + }); + + client.accept(); + client.send(ab); + + const msg = await received; + const text = new TextDecoder().decode(msg); + strictEqual( + text, + 'hello', + 'resizable after shrink: should send only the current (5-byte) content' + ); + strictEqual( + msg.byteLength, + 5, + 'resizable after shrink: sent length should be current size, not max' + ); + + client.close(); + server.close(); + }, +}; diff --git a/src/workerd/api/tests/resizable-arraybuffer-aliasing-test.wd-test b/src/workerd/api/tests/resizable-arraybuffer-aliasing-test.wd-test new file mode 100644 index 00000000000..235d4fc0fc1 --- /dev/null +++ b/src/workerd/api/tests/resizable-arraybuffer-aliasing-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "resizable-arraybuffer-aliasing-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "resizable-arraybuffer-aliasing-test.js") + ], + compatibilityFlags = ["nodejs_compat", "websocket_standard_binary_type"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/resizable-arraybuffer-toctou-test.js b/src/workerd/api/tests/resizable-arraybuffer-toctou-test.js new file mode 100644 index 00000000000..49586fb8333 --- /dev/null +++ b/src/workerd/api/tests/resizable-arraybuffer-toctou-test.js @@ -0,0 +1,474 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +import { ok, rejects } from 'node:assert'; + +// Adversarial regression tests for AUTOVULN-CLOUDFLARE-WORKERD-289. +// +// Each test exercises a SubtleCrypto method with a re-entrant path: a property +// getter on a JSG_STRUCT algorithm parameter fires during argument unwrapping +// and either resizes or detaches an ArrayBuffer that has already been (or is +// about to be) captured by the runtime. Before the fix, this could leave a +// stale {pointer, length} pair pointing into decommitted pages → SIGSEGV. +// +// The tests verify: +// 1. The getter actually fired (re-entrancy occurred). +// 2. The call did not crash (reached the assertion after the call). +// 3. The call threw a clean JS error (not an internal error / segfault). + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeResizable(size) { + const buf = new ArrayBuffer(size, { maxByteLength: size }); + new Uint8Array(buf).fill(0xaa); + return buf; +} + +async function importAesGcmKey(...usages) { + return crypto.subtle.importKey( + 'raw', + new Uint8Array(16), + { name: 'AES-GCM' }, + false, + usages + ); +} + +async function importHmacKey() { + return crypto.subtle.importKey( + 'raw', + new Uint8Array(32), + { name: 'HMAC', hash: 'SHA-256' }, + false, + ['sign', 'verify'] + ); +} + +// --------------------------------------------------------------------------- +// encrypt — struct-before-buffer path +// --------------------------------------------------------------------------- + +export const encryptResize = { + async test() { + const key = await importAesGcmKey('encrypt'); + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + iv: new Uint8Array(12), + get tagLength() { + buf.resize(1); + getterFired = true; + return 128; + }, + }; + + await crypto.subtle.encrypt(alg, key, buf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +export const encryptResizeToZero = { + async test() { + const key = await importAesGcmKey('encrypt'); + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + iv: new Uint8Array(12), + get tagLength() { + buf.resize(0); + getterFired = true; + return 128; + }, + }; + + await crypto.subtle.encrypt(alg, key, buf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +export const encryptDetach = { + async test() { + const key = await importAesGcmKey('encrypt'); + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + iv: new Uint8Array(12), + get tagLength() { + // Transfer detaches the original buffer. + structuredClone(buf, { transfer: [buf] }); + getterFired = true; + return 128; + }, + }; + + await crypto.subtle.encrypt(alg, key, buf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// --------------------------------------------------------------------------- +// decrypt — struct-before-buffer path +// --------------------------------------------------------------------------- + +export const decryptResize = { + async test() { + const key = await importAesGcmKey('decrypt'); + // Ciphertext must be at least tagLength/8 = 16 bytes for AES-GCM. + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + iv: new Uint8Array(12), + get tagLength() { + buf.resize(1); + getterFired = true; + return 128; + }, + }; + + await rejects(crypto.subtle.decrypt(alg, key, buf), { + message: /Ciphertext length of 8 bits must be/, + }); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// --------------------------------------------------------------------------- +// sign — struct-before-buffer path +// --------------------------------------------------------------------------- + +export const signResize = { + async test() { + const key = await importHmacKey(); + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + name: 'HMAC', + get hash() { + buf.resize(1); + getterFired = true; + return undefined; // hash was set at import time + }, + }; + + await crypto.subtle.sign(alg, key, buf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// --------------------------------------------------------------------------- +// verify — struct-before-buffer path, two buffer args +// --------------------------------------------------------------------------- + +export const verifyResize = { + async test() { + const key = await importHmacKey(); + + // First, produce a valid signature so verify gets past initial checks. + const realData = new Uint8Array(32); + const sig = await crypto.subtle.sign('HMAC', key, realData); + + // Now replay with a resizable data buffer that gets shrunk. + const dataBuf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + name: 'HMAC', + get hash() { + dataBuf.resize(1); + getterFired = true; + return undefined; + }, + }; + + await crypto.subtle.verify(alg, key, sig, dataBuf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// --------------------------------------------------------------------------- +// digest — simplest path, fewest params +// --------------------------------------------------------------------------- + +export const digestResize = { + async test() { + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + get name() { + buf.resize(1); + getterFired = true; + return 'SHA-256'; + }, + }; + + await crypto.subtle.digest(alg, buf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +export const digestDetach = { + async test() { + const buf = makeResizable(256 * 1024); + + let getterFired = false; + const alg = { + get name() { + structuredClone(buf, { transfer: [buf] }); + getterFired = true; + return 'SHA-256'; + }, + }; + + await crypto.subtle.digest(alg, buf); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// --------------------------------------------------------------------------- +// importKey — TOCTOU: buffer-before-struct +// --------------------------------------------------------------------------- + +export const importKeyDetach = { + async test() { + const buf = makeResizable(16); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + get length() { + structuredClone(buf, { transfer: [buf] }); + getterFired = true; + return 128; + }, + }; + + await rejects( + crypto.subtle.importKey('raw', buf, alg, false, ['encrypt']), + { + message: /Imported AES key length must be/, + } + ); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// --------------------------------------------------------------------------- +// unwrapKey — original autovuln-289 path: buffer-before-struct +// --------------------------------------------------------------------------- + +export const unwrapKeyDetach = { + async test() { + const key = await importAesGcmKey('unwrapKey'); + const buf = makeResizable(256 * 1024); + const iv = new Uint8Array(12); + + let getterFired = false; + const unwrapAlg = { + name: 'AES-GCM', + iv, + get tagLength() { + structuredClone(buf, { transfer: [buf] }); + getterFired = true; + return 128; + }, + }; + + await rejects( + crypto.subtle.unwrapKey( + 'raw', + buf, + key, + unwrapAlg, + { name: 'AES-GCM' }, + false, + ['encrypt'] + ), + { + message: /Ciphertext length of 0 bits must be/, + } + ); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// =========================================================================== +// Original TOCTOU test cases (unwrapKey/importKey with resize) +// =========================================================================== + +export const unwrapKeyResizableBuffer = { + async test() { + const key = await crypto.subtle.importKey( + 'raw', + new Uint8Array(16), + { name: 'AES-GCM' }, + false, + ['unwrapKey'] + ); + + const buf = new ArrayBuffer(256 * 1024, { + maxByteLength: 256 * 1024, + }); + new Uint8Array(buf).fill(0xaa); + const iv = new Uint8Array(12); + + let getterFired = false; + const unwrapAlg = { + name: 'AES-GCM', + iv, + get tagLength() { + buf.resize(1); + getterFired = true; + return 128; + }, + }; + + await rejects( + crypto.subtle.unwrapKey( + 'raw', + buf, + key, + unwrapAlg, + { name: 'AES-GCM' }, + false, + ['encrypt'] + ), + { + message: /Ciphertext length of 8 bits must be/, + } + ); + + ok(getterFired, 'getter did not fire'); + }, +}; + +export const importKeyResizableBuffer = { + async test() { + const buf = new ArrayBuffer(256 * 1024, { + maxByteLength: 256 * 1024, + }); + new Uint8Array(buf).fill(0xbb); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + get length() { + buf.resize(1); + getterFired = true; + return 128; + }, + }; + + await rejects( + crypto.subtle.importKey('raw', buf, alg, false, ['encrypt']), + { + message: /Imported AES key length must be/, + } + ); + + ok(getterFired, 'getter did not fire'); + }, +}; + +// ArrayBufferView variants: exercises the asBytes(v8::ArrayBufferView) overload. +// The view is a Uint8Array over a resizable ArrayBuffer; the getter shrinks the +// underlying buffer while the view's {byteOffset, byteLength} still refer to +// the original extent. + +export const unwrapKeyResizableBufferView = { + async test() { + const key = await crypto.subtle.importKey( + 'raw', + new Uint8Array(16), + { name: 'AES-GCM' }, + false, + ['unwrapKey'] + ); + + const buf = new ArrayBuffer(256 * 1024, { + maxByteLength: 256 * 1024, + }); + const view = new Uint8Array(buf); + view.fill(0xcc); + const iv = new Uint8Array(12); + + let getterFired = false; + const unwrapAlg = { + name: 'AES-GCM', + iv, + get tagLength() { + buf.resize(1); + getterFired = true; + return 128; + }, + }; + + await rejects( + crypto.subtle.unwrapKey( + 'raw', + view, + key, + unwrapAlg, + { name: 'AES-GCM' }, + false, + ['encrypt'] + ), + { + message: /Ciphertext length of 8 bits must be/, + } + ); + + ok(getterFired, 'getter did not fire'); + }, +}; + +export const importKeyResizableBufferView = { + async test() { + const buf = new ArrayBuffer(256 * 1024, { + maxByteLength: 256 * 1024, + }); + const view = new Uint8Array(buf); + view.fill(0xdd); + + let getterFired = false; + const alg = { + name: 'AES-GCM', + get length() { + buf.resize(1); + getterFired = true; + return 128; + }, + }; + + await rejects( + crypto.subtle.importKey('raw', view, alg, false, ['encrypt']), + { + message: /Imported AES key length must be/, + } + ); + + ok(getterFired, 'getter did not fire'); + }, +}; diff --git a/src/workerd/api/tests/resizable-arraybuffer-toctou-test.wd-test b/src/workerd/api/tests/resizable-arraybuffer-toctou-test.wd-test new file mode 100644 index 00000000000..dada420522f --- /dev/null +++ b/src/workerd/api/tests/resizable-arraybuffer-toctou-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "resizable-arraybuffer-toctou-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "resizable-arraybuffer-toctou-test.js") + ], + compatibilityFlags = ["nodejs_compat"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/socket-close-gc-test.js b/src/workerd/api/tests/socket-close-gc-test.js new file mode 100644 index 00000000000..01e043b055e --- /dev/null +++ b/src/workerd/api/tests/socket-close-gc-test.js @@ -0,0 +1,54 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// Regression test for AUTOVULN-EW-EDGEWORKER-17: +// Socket::close() bare-this UAF in jsg::Promise .then()/.catch_() continuations. +// +// Strategy: write a large buffer to the socket WITHOUT awaiting, then immediately +// call close(). The WritableStream's internal queue still holds the data, so the +// flush() inside close() must wait for it to drain to the underlying stream. While +// flush pends across event loop turns, we drop the Socket reference and force GC. +// When flush eventually resolves, the .then() continuations run — pre-fix, they +// dereference freed memory. +// Note that this test is not deterministic and may pass even without the fix. + +import { ok } from 'assert'; + +let connectHandlerCalled = false; + +export default { + async connect(socket) { + connectHandlerCalled = true; + }, +}; + +export const socketCloseGcRegression = { + async test(ctrl, env) { + let socket = env.SELF.connect('localhost:1'); + await socket.opened; + ok(connectHandlerCalled, 'connect handler must have been called'); + + // Queue a large write WITHOUT awaiting — data sits in the WritableStream's + // JS-side queue. This ensures close()'s internal flush() actually pends. + const writer = socket.writable.getWriter(); + writer.write(new Uint8Array(1 << 20)); // 1 MiB, fire-and-forget + writer.releaseLock(); + + // close() starts the four-continuation .then() chain. flush() cannot resolve + // instantly because the write queue is still draining. + const closePromise = socket.close(); + socket = null; + + // Force GC while flush is pending. Without JSG_THIS in the lambda captures, + // the Socket wrapper is invisible to V8's GC tracer and gets collected. + gc(); + await scheduler.wait(1); + gc(); + await scheduler.wait(1); + gc(); + + // When flush completes, the .then() continuations fire. Pre-fix: UAF. + await closePromise; + }, +}; diff --git a/src/workerd/api/tests/socket-close-gc-test.wd-test b/src/workerd/api/tests/socket-close-gc-test.wd-test new file mode 100644 index 00000000000..8914d5c08d0 --- /dev/null +++ b/src/workerd/api/tests/socket-close-gc-test.wd-test @@ -0,0 +1,18 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [ + ( name = "socket-close-gc-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "socket-close-gc-test.js"), + ], + compatibilityFlags = ["nodejs_compat_v2", "experimental"], + bindings = [ + (name = "SELF", service = "socket-close-gc-test"), + ], + ) + ), + ], +); diff --git a/src/workerd/api/tests/sql-resizable-arraybuffer-test.js b/src/workerd/api/tests/sql-resizable-arraybuffer-test.js new file mode 100644 index 00000000000..c88bda21e34 --- /dev/null +++ b/src/workerd/api/tests/sql-resizable-arraybuffer-test.js @@ -0,0 +1,129 @@ +// Copyright (c) 2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-330: +// SIGSEGV in SqlStorage::Cursor when a resizable ArrayBuffer bound as a +// SQLITE_STATIC blob is shrunk before the cursor is read. + +import * as assert from 'node:assert'; +import { DurableObject } from 'cloudflare:workers'; + +export class DurableObjectExample extends DurableObject { + constructor(state, env) { + super(state, env); + this.state = state; + } + + async testResizableArrayBufferBlobBinding() { + const sql = this.state.storage.sql; + + // Allocate a resizable ArrayBuffer and fill it with a known pattern. + const rab = new ArrayBuffer(64, { maxByteLength: 256 }); + const view = new Uint8Array(rab); + for (let i = 0; i < view.length; i++) { + view[i] = i & 0xff; + } + + // Bind the resizable ArrayBuffer as a blob parameter. + const cursor = sql.exec('SELECT ? AS x', rab); + + // Shrink the buffer to zero -- V8 decommits the previously committed pages. + rab.resize(0); + + // Reading the cursor must not crash (SIGSEGV). After the fix, asBytes() + // copies the data eagerly when the source ArrayBuffer is resizable, so the + // kj::Array stored in Cursor::State::bindings owns stable heap + // memory regardless of later resize() calls. + const rows = cursor.toArray(); + assert.strictEqual(rows.length, 1); + + // The blob should contain the original 64 bytes (copied before resize). + const blob = new Uint8Array(rows[0].x); + assert.strictEqual(blob.length, 64); + for (let i = 0; i < blob.length; i++) { + assert.strictEqual( + blob[i], + i & 0xff, + `byte at index ${i} should be ${i & 0xff} but got ${blob[i]}` + ); + } + } + + async testResizableArrayBufferViewBlobBinding() { + const sql = this.state.storage.sql; + + // Use a Uint8Array view over a resizable ArrayBuffer. + const rab = new ArrayBuffer(128, { maxByteLength: 512 }); + const fullView = new Uint8Array(rab); + for (let i = 0; i < fullView.length; i++) { + fullView[i] = (i * 3) & 0xff; + } + + // Create a view over a sub-range. + const subView = new Uint8Array(rab, 16, 32); + + const cursor = sql.exec('SELECT ? AS x', subView); + + // Shrink the underlying buffer. + rab.resize(0); + + // Must not crash. + const rows = cursor.toArray(); + assert.strictEqual(rows.length, 1); + + const blob = new Uint8Array(rows[0].x); + assert.strictEqual(blob.length, 32); + for (let i = 0; i < blob.length; i++) { + const expected = ((i + 16) * 3) & 0xff; + assert.strictEqual( + blob[i], + expected, + `byte at index ${i} should be ${expected} but got ${blob[i]}` + ); + } + } + + async testNonResizableArrayBufferStillWorks() { + const sql = this.state.storage.sql; + + // Sanity check: non-resizable ArrayBuffer blob binding still works. + const ab = new ArrayBuffer(8); + const view = new Uint8Array(ab); + for (let i = 0; i < view.length; i++) { + view[i] = 0xaa; + } + + const rows = [...sql.exec('SELECT ? AS x', ab)]; + assert.strictEqual(rows.length, 1); + const blob = new Uint8Array(rows[0].x); + assert.strictEqual(blob.length, 8); + for (let i = 0; i < blob.length; i++) { + assert.strictEqual(blob[i], 0xaa); + } + } +} + +export let testResizableArrayBufferBlobBinding = { + async test(ctrl, env, ctx) { + let id = env.ns.idFromName('rab-blob-test'); + let stub = env.ns.get(id); + await stub.testResizableArrayBufferBlobBinding(); + }, +}; + +export let testResizableArrayBufferViewBlobBinding = { + async test(ctrl, env, ctx) { + let id = env.ns.idFromName('rab-view-blob-test'); + let stub = env.ns.get(id); + await stub.testResizableArrayBufferViewBlobBinding(); + }, +}; + +export let testNonResizableArrayBufferStillWorks = { + async test(ctrl, env, ctx) { + let id = env.ns.idFromName('non-rab-blob-test'); + let stub = env.ns.get(id); + await stub.testNonResizableArrayBufferStillWorks(); + }, +}; diff --git a/src/workerd/api/tests/sql-resizable-arraybuffer-test.wd-test b/src/workerd/api/tests/sql-resizable-arraybuffer-test.wd-test new file mode 100644 index 00000000000..ab4bf94004e --- /dev/null +++ b/src/workerd/api/tests/sql-resizable-arraybuffer-test.wd-test @@ -0,0 +1,28 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const config :Workerd.Config = ( + services = [ + (name = "main", worker = .mainWorker), + (name = "TEST_TMPDIR", disk = (writable = true)), + ], +); + +const mainWorker :Workerd.Worker = ( + compatibilityFlags = ["nodejs_compat", "experimental"], + + modules = [ + (name = "worker", esModule = embed "sql-resizable-arraybuffer-test.js"), + ], + + durableObjectNamespaces = [ + ( className = "DurableObjectExample", + uniqueKey = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4", + enableSql = true ), + ], + + durableObjectStorage = (localDisk = "TEST_TMPDIR"), + + bindings = [ + (name = "ns", durableObjectNamespace = "DurableObjectExample"), + ], +); diff --git a/src/workerd/api/tests/sql-restrict-names-test.js b/src/workerd/api/tests/sql-restrict-names-test.js index 667775a011b..e69da586065 100644 --- a/src/workerd/api/tests/sql-restrict-names-test.js +++ b/src/workerd/api/tests/sql-restrict-names-test.js @@ -2,8 +2,7 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -// Tests for the SQL_RESTRICT_RESERVED_NAMES autogate. -// This test only runs in the @all-autogates variant so it can assert the gated behavior. +// Tests for SQL reserved name restrictions. import * as assert from 'node:assert'; import { DurableObject } from 'cloudflare:workers'; diff --git a/src/workerd/api/tests/starttls-nodejs-test.js b/src/workerd/api/tests/starttls-nodejs-test.js index 0597cb4a8e3..a39bfc3049b 100644 --- a/src/workerd/api/tests/starttls-nodejs-test.js +++ b/src/workerd/api/tests/starttls-nodejs-test.js @@ -4,8 +4,9 @@ import { connect } from 'cloudflare:sockets'; import { ok, strict as assert } from 'node:assert'; -import { connect as tlsConnect } from 'node:tls'; +import { connect as tlsConnect, TLSSocket } from 'node:tls'; import { connect as netConnect } from 'node:net'; +import unsafe from 'workerd:unsafe'; export const checkPortsSetCorrectly = { test(ctrl, env, ctx) { @@ -17,6 +18,172 @@ export const checkPortsSetCorrectly = { }, }; +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-35: tls.connect() must propagate +// options.servername to TLSSocket.servername and pass it as expectedServerHostname +// to the native Socket.startTls() call. Before the fix, servername was silently +// dropped, causing TLS certificate identity checks to use the transport host +// instead of the caller-specified server identity. +export const regressionServernamePassthrough = { + async test(ctrl, env, ctx) { + const opts = { + servername: 'localhost', + port: env.STARTTLS_CA_PORT, + rejectUnauthorized: true, + }; + + const socket = netConnect(opts.port); + + await new Promise((resolve, reject) => { + socket.once('data', (data) => { + const greeting = data.toString().trim(); + if (greeting !== 'HELLO') { + reject(new Error('Expected HELLO greeting')); + return; + } + + socket.write('HELLO_BACK\n'); + + socket.once('data', (data) => { + const signal = data.toString().trim(); + if (signal !== 'START_TLS') { + reject(new Error('Expected START_TLS signal')); + return; + } + + // Upgrade to TLS with explicit servername + const tlsSocket = tlsConnect( + { + ...opts, + socket: socket, + }, + function () { + // After the fix, TLSSocket.servername must reflect the + // caller-supplied value. Before the fix it was always null. + assert.strictEqual( + this.servername, + 'localhost', + 'TLSSocket.servername must be set to the caller-supplied servername' + ); + + this.write('ping\n'); + this.once('data', (data) => { + assert.strictEqual(data.toString().trim(), 'pong'); + this.end(); + resolve(); + }); + } + ); + + // We use isTestAutogateEnabled() (which checks TEST_WORKERD) as a proxy + // for whether STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME is enabled, + // because the @all-autogates test variant enables every gate at once. + if (unsafe.isTestAutogateEnabled()) { + tlsSocket.on('error', (err) => { + try { + assert.match(err.message, /expectedServerHostname/); + resolve(); + } catch (e) { + reject(e); + } + }); + } else { + tlsSocket.on('error', reject); + } + }); + }); + + socket.on('error', reject); + }); + }, +}; + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-35: setServername() must +// actually store the value so it is used during the TLS upgrade. We construct +// a TLSSocket *without* a servername, call setServername('localhost') before +// the handshake, then trigger _start(). The server cert is issued for +// "localhost", so the handshake succeeds only if setServername() actually +// propagated the value to startTls({ expectedServerHostname }). +export const regressionSetServernameStoresValue = { + async test(ctrl, env, ctx) { + const socket = netConnect(env.STARTTLS_CA_PORT); + + await new Promise((resolve, reject) => { + socket.once('data', (data) => { + const greeting = data.toString().trim(); + if (greeting !== 'HELLO') { + reject(new Error('Expected HELLO greeting')); + return; + } + + socket.write('HELLO_BACK\n'); + + socket.once('data', (data) => { + const signal = data.toString().trim(); + if (signal !== 'START_TLS') { + reject(new Error('Expected START_TLS signal')); + return; + } + + // Create a TLSSocket with no servername — deliberately omitted so + // that the only way the correct SNI reaches startTls() is through + // our setServername() call below. + const tlsSocket = new TLSSocket(socket, { + rejectUnauthorized: true, + }); + + // This is the line under test: if setServername were a no-op the + // handshake would either send no SNI or the wrong one, and the + // server's certificate check for "localhost" would fail. + tlsSocket.setServername('localhost'); + + tlsSocket.on('secure', function () { + try { + assert.strictEqual( + tlsSocket.servername, + 'localhost', + 'servername must reflect the value set via setServername()' + ); + + tlsSocket.write('ping\n'); + tlsSocket.once('data', (data) => { + try { + assert.strictEqual(data.toString().trim(), 'pong'); + tlsSocket.end(); + resolve(); + } catch (e) { + reject(e); + } + }); + } catch (e) { + reject(e); + } + }); + + // We use isTestAutogateEnabled() (which checks TEST_WORKERD) as a proxy + // for whether STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME is enabled, + // because the @all-autogates test variant enables every gate at once. + if (unsafe.isTestAutogateEnabled()) { + tlsSocket.on('error', (err) => { + try { + assert.match(err.message, /expectedServerHostname/); + resolve(); + } catch (e) { + reject(e); + } + }); + } else { + tlsSocket.on('error', reject); + } + + tlsSocket._start(); + }); + }); + + socket.on('error', reject); + }); + }, +}; + export const startTlsCATest = { async test(ctrl, env, ctx) { const opts = { diff --git a/src/workerd/api/tests/starttls-nodejs-test.wd-test b/src/workerd/api/tests/starttls-nodejs-test.wd-test index e742fe658d6..f22a46aa3c0 100644 --- a/src/workerd/api/tests/starttls-nodejs-test.wd-test +++ b/src/workerd/api/tests/starttls-nodejs-test.wd-test @@ -12,21 +12,21 @@ const config :Workerd.Config = ( modules = [ (name = "worker.js", esModule = embed "starttls-nodejs-test.js") ], - compatibilityFlags = ["nodejs_compat", "experimental"], + compatibilityFlags = ["nodejs_compat", "experimental", "unsafe_module"], bindings = [ (name = "STARTTLS_CA_PORT", fromEnvironment = "STARTTLS_CA_PORT"), ], ) ), - ( name = "internet", - network = ( + ( name = "internet", + network = ( allow = ["private"], tlsOptions = ( trustedCertificates = [ embed "starttls-server.pem", ], ), - ) + ) ), ], ); diff --git a/src/workerd/api/tests/stdio-writesync-reentry-uaf-test.js b/src/workerd/api/tests/stdio-writesync-reentry-uaf-test.js new file mode 100644 index 00000000000..f50690b8593 --- /dev/null +++ b/src/workerd/api/tests/stdio-writesync-reentry-uaf-test.js @@ -0,0 +1,74 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-395: heap use-after-free in +// writeStdio() via re-entrant console.log getter. +// +// writeStdio() used to capture a kj::ArrayPtr aliasing StdioFile::lineBuffer's +// heap backing store, then perform JS property lookups (console / console.log) +// that can run user-defined getters. A getter that calls fs.writeSync(1, ...) +// re-enters StdioFile::write, which can grow lineBuffer and free the old +// backing store, leaving writeStdio reading freed memory. +// +// Without the fix, this crashes under ASAN with: +// heap-use-after-free READ at src/workerd/io/worker-fs.c++ +// +// With the fix, writeStdio copies the bytes into an owned kj::String before +// any JS property access, and StdioFile::write moves lineBuffer into a local +// before calling writeStdio, so re-entrant writes cannot invalidate the +// buffer. + +import * as fs from 'node:fs'; +import assert from 'node:assert'; + +export const stdioWriteSyncReentryUafTest = { + test() { + const origLog = console.log; + let armed = false; + let getterFired = false; + + // Install a getter on console.log that re-enters StdioFile::write + // when armed. This forces lineBuffer to grow (and potentially + // reallocate), which would free the buffer that writeStdio() captured. + Object.defineProperty(console, 'log', { + configurable: true, + get() { + if (armed) { + armed = false; + getterFired = true; + // Write a large buffer with no newline to force + // lineBuffer.addAll() to grow, freeing the old backing store. + const big = Buffer.alloc(4000, 0x42); + fs.writeSync(1, big); + } + return origLog; + }, + }); + + try { + // Step 1: Prime lineBuffer with non-newline data (buffered, no + // flush). + fs.writeSync(1, Buffer.from('AAAAA')); + + // Step 2: Arm the getter and trigger the newline path. + // writeStdio(lineBuffer.asPtr()) -> console.get("log") runs getter + // -> re-entrant write grows lineBuffer -> old backing store freed + // -> writeStdio reads from owned copy (safe). + armed = true; + fs.writeSync(1, Buffer.from('X\n')); + + // If we get here without crashing, the fix is working. + // Verify the getter actually fired (otherwise the test is not + // exercising the vulnerable path). + assert.ok( + getterFired, + 'console.log getter should have fired during writeStdio' + ); + } finally { + // Restore console.log + delete console.log; + console.log = origLog; + } + }, +}; diff --git a/src/workerd/api/tests/stdio-writesync-reentry-uaf-test.wd-test b/src/workerd/api/tests/stdio-writesync-reentry-uaf-test.wd-test new file mode 100644 index 00000000000..2a27a2b9603 --- /dev/null +++ b/src/workerd/api/tests/stdio-writesync-reentry-uaf-test.wd-test @@ -0,0 +1,13 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [( + name = "stdio-writesync-reentry-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "stdio-writesync-reentry-uaf-test.js"), + ], + compatibilityFlags = ["nodejs_compat_v2", "enable_nodejs_fs_module"], + ), + )], +); diff --git a/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js b/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js index 879a4970da8..1d3bcef7859 100644 --- a/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js +++ b/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js @@ -72,7 +72,9 @@ export const handlePushReentrantError = { strictEqual(result.done, false); strictEqual(result.value.byteLength, 4); strictEqual(result.value[0], 1); - strictEqual(thenCalled, true); + + // The offending Object.prototype.then should not have been called. + strictEqual(thenCalled, false); // Allocate objects to pressure the allocator into reclaiming freed memory, // making the UAF more likely to manifest under ASAN. diff --git a/src/workerd/api/tests/struct-prototype-pollution-test.js b/src/workerd/api/tests/struct-prototype-pollution-test.js new file mode 100644 index 00000000000..2f872d64de1 --- /dev/null +++ b/src/workerd/api/tests/struct-prototype-pollution-test.js @@ -0,0 +1,58 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-369: process abort via +// prototype-polluted getter during Request host-object deserialization. +// Pre-fix: V8_Fatal("Invoke in DisallowJavascriptExecutionScope") -> abort. +// Post-fix: structuredClone succeeds normally. + +import assert from 'node:assert'; + +function withProtoGetter(prop, fn) { + const saved = Object.getOwnPropertyDescriptor(Object.prototype, prop); + Object.defineProperty(Object.prototype, prop, { + configurable: true, + get: () => undefined, + }); + try { + fn(); + } finally { + if (saved) { + Object.defineProperty(Object.prototype, prop, saved); + } else { + // eslint-disable-next-line + delete Object.prototype[prop]; + } + } +} + +export const requestPrototypePollutionRedirect = { + test() { + withProtoGetter('redirect', () => { + const cloned = structuredClone(new Request('https://example.com/')); + assert.strictEqual(cloned.url, 'https://example.com/'); + assert.ok(cloned instanceof Request); + }); + }, +}; + +export const requestPrototypePollutionMethod = { + test() { + withProtoGetter('method', () => { + const cloned = structuredClone(new Request('https://example.com/')); + assert.strictEqual(cloned.url, 'https://example.com/'); + assert.strictEqual(cloned.method, 'GET'); + }); + }, +}; + +export const requestPrototypePollutionSignal = { + test() { + withProtoGetter('signal', () => { + const cloned = structuredClone(new Request('https://example.com/')); + assert.strictEqual(cloned.url, 'https://example.com/'); + assert.ok(cloned instanceof Request); + }); + }, +}; diff --git a/src/workerd/api/tests/struct-prototype-pollution-test.wd-test b/src/workerd/api/tests/struct-prototype-pollution-test.wd-test new file mode 100644 index 00000000000..474d06df302 --- /dev/null +++ b/src/workerd/api/tests/struct-prototype-pollution-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "struct-prototype-pollution-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "struct-prototype-pollution-test.js") + ], + compatibilityFlags = ["nodejs_compat"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/tail-worker-test.js b/src/workerd/api/tests/tail-worker-test.js index 7a380cf5357..59222623ffd 100644 --- a/src/workerd/api/tests/tail-worker-test.js +++ b/src/workerd/api/tests/tail-worker-test.js @@ -2,7 +2,6 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 import * as assert from 'node:assert'; -import unsafe from 'workerd:unsafe'; // Flat array of all invocations observed by the tail handler. // Each entry captures trace metadata and concatenated event JSON. @@ -239,48 +238,10 @@ const E = { '{"type":"onset","executionModel":"stateless","spanId":"0000000000000000","scriptTags":[],"info":{"type":"trace","traces":[""]}}{"type":"outcome","outcome":"ok","cpuTime":0,"wallTime":0}', }; -// Expected tree without propagation — every invocation is a root with no children. -// This is the same set of events as the old flat expected array, just wrapped in tree nodes. -const expectedFlat = [ - n(E.alarm), - n(E.wsUpgrade), - n(E.wsHibernation), - n(E.doFetch), - n(E.wsClose), - n(E.wsMessage), - n(E.myActorJsrpc), - n(E.cacheMode), - n(E.connectHandler), - n(E.connectHandlerProxy), - n(E.localAddressViaServiceBinding), - n(E.jsrpcGetCounter), - n(E.jsrpcNonFunction), - n(E.connectTarget), - n(E.jsrpcDoSubrequest), - n(E.httpTest), - n(E.queueTest), - n(E.fetchEmptyUrl), - n(E.fetchNotFound), - n(E.fetchRayId), - n(E.fetchWebSocket), - n(E.fetchBodyLength), - n(E.fetchBodyLength), - n(E.fetchBatch), - n(E.fetchMsgText), - n(E.fetchMsgBytes), - n(E.fetchMsgJson), - n(E.fetchMsgV8), - n(E.queueConsumer), - n(E.scheduledEmpty), - n(E.scheduledCron), - n(E.trace), - n(E.trace), -].sort((a, b) => a.events.localeCompare(b.events)); - -// Expected tree with propagation — subrequest callees are children of their callers. +// Subrequest callees are children of their callers. // DOs that are called via subrequests inherit the caller's traceId and become children. // DOs triggered by system events (alarms, hibernation wakeups) remain standalone roots. -const expectedWithPropagation = [ +const expected = [ // actor-alarms-test: DO fetch and alarm are independent roots (own traceId) n(E.alarm), n(E.doFetch), @@ -345,8 +306,7 @@ function sortTreeChildren(nodes) { sortTreeChildren(node.children); } } -sortTreeChildren(expectedFlat); -sortTreeChildren(expectedWithPropagation); +sortTreeChildren(expected); export const test = { async test() { @@ -354,11 +314,6 @@ export const test = { // propagating the outcome of the invocation may take longer. Wait briefly so this can go ahead. await scheduler.wait(50); - // @all-autogates enables USER_SPAN_CONTEXT_PROPAGATION. - const expected = unsafe.isTestAutogateEnabled() - ? expectedWithPropagation - : expectedFlat; - verifyTraceIds(allInvocations); assert.deepStrictEqual(buildTree(allInvocations), expected); }, diff --git a/src/workerd/api/tests/tail-worker-test.wd-test b/src/workerd/api/tests/tail-worker-test.wd-test index 3cdd7880dd1..f1ed026f9d6 100644 --- a/src/workerd/api/tests/tail-worker-test.wd-test +++ b/src/workerd/api/tests/tail-worker-test.wd-test @@ -167,7 +167,7 @@ const logWorker :Workerd.Worker = ( modules = [ (name = "worker", esModule = embed "tail-worker-test.js") ], - compatibilityFlags = ["experimental", "nodejs_compat", "unsafe_module"], + compatibilityFlags = ["experimental", "nodejs_compat"], streamingTails = ["receiver"], ); diff --git a/src/workerd/api/tests/url-searchparams-iterator-uaf-test.js b/src/workerd/api/tests/url-searchparams-iterator-uaf-test.js new file mode 100644 index 00000000000..eba20e49118 --- /dev/null +++ b/src/workerd/api/tests/url-searchparams-iterator-uaf-test.js @@ -0,0 +1,140 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-386: +// URLSearchParams key/value iterators must return owning copies of strings, +// not borrowed pointers into the query vector. A re-entrant mutation via +// Object.prototype setter during JSG_STRUCT slow-path wrapping must not +// cause a use-after-free. + +import { strictEqual } from 'node:assert'; + +// Test that a key iterator returns the correct value even when an +// Object.prototype.done setter mutates the URLSearchParams mid-iteration. +// Pre-patch, this would read freed memory (UAF). +export const keyIteratorReentrantDelete = { + test() { + const key0 = 'A'.repeat(64); + const key1 = 'B'.repeat(64); + const usp = new URLSearchParams(); + usp.append(key0, 'v0'); + usp.append(key1, 'v1'); + + const it = usp.keys(); + + let armed = true; + Object.defineProperty(Object.prototype, 'done', { + configurable: true, + set(v) { + if (armed) { + armed = false; + // This delete frees the kj::String buffer backing key0. + // Pre-patch, the pending kj::StringPtr in the Next struct + // would become dangling. + usp.delete(key0); + } + }, + get() { + return undefined; + }, + }); + + try { + const r = it.next(); + // The value must be the original key0, not garbage from freed memory. + strictEqual( + r.value, + key0, + 'key iterator must return an owning copy of the key, ' + + 'not a dangling pointer' + ); + } finally { + delete Object.prototype.done; + } + }, +}; + +// Same test for the value iterator. +export const valueIteratorReentrantDelete = { + test() { + const key0 = 'X'.repeat(64); + const val0 = 'Y'.repeat(64); + const usp = new URLSearchParams(); + usp.append(key0, val0); + usp.append('Z'.repeat(64), 'w1'); + + const it = usp.values(); + + let armed = true; + Object.defineProperty(Object.prototype, 'done', { + configurable: true, + set(v) { + if (armed) { + armed = false; + usp.delete(key0); + } + }, + get() { + return undefined; + }, + }); + + try { + const r = it.next(); + strictEqual( + r.value, + val0, + 'value iterator must return an owning copy of the value, ' + + 'not a dangling pointer' + ); + } finally { + delete Object.prototype.done; + } + }, +}; + +// Same test for the entry iterator. +export const entryIteratorReentrantDelete = { + test() { + const key0 = 'X'.repeat(64); + const val0 = 'Y'.repeat(64); + const usp = new URLSearchParams(); + usp.append(key0, val0); + usp.append('Z'.repeat(64), 'w1'); + + const it = usp.entries(); + + let armed = true; + Object.defineProperty(Object.prototype, 'done', { + configurable: true, + set(v) { + if (armed) { + armed = false; + usp.delete(key0); + } + }, + get() { + return undefined; + }, + }); + + try { + const r = it.next(); + strictEqual( + r.value[0], + key0, + 'entry iterator must return an owning copy of the key, ' + + 'not a dangling pointer' + ); + strictEqual( + r.value[1], + val0, + 'entry iterator must return an owning copy of the value, ' + + 'not a dangling pointer' + ); + } finally { + delete Object.prototype.done; + } + }, +}; diff --git a/src/workerd/api/tests/url-searchparams-iterator-uaf-test.wd-test b/src/workerd/api/tests/url-searchparams-iterator-uaf-test.wd-test new file mode 100644 index 00000000000..556a5fc608d --- /dev/null +++ b/src/workerd/api/tests/url-searchparams-iterator-uaf-test.wd-test @@ -0,0 +1,16 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "url-searchparams-iterator-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "url-searchparams-iterator-uaf-test.js") + ], + compatibilityFlags = [ + "nodejs_compat", + ] + ) + ), + ], +); diff --git a/src/workerd/api/tests/url_standard-searchparams-iterator-uaf-test.wd-test b/src/workerd/api/tests/url_standard-searchparams-iterator-uaf-test.wd-test new file mode 100644 index 00000000000..f5f81864013 --- /dev/null +++ b/src/workerd/api/tests/url_standard-searchparams-iterator-uaf-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "url_standard-searchparams-iterator-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "url-searchparams-iterator-uaf-test.js") + ], + compatibilityFlags = [ + "nodejs_compat", + "url_standard" + ] + ) + ), + ], +); diff --git a/src/workerd/api/tests/urlpattern-regex-search-oob-test.js b/src/workerd/api/tests/urlpattern-regex-search-oob-test.js new file mode 100644 index 00000000000..1889ba60776 --- /dev/null +++ b/src/workerd/api/tests/urlpattern-regex-search-oob-test.js @@ -0,0 +1,113 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-335: +// Heap buffer overflow (OOB write) in URLPattern regex_search via +// monkey-patched RegExp.prototype.exec. +// +// URLPatternRegexEngine::regex_search allocated a std::vector sized by the +// initial matches.size(), then re-read matches.size() in the loop condition +// while calling matches.get() which can fire user-defined getters. A +// monkey-patched RegExp.prototype.exec could return an array that grows +// mid-iteration, causing OOB writes past the vector's backing store. +// +// The fix snapshots the array length once before iterating and uses +// reserve+emplace_back instead of operator[]. JsRegExp::operator() also +// now rejects non-Array return values from exec. + +import { strictEqual, ok } from 'node:assert'; + +export const urlpatternRegexSearchOobRegression = { + async test() { + const realExec = RegExp.prototype.exec; + + // Test 1: Monkey-patched exec that tries to grow the result array + // mid-iteration should not cause a crash or OOB write. After the fix, + // the snapshotted length prevents the loop from reading past the + // allocated vector. + let armed = false; + RegExp.prototype.exec = function (s) { + if (!armed) return realExec.call(this, s); + + // Return an array with initial length 3 (match + 2 groups). + // The getter on index 2 tries to grow the array to 64 elements. + const arr = ['match', 'AAAAAAAA']; + Object.defineProperty(arr, 2, { + enumerable: true, + configurable: true, + get() { + // Attempt to grow the array past the pre-allocated vector size. + for (let j = 3; j < 64; j++) arr[j] = 'B'.repeat(40); + return 'CCCCCCCC'; + }, + }); + arr.length = 3; + return arr; + }; + + try { + const p = new URLPattern({ pathname: '/(x)' }); + armed = true; + + // After the fix, this should either succeed safely (returning results + // based on the snapshotted length) or throw a TypeError — but must NOT + // crash the process with a heap-buffer-overflow. + try { + const result = p.exec({ pathname: '/x' }); + // If it succeeds, the result should have the pathname group. + if (result !== null) { + strictEqual(typeof result.pathname, 'object'); + } + } catch (_e) { + // A TypeError from the hardened JsRegExp::operator() rejecting + // non-standard exec results is acceptable. + } + // The key assertion: we reached this point without a process crash. + ok(true, 'Process did not crash from array-growing getter attack'); + } finally { + armed = false; + RegExp.prototype.exec = realExec; + } + + // Test 2: Monkey-patched exec returning a non-array should not crash. + RegExp.prototype.exec = function (s) { + if (!armed) return realExec.call(this, s); + // Return a plain object instead of an array. + return { 0: 'match', 1: 'group', length: 2 }; + }; + + try { + const p2 = new URLPattern({ pathname: '/(y)' }); + armed = true; + // Should not crash — the hardened code rejects non-Array results. + const result2 = p2.exec({ pathname: '/y' }); + // After the fix, non-array results are treated as no-match (null). + strictEqual(result2, null); + } finally { + armed = false; + RegExp.prototype.exec = realExec; + } + + // Test 3: Monkey-patched exec returning empty array (length 0) should + // not cause integer underflow (size_t wrapping to ~4G). + RegExp.prototype.exec = function (s) { + if (!armed) return realExec.call(this, s); + return []; + }; + + try { + const p3 = new URLPattern({ pathname: '/(z)' }); + armed = true; + // Should not crash with OOM from 4G-element vector allocation. + // The empty array is treated as a match with zero groups by ada-url, + // so exec() returns a result object (not null). The key assertion is + // that we don't crash from the integer underflow. + const _result3 = p3.exec({ pathname: '/z' }); + ok(true, 'Process did not crash from empty-array underflow attack'); + } finally { + armed = false; + RegExp.prototype.exec = realExec; + } + }, +}; diff --git a/src/workerd/api/tests/urlpattern-regex-search-oob-test.wd-test b/src/workerd/api/tests/urlpattern-regex-search-oob-test.wd-test new file mode 100644 index 00000000000..05467b57f33 --- /dev/null +++ b/src/workerd/api/tests/urlpattern-regex-search-oob-test.wd-test @@ -0,0 +1,13 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [( + name = "urlpattern-regex-search-oob-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "urlpattern-regex-search-oob-test.js"), + ], + compatibilityFlags = ["nodejs_compat", "urlpattern_standard"], + ), + )], +); diff --git a/src/workerd/api/tests/worker-loader-gc-test.js b/src/workerd/api/tests/worker-loader-gc-test.js new file mode 100644 index 00000000000..2598af2b8f3 --- /dev/null +++ b/src/workerd/api/tests/worker-loader-gc-test.js @@ -0,0 +1,43 @@ +// Copyright (c) 2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-102: +// GC of an anonymous WorkerStub must not abort the process when the child +// v8::Isolate is torn down inside the parent's cppgc finalizer. +// +// Before the fix, when DeleteQueue::scheduleDeletion() synchronously destroyed +// the WorkerStubImpl (and its child WorkerService / v8::Isolate) while the +// thread-local inCppgcShimDestructor flag was set by the parent's +// CppgcShim::~CppgcShim(), causing HeapTracer::clearWrappers() in the child +// isolate to hit KJ_ASSERT(!inCppgcShimDestructor) and std::terminate(). +// After the fix, we defer destruction of the `WorkerService` owned by `WorkerStubImpl` +// to the next turn of the event loop, skipping the nested teardown +import assert from 'node:assert'; + +export let gcAnonymousWorkerStub = { + async test(ctrl, env, ctx) { + // Load an anonymous child worker (no name → sole owner is the JSG WorkerStub). + let stub = env.loader.load({ + compatibilityDate: '2025-01-01', + mainModule: 'main.js', + modules: { + 'main.js': `export default { fetch() { return new Response('ok'); } }`, + }, + }); + + // Force the child WorkerService to be fully constructed by making a request. + let resp = await stub.getEntrypoint().fetch('http://x/'); + assert.strictEqual(await resp.text(), 'ok'); + + // Drop the only JS reference so the WorkerStub becomes unreachable. + stub = null; + + // Trigger a major GC. Pre-fix this would abort the process + gc(); + + // If we reach here the process did not abort — the fix is working. + // Give the event loop a turn so any deferred destruction can complete. + await new Promise((resolve) => setTimeout(resolve, 0)); + }, +}; diff --git a/src/workerd/api/tests/worker-loader-gc-test.wd-test b/src/workerd/api/tests/worker-loader-gc-test.wd-test new file mode 100644 index 00000000000..845478058ff --- /dev/null +++ b/src/workerd/api/tests/worker-loader-gc-test.wd-test @@ -0,0 +1,18 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [ + ( name = "worker-loader-gc-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "worker-loader-gc-test.js") + ], + compatibilityFlags = ["nodejs_compat","experimental"], + bindings = [ + (name = "loader", workerLoader = ()), + ], + ) + ), + ], +); diff --git a/src/workerd/api/tests/worker-loader-limits-test.js b/src/workerd/api/tests/worker-loader-limits-test.js new file mode 100644 index 00000000000..c75026a6582 --- /dev/null +++ b/src/workerd/api/tests/worker-loader-limits-test.js @@ -0,0 +1,98 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +import assert from 'node:assert'; + +// These must match the limits hard-coded in worker-loader.c++. +const MAX_CODE_SIZE = 64 * 1024 * 1024; // 64 MiB +const MAX_ENV_SIZE = 1 * 1024 * 1024; // 1 MiB + +const MAIN_MODULE = ` + import {WorkerEntrypoint} from "cloudflare:workers"; + export default class extends WorkerEntrypoint { + ping() { return "pong"; } + envBigLength() { return this.env.big ? this.env.big.length : 0; } + } +`; + +function makeCode(overrides) { + return { + compatibilityDate: '2025-01-01', + mainModule: 'main.js', + modules: { 'main.js': MAIN_MODULE }, + globalOutbound: null, + ...overrides, + }; +} + +// A worker whose total module size is comfortably under the limit loads and runs fine. +export let codeSizeWithinLimit = { + async test(ctrl, env, ctx) { + let worker = env.loader.get('codeSizeWithinLimit', () => + makeCode({ + modules: { + 'main.js': MAIN_MODULE, + // ~1 MiB of additional (uncompiled) module content, well under the limit. + 'pad.js': '// ' + 'x'.repeat(1 * 1024 * 1024), + }, + }) + ); + + assert.strictEqual(await worker.getEntrypoint().ping(), 'pong'); + }, +}; + +// A worker whose total module size exceeds the limit fails to load with a clear error. +export let codeSizeExceedsLimit = { + async test(ctrl, env, ctx) { + let worker = env.loader.get('codeSizeExceedsLimit', () => + makeCode({ + modules: { + 'main.js': MAIN_MODULE, + // Push the total just over the limit. This module is never compiled because the size + // check throws first. + 'big.js': '// ' + 'x'.repeat(MAX_CODE_SIZE), + }, + }) + ); + + await assert.rejects(worker.getEntrypoint().ping(), (e) => { + assert.strictEqual(e.name, 'Error'); + assert.match( + e.message, + /^Dynamic Worker code size \(\d+ bytes\) exceeds the maximum allowed size of 67108864 bytes\.$/ + ); + return true; + }); + }, +}; + +// An env value under the limit is passed through to the dynamic worker. +export let envSizeWithinLimit = { + async test(ctrl, env, ctx) { + const big = 'x'.repeat(512 * 1024); // 512 KiB, under the 1 MiB limit. + let worker = env.loader.get('envSizeWithinLimit', () => + makeCode({ env: { big } }) + ); + + assert.strictEqual(await worker.getEntrypoint().envBigLength(), big.length); + }, +}; + +// An env value over the limit fails to load with a clear error. +export let envSizeExceedsLimit = { + async test(ctrl, env, ctx) { + let worker = env.loader.get('envSizeExceedsLimit', () => + makeCode({ env: { big: 'x'.repeat(2 * MAX_ENV_SIZE) } }) + ); + + await assert.rejects(worker.getEntrypoint().ping(), (e) => { + assert.strictEqual(e.name, 'Error'); + assert.match( + e.message, + /^Dynamic Worker env size \(\d+ bytes\) exceeds the maximum allowed size of 1048576 bytes\.$/ + ); + return true; + }); + }, +}; diff --git a/src/workerd/api/tests/worker-loader-limits-test.wd-test b/src/workerd/api/tests/worker-loader-limits-test.wd-test new file mode 100644 index 00000000000..f76b78093e9 --- /dev/null +++ b/src/workerd/api/tests/worker-loader-limits-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "worker-loader-limits-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "worker-loader-limits-test.js") + ], + compatibilityFlags = ["nodejs_compat", "experimental"], + bindings = [ + (name = "loader", workerLoader = ()), + ], + ) + ), + ], +); diff --git a/src/workerd/api/tests/worker-loader-test.js b/src/workerd/api/tests/worker-loader-test.js index 5243928491a..bbb5100cc34 100644 --- a/src/workerd/api/tests/worker-loader-test.js +++ b/src/workerd/api/tests/worker-loader-test.js @@ -355,6 +355,32 @@ export let tails = { }, }; +// A simple Durable Object that maintains a counter, used to test passing DO stubs +// into dynamic workers. +export class StubCounter extends DurableObject { + async increment(amount) { + let value = (await this.ctx.storage.get('count')) || 0; + value += amount; + await this.ctx.storage.put('count', value); + return value; + } +} + +// A Durable Object that receives tail events via its tail() handler. +// The test retrieves the received event via wait(). +export class TailReceiver extends DurableObject { + #promiseAndResolvers = Promise.withResolvers(); + + tail(event) { + // HACK: Currently, tail events are not serializable over RPC. :( + this.#promiseAndResolvers.resolve(JSON.parse(JSON.stringify(event))); + } + + wait() { + return this.#promiseAndResolvers.promise; + } +} + export class GreeterFacet extends DurableObject { async greet(name) { return `${this.ctx.props.greeting}, ${name}?`; @@ -1090,6 +1116,67 @@ export let regressionDeadIoContextGetCode = { }, }; +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-147: heap use-after-free of dynamically-loaded +// WorkerService when a facet actor is torn down via monitorOnBroken(). The bug was that +// `Server::WorkerService::startRequest` created a `WorkerEntrypoint` with the `LimitEnforcer` +// being a `NullDisposer`-backed (i.e. not refcounted) `Own` instance to `WorkerService`. +// When monitorOnBroken() dropped the `Actor` after the `ActorContainer` which owned the +// `WorkerService`, it caused a UAF when dropping `limitEnforcer` in `~IoContext` +export class FacetUafTestActor extends DurableObject { + async doTest() { + // Load an *anonymous* dynamic worker (name = null) so the WorkerStubImpl is NOT cached + // in WorkerLoaderNamespace::isolates. This means the only owners of the child WorkerService + // are the JS WorkerStub and any ActorClassImpl derived from it. + let worker = this.env.loader.get(null, () => { + return { + compatibilityDate: '2025-01-01', + mainModule: 'child.js', + modules: { + 'child.js': ` + import {DurableObject} from "cloudflare:workers"; + export class ChildActor extends DurableObject { + ping() { + // Schedule a self-abort after a short delay. This will trigger + // monitorOnBroken() in the parent's facet container. + setTimeout(() => this.ctx.abort('self-destruct'), 50); + return "pong"; + } + } + `, + }, + }; + }); + + let cls = worker.getDurableObjectClass('ChildActor'); + let facet = this.ctx.facets.get('uaf-test', () => ({ class: cls })); + + // Trigger the child actor to start and schedule its self-abort. + let result = await facet.ping(); + assert.strictEqual(result, 'pong'); + + // Drop JS references to the worker stub and class so that the facet's + // ActorContainer::classAndId.actorClass becomes the last owner of the + // ActorClassImpl -> WorkerStubImpl -> child WorkerService chain. + worker = null; + cls = null; + facet = null; + + // Wait for the child's setTimeout to fire and trigger ctx.abort(). + // monitorOnBroken() will move the actor out, erase the facet, and then + // destroy the actor. Without the fix, this is where the UAF occurs. + gc(); + await new Promise((resolve) => setTimeout(resolve, 100)); + } +} + +export let facetUafRegression = { + async test(ctrl, env, ctx) { + let id = ctx.exports.FacetUafTestActor.idFromName('uaf-test'); + let stub = ctx.exports.FacetUafTestActor.get(id); + await stub.doTest(); + }, +}; + // Test that abortIsolate() works correctly for anonymous dynamic workers. // Anonymous workers don't have a name and therefore aren't stored in the loader's map. export let abortIsolateDynamicAnonymous = { @@ -1133,3 +1220,145 @@ export let abortIsolateDynamicAnonymous = { ); }, }; + +// ============================================================================= +// Tests for passing Durable Object stubs into dynamic workers. + +// Test passing a DO stub as an RPC parameter into a dynamic worker. +export let doStubViaRpcParam = { + async test(ctrl, env, ctx) { + let target = ctx.exports.StubCounter.get( + ctx.exports.StubCounter.idFromName('rpcParam') + ); + + let worker = env.loader.get('doStubViaRpcParam', () => { + return { + compatibilityDate: '2025-01-01', + compatibilityFlags: ['experimental'], + allowExperimental: true, + mainModule: 'foo.js', + modules: { + 'foo.js': ` + import { WorkerEntrypoint } from "cloudflare:workers"; + export default class extends WorkerEntrypoint { + callStub(stub, amount) { + return stub.increment(amount); + } + } + `, + }, + }; + }); + + let result = await worker.getEntrypoint().callStub(target, 5); + assert.strictEqual(result, 5); + + // Verify the original stub still sees the updated state. + let direct = await target.increment(3); + assert.strictEqual(direct, 8); + }, +}; + +// Test passing a DO stub via props into a dynamic worker. +export let doStubViaProps = { + async test(ctrl, env, ctx) { + let target = ctx.exports.StubCounter.get( + ctx.exports.StubCounter.idFromName('props') + ); + + let worker = env.loader.get('doStubViaProps', () => { + return { + compatibilityDate: '2025-01-01', + compatibilityFlags: ['experimental'], + allowExperimental: true, + mainModule: 'foo.js', + modules: { + 'foo.js': ` + import { WorkerEntrypoint } from "cloudflare:workers"; + export default class extends WorkerEntrypoint { + async run() { + return await this.ctx.props.counter.increment(10); + } + } + `, + }, + }; + }); + + let result = await worker + .getEntrypoint(undefined, { + props: { counter: target }, + }) + .run(); + assert.strictEqual(result, 10); + }, +}; + +// Test passing a DO stub via env into a dynamic worker. +export let doStubViaEnv = { + async test(ctrl, env, ctx) { + let target = ctx.exports.StubCounter.get( + ctx.exports.StubCounter.idFromName('env') + ); + + let worker = env.loader.get('doStubViaEnv', () => { + return { + compatibilityDate: '2025-01-01', + compatibilityFlags: ['experimental'], + allowExperimental: true, + mainModule: 'foo.js', + modules: { + 'foo.js': ` + import { WorkerEntrypoint } from "cloudflare:workers"; + export default class extends WorkerEntrypoint { + async run() { + return await this.env.counter.increment(7); + } + } + `, + }, + env: { + counter: target, + }, + }; + }); + + let result = await worker.getEntrypoint().run(); + assert.strictEqual(result, 7); + }, +}; + +// Test using a DO stub as a tail worker of a dynamic worker. +export let doStubAsTail = { + async test(ctrl, env, ctx) { + let tailReceiver = ctx.exports.TailReceiver.get( + ctx.exports.TailReceiver.idFromName('tail') + ); + + let worker = env.loader.get('doStubAsTail', () => { + return { + compatibilityDate: '2025-01-01', + compatibilityFlags: ['experimental'], + allowExperimental: true, + mainModule: 'foo.js', + modules: { + 'foo.js': ` + export default { + fetch(req, env, ctx) { + console.log("hello from tailed worker"); + return new Response("OK"); + }, + } + `, + }, + tails: [tailReceiver], + }; + }); + + let resp = await worker.getEntrypoint().fetch('https://example.com'); + assert.strictEqual(await resp.text(), 'OK'); + + let event = await tailReceiver.wait(); + assert.strictEqual(event[0].logs[0].message[0], 'hello from tailed worker'); + }, +}; diff --git a/src/workerd/api/tests/worker-loader-test.wd-test b/src/workerd/api/tests/worker-loader-test.wd-test index 961f12b18f8..4d0742183b4 100644 --- a/src/workerd/api/tests/worker-loader-test.wd-test +++ b/src/workerd/api/tests/worker-loader-test.wd-test @@ -16,7 +16,10 @@ const unitTests :Workerd.Config = ( ], durableObjectNamespaces = [ (className = "FacetTestActor", uniqueKey = "FacetTestActor"), + (className = "FacetUafTestActor", uniqueKey = "FacetUafTestActor"), (className = "RendezvousActor", uniqueKey = "RendezvousActor"), + (className = "StubCounter", uniqueKey = "StubCounter"), + (className = "TailReceiver", uniqueKey = "TailReceiver"), ], durableObjectStorage = (inMemory = void), globalOutbound = (name = "worker-loader-test", entrypoint = "defaultOutbound"), diff --git a/src/workerd/api/trace.c++ b/src/workerd/api/trace.c++ index 70c9e378fd6..14fc0fc0e34 100644 --- a/src/workerd/api/trace.c++ +++ b/src/workerd/api/trace.c++ @@ -647,12 +647,13 @@ jsg::Ref UnsafeTraceMetrics::fromTrace(jsg::Lock& js, jsg::Ref sendTracesToExportedHandler(kj::Own incomingRequest, +void sendTracesToExportedHandler(kj::Own incomingRequest, kj::Maybe entrypointNamePtr, kj::Maybe versionInfo, Frankenvalue props, kj::ArrayPtr> traces, - bool isDynamicDispatch) { + bool isDynamicDispatch, + kj::TaskSet& waitUntilTasks) { // Mark the request as delivered because we're about to run some JS. incomingRequest->delivered(); @@ -670,19 +671,18 @@ kj::Promise sendTracesToExportedHandler(kj::Own sendTracesToExportedHandler(kj::Owndrain(); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); } } // namespace @@ -712,8 +712,8 @@ auto TraceCustomEvent::run(kj::Own incomingRequest, kj::TaskSet& waitUntilTasks, bool isDynamicDispatch) -> kj::Promise { // Don't bother to wait around for the handler to run, just hand it off to the waitUntil tasks. - waitUntilTasks.add(sendTracesToExportedHandler(kj::mv(incomingRequest), entrypointNamePtr, - kj::mv(versionInfo), kj::mv(props), traces, isDynamicDispatch)); + sendTracesToExportedHandler(kj::mv(incomingRequest), entrypointNamePtr, kj::mv(versionInfo), + kj::mv(props), traces, isDynamicDispatch, waitUntilTasks); // Reporting a proper outcome and return event here would be nice, but for that we'd need to await // running the tail handler... diff --git a/src/workerd/api/trace.h b/src/workerd/api/trace.h index 27f3e7c53f6..97f4594f300 100644 --- a/src/workerd/api/trace.h +++ b/src/workerd/api/trace.h @@ -178,16 +178,36 @@ class TraceItem final: public jsg::Object { void visitForGc(jsg::GcVisitor& visitor) { KJ_IF_SOME(info, eventInfo) { KJ_SWITCH_ONEOF(info) { - KJ_CASE_ONEOF(fetch, jsg::Ref) { visitor.visit(fetch); } - KJ_CASE_ONEOF(rpc, jsg::Ref) { visitor.visit(rpc); } - KJ_CASE_ONEOF(conn, jsg::Ref) { visitor.visit(conn); } - KJ_CASE_ONEOF(sched, jsg::Ref) { visitor.visit(sched); } - KJ_CASE_ONEOF(alarm, jsg::Ref) { visitor.visit(alarm); } - KJ_CASE_ONEOF(queue, jsg::Ref) { visitor.visit(queue); } - KJ_CASE_ONEOF(email, jsg::Ref) { visitor.visit(email); } - KJ_CASE_ONEOF(tail, jsg::Ref) { visitor.visit(tail); } - KJ_CASE_ONEOF(custom, jsg::Ref) { visitor.visit(custom); } - KJ_CASE_ONEOF(ws, jsg::Ref) { visitor.visit(ws); } + KJ_CASE_ONEOF(fetch, jsg::Ref) { + visitor.visit(fetch); + } + KJ_CASE_ONEOF(rpc, jsg::Ref) { + visitor.visit(rpc); + } + KJ_CASE_ONEOF(conn, jsg::Ref) { + visitor.visit(conn); + } + KJ_CASE_ONEOF(sched, jsg::Ref) { + visitor.visit(sched); + } + KJ_CASE_ONEOF(alarm, jsg::Ref) { + visitor.visit(alarm); + } + KJ_CASE_ONEOF(queue, jsg::Ref) { + visitor.visit(queue); + } + KJ_CASE_ONEOF(email, jsg::Ref) { + visitor.visit(email); + } + KJ_CASE_ONEOF(tail, jsg::Ref) { + visitor.visit(tail); + } + KJ_CASE_ONEOF(custom, jsg::Ref) { + visitor.visit(custom); + } + KJ_CASE_ONEOF(ws, jsg::Ref) { + visitor.visit(ws); + } } } visitor.visitAll(logs); @@ -493,9 +513,15 @@ class TraceItem::HibernatableWebSocketEventInfo final: public jsg::Object { void visitForGc(jsg::GcVisitor& visitor) { KJ_SWITCH_ONEOF(eventType) { - KJ_CASE_ONEOF(msg, jsg::Ref) { visitor.visit(msg); } - KJ_CASE_ONEOF(close, jsg::Ref) { visitor.visit(close); } - KJ_CASE_ONEOF(err, jsg::Ref) { visitor.visit(err); } + KJ_CASE_ONEOF(msg, jsg::Ref) { + visitor.visit(msg); + } + KJ_CASE_ONEOF(close, jsg::Ref) { + visitor.visit(close); + } + KJ_CASE_ONEOF(err, jsg::Ref) { + visitor.visit(err); + } } } }; diff --git a/src/workerd/api/tracing.c++ b/src/workerd/api/tracing.c++ index 45a2ca7ae14..0f40e79b2f9 100644 --- a/src/workerd/api/tracing.c++ +++ b/src/workerd/api/tracing.c++ @@ -156,12 +156,17 @@ void Span::end() { namespace workerd::api { -v8::Local Tracing::enterSpan(jsg::Lock& js, +namespace { + +enum class SpanEndMode { AUTO_END, MANUAL_END }; + +v8::Local runSpan(jsg::Lock& js, kj::String operationName, v8::Local callback, jsg::Arguments args, const jsg::TypeHandler>& spanHandler, - const jsg::TypeHandler>& valuePromiseHandler) { + const jsg::TypeHandler>* valuePromiseHandler, + SpanEndMode endMode) { // We use qualified `user_tracing::Span` / `user_tracing::SpanImpl` throughout because an // unqualified `Span` in this namespace resolves to workerd::Span (the runtime span struct), // which is a different type. @@ -174,8 +179,9 @@ v8::Local Tracing::enterSpan(jsg::Lock& js, kj::Own impl; kj::Maybe childSpanForAsyncContext; + bool hasIoContext = IoContext::hasCurrent(); - if (IoContext::hasCurrent()) { + if (hasIoContext) { auto& context = IoContext::current(); SpanParent parent = context.getCurrentUserTraceSpan(); @@ -203,10 +209,10 @@ v8::Local Tracing::enterSpan(jsg::Lock& js, // Wrap impl in IoOwn (when inside an IoContext) so destruction funnels through the // IoContext's delete queue and cannot cross threads. Outside an IoContext, fall back to - // kj::Own; enterSpan without an IoContext is a no-op tracing-wise but still runs the + // kj::Own; tracing without an IoContext is a no-op tracing-wise but still runs the // callback. jsg::Ref jsSpan = [&]() -> jsg::Ref { - if (IoContext::hasCurrent()) { + if (hasIoContext) { auto ownedImpl = IoContext::current().addObject(kj::mv(impl)); return js.alloc(kj::mv(ownedImpl)); } @@ -225,9 +231,15 @@ v8::Local Tracing::enterSpan(jsg::Lock& js, return js.tryCatch([&]() -> v8::Local { auto result = jsg::check(callback->Call(v8Context, v8Context->Global(), argv.size(), argv.data())); + + if (endMode == SpanEndMode::MANUAL_END) { + return result; + } + // If the callback returned a promise, defer end() until settlement. if (result->IsPromise()) { - auto promise = KJ_ASSERT_NONNULL(valuePromiseHandler.tryUnwrap(js, result)) + KJ_ASSERT(valuePromiseHandler != nullptr); + auto promise = KJ_ASSERT_NONNULL(valuePromiseHandler->tryUnwrap(js, result)) .then(js, [jsSpan = jsSpan.addRef()]( jsg::Lock& js, jsg::Value value) mutable -> jsg::Value { @@ -242,15 +254,17 @@ v8::Local Tracing::enterSpan(jsg::Lock& js, // If the promise never settles, the span will still be submitted when the IoOwn is // destroyed (via ~SpanImpl calling end()), though this is a corner case and should // generally be avoided by users. - return valuePromiseHandler.wrap(js, kj::mv(promise)); + return valuePromiseHandler->wrap(js, kj::mv(promise)); } else { // Synchronous success: end immediately. jsSpan->end(); return result; } }, [&](jsg::Value exception) -> v8::Local { - // Synchronous exception: end then rethrow. - jsSpan->end(); + if (endMode == SpanEndMode::AUTO_END) { + // Synchronous exception: end then rethrow. + jsSpan->end(); + } js.throwException(kj::mv(exception)); }); }; @@ -270,4 +284,25 @@ v8::Local Tracing::enterSpan(jsg::Lock& js, } } +} // namespace + +v8::Local Tracing::enterSpan(jsg::Lock& js, + kj::String operationName, + v8::Local callback, + jsg::Arguments args, + const jsg::TypeHandler>& spanHandler, + const jsg::TypeHandler>& valuePromiseHandler) { + return runSpan(js, kj::mv(operationName), callback, kj::mv(args), spanHandler, + &valuePromiseHandler, SpanEndMode::AUTO_END); +} + +v8::Local Tracing::startActiveSpan(jsg::Lock& js, + kj::String operationName, + v8::Local callback, + jsg::Arguments args, + const jsg::TypeHandler>& spanHandler) { + return runSpan(js, kj::mv(operationName), callback, kj::mv(args), spanHandler, nullptr, + SpanEndMode::MANUAL_END); +} + } // namespace workerd::api diff --git a/src/workerd/api/tracing.h b/src/workerd/api/tracing.h index 7485a3026f1..7bee04ab69d 100644 --- a/src/workerd/api/tracing.h +++ b/src/workerd/api/tracing.h @@ -52,7 +52,7 @@ class SpanImpl final: public kj::Refcounted { bool getIsTraced(); // Returns a SpanParent wrapping this span's observer, or a null SpanParent if the span has - // ended or has no observer. Used by Tracing::enterSpan() to push onto the AsyncContextFrame. + // ended or has no observer. Used by Tracing methods to push onto the AsyncContextFrame. workerd::SpanParent makeSpanParent(); // Sets a single attribute on the span. If value is kj::none, the attribute is not set. @@ -88,15 +88,14 @@ class Span: public jsg::Object { // optional fields. void setAttribute(jsg::Lock& js, kj::String key, jsg::Optional value); - // Ends the span and submits its content to the tracing system. Not exposed to JS - only - // called by Tracing::enterSpan when the user callback returns / throws / its promise - // settles. Callers outside this file should not need it. + // Ends the span and submits its content to the tracing system. Idempotent. void end(); JSG_RESOURCE_TYPE(Span) { JSG_READONLY_PROTOTYPE_PROPERTY(isTraced, getIsTraced); JSG_METHOD(setAttribute); + JSG_METHOD(end); } private: @@ -140,8 +139,19 @@ class Tracing: public jsg::Object { const jsg::TypeHandler>& spanHandler, const jsg::TypeHandler>& valuePromiseHandler); + // Creates a new child span, pushes it onto the AsyncContextFrame while invoking + // callback(span, ...args), and returns the callback result without ending the span. + // The caller must call span.end() explicitly; forgotten spans are still ended by + // SpanImpl's destructor when the request-owned span object is destroyed. + v8::Local startActiveSpan(jsg::Lock& js, + kj::String operationName, + v8::Local callback, + jsg::Arguments args, + const jsg::TypeHandler>& spanHandler); + JSG_RESOURCE_TYPE(Tracing) { JSG_METHOD(enterSpan); + JSG_METHOD(startActiveSpan); // Use the _NAMED variant so the property ends up as `tracing.Span` rather than // `tracing["user_tracing::Span"]`. @@ -157,6 +167,11 @@ class Tracing: public jsg::Object { callback: (span: Span, ...args: A) => T, ...args: A ): T; + startActiveSpan( + name: string, + callback: (span: Span, ...args: A) => T, + ...args: A + ): T; }); } }; diff --git a/src/workerd/api/url-standard.c++ b/src/workerd/api/url-standard.c++ index efa65d2c892..614307a6f75 100644 --- a/src/workerd/api/url-standard.c++ +++ b/src/workerd/api/url-standard.c++ @@ -307,21 +307,21 @@ jsg::Ref URLSearchParams::values(jsg::Lock& js) IteratorState(JSG_THIS, inner.getValues())); } -kj::Maybe>> URLSearchParams::entryIteratorNext( +kj::Maybe> URLSearchParams::entryIteratorNext( jsg::Lock& js, URLSearchParams::IteratorState& state) { return state.inner.next().map([](const jsg::UrlSearchParams::EntryIterator::Entry& entry) { - return kj::arr(entry.key, entry.value); + return kj::arr(kj::str(entry.key), kj::str(entry.value)); }); } -kj::Maybe> URLSearchParams::keyIteratorNext( +kj::Maybe URLSearchParams::keyIteratorNext( jsg::Lock& js, URLSearchParams::IteratorState& state) { - return state.inner.next(); + return state.inner.next().map([](kj::ArrayPtr ptr) { return kj::str(ptr); }); } -kj::Maybe> URLSearchParams::valueIteratorNext( +kj::Maybe URLSearchParams::valueIteratorNext( jsg::Lock& js, URLSearchParams::IteratorState& state) { - return state.inner.next(); + return state.inner.next().map([](kj::ArrayPtr ptr) { return kj::str(ptr); }); } void URLSearchParams::forEach(jsg::Lock& js, diff --git a/src/workerd/api/url-standard.h b/src/workerd/api/url-standard.h index 5a976e40219..fd5a73d0937 100644 --- a/src/workerd/api/url-standard.h +++ b/src/workerd/api/url-standard.h @@ -58,15 +58,15 @@ class URLSearchParams: public jsg::Object { void sort(); JSG_ITERATOR(EntryIterator, entries, - kj::Array>, + kj::Array, IteratorState, entryIteratorNext) JSG_ITERATOR(KeyIterator, keys, - kj::ArrayPtr, + kj::String, IteratorState, keyIteratorNext) JSG_ITERATOR(ValueIterator, values, - kj::ArrayPtr, + kj::String, IteratorState, valueIteratorNext) @@ -141,13 +141,13 @@ class URLSearchParams: public jsg::Object { // URLs search component. void reset(); - static kj::Maybe>> entryIteratorNext( + static kj::Maybe> entryIteratorNext( jsg::Lock& js, IteratorState& state); - static kj::Maybe> keyIteratorNext( + static kj::Maybe keyIteratorNext( jsg::Lock& js, IteratorState& state); - static kj::Maybe> valueIteratorNext( + static kj::Maybe valueIteratorNext( jsg::Lock& js, IteratorState& state); diff --git a/src/workerd/api/url.c++ b/src/workerd/api/url.c++ index f9816e9a6b9..32463b97b27 100644 --- a/src/workerd/api/url.c++ +++ b/src/workerd/api/url.c++ @@ -516,7 +516,9 @@ jsg::Ref URLSearchParams::constructor( searchParams->url->kj::Url::operator=(usp->url->clone()); } KJ_CASE_ONEOF(queryString, kj::String) { - parseQueryString(searchParams->url->query, kj::mv(queryString), true); + auto& adjustment = + searchParams->externalMemoryAdjustment.emplace(js.getExternalMemoryAdjustment()); + parseQueryString(searchParams->url->query, kj::mv(queryString), adjustment, true); } KJ_CASE_ONEOF(dict, jsg::Dict) { searchParams->url->query = KJ_MAP(entry, dict.fields) { diff --git a/src/workerd/api/url.h b/src/workerd/api/url.h index 06a47653dcb..9f1710f030e 100644 --- a/src/workerd/api/url.h +++ b/src/workerd/api/url.h @@ -180,11 +180,11 @@ class URLSearchParams: public jsg::Object { IteratorState, entryIteratorNext) JSG_ITERATOR(KeyIterator, keys, - kj::StringPtr, + kj::String, IteratorState, keyIteratorNext) JSG_ITERATOR(ValueIterator, values, - kj::StringPtr, + kj::String, IteratorState, valueIteratorNext) @@ -243,6 +243,7 @@ class URLSearchParams: public jsg::Object { private: kj::Own url; + kj::Maybe externalMemoryAdjustment; static kj::Maybe> entryIteratorNext(jsg::Lock& js, IteratorState& state) { if (state.index >= state.parent->url->query.size()) { @@ -252,20 +253,20 @@ class URLSearchParams: public jsg::Object { return kj::arr(kj::str(key), kj::str(value)); } - static kj::Maybe keyIteratorNext(jsg::Lock& js, IteratorState& state) { + static kj::Maybe keyIteratorNext(jsg::Lock& js, IteratorState& state) { if (state.index >= state.parent->url->query.size()) { return kj::none; } auto& [key, value] = state.parent->url->query[state.index++]; - return key.asPtr(); + return kj::str(key); } - static kj::Maybe valueIteratorNext(jsg::Lock& js, IteratorState& state) { + static kj::Maybe valueIteratorNext(jsg::Lock& js, IteratorState& state) { if (state.index >= state.parent->url->query.size()) { return kj::none; } auto& [key, value] = state.parent->url->query[state.index++]; - return value.asPtr(); + return kj::str(value); } }; diff --git a/src/workerd/api/urlpattern-standard.c++ b/src/workerd/api/urlpattern-standard.c++ index 011b67c13f9..7b9723bca05 100644 --- a/src/workerd/api/urlpattern-standard.c++ +++ b/src/workerd/api/urlpattern-standard.c++ @@ -43,17 +43,27 @@ std::optional>> URLPattern::URLPatternReg // We need to create a null-terminated copy. auto str = kj::str(kj::arrayPtr(input.data(), input.size())); KJ_IF_SOME(matches, pattern.getHandle(js)(js, str)) { - std::vector> results(matches.size() - 1); + // Snapshot the array length exactly once. matches.size() calls + // v8::Array::Length() which reads live JS state — a monkey-patched + // RegExp.prototype.exec can return an array whose length grows + // mid-iteration (via accessor getters on indexed properties). + // Re-reading the length in the loop condition while using operator[] + // with the pre-allocated size would write past the vector's backing + // store (heap buffer overflow). Fix: snapshot once, use reserve + + // emplace_back so we never index past what we allocated. + const uint32_t len = matches.size(); + if (len == 0) return std::vector>{}; + std::vector> results; + results.reserve(len - 1); // The first value is always the input of the exec() command. Therefore // we should avoid it while constructing the returning vector. - for (size_t i = 1; i < matches.size(); i++) { - auto value = matches.get(js, i); + for (uint32_t i = 1; i < len; i++) { + auto value = matches.get(js, i); // may run user JS via getters if (value.isUndefined()) { - results[i - 1] = std::nullopt; + results.emplace_back(std::nullopt); } else { - KJ_DASSERT(value.isString()); auto str = value.toString(js); - results[i - 1] = std::string(str.cStr(), str.size()); + results.emplace_back(std::string(str.cStr(), str.size())); } } return kj::mv(results); diff --git a/src/workerd/api/util.c++ b/src/workerd/api/util.c++ index 568c2be262b..5b7899cb257 100644 --- a/src/workerd/api/util.c++ +++ b/src/workerd/api/util.c++ @@ -33,16 +33,29 @@ kj::ArrayPtr split(kj::ArrayPtr& text, char c) { void parseQueryString(kj::Vector& query, kj::ArrayPtr text, + jsg::ExternalMemoryAdjustment& externalMemoryAdjustment, bool skipLeadingQuestionMark) { if (skipLeadingQuestionMark && text.size() > 0 && text[0] == '?') { text = text.slice(1, text.size()); } + size_t pendingBytes = 0; while (text.size() > 0) { auto value = split(text, '&'); if (value.size() == 0) continue; auto name = split(value, '='); - query.add(kj::Url::QueryParam{kj::decodeWwwForm(name), kj::decodeWwwForm(value)}); + auto decodedName = kj::decodeWwwForm(name); + auto decodedValue = kj::decodeWwwForm(value); + pendingBytes += decodedName.size() + decodedValue.size() + sizeof(kj::Url::QueryParam); + query.add(kj::Url::QueryParam{kj::mv(decodedName), kj::mv(decodedValue)}); + if (pendingBytes >= 1024 * 1024) { + // Adjust memory every 1MB to avoid excessive memory usage + externalMemoryAdjustment.adjust(pendingBytes); + pendingBytes = 0; + } + } + if (pendingBytes > 0) { + externalMemoryAdjustment.adjust(pendingBytes); } } diff --git a/src/workerd/api/util.h b/src/workerd/api/util.h index 72cf81e67a5..f0c7fa3b8d9 100644 --- a/src/workerd/api/util.h +++ b/src/workerd/api/util.h @@ -30,8 +30,12 @@ struct CiLess { // Parse `rawText` as application/x-www-form-urlencoded name/value pairs and store in `query`. If // `skipLeadingQuestionMark` is true, any initial '?' will be ignored. Otherwise, it will be // interpreted as part of the first URL-encoded field. +// +// Native heap growth from parsed entries is reported to V8 via `externalMemoryAdjustment` so the +// per-isolate memory limit can bound it. void parseQueryString(kj::Vector& query, kj::ArrayPtr rawText, + jsg::ExternalMemoryAdjustment& externalMemoryAdjustment, bool skipLeadingQuestionMark = false); // TODO(cleanup): Would be really nice to move this to kj-url. diff --git a/src/workerd/api/web-socket.c++ b/src/workerd/api/web-socket.c++ index ea58697e5b0..711c7e7d48e 100644 --- a/src/workerd/api/web-socket.c++ +++ b/src/workerd/api/web-socket.c++ @@ -50,8 +50,7 @@ IoOwn WebSocket::initNative(IoContext& ioContext, WebSocket::WebSocket( jsg::Lock& js, IoContext& ioContext, kj::WebSocket& ws, HibernationPackage package) - : weakRef(kj::refcounted>(kj::Badge{}, *this)), - url(kj::mv(package.url)), + : url(kj::mv(package.url)), protocol(kj::mv(package.protocol)), extensions(kj::mv(package.extensions)), binaryType_(FeatureFlags::get(js).getWebsocketBinaryTypeDefault() ? BinaryType::BLOB @@ -73,8 +72,7 @@ jsg::Ref WebSocket::hibernatableFromNative( } WebSocket::WebSocket(jsg::Lock& js, kj::Own native) - : weakRef(kj::refcounted>(kj::Badge{}, *this)), - url(kj::none), + : url(kj::none), binaryType_(FeatureFlags::get(js).getWebsocketBinaryTypeDefault() ? BinaryType::BLOB : BinaryType::ARRAYBUFFER), allowHalfOpen(!FeatureFlags::get(js).getWebSocketAutoReplyToClose()), @@ -86,8 +84,7 @@ WebSocket::WebSocket(jsg::Lock& js, kj::Own native) } WebSocket::WebSocket(jsg::Lock& js, kj::String url) - : weakRef(kj::refcounted>(kj::Badge{}, *this)), - url(kj::mv(url)), + : url(kj::mv(url)), binaryType_(FeatureFlags::get(js).getWebsocketBinaryTypeDefault() ? BinaryType::BLOB : BinaryType::ARRAYBUFFER), allowHalfOpen(!FeatureFlags::get(js).getWebSocketAutoReplyToClose()), @@ -317,7 +314,7 @@ jsg::Ref WebSocket::constructor(jsg::Lock& js, } kj::Promise> WebSocket::couple( - kj::Own other, RequestObserver& request) { + jsg::Lock& js, kj::Own other, RequestObserver& request) { auto& native = *farNative; JSG_REQUIRE(!native.state.is(), TypeError, "Can't return WebSocket in a Response if it was created with `new WebSocket()`"); @@ -332,73 +329,83 @@ kj::Promise> WebSocket::couple( } } - // Tear down the IoOwn since we now need to extend the WebSocket to a `DeferredProxy` promise. - // This works because the `DeferredProxy` ends on the same event loop, but after the request - // context goes away. - kj::Own self = - kj::mv(KJ_ASSERT_NONNULL(native.state.tryGet()).ws); - native.state.init(); + // Grab the peer reference if it exists and is still alive. We have to do + // this here while we have the isolate lock. + kj::Maybe> maybePeerRef; + KJ_IF_SOME(p, peer) { + maybePeerRef = p.tryAddRef(js); + } - auto& context = IoContext::current(); + static const auto coupleImpl = + [](kj::Own self, kj::Own other, + kj::Maybe> maybePeerRef, + RequestObserver& request) -> kj::Promise> { + // Tear down the IoOwn since we now need to extend the WebSocket to a `DeferredProxy` promise. + // This works because the `DeferredProxy` ends on the same event loop, but after the request + // context goes away. - auto upstream = other->pumpTo(*self); - auto downstream = self->pumpTo(*other); + auto& context = IoContext::current(); - auto tryGetPeer = [&]() -> kj::Maybe { - KJ_IF_SOME(p, peer) { - return p->tryGet(); - } - return kj::none; - }; - auto isHibernatable = [&](workerd::api::WebSocket& ws) { - KJ_IF_SOME(state, ws.farNative->state.tryGet()) { - return state.isHibernatable(); - } - return false; - }; - KJ_IF_SOME(p, tryGetPeer()) { - // We're terminating the WebSocket in this worker, so the upstream promise (which pumps - // messages from the client to this worker) counts as something the request is waiting for. - upstream = upstream.attach(context.registerPendingEvent()); - - // We can observe websocket traffic in both directions by attaching an observer to the peer - // websocket which terminates in the worker. - KJ_IF_SOME(observer, request.tryCreateWebSocketObserver()) { - p.observer = kj::mv(observer); + auto upstream = other->pumpTo(*self); + auto downstream = self->pumpTo(*other); + + auto isHibernatable = [&](workerd::api::WebSocket& ws) { + KJ_IF_SOME(state, ws.farNative->state.tryGet()) { + return state.isHibernatable(); + } + return false; + }; + + KJ_IF_SOME(peerRef, maybePeerRef) { + // We're terminating the WebSocket in this worker, so the upstream promise (which pumps + // messages from the client to this worker) counts as something the request is waiting for. + upstream = upstream.attach(context.registerPendingEvent()); + + // We can observe websocket traffic in both directions by attaching an observer to the peer + // websocket which terminates in the worker. + KJ_IF_SOME(observer, request.tryCreateWebSocketObserver()) { + peerRef->observer = kj::mv(observer); + } } - } - // We need to use `eagerlyEvaluate()` on both inputs to `joinPromises` to work around the awkward - // behavior of `joinPromises` lazily-evaluating tail continuations. - auto promise = kj::joinPromises( - kj::arr(upstream.eagerlyEvaluate(nullptr), downstream.eagerlyEvaluate(nullptr))) - .attach(kj::mv(self), kj::mv(other)); - - KJ_IF_SOME(peer, tryGetPeer()) { - // Since the WebSocket is terminated locally, we generally want the request and associated - // IoContext to stay alive until the WebSocket connection has terminated. - // - // However, there is one exception to this: when the WebSocket is hibernatable, we don't want - // the existence of this connection to prevent the actor from being evicted, so we fall through - // to deferred proxying in this case. - if (!isHibernatable(peer)) { - co_await promise; - co_return; + // We need to use `eagerlyEvaluate()` on both inputs to `joinPromises` to work around the awkward + // behavior of `joinPromises` lazily-evaluating tail continuations. + auto promise = kj::joinPromises( + kj::arr(upstream.eagerlyEvaluate(nullptr), downstream.eagerlyEvaluate(nullptr))) + .attach(kj::mv(self), kj::mv(other)); + + KJ_IF_SOME(peerRef, maybePeerRef) { + // Since the WebSocket is terminated locally, we generally want the request and associated + // IoContext to stay alive until the WebSocket connection has terminated. + // + // However, there is one exception to this: when the WebSocket is hibernatable, we don't want + // the existence of this connection to prevent the actor from being evicted, so we fall through + // to deferred proxying in this case. + if (!isHibernatable(*peerRef)) { + co_await promise; + co_return; + } + // Drop the maybePeerRef before we hit the BEGIN_DEFERRED_PROXYING below. + maybePeerRef = kj::none; } - } - // Either: - // 1. This websocket is just proxying through, in which case we can allow the IoContext to go - // away while still being able to successfully pump the websocket connection. - // 2. This is a hibernatable websocket and we are falling through to deferred proxying to - // potentially allow for hibernation to occur. + // Either: + // 1. This websocket is just proxying through, in which case we can allow the IoContext to go + // away while still being able to successfully pump the websocket connection. + // 2. This is a hibernatable websocket and we are falling through to deferred proxying to + // potentially allow for hibernation to occur. + + // To begin deferred proxying, we can use this magic `KJ_CO_MAGIC` expression, which fulfills + // our outer promise for a DeferredProxy, which wraps a promise for the rest of this + // coroutine. + KJ_CO_MAGIC BEGIN_DEFERRED_PROXYING; - // To begin deferred proxying, we can use this magic `KJ_CO_MAGIC` expression, which fulfills - // our outer promise for a DeferredProxy, which wraps a promise for the rest of this - // coroutine. - KJ_CO_MAGIC BEGIN_DEFERRED_PROXYING; + co_return co_await promise; + }; - co_return co_await promise; + auto self = kj::mv(KJ_ASSERT_NONNULL(native.state.tryGet()).ws); + native.state.init(); + return coupleImpl(kj::mv(self), kj::mv(other), kj::mv(maybePeerRef), request); } void WebSocket::accept(jsg::Lock& js, jsg::Optional options) { @@ -523,9 +530,7 @@ void WebSocket::startReadLoop(jsg::Lock& js, kj::MaybeisValid()) { - return true; - } + return p.isAlive(); } return false; }; @@ -1076,8 +1081,8 @@ kj::Promise> WebSocket::readLoop( auto blob = js.alloc(js, jsg::JsBufferSource(ab), kj::str()); dispatchEventImpl(js, js.alloc(js, kj::str("message"), kj::mv(blob))); } else { - auto ab = js.arrayBuffer(kj::mv(data)).getHandle(js); - dispatchEventImpl(js, js.alloc(js, jsg::JsValue(ab))); + jsg::JsValue ab = jsg::JsArrayBuffer::create(js, data); + dispatchEventImpl(js, js.alloc(js, ab)); } } KJ_CASE_ONEOF(close, kj::WebSocket::Close) { @@ -1124,8 +1129,8 @@ jsg::Ref WebSocketPair::constructor(jsg::Lock& js) { auto first = pair->getFirst(); auto second = pair->getSecond(); - first->setPeer(second->addWeakRef()); - second->setPeer(first->addWeakRef()); + first->setPeer(second.getWeakRef(js)); + second->setPeer(first.getWeakRef(js)); return kj::mv(pair); } @@ -1176,7 +1181,7 @@ void WebSocket::assertNoError(jsg::Lock& js) { } } -void WebSocket::setPeer(kj::Own> other) { +void WebSocket::setPeer(jsg::WeakRef other) { peer = kj::mv(other); } @@ -1224,14 +1229,13 @@ bool WebSocket::awaitingHibernatableRelease() { return false; } -bool WebSocket::peerIsAwaitingCoupling() { - bool answer = false; +bool WebSocket::peerIsAwaitingCoupling(jsg::Lock& js) { KJ_IF_SOME(p, peer) { - p->runIfAlive([&answer](WebSocket& ws) { - answer = ws.farNative->state.is(); - }); + KJ_IF_SOME(ref, p.tryAddRef(js)) { + return ref->farNative->state.is(); + } } - return answer; + return false; } WebSocket::HibernationPackage WebSocket::buildPackageForHibernation() { diff --git a/src/workerd/api/web-socket.h b/src/workerd/api/web-socket.h index f3fc3ad2d42..a0dc6f5e9bc 100644 --- a/src/workerd/api/web-socket.h +++ b/src/workerd/api/web-socket.h @@ -12,7 +12,6 @@ #include #include #include -#include #include @@ -215,9 +214,7 @@ class WebSocket: public EventTarget { AllowHalfOpen allowHalfOpen = AllowHalfOpen::YES; }; - ~WebSocket() noexcept(false) { - weakRef->invalidate(); - } + ~WebSocket() noexcept(false) = default; // This WebSocket constructor is only used when WebSockets wake up from hibernation. // It will immediately set the `state` to `Accepted`, but it limits the behavior by specifying it @@ -253,7 +250,8 @@ class WebSocket: public EventTarget { // As an exception to the usual KJ convention, it is not necessary for the JavaScript `WebSocket` // object to be kept live while waiting for the promise returned by couple() to complete. Instead, // the promise takes direct ownership of the underlying KJ-native WebSocket (as well as `other`). - kj::Promise> couple(kj::Own other, RequestObserver& request); + kj::Promise> couple( + jsg::Lock& js, kj::Own other, RequestObserver& request); // Extract the kj::WebSocket from this api::WebSocket (if applicable). The kj::WebSocket will be // owned elsewhere, but the api::WebSocket will retain a reference. @@ -286,7 +284,7 @@ class WebSocket: public EventTarget { // Should only be called on one end of a WebSocketPair. // Relevant for WebSocket Hibernation: the end we return in the Response must be in the // AwaitingAcceptanceOrCoupling state. - bool peerIsAwaitingCoupling(); + bool peerIsAwaitingCoupling(jsg::Lock& js); HibernationPackage buildPackageForHibernation(); @@ -407,12 +405,7 @@ class WebSocket: public EventTarget { void visitForMemoryInfo(jsg::MemoryTracker& tracker) const; - kj::Own> addWeakRef() { - return weakRef->addRef(); - } - private: - kj::Own> weakRef; kj::Maybe url; kj::Maybe protocol = kj::String(); kj::Maybe extensions = kj::String(); @@ -627,13 +620,13 @@ class WebSocket: public EventTarget { // between the two WebSocket instances that would cause them to leak. This // can mean, however, that it's possible for one of the peers to be garbage // collected while the other still exists. This should be fairly unusual tho. - kj::Maybe>> peer; + kj::Maybe> peer; void visitForGc(jsg::GcVisitor& visitor) { visitor.visit(error); } - void setPeer(kj::Own> peer); + void setPeer(jsg::WeakRef peer); friend jsg::Ref WebSocketPair::constructor(jsg::Lock&); diff --git a/src/workerd/api/worker-loader.c++ b/src/workerd/api/worker-loader.c++ index 89396d5768e..09208f2b49f 100644 --- a/src/workerd/api/worker-loader.c++ +++ b/src/workerd/api/worker-loader.c++ @@ -10,6 +10,18 @@ namespace workerd::api { +namespace { + +// Maximum total (uncompressed) size of all module bodies in a dynamically-loaded Worker. This +// mirrors the documented paid Worker uncompressed size limit (64 MB) +constexpr size_t MAX_DYNAMIC_WORKER_CODE_SIZE = 64 * 1024 * 1024; + +// Maximum serialized size of the `env` object passed to a dynamically-loaded Worker. This is +// roughly the paid Worker analog of 128 environment variables at 5 KB each +constexpr size_t MAX_DYNAMIC_WORKER_ENV_SIZE = 1 * 1024 * 1024; + +} // namespace + jsg::Ref WorkerStub::getEntrypoint(jsg::Lock& js, jsg::Optional> name, jsg::Optional options) { @@ -64,7 +76,10 @@ jsg::Ref WorkerLoader::get( jsg::Lock& js, kj::Maybe name, jsg::Function()> getCode) { auto& ioctx = IoContext::current(); - auto reenterAndGetCode = ioctx.makeReentryCallback( + // It's important that we use a *weak* reentry callback because this callback will held by the + // WorkerStub and any entrypoint stubs in vends until they are GC'd. We don't want to create + // a cycle where a request context holds itself open (which would block DO hibernation). + auto reenterAndGetCode = ioctx.makeReentryCallbackWeak( [weakIoctx = ioctx.getWeakRef(), getCode = kj::mv(getCode), compatDateValidation = compatDateValidation](jsg::Lock& js) mutable { return getCode(js).then(js, @@ -114,9 +129,38 @@ DynamicWorkerSource WorkerLoader::toDynamicWorkerSource(jsg::Lock& js, auto ownCompatFlags = extractCompatFlags(js, code, compatDateValidation); CompatibilityFlags::Reader compatFlags = *ownCompatFlags; + // Set up compat flags for Python Workers so that the caller doesn't have to specify them manually. + if (code.mainModule.endsWith(".py"_kj)) { + capnp::MallocMessageBuilder flagsMessage; + flagsMessage.setRoot(compatFlags); + auto flagsBuilder = flagsMessage.getRoot(); + flagsBuilder.setPythonWorkers(true); + bool userExplicitlyEnabledExternalSdk = false; + + KJ_IF_SOME(f, code.compatibilityFlags) { + for (auto& flag: f) { + if (flag == "enable_python_external_sdk") { + userExplicitlyEnabledExternalSdk = true; + break; + } + } + } + if (!userExplicitlyEnabledExternalSdk) { + // TODO: We currently need to disable this because we have no way to include the SDK + // in dynamic workers. Once RM-28738 is implemented we may be able to get rid of this. + flagsBuilder.setPythonExternalSDK(false); + } + ownCompatFlags = capnp::clone(flagsBuilder.asReader()); + compatFlags = *ownCompatFlags; + } + Frankenvalue env; KJ_IF_SOME(codeEnv, code.env) { env = Frankenvalue::fromJs(js, codeEnv.getHandle(js)); + auto estimate = env.estimateSize(); + JSG_REQUIRE(estimate <= MAX_DYNAMIC_WORKER_ENV_SIZE, Error, "Dynamic Worker env size (", + estimate, " bytes) exceeds the maximum allowed size of ", MAX_DYNAMIC_WORKER_ENV_SIZE, + " bytes."); } kj::Maybe> globalOutbound; @@ -253,7 +297,9 @@ Worker::Script::Source WorkerLoader::extractSource(jsg::Lock& js, WorkerCode& co }; bool isPython = code.mainModule.endsWith(".py"_kj); - // Disallow Python modules when the main module is a JS module, and vice versa. + // Disallow Python modules when the main module is a JS module, and vice versa. Also tally up the + // total size of all module bodies so we can enforce the worker code size limit. + size_t totalCodeSize = 0; for (auto& module: modules) { auto isJsModule = module.content.is() || module.content.is(); @@ -266,8 +312,38 @@ Worker::Script::Source WorkerLoader::extractSource(jsg::Lock& js, WorkerCode& co JSG_FAIL_REQUIRE(TypeError, "Module \"", module.name, "\" is a Python module, but the main module isn't a Python module."); } + + KJ_SWITCH_ONEOF(module.content) { + KJ_CASE_ONEOF(m, Worker::Script::EsModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::CommonJsModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::TextModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::DataModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::WasmModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::JsonModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::PythonModule) { + totalCodeSize += m.body.size(); + } + KJ_CASE_ONEOF(m, Worker::Script::ObsoletePythonRequirement) {} + KJ_CASE_ONEOF(m, Worker::Script::CapnpModule) {} + } } + JSG_REQUIRE(totalCodeSize <= MAX_DYNAMIC_WORKER_CODE_SIZE, Error, "Dynamic Worker code size (", + totalCodeSize, " bytes) exceeds the maximum allowed size of ", MAX_DYNAMIC_WORKER_CODE_SIZE, + " bytes."); + return Worker::Script::ModulesSource{ .mainModule = code.mainModule, .modules = kj::mv(modules), @@ -295,8 +371,10 @@ kj::Own WorkerLoader::extractCompatFlags( SimpleWorkerErrorReporter errorReporter; + // allowedExperimentalFlags is nullptr on purpose, a worker loader being trusted with specific + // experimental flags should not imply that it can delegate that trust to its dynamic workers. compileCompatibilityFlags(code.compatibilityDate, compatFlags, compatFlagsBuilder, errorReporter, - allowExperimental, compatDateValidation); + allowExperimental, compatDateValidation, nullptr); if (!errorReporter.errors.empty()) { JSG_FAIL_REQUIRE(Error, errorReporter.errors.front()); diff --git a/src/workerd/api/worker-rpc.c++ b/src/workerd/api/worker-rpc.c++ index 7830011b088..b35cdfaf99a 100644 --- a/src/workerd/api/worker-rpc.c++ +++ b/src/workerd/api/worker-rpc.c++ @@ -540,7 +540,12 @@ jsg::JsValue JsRpcPromise::finally(jsg::Lock& js, v8::Local onFina } kj::Maybe> JsRpcProperty::getProperty(jsg::Lock& js, kj::String name) { - return js.alloc(JSG_THIS, kj::mv(name)); + if (depth >= MAX_PROPERTY_WARNING_DEPTH) { + LOG_PERIODICALLY(WARNING, "NOSENTRY VULN-136589 exceeded RPC property warning depth", depth); + } + JSG_REQUIRE(depth < MAX_PROPERTY_DEPTH, TypeError, + "RPC pipelined property path is too deep (max ", MAX_PROPERTY_DEPTH, ")."); + return js.alloc(JSG_THIS, kj::mv(name), depth + 1); } kj::Maybe> JsRpcPromise::getProperty(jsg::Lock& js, kj::String name) { @@ -1914,18 +1919,30 @@ kj::Promise JsRpcSessionCustomEvent::run( KJ_DEFER({ // waitUntil() should allow extending execution on the server side even when the client // disconnects. - waitUntilTasks.add(incomingRequest->drain().attach(kj::mv(incomingRequest))); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); }); EntrypointJsRpcTarget target(ioctx, entrypointName, kj::mv(versionInfo), kj::mv(props), kj::mv(wrapperModule), mapAddRef(incomingRequest->getWorkerTracer()), isDynamicDispatch); - capnp::RevocableServer revcableTarget(target); + capnp::RevocableServer revocableTarget(target); + + KJ_DEFER({ + // If run() is canceled while a call is still in flight, then when the `RevocableServer` is + // destroyed, the in-flight request will be canceled with a not-very-friendly error message. + // If the cancellation occurred because the Actor or IoContext was aborted, we'd rather + // propagate the abort error. So check for one, and revoke with that if present. + KJ_IF_SOME(r, incomingRequest->getContext().getAbortReason()) { + revocableTarget.revoke(kj::mv(r)); + } else { + // silence bogus clang warning about dangling else + } + }); try { auto [donePromise, doneFulfiller] = kj::newPromiseAndFulfiller(); capFulfiller->fulfill(capnp::membrane( - revcableTarget.getClient(), kj::refcounted(kj::mv(doneFulfiller)))); + revocableTarget.getClient(), kj::refcounted(kj::mv(doneFulfiller)))); // `donePromise` resolves once there are no longer any capabilities pointing between the client // and server as part of this session. @@ -1936,7 +1953,7 @@ kj::Promise JsRpcSessionCustomEvent::run( // Make sure the top-level capability is revoked with the same exception that `run()` is // throwing, rather than some generic revocation exception. auto e = kj::getCaughtExceptionAsKj(); - revcableTarget.revoke(e.clone()); + revocableTarget.revoke(e.clone()); kj::throwFatalException(kj::mv(e)); } } diff --git a/src/workerd/api/worker-rpc.h b/src/workerd/api/worker-rpc.h index faddc717d38..5fc1b36da80 100644 --- a/src/workerd/api/worker-rpc.h +++ b/src/workerd/api/worker-rpc.h @@ -267,9 +267,16 @@ class JsRpcPromise: public JsRpcClientProvider { // Represents a property -- possibly, a method -- of a remote RPC object. class JsRpcProperty: public JsRpcClientProvider { public: - JsRpcProperty(jsg::Ref parent, kj::String name) + // Maximum depth of pipelined property chains. Prevents stack overflow when a chain of + // JsRpcProperty objects is destructed recursively. 64 is beyond any legitimate RPC pipelining + // depth. + static constexpr uint MAX_PROPERTY_DEPTH = 5120; + static constexpr uint MAX_PROPERTY_WARNING_DEPTH = 64; + + JsRpcProperty(jsg::Ref parent, kj::String name, uint depth = 0) : parent(kj::mv(parent)), - name(kj::mv(name)) {} + name(kj::mv(name)), + depth(depth) {} rpc::JsRpcTarget::Client getClientForOneCall( jsg::Lock& js, kj::Vector& path) override; @@ -319,6 +326,10 @@ class JsRpcProperty: public JsRpcClientProvider { // Name of this property within its immediate parent. kj::String name; + // Number of JsRpcProperty links above this one in the chain. Used to enforce + // MAX_PROPERTY_DEPTH and prevent native stack overflow on destruction. + uint depth; + void visitForGc(jsg::GcVisitor& visitor) { visitor.visit(parent); } diff --git a/src/workerd/api/workers-module.c++ b/src/workerd/api/workers-module.c++ index 3de9260235c..10b66292511 100644 --- a/src/workerd/api/workers-module.c++ +++ b/src/workerd/api/workers-module.c++ @@ -6,7 +6,6 @@ #include #include -#include namespace workerd::api { @@ -77,8 +76,4 @@ void EntrypointsModule::abortIsolate(jsg::Lock& js, jsg::Optional re js.terminateExecutionNow(); } -bool EntrypointsModule::getIsExperimental(jsg::Lock& js) { - return FeatureFlags::get(js).getWorkerdExperimental(); -} - } // namespace workerd::api diff --git a/src/workerd/api/workers-module.h b/src/workerd/api/workers-module.h index 305f440542e..15cb74df41f 100644 --- a/src/workerd/api/workers-module.h +++ b/src/workerd/api/workers-module.h @@ -90,11 +90,6 @@ class EntrypointsModule: public jsg::Object { // process. void abortIsolate(jsg::Lock& js, jsg::Optional reason); - // Returns whether the workerd_experimental compat flag is enabled. Exposed on the internal - // module so user-facing wrappers in cloudflare:workers can gate experimental APIs without - // relying on Cloudflare.compatibilityFlags (which filters out experimental flags themselves). - bool getIsExperimental(jsg::Lock& js); - JSG_RESOURCE_TYPE(EntrypointsModule, CompatibilityFlags::Reader flags) { JSG_NESTED_TYPE(WorkerEntrypoint); JSG_NESTED_TYPE(WorkflowEntrypoint); @@ -107,19 +102,7 @@ class EntrypointsModule: public jsg::Object { JSG_METHOD(waitUntil); JSG_METHOD(getCtxCache); - - // abortIsolate: - // - // From user code only usable with experimental set for now. - // The Python runtime wants to use it directly. - // - // So we always expose it to internal JS for the Python runtime, but the - // version exposed to user code checks this isExperimental flag and throws - // if it returns false. - // - // TODO: Clean up when we remove the experimental gate on abortIsolate. JSG_METHOD(abortIsolate); - JSG_READONLY_PROTOTYPE_PROPERTY(isExperimental, getIsExperimental); } }; diff --git a/src/workerd/io/BUILD.bazel b/src/workerd/io/BUILD.bazel index 43ca28d5262..44164336c4a 100644 --- a/src/workerd/io/BUILD.bazel +++ b/src/workerd/io/BUILD.bazel @@ -41,12 +41,14 @@ wd_cc_library( "io-context.c++", "io-own.c++", "io-util.c++", + "stored-value.c++", "trace-stream.c++", "tracer.c++", "worker.c++", "worker-fs.c++", ] + ["//src/workerd/api:srcs"], hdrs = [ + "access-info.h", "compatibility-date.h", "external-pusher.h", "hibernation-manager.h", @@ -54,6 +56,7 @@ wd_cc_library( "io-context.h", "io-own.h", "io-util.h", + "stored-value.h", "trace-stream.h", "tracer.h", "worker.h", @@ -401,6 +404,7 @@ wd_capnp_library( src = "container.capnp", deps = [ ":compatibility-date_capnp", + ":worker-interface_capnp", "@capnp-cpp//src/capnp/compat:byte-stream_capnp", ], ) @@ -419,6 +423,7 @@ kj_test( ":io-gate", "//src/workerd/util:test", "//src/workerd/util:test-util", + "//src/workerd/util:thread-scopes", ], ) @@ -514,10 +519,6 @@ wd_test( "//src/workerd/io/wasm:signal-preinit.wasm", "//src/workerd/io/wasm:signal-terminated-only.wasm", ], - # The WebAssembly.instantiate shim is behind the WASM_SHUTDOWN_SIGNAL_SHIM autogate, - # so this test only works when all autogates are enabled. - generate_all_compat_flags_variant = False, - generate_default_variant = False, ) kj_test( @@ -536,3 +537,11 @@ kj_test( "//src/workerd/tests:test-fixture", ], ) + +kj_test( + src = "hibernation-manager-test.c++", + deps = [ + ":io", + "//src/workerd/tests:test-fixture", + ], +) diff --git a/src/workerd/io/access-info.h b/src/workerd/io/access-info.h new file mode 100644 index 00000000000..57d1223dc83 --- /dev/null +++ b/src/workerd/io/access-info.h @@ -0,0 +1,40 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#pragma once + +#include +#include +#include + +namespace workerd { + +// Per-request Cloudflare Access authentication information. +// +// This is the I/O-side carrier for Access auth data. It is created by the embedding application +// (e.g. the production runtime) before invoking the worker, plumbed through `newWorkerEntrypoint()` +// into the `IoContext::IncomingRequest`, and surfaced to JavaScript by the concrete +// `api::AccessContext` wrapper as `ctx.access`. +// +// In standalone workerd this is never constructed; `ctx.access` evaluates to `undefined`. +// +// This type intentionally lives in `io/` rather than `api/` because: +// - It is the polymorphism boundary between embedders (workerd vs. production), not the +// JS-facing type. +// - It carries per-request data that flows through `newWorkerEntrypoint` → `IncomingRequest`, +// not through `Worker::Api` (which is per-isolate) or `IoChannelFactory`. +class AccessInfo: public kj::Refcounted { + public: + virtual ~AccessInfo() noexcept(false) = default; + + // The audience claim from the Access JWT. Stable for the lifetime of the request. + virtual kj::StringPtr getAudience() = 0; + + // Fetches the full identity information for the authenticated user, equivalent to calling + // /cdn-cgi/access/get-identity. The returned string is a JSON document; `kj::none` indicates + // no identity is available (e.g. service-token authentication). + virtual kj::Promise> getIdentity() = 0; +}; + +} // namespace workerd diff --git a/src/workerd/io/actor-cache-test.c++ b/src/workerd/io/actor-cache-test.c++ index 6a2d39eb1d9..d391bc0035e 100644 --- a/src/workerd/io/actor-cache-test.c++ +++ b/src/workerd/io/actor-cache-test.c++ @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -1382,6 +1383,35 @@ KJ_TEST("ActorCache flush hard failure with output gate bypass") { test.put("bar", "456"); } +KJ_TEST("ActorCache flush hard failure includes internal error reference id") { + // A flush failure that is neither DISCONNECTED nor a tunneled JSG error falls into the + // "internal error" branch of flushImpl(), which should embed a reference id in the + // user-visible exception so it can be correlated with internal logs. + setPredictableModeForTest(); + ActorCacheTest test({.monitorOutputGate = false}); + auto& ws = test.ws; + auto& mockStorage = test.mockStorage; + + auto promise = test.gate.onBroken(); + + test.put("foo", "123"); + + KJ_ASSERT(!promise.poll(ws)); + + { + // FAILED + no "jsg." prefix => not tunneled and not retried, so we hit the wdErrId branch. + mockStorage->expectCall("put", ws) + .withParams(CAPNP(entries = [(key = "foo", value = "123")])) + .thenThrow(KJ_EXCEPTION(FAILED, "raw storage failure")); + } + + KJ_EXPECT_LOG(ERROR, "raw storage failure"); + KJ_EXPECT_THROW_MESSAGE("broken.outputGateBroken; jsg.Error: Internal error in Durable " + "Object storage write caused object to be reset; " + "reference = 0123456789abcdefghijklmn", + promise.wait(ws)); +} + KJ_TEST("ActorCache read retry") { ActorCacheTest test; auto& ws = test.ws; @@ -5418,6 +5448,29 @@ KJ_TEST("ActorCache deleteAll() failure with deleteAlarm does not delete alarm") mockStorage->expectNoActivity(ws); } +KJ_TEST("ActorCache deleteAll() failure includes internal error reference id") { + // A deleteAll failure that is neither DISCONNECTED nor a tunneled JSG error falls into the + // "internal error" branch of flushImplDeleteAll(), which should embed a reference id in the + // user-visible exception so it can be correlated with internal logs. + setPredictableModeForTest(); + ActorCacheTest test({.monitorOutputGate = false}); + auto& ws = test.ws; + auto& mockStorage = test.mockStorage; + + auto brokenPromise = test.gate.onBroken(); + + auto deleteAll = test.cache.deleteAll({}, nullptr); + + // FAILED + no "jsg." prefix => not tunneled and not retried, so we hit the wdErrId branch. + mockStorage->expectCall("deleteAll", ws).thenThrow(KJ_EXCEPTION(FAILED, "raw storage failure")); + + KJ_EXPECT_LOG(ERROR, "raw storage failure"); + KJ_EXPECT_THROW_MESSAGE("broken.outputGateBroken; jsg.Error: Internal error in Durable " + "Object storage deleteAll() caused object to be reset; " + "reference = 0123456789abcdefghijklmn", + brokenPromise.wait(ws)); +} + KJ_TEST("ActorCache can wait for flush") { // This test confirms that `onNoPendingFlush()` will return a promise that resolves when any // scheduled or in-flight flush completes. diff --git a/src/workerd/io/actor-cache.c++ b/src/workerd/io/actor-cache.c++ index 9af5b266ebe..7ded6ccc279 100644 --- a/src/workerd/io/actor-cache.c++ +++ b/src/workerd/io/actor-cache.c++ @@ -2015,8 +2015,9 @@ kj::OneOf> ActorCache::delete_( [waiter = kj::mv(waiter)]() { return waiter->getCountedDelete().countDeleted; }); } -kj::Own ActorCache::startTransaction() { - return kj::heap(*this); +kj::OneOf, kj::Promise> ActorCache:: + startTransaction() { + return kj::Own(kj::heap(*this)); } ActorCache::DeleteAllResults ActorCache::deleteAll( @@ -2726,15 +2727,17 @@ kj::Promise ActorCache::flushImpl(uint retryCount) { } return kj::mv(e); } else { + auto wdErrId = makeInternalErrorId(); if (isInterestingException(e)) { - LOG_EXCEPTION("actorCacheFlush", e); + LOG_EXCEPTION_WITH_ID("actorCacheFlush", e, wdErrId); } else { - LOG_NOSENTRY(ERROR, "actor cache flush failed", e); + LOG_NOSENTRY(ERROR, "actor cache flush failed", e, wdErrId); } // Pass through exception type to convey appropriate retry behavior. return kj::Exception(e.getType(), __FILE__, __LINE__, kj::str("broken.outputGateBroken; jsg.Error: Internal error in Durable " - "Object storage write caused object to be reset.")); + "Object storage write caused object to be reset; reference = ", + wdErrId)); } }); } @@ -3122,15 +3125,17 @@ kj::Promise ActorCache::flushImplDeleteAll(uint retryCount) { e.setDescription(kj::str("broken.outputGateBroken; ", msg)); return kj::mv(e); } else { + auto wdErrId = makeInternalErrorId(); if (isInterestingException(e)) { - LOG_EXCEPTION("actorCacheDeleteAll", e); + LOG_EXCEPTION_WITH_ID("actorCacheDeleteAll", e, wdErrId); } else { - LOG_NOSENTRY(ERROR, "actorCacheDeleteAll failed", e); + LOG_NOSENTRY(ERROR, "actorCacheDeleteAll failed", e, wdErrId); } // Pass through exception type to convey appropriate retry behavior. return kj::Exception(e.getType(), __FILE__, __LINE__, - kj::str( - "broken.outputGateBroken; jsg.Error: Internal error in Durable Object storage deleteAll() caused object to be reset.")); + kj::str("broken.outputGateBroken; jsg.Error: Internal error in Durable " + "Object storage deleteAll() caused object to be reset; reference = ", + wdErrId)); } }); } diff --git a/src/workerd/io/actor-cache.h b/src/workerd/io/actor-cache.h index 3f108b10a6a..ae9c9f2d36b 100644 --- a/src/workerd/io/actor-cache.h +++ b/src/workerd/io/actor-cache.h @@ -194,13 +194,25 @@ class ActorCacheInterface: public ActorCacheOps { // old-style DOs have asyncronous storage. virtual kj::Maybe getSqliteKv() = 0; + // Prevents the current transaction from being committed until `promise` resolves. This is used + // when storing an external capability that requires performing some async RPC to obtain the + // token -- the transaction must be held open until the token is obtained and stored. + // + // This is only supported for SQLite-backed actor storage. For non-SQLite backends, calling this + // method is a programming error. + // + // See `ActorSqlite::blockTransaction()` for additional details on the semantics. + virtual void blockTransaction(kj::Promise promise) = 0; + class Transaction: public ActorCacheOps { public: // Write all changes to the underlying ActorCache. // // If commit() is not called before the Transaction is destroyed, nothing is written. // - // Returns a promise if backpressure needs to be applied (like ActorCache::put()). + // Returns a promise if backpressure needs to be applied (like ActorCache::put()) or if + // additional work needs to be done before the commit is actually complete. The caller must + // keep the input lock held until this promise completes. // // This will NOT detect conflicts, it will always just write blindly, because conflicts // inherently cannot happen. @@ -209,7 +221,11 @@ class ActorCacheInterface: public ActorCacheOps { virtual kj::Promise rollback() = 0; }; - virtual kj::Own startTransaction() = 0; + // Start an explicit async transaction. + // + // If this returns a Promise instead of a transaction, then we can't start a transaction right + // now. The caller must await the promise first, then try again. + virtual kj::OneOf, kj::Promise> startTransaction() = 0; // We split these up so client code that doesn't need the count doesn't have to // wait for it just to account for backpressure @@ -361,6 +377,9 @@ class ActorCache final: public ActorCacheInterface { kj::Maybe getSqliteKv() override { return kj::none; } + void blockTransaction(kj::Promise promise) override { + KJ_UNIMPLEMENTED("blockTransaction() is only supported on SQLite-backed actors"); + } kj::OneOf, kj::Promise>> get( Key key, ReadOptions options) override; kj::OneOf> get( @@ -383,7 +402,7 @@ class ActorCache final: public ActorCacheInterface { kj::Maybe newAlarmTime, WriteOptions options, SpanParent traceSpan) override; // See ActorCacheOps. - kj::Own startTransaction() override; + kj::OneOf, kj::Promise> startTransaction() override; DeleteAllResults deleteAll( WriteOptions options, SpanParent traceSpan, DeleteAllOptions deleteAllOptions = {}) override; kj::Maybe> evictStale(kj::Date now) override; diff --git a/src/workerd/io/actor-sqlite-test.c++ b/src/workerd/io/actor-sqlite-test.c++ index 5d5a9e7a63a..fb79f88b50f 100644 --- a/src/workerd/io/actor-sqlite-test.c++ +++ b/src/workerd/io/actor-sqlite-test.c++ @@ -165,9 +165,13 @@ struct ActorSqliteTest final { kj::Array pairs, ActorCache::WriteOptions options = {}) { return actor.put(kj::mv(pairs), options, nullptr); } + auto startTransaction() { + return KJ_ASSERT_NONNULL( + actor.startTransaction().tryGet>()); + } auto putMultipleExplicitTxn( kj::Array pairs, ActorCache::WriteOptions options = {}) { - auto txn = actor.startTransaction(); + auto txn = startTransaction(); txn->put(kj::mv(pairs), options, nullptr); return txn->commit(); } @@ -457,7 +461,7 @@ KJ_TEST("alarm scheduling starts synchronously before explicit local db commit") }; { - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); txn->setAlarm(oneMs, {}, nullptr); KJ_ASSERT(!startedScheduleRun); @@ -486,10 +490,10 @@ KJ_TEST("alarm scheduling does not start synchronously before nested explicit lo }; { - auto txn1 = test.actor.startTransaction(); + auto txn1 = test.startTransaction(); { - auto txn2 = test.actor.startTransaction(); + auto txn2 = test.startTransaction(); txn2->setAlarm(oneMs, {}, nullptr); txn2->commit(); @@ -1741,7 +1745,7 @@ KJ_TEST("rolling back transaction leaves alarm in expected state") { KJ_ASSERT(expectSync(test.getAlarm()) == twoMs); { - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); KJ_ASSERT(expectSync(txn->getAlarm({})) == twoMs); txn->setAlarm(oneMs, {}, nullptr); KJ_ASSERT(expectSync(txn->getAlarm({})) == oneMs); @@ -1764,7 +1768,7 @@ KJ_TEST("rolling back transaction leaves deferred alarm deletion in expected sta auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime); KJ_ASSERT(armResult.is()); - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); KJ_ASSERT(expectSync(test.getAlarm()) == kj::none); test.setAlarm(oneMs); KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); @@ -1797,7 +1801,7 @@ KJ_TEST("committing transaction leaves deferred alarm deletion in expected state auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime); KJ_ASSERT(armResult.is()); - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); KJ_ASSERT(expectSync(test.getAlarm()) == kj::none); test.setAlarm(oneMs); KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); @@ -1828,11 +1832,11 @@ KJ_TEST("rolling back nested transaction leaves deferred alarm deletion in expec auto armResult = test.actor.armAlarmHandler(twoMs, nullptr, testCurrentTime); KJ_ASSERT(armResult.is()); - auto txn1 = test.actor.startTransaction(); + auto txn1 = test.startTransaction(); KJ_ASSERT(expectSync(test.getAlarm()) == kj::none); { // Rolling back nested transaction change leaves deferred deletion in place. - auto txn2 = test.actor.startTransaction(); + auto txn2 = test.startTransaction(); KJ_ASSERT(expectSync(test.getAlarm()) == kj::none); test.setAlarm(oneMs); KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); @@ -1842,7 +1846,7 @@ KJ_TEST("rolling back nested transaction leaves deferred alarm deletion in expec KJ_ASSERT(expectSync(test.getAlarm()) == kj::none); { // Committing nested transaction changes parent transaction state to dirty. - auto txn3 = test.actor.startTransaction(); + auto txn3 = test.startTransaction(); KJ_ASSERT(expectSync(test.getAlarm()) == kj::none); test.setAlarm(oneMs); KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); @@ -1852,7 +1856,7 @@ KJ_TEST("rolling back nested transaction leaves deferred alarm deletion in expec KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); { // Nested transaction of dirty transaction is dirty, rollback has no effect. - auto txn4 = test.actor.startTransaction(); + auto txn4 = test.startTransaction(); KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); txn4->rollback().wait(test.ws); KJ_ASSERT(expectSync(test.getAlarm()) == oneMs); @@ -2576,7 +2580,7 @@ KJ_TEST("sync() throws after critical error in explicit transaction") { KJ_DEFER(sqlite3_hard_heap_limit64(heapLimit);); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Do a write within the transaction txn->put(kj::str("foo"), kj::heapArray(kj::str("bar").asBytes()), {}, nullptr); @@ -2620,7 +2624,7 @@ KJ_TEST("allowUnconfirmed put in explicit transaction does not block output gate KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Do an unconfirmed put within the transaction txn->put( @@ -2652,7 +2656,7 @@ KJ_TEST("confirmed put in explicit transaction blocks output gate on commit") { KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Do a confirmed put (default behavior) txn->put(kj::str("foo"), kj::heapArray(kj::str("bar").asBytes()), {.allowUnconfirmed = false}, @@ -2684,7 +2688,7 @@ KJ_TEST("mixed confirmed and unconfirmed puts in explicit transaction use output KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Do an unconfirmed put followed by a confirmed put txn->put( @@ -2723,7 +2727,7 @@ KJ_TEST("allowUnconfirmed delete in explicit transaction does not block output g KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Perform an unconfirmed delete expectSync(txn->delete_(kj::str("foo"), {.allowUnconfirmed = true}, nullptr)); @@ -2754,7 +2758,7 @@ KJ_TEST("allowUnconfirmed putMultiple in explicit transaction does not block out KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Do an unconfirmed putMultiple auto pairs = kj::heapArrayBuilder(2); @@ -2794,7 +2798,7 @@ KJ_TEST("allowUnconfirmed deleteMultiple in explicit transaction does not block KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Perform an unconfirmed deleteMultiple auto keys = kj::heapArrayBuilder(2); @@ -2829,7 +2833,7 @@ KJ_TEST("allowUnconfirmed setAlarm in explicit transaction does not block output KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Set an alarm with allowUnconfirmed txn->setAlarm(oneMs, {.allowUnconfirmed = true}, nullptr); @@ -2861,7 +2865,7 @@ KJ_TEST("nested transaction: unconfirmed child commit does not block output gate KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start a parent transaction - auto parentTxn = test.actor.startTransaction(); + auto parentTxn = test.startTransaction(); // Do an unconfirmed put in the parent parentTxn->put(kj::str("parent"), kj::heapArray(kj::str("data").asBytes()), @@ -2869,7 +2873,7 @@ KJ_TEST("nested transaction: unconfirmed child commit does not block output gate { // Start a nested child transaction - auto childTxn = test.actor.startTransaction(); + auto childTxn = test.startTransaction(); // Do an unconfirmed put in the child childTxn->put(kj::str("child"), kj::heapArray(kj::str("data").asBytes()), @@ -2909,13 +2913,13 @@ KJ_TEST("nested transaction: confirmed child propagates to parent commit") { KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start a parent transaction with unconfirmed write - auto parentTxn = test.actor.startTransaction(); + auto parentTxn = test.startTransaction(); parentTxn->put(kj::str("parent"), kj::heapArray(kj::str("data").asBytes()), {.allowUnconfirmed = true}, nullptr); { // Start a nested child transaction - auto childTxn = test.actor.startTransaction(); + auto childTxn = test.startTransaction(); // Do a confirmed put in the child childTxn->put(kj::str("child"), kj::heapArray(kj::str("data").asBytes()), @@ -2955,13 +2959,13 @@ KJ_TEST("nested transaction: confirmed parent with unconfirmed child blocks outp KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start a parent transaction with confirmed write - auto parentTxn = test.actor.startTransaction(); + auto parentTxn = test.startTransaction(); parentTxn->put(kj::str("parent"), kj::heapArray(kj::str("data").asBytes()), {.allowUnconfirmed = false}, nullptr); { // Start a nested child transaction - auto childTxn = test.actor.startTransaction(); + auto childTxn = test.startTransaction(); // Do an unconfirmed put in the child childTxn->put(kj::str("child"), kj::heapArray(kj::str("data").asBytes()), @@ -3001,19 +3005,19 @@ KJ_TEST("nested transaction: deeply nested confirmed write propagates to root") KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start a parent transaction with unconfirmed write - auto txn1 = test.actor.startTransaction(); + auto txn1 = test.startTransaction(); txn1->put(kj::str("level1"), kj::heapArray(kj::str("data").asBytes()), {.allowUnconfirmed = true}, nullptr); { // Start a second level nested transaction with unconfirmed write - auto txn2 = test.actor.startTransaction(); + auto txn2 = test.startTransaction(); txn2->put(kj::str("level2"), kj::heapArray(kj::str("data").asBytes()), {.allowUnconfirmed = true}, nullptr); { // Start a third level nested transaction with confirmed write - auto txn3 = test.actor.startTransaction(); + auto txn3 = test.startTransaction(); txn3->put(kj::str("level3"), kj::heapArray(kj::str("data").asBytes()), {.allowUnconfirmed = false}, nullptr); @@ -3057,13 +3061,13 @@ KJ_TEST("nested transaction: rollback resets someWriteConfirmed flag") { KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start a parent transaction with unconfirmed write - auto parentTxn = test.actor.startTransaction(); + auto parentTxn = test.startTransaction(); parentTxn->put(kj::str("parent"), kj::heapArray(kj::str("data").asBytes()), {.allowUnconfirmed = true}, nullptr); { // Start a nested child transaction - auto childTxn = test.actor.startTransaction(); + auto childTxn = test.startTransaction(); // Do a confirmed put in the child childTxn->put(kj::str("child"), kj::heapArray(kj::str("data").asBytes()), @@ -3105,7 +3109,7 @@ KJ_TEST("explicit transaction: commit failure breaks output gate even for unconf KJ_ASSERT(test.gate.wait(nullptr).poll(test.ws)); // Start an explicit transaction - auto txn = test.actor.startTransaction(); + auto txn = test.startTransaction(); // Do an unconfirmed put txn->put( diff --git a/src/workerd/io/actor-sqlite.c++ b/src/workerd/io/actor-sqlite.c++ index 9788954aaad..b4059f912c6 100644 --- a/src/workerd/io/actor-sqlite.c++ +++ b/src/workerd/io/actor-sqlite.c++ @@ -51,6 +51,7 @@ ActorSqlite::ActorSqlite(kj::Own dbParam, kv(*db), metadata(*db), commitTasks(*this), + blockTasks(*this), debugAlarmSync(debugAlarmSyncParam) { db->onWrite(KJ_BIND_METHOD(*this, onWrite)); db->onCriticalError(KJ_BIND_METHOD(*this, onCriticalError)); @@ -94,6 +95,9 @@ void ActorSqlite::ImplicitTxn::commit() { void ActorSqlite::ImplicitTxn::rollback() { // Ignore redundant commit()s. if (!committed) { + // Cancel any blocking async tasks that were scheduled as part of the transaction. + parent.blockTasks.clear(); + // As of this writing, rollback() is only called when the database is about to be reset. // Preparing a statement for it would be a waste since that statement would never be executed // more than once, since resetting requires repreparing all statements anyway. So we don't @@ -111,15 +115,21 @@ bool ActorSqlite::ImplicitTxn::isSomeWriteConfirmed() const { return someWriteConfirmed; } +kj::Promise ActorSqlite::ImplicitTxn::waitForCompletion() { + KJ_IF_SOME(c, completionPaf) { + return c.promise.addBranch(); + } else { + return completionPaf.emplace().promise.addBranch(); + } +} + ActorSqlite::ExplicitTxn::ExplicitTxn(ActorSqlite& actorSqlite): actorSqlite(actorSqlite) { KJ_SWITCH_ONEOF(actorSqlite.currentTxn) { KJ_CASE_ONEOF(_, NoTxn) {} KJ_CASE_ONEOF(implicit, ImplicitTxn*) { - // An implicit transaction is open, commit it now because it would be weird if writes - // performed before the explicit transaction started were postponed until the transaction - // completes. Note that this isn't violating any atomicity guarantees because the transaction - // API is async, and atomicity is only guaranteed over synchronous code. - implicit->commit(); + // ActorSqlite::startTransaction() should have handled this case before constructing + // ExplicitTxn. + KJ_FAIL_REQUIRE("can't create ExplicitTxn while ImplicitTxn is open"); } KJ_CASE_ONEOF(exp, ExplicitTxn*) { KJ_REQUIRE(!exp->hasChild, @@ -181,6 +191,27 @@ bool ActorSqlite::ExplicitTxn::isSomeWriteConfirmed() const { } kj::Maybe> ActorSqlite::ExplicitTxn::commit() { + if (!actorSqlite.blockTasks.isEmpty()) { + // Although the promise returned here was originally intended for "backpressure", it turns out + // if we return a promise here, the one call site (DurableObjectStorage::asyncTransactionImpl()) + // will actually keep the input gate locked until the commit finishes, which is what we need. + return actorSqlite.blockTasks.onEmpty().then([this]() { + commitImpl(); + }).catch_([self = kj::addRef(*this)](kj::Exception&& e) mutable { + if (self->actorSqlite.broken == kj::none) { + self->rollbackImpl(); + } + kj::throwFatalException(kj::mv(e)); + }); + } else { + commitImpl(); + + // No backpressure for SQLite. + return kj::none; + } +} + +void ActorSqlite::ExplicitTxn::commitImpl() { actorSqlite.requireNotBroken(); KJ_REQUIRE(!hasChild, "critical sections should have prevented committing transaction while " @@ -233,9 +264,6 @@ kj::Maybe> ActorSqlite::ExplicitTxn::commit() { actorSqlite.commitTasks.add(forkedPromise.addBranch()); actorSqlite.lastCommit = kj::mv(forkedPromise); } - - // No backpressure for SQLite. - return kj::none; } kj::Promise ActorSqlite::ExplicitTxn::rollback() { @@ -250,6 +278,9 @@ kj::Promise ActorSqlite::ExplicitTxn::rollback() { } void ActorSqlite::ExplicitTxn::rollbackImpl() noexcept(false) { + // Cancel any blocking async tasks that were scheduled as part of the transaction. + actorSqlite.blockTasks.clear(); + actorSqlite.db->run( {.regulator = SqliteDatabase::TRUSTED}, kj::str("ROLLBACK TO _cf_savepoint_", depth)); actorSqlite.db->run( @@ -279,40 +310,31 @@ void ActorSqlite::onCriticalError( } } -void ActorSqlite::startImplicitTxn() { - auto txn = kj::heap(*this); - - // We implement the magic of accumulating all of the writes between JavaScript awaits in one - // transaction by evaluating by wrapping the commit function with kj::evalLater, which runs the - // function on the next turn of the event loop - auto commitPromise = - kj::evalLater([this, txn = kj::mv(txn)]() mutable -> kj::Promise { - // Don't commit if shutdown() has been called. - requireNotBroken(); - - // Start the schedule request before commit(), for correctness in workerd. - auto precommitAlarmState = startPrecommitAlarmScheduling(); +void ActorSqlite::blockTransaction(kj::Promise promise) { + requireNotBroken(); - try { - txn->commit(); - } catch (...) { - // HACK: If we became broken during `COMMIT TRANSACTION` then throw the broken exception - // instead of whatever SQLite threw. - requireNotBroken(); + // Start a transaction if one isn't already open. (You might argue that we should call onWrite(), + // but externalTransaction() itself isn't actually a write, though writes are expected to happen + // while we wait for the promise. We don't want to preempt those other writes from setting the + // `allowUnconfirmed` flag.) + if (currentTxn.is()) { + startImplicitTxn(); + } - // No, we're not broken, so propagate the exception as-is. - throw; - } + blockTasks.add(promise.catch_([this](kj::Exception&& e) { + // We didn't wrap the whole promise in the outputGate because we want to leave it up to the + // app to specify allowUnconfirmed on the actual write that contained the externals that + // required asynchronous handling. But if the external promise failed, we should probably + // go ahead and break the output gate! (Also, `taskFailed()` expects us to have done this.) + return outputGate.lockWhile(kj::Promise(kj::mv(e)), nullptr); + })); +} - // The callback is only expected to commit writes up until this point. Any new writes that - // occur while the callback is in progress are NOT included, therefore require a new commit - // to be scheduled. So, we should drop `txn` to cause `currentTxn` to become NoTxn now, - // rather than after the callback. - { auto drop = kj::mv(txn); } +void ActorSqlite::startImplicitTxn() { + auto txn = kj::heap(*this); - // Move the commit span out immediately so new writes can capture a fresh span. - return commitImpl(kj::mv(precommitAlarmState), kj::mv(currentCommitSpan)); - }) + auto commitPromise = + startImplicitTxnImpl(kj::mv(txn)) // Unconditionally break the output gate if commit threw an error, no matter whether the // commit was confirmed or unconfirmed. .catch_([this](kj::Exception&& e) { @@ -327,6 +349,45 @@ void ActorSqlite::startImplicitTxn() { lastCommit = kj::mv(commitPromise); } +kj::Promise ActorSqlite::startImplicitTxnImpl(kj::Own txn) { + // We implement the magic of accumulating all of the writes between JavaScript awaits in one + // transaction by evaluating by awaiting kj::yield() first, which runs the function on the next + // turn of the event loop + co_await kj::yield(); + + // If there were tasks blocking the transaction, wait for them. + if (!blockTasks.isEmpty()) { + co_await blockTasks.onEmpty(); + } + + // Don't commit if shutdown() has been called, or if one of the blockTasks threw, or we broke + // for any other reason before the transaction could complete. + requireNotBroken(); + + // Start the schedule request before commit(), for correctness in workerd. + auto precommitAlarmState = startPrecommitAlarmScheduling(); + + try { + txn->commit(); + } catch (...) { + // HACK: If we became broken during `COMMIT TRANSACTION` then throw the broken exception + // instead of whatever SQLite threw. + requireNotBroken(); + + // No, we're not broken, so propagate the exception as-is. + throw; + } + + // The callback is only expected to commit writes up until this point. Any new writes that + // occur while the callback is in progress are NOT included, therefore require a new commit + // to be scheduled. So, we should drop `txn` to cause `currentTxn` to become NoTxn now, + // rather than after the callback. + { auto drop = kj::mv(txn); } + + // Move the commit span out immediately so new writes can capture a fresh span. + co_await commitImpl(kj::mv(precommitAlarmState), kj::mv(currentCommitSpan)); +} + void ActorSqlite::onWrite(bool allowUnconfirmed) { requireNotBroken(); if (currentTxn.is()) { @@ -578,6 +639,9 @@ kj::Promise ActorSqlite::commitImpl( } void ActorSqlite::taskFailed(kj::Exception&& exception) { + // commitTasks and blockTasks both use this taskFailed callback. In either case, we just want + // to mark ourselves broken. + // The output gate should already have been broken since it wraps all commit tasks that can // throw. So, we don't have to report anything here, the exception will already propagate // elsewhere. We should block further operations, though. @@ -777,10 +841,21 @@ kj::Maybe> ActorSqlite::setAlarm( return kj::none; } -kj::Own ActorSqlite::startTransaction() { +kj::OneOf, kj::Promise> ActorSqlite:: + startTransaction() { requireNotBroken(); - return kj::refcounted(*this); + KJ_IF_SOME(itxn, currentTxn.tryGet()) { + return itxn->waitForCompletion(); + } else if (!blockTasks.isEmpty()) { + // We may be starting a nested async transaction (nested within another async transaction). + // We should wait for any blocking tasks to finish first, otherwise they might accidentally + // deliver their writes inside the nested transaction, leading to inconsistency if it is rolled + // back. + return blockTasks.onEmpty(); + } else { + return kj::Own(kj::refcounted(*this)); + } } ActorCacheInterface::DeleteAllResults ActorSqlite::deleteAll( diff --git a/src/workerd/io/actor-sqlite.h b/src/workerd/io/actor-sqlite.h index d285720c815..30e27c3c412 100644 --- a/src/workerd/io/actor-sqlite.h +++ b/src/workerd/io/actor-sqlite.h @@ -60,6 +60,25 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH return !currentTxn.is() || deleteAllCommitScheduled; } + // Prevents the current transaction from being committed until `promise` resolves. This is used + // when storing an external capability that requires performing some async RPC to obtain the + // token -- the transaction must be held open until the token is obtained and stored. + // + // For implicit transactions (or explicit synchronous transactions nested within an implicit + // transaction), extending the transaction lifetime may mean that several independent events get + // coalesced into a single transaction that normally wouldn't. That's fine, as long as the output + // gate stays closed until the commit actually happens. + // + // For explicit, asynchronous transactions, the input gate is locked until the transaction + // completes. This just means that the promise extends the input gate lock, preventing any other + // events from arriving until the transaction can finish. + // + // If no transaction is currently open, an implicit transaction is started. + // + // NOTE: It's important that canceling this promise early cancels all work as this means the + // transaction is being rolled back. + void blockTransaction(kj::Promise promise) override; + kj::Maybe getSqliteDatabase() override { return *db; } @@ -91,7 +110,8 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH kj::Maybe newAlarmTime, WriteOptions options, SpanParent traceSpan) override; // See ActorCacheOps. - kj::Own startTransaction() override; + kj::OneOf, kj::Promise> startTransaction() + override; DeleteAllResults deleteAll( WriteOptions options, SpanParent traceSpan, DeleteAllOptions deleteAllOptions = {}) override; kj::Maybe> evictStale(kj::Date now) override; @@ -143,6 +163,8 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH void setSomeWriteConfirmed(bool someWriteConfirmed); bool isSomeWriteConfirmed() const; + kj::Promise waitForCompletion(); + private: ActorSqlite& parent; @@ -150,6 +172,21 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH // True if any of the writes in this commit are confirmed writes. bool someWriteConfirmed = false; + + struct CompletionPaf { + kj::Own> fulfiller; + kj::ForkedPromise promise; + + CompletionPaf(kj::PromiseFulfillerPair paf = kj::newPromiseAndFulfiller()) + : fulfiller(kj::mv(paf.fulfiller)), + promise(paf.promise.fork()) {} + ~CompletionPaf() noexcept(false) { + fulfiller->fulfill(); + } + }; + + // Initialized if waitForCompletion() is ever called. + kj::Maybe completionPaf; }; class ExplicitTxn: public ActorCacheInterface::Transaction, public kj::Refcounted { @@ -201,6 +238,7 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH bool someWriteConfirmed = false; void rollbackImpl(); + void commitImpl(); }; // When set to NoTxn, there is no transaction outstanding. @@ -261,6 +299,9 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH kj::TaskSet commitTasks; + // Tasks queued by blockTransaction(). + kj::TaskSet blockTasks; + // Trace span for the current commit operation. Captured from each write and used // for the output gate lock hold trace when a non-allowUnconfirmed write occurs. SpanParent currentCommitSpan = nullptr; @@ -289,6 +330,8 @@ class ActorSqlite final: public ActorCacheInterface, private kj::TaskSet::ErrorH void startImplicitTxn(); + kj::Promise startImplicitTxnImpl(kj::Own txn); + void onWrite(bool allowUnconfirmed); void onCriticalError(kj::StringPtr errorMessage, kj::Maybe maybeException); diff --git a/src/workerd/io/bundle-fs-test.c++ b/src/workerd/io/bundle-fs-test.c++ index e99ce4f104c..babd5b2b99f 100644 --- a/src/workerd/io/bundle-fs-test.c++ +++ b/src/workerd/io/bundle-fs-test.c++ @@ -62,7 +62,7 @@ KJ_TEST("The BundleDirectoryDelegate works") { // Iterating over the directory should work. size_t counter = 0; - for (auto& _ KJ_UNUSED: *dir.get()) { + for (auto& _ KJ_UNUSED: *dir) { counter++; } KJ_EXPECT(counter, 3); @@ -81,8 +81,8 @@ KJ_TEST("The BundleDirectoryDelegate works") { auto readText = file->readAllText(env.js).get(); KJ_EXPECT(readText == env.js.str("this is a commonjs module"_kj)); - auto readBytes = file->readAllBytes(env.js).get(); - KJ_EXPECT(readBytes.asArrayPtr() == "this is a commonjs module"_kjb); + auto readBytes = file->readAllBytes(env.js).get>(); + KJ_EXPECT(readBytes.getHandle(env.js).asArrayPtr() == "this is a commonjs module"_kjb); // Reading five bytes from offset 20 should return "odule". kj::byte buffer[5]{}; @@ -195,5 +195,76 @@ KJ_TEST("Guarding against deep non-circular symlink chains works") { }); } +KJ_TEST("Module names exceeding max bundle path depth are skipped") { + // Regression test for AUTOVULN-CLOUDFLARE-WORKERD-104: an attacker-controlled + // module name with deeply nested path segments (e.g. "a/".repeat(100000) + "x.txt") + // could cause stack exhaustion via recursive directory building. + TestFixture fixture; + + fixture.runInIoContext([&](const TestFixture::Environment& env) { + kj::Vector modules(3); + + // A normal module that should be included. + modules.add(WorkerSource::Module{ + .name = "ok/module.js"_kj, + .content = WorkerSource::EsModule{.body = "export default 1;"_kj}, + }); + + kj::Vector atLimit; + for (size_t i = 0; i < 255; i++) { + atLimit.addAll(kj::StringPtr("d/")); + } + atLimit.addAll(kj::StringPtr("leaf.txt")); + atLimit.add('\0'); + kj::StringPtr atLimitName(atLimit.begin(), atLimit.size() - 1); + modules.add(WorkerSource::Module{ + .name = atLimitName, + .content = WorkerSource::EsModule{.body = "export default 2;"_kj}, + }); + + kj::Vector overLimit; + for (size_t i = 0; i < 2000; i++) { + overLimit.addAll(kj::StringPtr("x/")); + } + overLimit.addAll(kj::StringPtr("leaf.txt")); + overLimit.add('\0'); + kj::StringPtr overLimitName(overLimit.begin(), overLimit.size() - 1); + modules.add(WorkerSource::Module{ + .name = overLimitName, + .content = WorkerSource::EsModule{.body = "export default 3;"_kj}, + }); + + auto config = WorkerSource(WorkerSource::ModulesSource{ + .mainModule = "ok/module.js"_kj, + .modules = modules.releaseAsArray(), + }); + auto dir = getBundleDirectory(config); + + // The normal module should be accessible. + KJ_REQUIRE_NONNULL(dir->tryOpen(env.js, kj::Path({"ok", "module.js"}))); + + // The 256-segment module should be accessible — build the lookup path. + kj::Vector atLimitSegments; + for (size_t i = 0; i < 255; i++) { + atLimitSegments.add(kj::str("d")); + } + atLimitSegments.add(kj::str("leaf.txt")); + kj::Path atLimitPath(atLimitSegments.releaseAsArray()); + KJ_REQUIRE_NONNULL(dir->tryOpen(env.js, atLimitPath)); + + // The too deep module should have been skipped entirely. Verify that + // it is not reachable. We just need to check that the leaf doesn't exist — + // if the module was skipped, looking up any part of the deep path will + // return none. + kj::Vector overLimitSegments; + for (size_t i = 0; i < 2000; i++) { + overLimitSegments.add(kj::str("x")); + } + overLimitSegments.add(kj::str("leaf.txt")); + kj::Path overLimitPath(overLimitSegments.releaseAsArray()); + KJ_EXPECT(dir->tryOpen(env.js, overLimitPath) == kj::none); + }); +} + } // namespace } // namespace workerd diff --git a/src/workerd/io/bundle-fs.c++ b/src/workerd/io/bundle-fs.c++ index c306db6e0af..868f5b482f5 100644 --- a/src/workerd/io/bundle-fs.c++ +++ b/src/workerd/io/bundle-fs.c++ @@ -68,7 +68,7 @@ kj::Rc getBundleDirectory(const WorkerSource& conf) { .data = pythonModule.body.asBytes(), }); } - KJ_CASE_ONEOF(pythonRequirement, WorkerSource::PythonRequirement) { + KJ_CASE_ONEOF(pythonRequirement, WorkerSource::ObsoletePythonRequirement) { // Just ignore it. } KJ_CASE_ONEOF(capnpModule, WorkerSource::CapnpModule) { @@ -83,6 +83,10 @@ kj::Rc getBundleDirectory(const WorkerSource& conf) { return getLazyDirectoryImpl([entries = entries.releaseAsArray()] { Directory::Builder builder; kj::Path kRoot{}; + // Defense-in-depth: reject module names whose parsed path exceeds a sane + // segment count. Legitimate module paths are short (e.g. "src/util/helpers.js"); + // pathologically deep names can never be addressed by node:fs anyway. + static constexpr size_t kMaxBundlePathDepth = 1024; for (auto& entry: entries) { auto url = KJ_ASSERT_NONNULL(jsg::Url::tryParse(entry.name, "file:///"_kj)); // If the name is not a valid file URL path, ignore it. @@ -91,6 +95,10 @@ kj::Rc getBundleDirectory(const WorkerSource& conf) { } auto pathStr = kj::str(url.getPathname().slice(1)); auto path = kRoot.eval(pathStr); + if (path.size() > kMaxBundlePathDepth) { + KJ_LOG(WARNING, "Skipping overly deep module path", path.size()); + continue; + } builder.addPath(path, File::newReadable(entry.data)); } return builder.finish(); diff --git a/src/workerd/io/compatibility-date-test.c++ b/src/workerd/io/compatibility-date-test.c++ index 8a4a528b9be..774409626ef 100644 --- a/src/workerd/io/compatibility-date-test.c++ +++ b/src/workerd/io/compatibility-date-test.c++ @@ -71,7 +71,8 @@ KJ_TEST("compatibility flag parsing") { [](kj::StringPtr compatDate, kj::ArrayPtr featureFlags, kj::StringPtr expectedOutput, kj::ArrayPtr expectedErrors = nullptr, CompatibilityDateValidation dateValidation = CompatibilityDateValidation::FUTURE_FOR_TEST, - bool r2InternalBetaApiSet = false, bool experimental = false) { + bool r2InternalBetaApiSet = false, bool experimental = false, + kj::ArrayPtr allowedExperimentalFlags = nullptr) { capnp::MallocMessageBuilder message; auto orphanage = message.getOrphanage(); @@ -85,8 +86,8 @@ KJ_TEST("compatibility flag parsing") { auto output = outputOrphan.get(); SimpleWorkerErrorReporter errorReporter; - compileCompatibilityFlags( - compatDate, flagList.asReader(), output, errorReporter, experimental, dateValidation); + compileCompatibilityFlags(compatDate, flagList.asReader(), output, errorReporter, experimental, + dateValidation, allowedExperimentalFlags); capnp::TextCodec codec; auto parsedExpectedOutput = codec.decode(expectedOutput, orphanage); @@ -164,6 +165,30 @@ KJ_TEST("compatibility flag parsing") { expectCompileCompatibilityFlags("2020-01-01", {"durable_object_rename"_kj}, "(obsolete19 = true)", {}, CompatibilityDateValidation::CODE_VERSION, false, true); + // An experimental flag may be individually permitted via the allowlist, even when experimental + // features are not generally allowed. + expectCompileCompatibilityFlags("2020-01-01", {"durable_object_rename"_kj}, "(obsolete19 = true)", + {}, CompatibilityDateValidation::CODE_VERSION, false, false, {"durable_object_rename"_kj}); + + // Allowlisting an unrelated experimental flag does not grant access to a different one. + expectCompileCompatibilityFlags("2020-01-01", {"durable_object_rename"_kj}, "(obsolete19 = true)", + {"The compatibility flag durable_object_rename is experimental and may break or be removed " + "in a future version of workerd. To use this flag, you must pass --experimental on the " + "command line."_kj}, + CompatibilityDateValidation::CODE_VERSION, false, false, {"some_other_flag"_kj}); + + // The allowlist also applies under CURRENT_DATE_FOR_CLOUDFLARE validation. + expectCompileCompatibilityFlags("2020-01-01", {"durable_object_rename"_kj}, "(obsolete19 = true)", + {}, CompatibilityDateValidation::CURRENT_DATE_FOR_CLOUDFLARE, false, false, + {"durable_object_rename"_kj}); + + // Without the allowlist, CURRENT_DATE_FOR_CLOUDFLARE emits the Cloudflare-specific message. + expectCompileCompatibilityFlags("2020-01-01", {"durable_object_rename"_kj}, "(obsolete19 = true)", + {"The compatibility flag durable_object_rename is experimental and cannot yet be used in " + "Workers deployed to Cloudflare."_kj}, + CompatibilityDateValidation::CURRENT_DATE_FOR_CLOUDFLARE, false, false, + {"some_other_flag"_kj}); + // Test experimental requirement using the durable_object_alarms flag since we know this flag // is obsolete and will never have a date set. (Should always pass, even if experimental flags // aren't allowed) @@ -329,8 +354,8 @@ KJ_TEST("encode to flag list for FL") { SimpleWorkerErrorReporter errorReporter; - compileCompatibilityFlags( - compatDate, flagList.asReader(), output, errorReporter, experimental, dateValidation); + compileCompatibilityFlags(compatDate, flagList.asReader(), output, errorReporter, experimental, + dateValidation, nullptr); KJ_ASSERT(errorReporter.errors.empty()); return kj::mv(outputOrphan); diff --git a/src/workerd/io/compatibility-date.c++ b/src/workerd/io/compatibility-date.c++ index c20e169b9f2..8128802d85d 100644 --- a/src/workerd/io/compatibility-date.c++ +++ b/src/workerd/io/compatibility-date.c++ @@ -104,7 +104,8 @@ static void compileCompatibilityFlags(kj::StringPtr compatDate, CompatibilityFlags::Builder output, Worker::ValidationErrorReporter& errorReporter, bool allowExperimentalFeatures, - CompatibilityDateValidation dateValidation) { + CompatibilityDateValidation dateValidation, + kj::ArrayPtr allowedExperimentalFlags) { auto parsedCompatDate = CompatDate::parse(compatDate, errorReporter); switch (dateValidation) { @@ -235,14 +236,23 @@ static void compileCompatibilityFlags(kj::StringPtr compatDate, // set the flag early to make sure they don't forget later. } if (enableByFlag && isExperimental && !allowExperimentalFeatures) { - if (dateValidation == CompatibilityDateValidation::CURRENT_DATE_FOR_CLOUDFLARE) { - errorReporter.addError(kj::str("The compatibility flag ", enableFlagName, - " is experimental and cannot yet be used in Workers deployed to Cloudflare.")); - } else { - errorReporter.addError(kj::str("The compatibility flag ", enableFlagName, - " is experimental and may break or be " - "removed in a future version of workerd. To use this flag, you must pass --experimental " - "on the command line.")); + // Check whether this experimental flag is individually permitted via the allowlist. + bool experimentalFlagAllowlisted = false; + for (auto& allowed: allowedExperimentalFlags) { + if (allowed == enableFlagName) { + experimentalFlagAllowlisted = true; + break; + } + } + if (!experimentalFlagAllowlisted) { + if (dateValidation == CompatibilityDateValidation::CURRENT_DATE_FOR_CLOUDFLARE) { + errorReporter.addError(kj::str("The compatibility flag ", enableFlagName, + " is experimental and cannot yet be used in Workers deployed to Cloudflare.")); + } else { + errorReporter.addError(kj::str("The compatibility flag ", enableFlagName, + " is experimental and may break or be removed in a future version of workerd. To use " + "this flag, you must pass --experimental on the command line.")); + } } } @@ -265,7 +275,8 @@ void compileCompatibilityFlags(kj::StringPtr compatDate, CompatibilityFlags::Builder output, Worker::ValidationErrorReporter& errorReporter, bool allowExperimentalFeatures, - CompatibilityDateValidation dateValidation) { + CompatibilityDateValidation dateValidation, + kj::ArrayPtr allowedExperimentalFlags) { kj::HashSet flagSet; flagSet.reserve(compatFlags.size()); for (auto flag: compatFlags) { @@ -275,7 +286,7 @@ void compileCompatibilityFlags(kj::StringPtr compatDate, } return compileCompatibilityFlags(compatDate, kj::mv(flagSet), output, errorReporter, - allowExperimentalFeatures, dateValidation); + allowExperimentalFeatures, dateValidation, allowedExperimentalFlags); } void compileCompatibilityFlags(kj::StringPtr compatDate, @@ -283,7 +294,8 @@ void compileCompatibilityFlags(kj::StringPtr compatDate, CompatibilityFlags::Builder output, Worker::ValidationErrorReporter& errorReporter, bool allowExperimentalFeatures, - CompatibilityDateValidation dateValidation) { + CompatibilityDateValidation dateValidation, + kj::ArrayPtr allowedExperimentalFlags) { kj::HashSet flagSet; flagSet.reserve(compatFlags.size()); for (auto& flag: compatFlags) { @@ -293,7 +305,7 @@ void compileCompatibilityFlags(kj::StringPtr compatDate, } return compileCompatibilityFlags(compatDate, kj::mv(flagSet), output, errorReporter, - allowExperimentalFeatures, dateValidation); + allowExperimentalFeatures, dateValidation, allowedExperimentalFlags); } namespace { diff --git a/src/workerd/io/compatibility-date.capnp b/src/workerd/io/compatibility-date.capnp index 56d8a6d08c8..c95237ec3d2 100644 --- a/src/workerd/io/compatibility-date.capnp +++ b/src/workerd/io/compatibility-date.capnp @@ -1540,4 +1540,19 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef { # startup. This allows packages to extend `sys.path` declaratively (e.g. to # add subdirectories or register import hooks). Without this flag, `.pth` # files in `python_modules/` are ignored. + + throwOnNotImplementedTlsOptions @177 :Bool + $compatEnableFlag("throw_on_not_implemented_tls_options") + $compatDisableFlag("no_throw_on_not_implemented_tls_options") + $compatEnableDate("2026-06-16"); + # When enabled, passing unsupported TLS options (e.g. checkServerIdentity) + # to tls.connect() or new TLSSocket() throws ERR_OPTION_NOT_IMPLEMENTED + # instead of silently ignoring them + + autoGrpcConvert @178 :Bool + $compatEnableFlag("auto_grpc_convert") + $neededByFl + $experimental; + # When enabled, a Worker's outbound gRPC-web subrequest is converted to gRPC at + # the edge. } diff --git a/src/workerd/io/compatibility-date.h b/src/workerd/io/compatibility-date.h index d78961c8ac9..b7b62009657 100644 --- a/src/workerd/io/compatibility-date.h +++ b/src/workerd/io/compatibility-date.h @@ -36,13 +36,15 @@ void compileCompatibilityFlags(kj::StringPtr compatDate, CompatibilityFlags::Builder output, Worker::ValidationErrorReporter& errorReporter, bool allowExperimentalFeatures, - CompatibilityDateValidation dateValidation); + CompatibilityDateValidation dateValidation, + kj::ArrayPtr allowedExperimentalFlags); void compileCompatibilityFlags(kj::StringPtr compatDate, kj::ArrayPtr compatFlags, CompatibilityFlags::Builder output, Worker::ValidationErrorReporter& errorReporter, bool allowExperimentalFeatures, - CompatibilityDateValidation dateValidation); + CompatibilityDateValidation dateValidation, + kj::ArrayPtr allowedExperimentalFlags); // Return an array of compatibility enable-flags which express the given FeatureFlags. The returned // StringPtrs point to FeatureFlags annotation parameters, which live in static storage. diff --git a/src/workerd/io/container.capnp b/src/workerd/io/container.capnp index 7fdf41005af..deb6066704a 100644 --- a/src/workerd/io/container.capnp +++ b/src/workerd/io/container.capnp @@ -6,6 +6,7 @@ $Cxx.allowCancellation; using import "/capnp/compat/byte-stream.capnp".ByteStream; using CompatibilityFlags = import "/workerd/io/compatibility-date.capnp".CompatibilityFlags; +using SpanContext = import "/workerd/io/worker-interface.capnp".SpanContext; interface Container @0x9aaceefc06523bca { # RPC interface to talk to a container, for containers attached to Durable Objects. @@ -13,7 +14,7 @@ interface Container @0x9aaceefc06523bca { # When the actor shuts down, workerd will drop the `Container` capability, at which point # the container engine should implicitly destroy the container. - status @0 () -> (running :Bool); + status @0 (spanContext :SpanContext) -> (running :Bool); # Returns the container's current status. The runtime will always call this at DO startup. start @1 StartParams -> (); @@ -53,6 +54,8 @@ interface Container @0x9aaceefc06523bca { containerSnapshotId @7 :Text; # Id of the full container snapshot to restore before the container starts. + + spanContext @8 :SpanContext; } struct Label { @@ -122,6 +125,8 @@ interface Container @0x9aaceefc06523bca { combinedOutput @3 :Bool; # If true, stderr is combined into stdout. If stdout is not set, combined output is discarded. + + spanContext @4 :SpanContext; } struct Process { @@ -161,7 +166,7 @@ interface Container @0x9aaceefc06523bca { signal @4 (signo :UInt32); # Sends the given Linux signal number to the root process. - getTcpPort @5 (port :UInt16) -> (port :Port); + getTcpPort @5 (port :UInt16, spanContext :SpanContext) -> (port :Port); # Obtains an object which can be used to connect to the application inside the container on the # given TCP port (the application must be listening on this port). diff --git a/src/workerd/io/frankenvalue-test.c++ b/src/workerd/io/frankenvalue-test.c++ index b1ede17be11..3e8d0ea13b5 100644 --- a/src/workerd/io/frankenvalue-test.c++ +++ b/src/workerd/io/frankenvalue-test.c++ @@ -143,5 +143,50 @@ KJ_TEST("Frankenvalue") { }); } +KJ_TEST("Frankenvalue fromCapnp rejects capTableSize uint32 overflow") { + // Regression test for AUTOVULN-EW-EDGEWORKER-15: fromCapnpImpl() accumulated per-node + // UInt32 capTableSize fields into a 32-bit uint capCount with no overflow check. An attacker + // could craft capTableSize values that wrap around 2^32 so the final sum equals capTable.size() + // while individual Property::capTableOffset/capTableSize values are arbitrary, leading to OOB + // slice bounds in toJsImpl(). + // + // Construction: root capTableSize=0x80000000, one property with capTableSize=0x80000001. + // Walk: capCount=0 -> +=0x80000000 -> 0x80000000; record property offset=0x80000000; + // recurse: capCount += 0x80000001 -> wraps to 0x00000001 (mod 2^32). + // Final KJ_REQUIRE(capTable.size()==1 == capCount==1) would pass without the fix. + + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + builder.setEmptyObject(); + builder.setCapTableSize(0x80000000u); + + auto props = builder.initProperties(1); + props[0].setName("p"); + props[0].setEmptyObject(); + props[0].setCapTableSize(0x80000001u); + + // Provide exactly 1 real cap table entry — the wrapped sum would equal 1. + kj::Vector> capTable; + capTable.add(kj::heap(42)); + + // The fix must reject this before the overflow can produce bogus slice bounds. + KJ_EXPECT_THROW_MESSAGE( + "capTableSize exceeds", Frankenvalue::fromCapnp(builder.asReader(), kj::mv(capTable))); +} + +KJ_TEST("Frankenvalue fromCapnp rejects capTableSize exceeding capTable") { + // Simpler case: a single node claims more caps than actually exist, without overflow. + capnp::MallocMessageBuilder message; + auto builder = message.initRoot(); + builder.setEmptyObject(); + builder.setCapTableSize(100); // Claims 100 caps but we only provide 1. + + kj::Vector> capTable; + capTable.add(kj::heap(42)); + + KJ_EXPECT_THROW_MESSAGE( + "capTableSize exceeds", Frankenvalue::fromCapnp(builder.asReader(), kj::mv(capTable))); +} + } // namespace } // namespace workerd diff --git a/src/workerd/io/frankenvalue.c++ b/src/workerd/io/frankenvalue.c++ index f4d0dd0c31b..db10118c3c7 100644 --- a/src/workerd/io/frankenvalue.c++ +++ b/src/workerd/io/frankenvalue.c++ @@ -70,7 +70,10 @@ void Frankenvalue::toCapnp(rpc::Frankenvalue::Builder builder) { toCapnpImpl(builder, capTable.size()); } -void Frankenvalue::toCapnpImpl(rpc::Frankenvalue::Builder builder, uint capTableSize) { +void Frankenvalue::toCapnpImpl(rpc::Frankenvalue::Builder builder, size_t capTableSize) { + KJ_REQUIRE(capTableSize <= static_cast(kj::maxValue), + "Frankenvalue capTable is too large to serialize"); + KJ_SWITCH_ONEOF(value) { KJ_CASE_ONEOF(_, EmptyObject) { builder.setEmptyObject(); @@ -84,10 +87,10 @@ void Frankenvalue::toCapnpImpl(rpc::Frankenvalue::Builder builder, uint capTable } if (properties.empty()) { - builder.setCapTableSize(capTableSize); + builder.setCapTableSize(static_cast(capTableSize)); } else { - uint capTablePos = properties[0].capTableOffset; - builder.setCapTableSize(capTablePos); + size_t capTablePos = properties[0].capTableOffset; + builder.setCapTableSize(static_cast(capTablePos)); auto listBuilder = builder.initProperties(properties.size()); @@ -106,16 +109,16 @@ Frankenvalue Frankenvalue::fromCapnp( rpc::Frankenvalue::Reader reader, kj::Vector> capTable) { Frankenvalue result; - uint capCount = 0; - result.fromCapnpImpl(reader, capCount); + size_t capCount = result.fromCapnpImpl(reader, 0, capTable.size()); - KJ_REQUIRE(capTable.size() == capCount); + KJ_REQUIRE(capTable.size() == capCount, "Frankenvalue capTable size doesn't match contents"); result.capTable = kj::mv(capTable); return result; } -void Frankenvalue::fromCapnpImpl(rpc::Frankenvalue::Reader reader, uint& capCount) { +size_t Frankenvalue::fromCapnpImpl( + rpc::Frankenvalue::Reader reader, size_t capCount, size_t capTableTotal) { switch (reader.which()) { case rpc::Frankenvalue::EMPTY_OBJECT: this->value = EmptyObject(); @@ -128,7 +131,10 @@ void Frankenvalue::fromCapnpImpl(rpc::Frankenvalue::Reader reader, uint& capCoun break; } - capCount += reader.getCapTableSize(); + size_t nodeCaps = reader.getCapTableSize(); + // Security invariant: never create OOB cap table slices. + KJ_REQUIRE(nodeCaps <= capTableTotal - capCount, "Frankenvalue capTableSize exceeds capTable"); + capCount += nodeCaps; auto properties = reader.getProperties(); if (properties.size() > 0) { @@ -139,11 +145,13 @@ void Frankenvalue::fromCapnpImpl(rpc::Frankenvalue::Reader reader, uint& capCoun .name = kj::str(property.getName()), .capTableOffset = capCount, }; - result.value.fromCapnpImpl(property, capCount); + capCount = result.value.fromCapnpImpl(property, capCount, capTableTotal); result.capTableSize = capCount - result.capTableOffset; this->properties.add(kj::mv(result)); } } + + return capCount; } jsg::JsValue Frankenvalue::toJs(jsg::Lock& js) { @@ -163,7 +171,7 @@ jsg::JsValue Frankenvalue::toJsImpl(jsg::Lock& js, kj::ArrayPtr() && properties.empty(); } + // Returns an estimate of the in-memory size of the value, in bytes. This sums the size of the + // serialized/JSON content of this value plus, recursively, the sizes of any stitched-in + // properties (including their names). Intended for enforcing size limits, not for exact + // accounting. The cap table is not included. + size_t estimateSize() const; + Frankenvalue clone(); // This method only works if the `CapTableEntry`s in this `Frankenvalue` all implement @@ -178,8 +184,9 @@ class Frankenvalue { kj::Vector> capTable; Frankenvalue cloneImpl() const; - void fromCapnpImpl(rpc::Frankenvalue::Reader reader, uint& capTablePos); - void toCapnpImpl(rpc::Frankenvalue::Builder builder, uint capTableSize); + // `capTableTotal` is the real cap table size; `capCount` must never advance past it. + size_t fromCapnpImpl(rpc::Frankenvalue::Reader reader, size_t capCount, size_t capTableTotal); + void toCapnpImpl(rpc::Frankenvalue::Builder builder, size_t capTableSize); jsg::JsValue toJsImpl(jsg::Lock& js, kj::ArrayPtr> capTable); }; @@ -190,8 +197,8 @@ struct Frankenvalue::Property { // `value.capTable` is always empty. Instead, these two values specify the slice of the parent's // capTable which this Frankenvalue refers into. - uint capTableOffset = 0; - uint capTableSize = 0; + size_t capTableOffset = 0; + size_t capTableSize = 0; }; } // namespace workerd diff --git a/src/workerd/io/hibernation-manager-test.c++ b/src/workerd/io/hibernation-manager-test.c++ new file mode 100644 index 00000000000..5db4df509f1 --- /dev/null +++ b/src/workerd/io/hibernation-manager-test.c++ @@ -0,0 +1,1061 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// Tests for HibernationManager behavior. The tests interact with the abstract +// HibernationManager interface so that the same suite can run against any +// concrete implementation that may exist over time (autogated in production). +// +// A note on the code comments throughout this file: they mix descriptions of +// the implementation as it stands today with motivations and references to +// in-progress refactor work. They may go stale relative to the current +// implementation as that work lands. The tests themselves are the source of +// truth for the contract; comments are best-effort context. +// +// A few tests use KJ_EXPECT_LOG to capture the production "another message +// send is already in progress" assertion as an expected ERROR log. They pass +// while the bug is present and fail loudly when the fix lands. Search the +// file for "regression test for EW-10817" to find them. + +#include +#include +#include +#include +#include + +#include +#include + +#if KJ_HAS_COMPILER_FEATURE(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +#include +#endif + +namespace workerd { + +namespace { + +// ============================================================================ +// Test fixtures +// ============================================================================ + +// Counts callbacks observed by StubLoopback / StubWorkerInterface so tests can +// assert dispatch behavior (e.g., auto-response should NOT dispatch). +struct DispatchStats { + uint getWorkerCalls = 0; + uint customEventCalls = 0; +}; + +// Minimal WorkerInterface for tests. Returns success on customEvent (so the HM's +// readLoop continues normally) and counts calls. All other methods are +// unimplemented — this is only suitable for tests that exercise the +// hibernation event dispatch path, which goes through customEvent(). +class StubWorkerInterface final: public WorkerInterface { + public: + explicit StubWorkerInterface(DispatchStats& stats): stats(stats) {} + + kj::Promise customEvent( + kj::Own event) override { + ++stats.customEventCalls; + return WorkerInterface::CustomEvent::Result{.outcome = EventOutcome::OK}; + } + + kj::Promise request(kj::HttpMethod, + kj::StringPtr, + const kj::HttpHeaders&, + kj::AsyncInputStream&, + kj::HttpService::Response&) override { + KJ_UNIMPLEMENTED("StubWorkerInterface::request not used"); + } + kj::Promise connect(kj::StringPtr, + const kj::HttpHeaders&, + kj::AsyncIoStream&, + ConnectResponse&, + kj::HttpConnectSettings) override { + KJ_UNIMPLEMENTED("StubWorkerInterface::connect not used"); + } + kj::Promise prewarm(kj::StringPtr) override { + KJ_UNIMPLEMENTED("StubWorkerInterface::prewarm not used"); + } + kj::Promise runScheduled(kj::Date, kj::StringPtr) override { + KJ_UNIMPLEMENTED("StubWorkerInterface::runScheduled not used"); + } + kj::Promise runAlarm(kj::Date, uint32_t) override { + KJ_UNIMPLEMENTED("StubWorkerInterface::runAlarm not used"); + } + + private: + DispatchStats& stats; +}; + +// Test loopback that hands out StubWorkerInterfaces and counts getWorker calls. +class StubLoopback final: public Worker::Actor::Loopback, public kj::Refcounted { + public: + explicit StubLoopback(DispatchStats& stats): stats(stats) {} + + kj::Own getWorker(IoChannelFactory::SubrequestMetadata) override { + ++stats.getWorkerCalls; + return kj::heap(stats); + } + + kj::Own addRef() override { + return kj::addRef(*this); + } + + private: + DispatchStats& stats; +}; + +// Helpers below are intentionally split so the HibernationManager can outlive any single +// IncomingRequest, which matters for tests that span multiple IRs. makeTestHm() needs no +// IoContext; acceptNewWebSocket() and sendFromDo() do (the api::WebSocket constructor stores +// IoOwn members, and ws.send() is delivered through the IoContext's pump). + +// SetupParams builder that installs a StubLoopback on the actor referencing `stats`. The +// caller MUST keep `stats` alive for the lifetime of the resulting TestFixture (declare it +// before the fixture). The same StubLoopback is later retrieved via actor.getLoopback() and +// handed to the HM, so actor and HM share a single Loopback (mirroring production). +TestFixture::SetupParams stubLoopbackParams(DispatchStats& stats, kj::String actorId) { + return { + .actorId = Worker::Actor::Id(kj::mv(actorId)), + .useRealTimers = true, + .actorLoopback = kj::refcounted(stats), + }; +} + +// Create a HibernationManager. The constructor (and setTimerChannel) don't need an IoContext; +// production typically constructs the HM inside one only because the trigger — a JS call to +// state.acceptWebSocket — runs in one. The HM itself is IoContext-independent and this test +// pattern keeps that explicit so any inadvertent dependency growth shows up. +kj::Own makeTestHm(TestFixture& fixture) { + auto hm = kj::refcounted(fixture.getActor().getLoopback(), 0); + hm->setTimerChannel(fixture.getTimerChannel()); + return hm; +} + +// Same, but configure auto-response. Both `autoRequest` and `autoResponse` are required. +kj::Own makeTestHm( + TestFixture& fixture, kj::StringPtr autoRequest, kj::StringPtr autoResponse) { + auto hm = makeTestHm(fixture); + hm->setWebSocketAutoResponse(autoRequest, autoResponse); + return hm; +} + +// Create an api::WebSocket, accept it into the HM under `tag` (or untagged if `tag` is empty), +// and return the eyeball end of the new pipe. Tests can call this multiple times to attach +// multiple concurrent WebSockets; pass distinct tags to identify them later via getWebSockets. +// +// Needs an IoContext (the api::WebSocket constructor stores IoOwn members), supplied by the +// IR. Test code should pick an IR whose IoContext should "own" this api::WebSocket. +kj::Own acceptNewWebSocket(TestFixture& fixture, + IoContext::IncomingRequest& request, + Worker::Actor::HibernationManager& hm, + kj::StringPtr tag = ""_kj) { + kj::Own eyeball; + fixture.enterContext(request, [&](const TestFixture::Environment& env) { + auto pipe = kj::newWebSocketPipe(); + eyeball = kj::mv(pipe.ends[0]); + // TODO(bug) EW-10817: leak a ref so the api::WebSocket survives the AsyncObject destructor + // issue (resolving EW-10817 will naturally remove the need for this). Tell LSan the leak + // is intentional so it doesn't fail tests under sanitizer builds. + auto apiWs = env.js.alloc(env.js, kj::mv(pipe.ends[1])); + auto* leaked = new jsg::Ref(apiWs.addRef()); +#if KJ_HAS_COMPILER_FEATURE(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + __lsan_ignore_object(leaked); +#else + (void)leaked; +#endif + auto tags = kj::heapArray(tag.size() == 0 ? 0 : 1); + if (tag.size() != 0) tags[0] = kj::str(tag); + hm.acceptWebSocket(kj::mv(apiWs), tags); + }); + return eyeball; +} + +// Send a string message from the DO side, on the WebSocket identified by `tag` (or the only +// untagged one if `tag` is empty). Enters the supplied IR's IoContext for the duration of +// the send setup; the actual ws.send happens asynchronously after the lock is released. +void sendFromDo(TestFixture& fixture, + IoContext::IncomingRequest& request, + Worker::Actor::HibernationManager& hm, + kj::StringPtr msg, + kj::StringPtr tag = ""_kj) { + fixture.enterContext(request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = + hm.getWebSockets(js, tag.size() == 0 ? kj::Maybe(kj::none) : tag); + KJ_ASSERT( + websockets.size() == 1, "expected exactly one WebSocket for tag", tag, websockets.size()); + websockets[0]->send(js, kj::OneOf, kj::String>(kj::str(msg))); + }); +} + +KJ_TEST("HibernationManager: smoke (create, accept, query)") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("smoke"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 KJ_UNUSED = acceptNewWebSocket(fixture, *request, *hm); + + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto websockets = hm->getWebSockets(env.js, kj::none); + KJ_ASSERT(websockets.size() == 1); + }); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: DO sends string message to eyeball") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("do-send-string"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + sendFromDo(fixture, *request, *hm, "hello"_kj); + + // Drive the pump; the message should arrive at the eyeball end. + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is()); + KJ_ASSERT(msg.get() == "hello"_kj); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: eyeball sends non-auto-response message → dispatched to worker") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("eyeball-send"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Eyeball sends a message that does NOT match any auto-response config. + end1->send("hello from eyeball"_kj).wait(fixture.getWaitScope()); + + // Give the HM's readLoop time to receive and dispatch. + fixture.pollEventLoop(); + + KJ_ASSERT(stats.customEventCalls == 1, "expected exactly one customEvent dispatch", + stats.customEventCalls); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: DO close sends close frame to eyeball") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("do-close"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + KJ_ASSERT(websockets.size() == 1); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("bye"))); + }); + + // The eyeball should receive a Close message. + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is()); + auto& close = msg.get(); + KJ_ASSERT(close.code == 1001, close.code); + KJ_ASSERT(close.reason == "bye"_kj, close.reason); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: eyeball close dispatches webSocketClose to worker") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("eyeball-close"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Eyeball closes the WS. The HM's readLoop should observe the close and dispatch a + // webSocketClose event to the worker via customEvent. + end1->close(1001, "eyeball bye"_kj).wait(fixture.getWaitScope()); + + fixture.pollEventLoop(); + + KJ_ASSERT(stats.customEventCalls == 1, "expected exactly one customEvent dispatch", + stats.customEventCalls); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: DO sends binary message to eyeball") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("do-send-bin"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + KJ_ASSERT(websockets.size() == 1); + auto bytes = kj::heapArray({0xde, 0xad, 0xbe, 0xef}); + websockets[0]->send(js, kj::OneOf, kj::String>(kj::mv(bytes))); + }); + + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is>()); + auto& bytes = msg.get>(); + KJ_ASSERT(bytes.size() == 4); + KJ_ASSERT(bytes[0] == 0xde && bytes[1] == 0xad && bytes[2] == 0xbe && bytes[3] == 0xef); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: eyeball sends binary message → dispatched to worker") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("eyeball-send-bin"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + auto bytes = kj::heapArray({0xca, 0xfe, 0xba, 0xbe}); + end1->send(bytes.asPtr()).wait(fixture.getWaitScope()); + + fixture.pollEventLoop(); + KJ_ASSERT(stats.customEventCalls == 1, "expected exactly one customEvent dispatch", + stats.customEventCalls); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: multiple tagged WebSockets are addressable independently") { + // Accept two WebSockets under distinct tags. getWebSockets(js, tag) should return only the + // matching one; getWebSockets(js, kj::none) returns both. DO-side sends, scoped via tag, + // reach only the addressed eyeball. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("multi-ws"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto aliceEnd1 = acceptNewWebSocket(fixture, *request, *hm, "alice"_kj); + auto bobEnd1 = acceptNewWebSocket(fixture, *request, *hm, "bob"_kj); + + // The HM tracks both; getWebSockets without a tag returns the union. + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + KJ_ASSERT(hm->getWebSockets(js, kj::none).size() == 2); + KJ_ASSERT(hm->getWebSockets(js, "alice"_kj).size() == 1); + KJ_ASSERT(hm->getWebSockets(js, "bob"_kj).size() == 1); + }); + + // DO sends a message addressed to alice; only alice's eyeball gets it. + sendFromDo(fixture, *request, *hm, "for-alice"_kj, "alice"_kj); + auto msgA = aliceEnd1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msgA.is() && msgA.get() == "for-alice"_kj); + + // Bob should have received nothing yet. + auto bobReceive = bobEnd1->receive(); + fixture.pollEventLoop(); + KJ_ASSERT(!bobReceive.poll(fixture.getWaitScope()), "bob should not have received anything yet"); + + // Now send to bob; the previous receive promise resolves. + sendFromDo(fixture, *request, *hm, "for-bob"_kj, "bob"_kj); + auto msgB = bobReceive.wait(fixture.getWaitScope()); + KJ_ASSERT(msgB.is() && msgB.get() == "for-bob"_kj); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: auto-response request not dispatched to worker (active)") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("autoresp-active"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Eyeball sends a message that matches the auto-response request. + end1->send("ping"_kj).wait(fixture.getWaitScope()); + + // The HM should reply with the configured response and NOT dispatch to the worker. + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is()); + KJ_ASSERT(msg.get() == "pong"_kj); + + // Wait for any potential dispatch (there shouldn't be one). + fixture.pollEventLoop(); + KJ_ASSERT(stats.customEventCalls == 0, "auto-response should not dispatch to worker", + stats.customEventCalls); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: auto-response not dispatched to worker (hibernated)") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("autoresp-hibernated"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Hibernate before any messages flow. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + // Eyeball sends a ping. The HM's hibernated-mode readLoop should send pong directly + // (bypassing the pump, which has no IoContext during hibernation) and NOT dispatch. + end1->send("ping"_kj).wait(fixture.getWaitScope()); + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is() && msg.get() == "pong"_kj); + + fixture.pollEventLoop(); + KJ_ASSERT( + stats.customEventCalls == 0, "auto-response should not dispatch", stats.customEventCalls); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: auto-response interleaved with DO sends (active)") { + // Verifies that, in active mode, auto-response pongs interleaved with DO-side sends all + // arrive at the eyeball without tripping the "another message send is already in progress" + // assertion. The pump and sendAutoResponse synchronize on ongoingAutoResponse in active + // mode; if that synchronization breaks this test will trip the bug class targeted by + // EW-10817 — but in active mode it should hold. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("autoresp-interleaved"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + sendFromDo(fixture, *request, *hm, "before"_kj); + end1->send("ping"_kj).wait(fixture.getWaitScope()); + sendFromDo(fixture, *request, *hm, "after"_kj); + + // Drain three messages from the eyeball. The order isn't guaranteed; verify the set. + bool sawBefore = false, sawPong = false, sawAfter = false; + for (int i = 0; i < 3; ++i) { + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is(), "expected string message", i); + auto& s = msg.get(); + if (s == "before"_kj) + sawBefore = true; + else if (s == "pong"_kj) + sawPong = true; + else if (s == "after"_kj) + sawAfter = true; + else + KJ_FAIL_ASSERT("unexpected message", s); + } + KJ_ASSERT(sawBefore && sawPong && sawAfter); + + KJ_ASSERT(stats.customEventCalls == 0, "auto-response should not dispatch to worker", + stats.customEventCalls); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: comm across multiple IncomingRequests sharing an IoContext") { + // The actor pattern: a single IoContext outlives any one IncomingRequest. The api::WebSocket + // is bound to the IoContext (via IoOwn members), not to any specific IR, so it must remain + // usable as IRs come and go. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("multi-ir-serial"))); + auto hm = makeTestHm(fixture); + auto context = fixture.newIoContext(); + + // Request 1: accept a WS, send a message from the DO side, receive it on the eyeball. + // (We must read the message before draining; the pump's send blocks on a reader.) + auto request1 = fixture.newIncomingRequest(*context); + auto end1 = acceptNewWebSocket(fixture, *request1, *hm); + sendFromDo(fixture, *request1, *hm, "from-r1"_kj); + auto msg1 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg1.is()); + KJ_ASSERT(msg1.get() == "from-r1"_kj); + fixture.drainAndDestroy(kj::mv(request1)); + + // Request 2: same IoContext, same WS; send another message and receive it. + auto request2 = fixture.newIncomingRequest(*context); + sendFromDo(fixture, *request2, *hm, "from-r2"_kj); + auto msg2 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg2.is()); + KJ_ASSERT(msg2.get() == "from-r2"_kj); + fixture.drainAndDestroy(kj::mv(request2)); +} + +KJ_TEST("HibernationManager: two concurrent IncomingRequests sharing an IoContext") { + // Two IncomingRequests delivered against the same IoContext, with overlapping lifetimes. + // This is a real production pattern: e.g. a chat-room DO might be handling a message from + // one user (one IR) and concurrently fan it out to another user, where the fan-out is + // structured as a second IR against the same actor. The IoContext model accommodates this + // — the second's delivered() just makes the first non-current — and work routed via either + // IR's enterContext lands on the single shared IoContext correctly. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("multi-ir-parallel"))); + auto hm = makeTestHm(fixture); + auto context = fixture.newIoContext(); + + auto request1 = fixture.newIncomingRequest(*context); + auto end1 = acceptNewWebSocket(fixture, *request1, *hm); + auto request2 = fixture.newIncomingRequest(*context); // IR1 still alive at this point. + + // Send via IR1; the IoContext is shared, so this works even though IR2 is "current". + sendFromDo(fixture, *request1, *hm, "from-r1"_kj); + auto msg1 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg1.is() && msg1.get() == "from-r1"_kj); + + // Send via IR2. + sendFromDo(fixture, *request2, *hm, "from-r2"_kj); + auto msg2 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg2.is() && msg2.get() == "from-r2"_kj); + + // Destroy the older IR first; IR2 keeps working. + fixture.drainAndDestroy(kj::mv(request1)); + sendFromDo(fixture, *request2, *hm, "from-r2-again"_kj); + auto msg3 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg3.is() && msg3.get() == "from-r2-again"_kj); + + fixture.drainAndDestroy(kj::mv(request2)); +} + +// ---------- Same-IoContext hibernation flows ---------- + +KJ_TEST("HibernationManager: comm survives hibernation/revival within one IoContext") { + // The classic hibernation flow: the HM's activeOrPackage transitions from + // jsg::Ref to HibernationPackage, then a fresh api::WebSocket is + // materialized on demand by getWebSockets(). This works as long as no message is + // in-flight on the pipe at the moment hibernation runs (the in-flight cases are tested + // separately below). + // + // This test stays within a single IoContext. See the cross-IoContext variant further down + // for the production-style flow where the actor is also evicted and recreated. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("hibernate-survive"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Round-trip a message, fully drained, before hibernation. + sendFromDo(fixture, *request, *hm, "before-hib"_kj); + auto msg1 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg1.is() && msg1.get() == "before-hib"_kj); + + // Hibernate. Replaces the active api::WebSocket on the HM with a HibernationPackage. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + // After hibernation, getWebSockets should rebuild a fresh api::WebSocket from the package. + sendFromDo(fixture, *request, *hm, "after-hib"_kj); + auto msg2 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg2.is() && msg2.get() == "after-hib"_kj); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: in-flight DO close survives hibernation within one IoContext") { + // The DO calls close() while the eyeball isn't reading; the pump queues the Close into + // outgoingMessages and blocks on a BlockedSend on the pipe. Hibernation runs. Verify that + // when the eyeball reads, it gets the Close. + // + // Mechanism: the OLD api::WebSocket is dropped from activeOrPackage during hibernation, but + // its pump task lives on (held alive via JSG_THIS in the pump's continuation, which is in + // the IoContext's tasks/waitUntilTasks list). The old pump's blocked ws.close(...) is still + // waiting on the pipe; once the eyeball reads, it delivers the Close. The Close is NOT + // dropped within a single IoContext — IoContext destruction is what loses it. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("close-race-same-ioc"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("queued-close"))); + }); + fixture.pollEventLoop(); // pump blocks on the close BlockedSend + + // Hibernate while the close is mid-send. activeOrPackage transitions; but we leave the + // IoContext alive (don't drainAndDestroy yet), so the OLD pump task keeps running. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + // Eyeball reads — should receive the Close that was queued before hibernation. + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is(), "expected Close"); + auto& close = msg.get(); + KJ_ASSERT(close.code == 1001, close.code); + KJ_ASSERT(close.reason == "queued-close"_kj, close.reason); + + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: in-flight auto-response orphans BlockedSend during hibernation") { + // Regression test for EW-10817. sendAutoResponse creates a BlockedSend on the pipe (held in + // a plain kj::Own outside any IoOwn — see web-socket.c++:874), then hibernation replaces + // activeOrPackage without carrying that state. The new api::WebSocket's pump skips the wait + // and trips on the orphaned BlockedSend. + // + // The KJ_EXPECT_LOG block below captures the bug's symptom (the assertion's ERROR log) so + // the test passes while EW-10817 is open. When the bug is fixed, the log won't fire and + // the KJ_EXPECT_LOG will fail — that's the signal to update this test (delete the + // EXPECT_LOG block and promote the receive() at the end to a positive assertion about the + // auto-response pong's content). + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("ew-10817-autoresp"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Send ping → readLoop → sendAutoResponse → BlockedSend. + end1->send("ping"_kj).wait(fixture.getWaitScope()); + fixture.pollEventLoop(); + + // Hibernate. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + // Unhibernate + close → hits orphaned BlockedSend. + { + KJ_EXPECT_LOG(ERROR, "another message send is already in progress"); + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + KJ_ASSERT(websockets.size() == 1); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("stale"))); + }); + + fixture.pollEventLoop(); + } + + // Receive the orphaned pong from the pipe (held outside any IoOwn — the very thing this + // test is documenting). This unblocks the stuck pump so drainAndDestroy() below can + // complete cleanly. Once EW-10817 is fixed, the orphan won't exist; this becomes a + // positive assertion about the pong's content. + end1->receive().wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: in-flight DO send orphans BlockedSend during hibernation") { + // Regression test for EW-10817. Same shape as the auto-response variant above but driven by + // a DO-side ws.send() — the pump creates a BlockedSend on the pipe (no BPT yet), hibernation + // orphans it, the next operation on the new api::WebSocket trips the assertion. See the + // auto-response variant above for the EXPECT_LOG / lifecycle details. + // + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("ew-10817-dosend"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + sendFromDo(fixture, *request, *hm, "hello from DO"_kj); + + fixture.pollEventLoop(); + + // Hibernate. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + // Unhibernate + close → hits orphaned BlockedSend. + { + KJ_EXPECT_LOG(ERROR, "another message send is already in progress"); + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + KJ_ASSERT(websockets.size() == 1); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("stale"))); + }); + + fixture.pollEventLoop(); + } + + // Receive the orphaned "hello from DO" from the pipe — it was sent before hibernation but + // the pump is stuck on its BlockedSend. Consuming it unblocks the pump so drainAndDestroy() + // below can complete cleanly. Once EW-10817 is fixed, this becomes a positive assertion + // about the message content. + end1->receive().wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +// ---------- Cross-IoContext hibernation flows (with actor eviction) ---------- + +KJ_TEST("HibernationManager: comm survives hibernation + actor eviction (cross-IoContext)") { + // Production-style hibernation: the actor is fully evicted and a new one is created on + // revival. The HM outlives any actor instance (in production, the namespace pulls the HM + // off the dying actor; in this test, the test holds it directly). After eviction, a brand + // new IoContext is built against the new actor, and the HM revives the WebSocket into it. + // + // This test exercises the no-in-flight-state cross-IoContext path: it round-trips a message + // before hibernating, so there's no pending BlockedSend on the pipe to orphan. It passes + // both before and after EW-10817 is fixed; its job is to ensure the unified-queue refactor + // doesn't break the basic eviction-and-revive flow. The actual bug-firing cross-IoContext + // case is the auto-response variant below. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("hibernate-evict"))); + auto hm = makeTestHm(fixture); + + // Phase 1: accept WS under the original actor's IoContext, round-trip a message. + auto request1 = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request1, *hm); + sendFromDo(fixture, *request1, *hm, "pre-evict"_kj); + auto msg1 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg1.is() && msg1.get() == "pre-evict"_kj); + + // Hibernate, drain the IR, evict the actor. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + fixture.drainAndDestroy(kj::mv(request1)); + fixture.resetActor(); + + // Phase 2: a brand new actor + IoContext. The HM (held by the test) is unchanged. + auto request2 = fixture.newIncomingRequest(); + sendFromDo(fixture, *request2, *hm, "post-evict"_kj); + auto msg2 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg2.is() && msg2.get() == "post-evict"_kj); + + fixture.drainAndDestroy(kj::mv(request2)); +} + +KJ_TEST("HibernationManager: in-flight DO close lost across IoContext destruction") { + // Cross-IoContext variant of the in-flight-close test above: same setup, but we drop the + // IR (destroying the IoContext) before the eyeball reads. The IoContext destruction + // cancels the pump task, which cancels the in-flight ws.close(), cleaning up the + // BlockedSend. The Close is silently lost — the eyeball never sees a clean WebSocket close. + // + // This is the close-race version of the silent-message-drop bug. WebSockets are supposed + // to be reliable; losing close frames is its own bug class. The unified-queue refactor's + // design (queue lives on the adapter, persists across IoContexts) addresses this + // incidentally — the close stays queued until actually delivered. + // + // Dropping the IR below (without draining — the pump task is stuck in waitUntilTasks, so + // drain() would hang) triggers a "failed to invoke drain()" warning. The block scope + // around that drop captures the warning so the test output stays clean. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("close-race-cross-ioc"))); + auto hm = makeTestHm(fixture); + auto request1 = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request1, *hm); + + fixture.enterContext(*request1, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("doomed-close"))); + }); + fixture.pollEventLoop(); + + // Hibernate, then drop the IR (destroying the IoContext). The pump's in-flight ws.close() + // is canceled. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + { + KJ_EXPECT_LOG(WARNING, "failed to invoke drain() on IncomingRequest before destroying it"); + request1 = nullptr; + } + fixture.resetActor(); + + // The eyeball's receive promise should NOT resolve to a Close — the close was canceled + // mid-send. Verify by polling: receive should not be ready immediately. (We can't easily + // assert "never resolves" in a test, so we observe the not-yet-ready state and move on.) + auto receivePromise = end1->receive(); + fixture.pollEventLoop(); + KJ_ASSERT(!receivePromise.poll(fixture.getWaitScope()), + "close was silently dropped across IoContext destruction; eyeball receives nothing"); + + // The new api::WebSocket has closedOutgoing=true (from the package), so the DO can't + // re-issue the close even if it wanted to. The eyeball is stuck. +} + +KJ_TEST("HibernationManager: in-flight DO send lost across IoContext destruction") { + // Data-frame sibling of the close-race test above. Same physics: pump stuck on a BlockedSend + // → IoContext destruction cancels mid-send → bytes gone. Both are flavors of the + // silent-message-drop bug class, and both are incidentally fixed by the unified-queue + // refactor (queue lives on the adapter, persists across IoContexts). + // + // Unlike the close case there's no closedOutgoing equivalent to prevent further sends — on + // revival the DO's new api::WebSocket has a fresh queue and ws.send() resumes working + // normally, but the doomed message is permanently lost and the DO has no error path + // indicating non-delivery. + // + // Dropping the IR below (without draining — the pump task is stuck in waitUntilTasks, so + // drain() would hang) triggers a "failed to invoke drain()" warning. The block scope + // around that drop captures the warning so the test output stays clean. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("send-loss-cross-ioc"))); + auto hm = makeTestHm(fixture); + auto request1 = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request1, *hm); + + sendFromDo(fixture, *request1, *hm, "doomed-message"_kj); + fixture.pollEventLoop(); + + // Hibernate, then drop the IR (destroying the IoContext). The pump's in-flight ws.send() + // is canceled. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + { + KJ_EXPECT_LOG(WARNING, "failed to invoke drain() on IncomingRequest before destroying it"); + request1 = nullptr; + } + fixture.resetActor(); + + // The eyeball's receive promise should NOT resolve — the data frame was canceled mid-send. + // Verify by polling: receive should not be ready immediately. (We can't easily assert + // "never resolves" in a test, so we observe the not-yet-ready state and move on.) + auto receivePromise = end1->receive(); + fixture.pollEventLoop(); + KJ_ASSERT(!receivePromise.poll(fixture.getWaitScope()), + "data frame was silently dropped across IoContext destruction; eyeball receives nothing"); +} + +KJ_TEST("HibernationManager: in-flight auto-response orphans BlockedSend across actor eviction") { + // Regression test for EW-10817 — the production failure mode. sendAutoResponse runs from + // the HM's readLoop (on the HM's TaskSet, NOT in an IoContext). It does a direct + // kj::WebSocket::send that creates a BlockedSend on the pipe. IoContext destruction cancels + // pump tasks but not sendAutoResponse, so the BlockedSend survives the IoContext's death. + // After actor eviction and revival, the new api::WebSocket's pump trips on the orphan. See + // the same-IoContext auto-response variant above for the EXPECT_LOG / lifecycle details. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("ew-10817-cross-autoresp"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + + auto request1 = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request1, *hm); + + // Eyeball sends ping → HM readLoop → sendAutoResponse → BlockedSend on the pipe. + end1->send("ping"_kj).wait(fixture.getWaitScope()); + fixture.pollEventLoop(); + + // Hibernate, drop the IR (IoContext1 is destroyed; the BlockedSend survives because + // sendAutoResponse runs outside any IoContext). Then evict the actor. + // + // Unlike the in-flight DO-close / DO-send variants above, no "failed to invoke drain() + // on IncomingRequest" warning fires here: sendAutoResponse runs on the HM's TaskSet and + // does not enqueue a waitUntil task on the IR, so the IR has nothing to drain at + // destruction. If a regression were to plumb sendAutoResponse through the IR's + // waitUntilTasks, that warning would start firing and this test would need updating. + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + request1 = nullptr; + fixture.resetActor(); + + // Phase 3: under a brand-new actor + IoContext, do something that starts a fresh pump. + // The new pump trips on the orphaned BlockedSend. + auto request2 = fixture.newIncomingRequest(); + { + KJ_EXPECT_LOG(ERROR, "another message send is already in progress"); + fixture.enterContext(*request2, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + KJ_ASSERT(websockets.size() == 1); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("post-evict"))); + }); + fixture.pollEventLoop(); + } + + // Receive the orphaned pong (see same-IoContext variant above for why), then drain. + end1->receive().wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request2)); +} + +KJ_TEST("HibernationManager: DO send waits for the actor's output gate") { + // The pump calls IoContext::waitForOutputLocksIfNecessary() before each kj::WebSocket::send. + // Locking the actor's OutputGate should hold a DO-side message until the gate releases. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("output-gate-do-send"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + // Lock the output gate. `blocker` is the wrapped promise; keep it in scope until we've + // either fulfilled the underlying promise or are otherwise done. + auto paf = kj::newPromiseAndFulfiller(); + auto blocker = fixture.getActor().getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); + + // DO sends a message. The pump should block on the gate. + sendFromDo(fixture, *request, *hm, "gated"_kj); + + // Set up the eyeball's receive promise without waiting. + auto receivePromise = end1->receive(); + + // Drive the loop; receivePromise should NOT be ready (gate still locked). + fixture.pollEventLoop(); + KJ_ASSERT(!receivePromise.poll(fixture.getWaitScope()), + "message should not have arrived while output gate is locked"); + + // Release the gate. The pump should now flush the message. + paf.fulfiller->fulfill(); + auto msg = receivePromise.wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is() && msg.get() == "gated"_kj); + + // blocker must outlive the gate-locking promise; let it die naturally at end of scope. + blocker.wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: DO close waits for the actor's output gate") { + // Like the DO-send-waits-for-gate test, but for close. close() goes through the same pump + // (it inserts a Close GatedMessage into outgoingMessages with the current output lock), so + // it must wait for the gate to release before the close frame reaches the eyeball. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("output-gate-do-close"))); + auto hm = makeTestHm(fixture); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + auto paf = kj::newPromiseAndFulfiller(); + auto blocker = fixture.getActor().getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); + + fixture.enterContext(*request, [&](const TestFixture::Environment& env) { + auto& js = env.js; + auto websockets = hm->getWebSockets(js, kj::none); + KJ_ASSERT(websockets.size() == 1); + websockets[0]->close(js, 1001, jsg::USVString(kj::str("gated-bye"))); + }); + + auto receivePromise = end1->receive(); + fixture.pollEventLoop(); + KJ_ASSERT(!receivePromise.poll(fixture.getWaitScope()), + "close should not have arrived while output gate is locked"); + + paf.fulfiller->fulfill(); + auto msg = receivePromise.wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is()); + auto& close = msg.get(); + KJ_ASSERT(close.code == 1001, close.code); + KJ_ASSERT(close.reason == "gated-bye"_kj, close.reason); + + blocker.wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: auto-response (active) waits when pump is gate-blocked on a DO send") { + // When the pump is already running (isPumping == true) and stalled on the output gate for a + // queued DO message, an arriving auto-response request causes sendAutoResponse to push the + // pong onto pendingAutoResponseDeque. The pump only drains that deque after it finishes the + // outer outgoingMessages loop, so the pong waits for the gate to release transitively. + // + // Order at the eyeball: the gated DO message arrives first (after the gate releases), and + // the pong follows immediately after (line 998 in web-socket.c++). + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("output-gate-autoresp-gated"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + auto paf = kj::newPromiseAndFulfiller(); + auto blocker = fixture.getActor().getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); + + // DO sends "msg1" — pump starts, blocks on gate. + sendFromDo(fixture, *request, *hm, "msg1"_kj); + + // Eyeball sends ping. sendAutoResponse sees isPumping=true and queues "pong". + end1->send("ping"_kj).wait(fixture.getWaitScope()); + + // Neither msg1 nor pong has arrived yet. + auto receivePromise = end1->receive(); + fixture.pollEventLoop(); + KJ_ASSERT(!receivePromise.poll(fixture.getWaitScope()), + "msg1 should not have arrived while output gate is locked"); + + // Release the gate. msg1 flushes, then pong follows. + paf.fulfiller->fulfill(); + auto msg1 = receivePromise.wait(fixture.getWaitScope()); + KJ_ASSERT(msg1.is() && msg1.get() == "msg1"_kj); + auto msg2 = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg2.is() && msg2.get() == "pong"_kj); + + blocker.wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: auto-response (active) bypasses the output gate") { + // Documents CURRENT behavior: in active mode, sendAutoResponse uses a direct kj::WebSocket::send + // that doesn't go through the pump, and therefore doesn't check waitForOutputLocksIfNecessary. + // The unified-queue refactor planned for EW-10817 should change this so auto-response respects + // the output gate in active mode; flip this assertion when that lands. + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("output-gate-autoresp-active"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + auto paf = kj::newPromiseAndFulfiller(); + auto blocker = fixture.getActor().getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); + + // Eyeball sends ping; auto-response should send pong despite the gate being locked. + end1->send("ping"_kj).wait(fixture.getWaitScope()); + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is() && msg.get() == "pong"_kj); + + paf.fulfiller->fulfill(); + blocker.wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +KJ_TEST("HibernationManager: auto-response (hibernated) bypasses the output gate") { + // Documents CURRENT behavior. The hibernated-mode readLoop sends the pong directly on the + // kj::WebSocket without an IoContext, so the actor's output gate never enters the picture + // (and couldn't be checked anyway, since waitForOutputLocksIfNecessary needs an IoContext). + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("output-gate-autoresp-hib"))); + auto hm = makeTestHm(fixture, "ping"_kj, "pong"_kj); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + auto paf = kj::newPromiseAndFulfiller(); + auto blocker = fixture.getActor().getOutputGate().lockWhile(kj::mv(paf.promise), nullptr); + + end1->send("ping"_kj).wait(fixture.getWaitScope()); + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is() && msg.get() == "pong"_kj); + + paf.fulfiller->fulfill(); + blocker.wait(fixture.getWaitScope()); + fixture.drainAndDestroy(kj::mv(request)); +} + +// Regression test for VULN-136638. +// +// In the hibernated branch of readLoop, an auto-response was sent by passing the borrowed +// kj::ArrayPtr from autoResponsePair->response directly into ws.send(), then suspending on +// co_await. kj::WebSocket::send() borrows that ArrayPtr across the suspension (http.h:633: +// "The underlying buffer must remain valid ... until the returned promise resolves"). A +// concurrent setWebSocketAutoResponse() call from JS would free the borrowed buffer mid-send. +// +// This test parks the readLoop at the co_await with the borrow in flight, then calls +// setWebSocketAutoResponse(kj::none, kj::none) to free the kj::String backing the borrowed +// pointer, then drains the eyeball. Under ASAN, the pipe's receive reading through the +// freed pointer trips a use-after-free report. With the fix in place (a coroutine-local +// kj::str(...) copy in the hibernated branch), the receive returns the original bytes +// cleanly and the assertion passes. +// +// Outside ASAN this test only catches the bug probabilistically — the freed bytes may still +// be readable. CI runs ASAN, so the regression is caught there. +KJ_TEST("HibernationManager: hibernated auto-response copies buffer before suspending send " + "(regression VULN-136638)") { + DispatchStats stats; + TestFixture fixture(stubLoopbackParams(stats, kj::str("vuln-136638-autoresp-uaf"))); + + // Use a distinct, non-trivial response so the comparison at the end is unambiguous and any + // partial overwrite under non-ASAN is more likely to be detectable. + constexpr kj::StringPtr kResponse = "AUTO-RESPONSE-PAYLOAD-VULN-136638"_kj; + + auto hm = makeTestHm(fixture, "ping"_kj, kResponse); + auto request = fixture.newIncomingRequest(); + auto end1 = acceptNewWebSocket(fixture, *request, *hm); + + fixture.enterWorkerLock([&](Worker::Lock& lock) { hm->hibernateWebSockets(lock); }); + + // Eyeball sends ping. end1->send().wait() returns once the readLoop's ws.receive() has + // consumed the message, but the readLoop may not yet have reached the + // ws.send(...).fork() / co_await p inside the hibernated branch — drive the event loop + // until it does. After this point, the readLoop is parked at co_await p with a + // BlockedSend on the pipe holding (under the bug) a borrowed pointer into + // autoResponsePair->response's heap buffer. + end1->send("ping"_kj).wait(fixture.getWaitScope()); + fixture.pollEventLoop(); + + // Free the borrowed buffer by clearing the auto-response pair. Production reaches this + // synchronously from actor-state.c++:setWebSocketAutoResponse, which would race with the + // parked readLoop. Here we call it directly while the readLoop is suspended — same effect, + // deterministic. + hm->setWebSocketAutoResponse(kj::none, kj::none); + + // Drain the eyeball. With the fix, the pipe reads the coroutine-local copy and we receive + // the original bytes. Without the fix and under ASAN, the pipe reads freed memory and ASAN + // fails the test with a use-after-free report. + auto msg = end1->receive().wait(fixture.getWaitScope()); + KJ_ASSERT(msg.is(), "expected auto-response string message"); + KJ_ASSERT(msg.get() == kResponse, "auto-response bytes were corrupted", + msg.get(), kResponse); + + fixture.drainAndDestroy(kj::mv(request)); +} + +} // namespace +} // namespace workerd diff --git a/src/workerd/io/hibernation-manager.c++ b/src/workerd/io/hibernation-manager.c++ index e98a266faeb..cd5485d8f60 100644 --- a/src/workerd/io/hibernation-manager.c++ +++ b/src/workerd/io/hibernation-manager.c++ @@ -7,7 +7,6 @@ #include "io-channels.h" #include "io-context.h" -#include #include namespace workerd { @@ -115,11 +114,9 @@ void HibernationManagerImpl::acceptWebSocket( // TODO(mar): Improve accept span context capturing — route snapshotted user span context // to serialization point instead of capturing only the invocation root span here. - if (util::Autogate::isEnabled(util::AutogateKey::USER_SPAN_CONTEXT_PROPAGATION)) { - auto invCtx = IoContext::current().getInvocationSpanContext(); - refToHibernatable.userSpanContext = - tracing::SpanContext(invCtx.getTraceId(), invCtx.getSpanId()); - } + auto invCtx = IoContext::current().getInvocationSpanContext(); + refToHibernatable.userSpanContext = + tracing::SpanContext(invCtx.getTraceId(), invCtx.getSpanId()); allWs.push_front(kj::mv(hib)); refToHibernatable.node = allWs.begin(); @@ -329,6 +326,15 @@ kj::Promise HibernationManagerImpl::readLoop(HibernatableWebSocket& hib) { // We'll store the current timestamp in the HibernatableWebSocket to assure it gets // stored even if the WebSocket is currently hibernating. In that scenario, the timestamp // value will be loaded into the WebSocket during unhibernation. + // Copy autoResponsePair->response into a coroutine-local kj::String before either + // branch sends it. The hibernated branch's ws.send() borrows the underlying + // ArrayPtr across the co_await per kj::WebSocket::send()'s documented contract, + // and any concurrent JS call to state.setWebSocketAutoResponse() would reassign + // or clear autoResponsePair->response, freeing the buffer while the write is + // still in flight. The active branch's sendAutoResponse takes ownership of the + // kj::String anyway, so hoisting the copy serves both cases with a single + // allocation. + auto responseCopy = kj::str(KJ_REQUIRE_NONNULL(autoResponsePair->response)); KJ_SWITCH_ONEOF(hib.activeOrPackage) { KJ_CASE_ONEOF(apiWs, jsg::Ref) { // If the actor is not hibernated/If the WebSocket is active, we need to update @@ -337,8 +343,7 @@ kj::Promise HibernationManagerImpl::readLoop(HibernatableWebSocket& hib) { // Since we had a request set, we must have and response that's sent back using the // same websocket here. The sending of response is managed in web-socket to avoid // possible racing problems with regular websocket messages. - co_await apiWs->sendAutoResponse( - kj::str(KJ_REQUIRE_NONNULL(autoResponsePair->response).asArray()), ws); + co_await apiWs->sendAutoResponse(kj::mv(responseCopy), ws); } KJ_CASE_ONEOF(package, api::WebSocket::HibernationPackage) { if (!package.closedOutgoingConnection) { @@ -346,7 +351,7 @@ kj::Promise HibernationManagerImpl::readLoop(HibernatableWebSocket& hib) { // If we do that, we have to provide it with the promise to avoid races. This can // happen if we have a websocket hibernating, that unhibernates and sends a // message while ws.send() for auto-response is also sending. - auto p = ws.send(KJ_REQUIRE_NONNULL(autoResponsePair->response).asArray()).fork(); + auto p = ws.send(responseCopy.asArray()).fork(); hib.autoResponsePromise = p.addBranch(); co_await p; hib.autoResponsePromise = kj::READY_NOW; diff --git a/src/workerd/io/io-channels.c++ b/src/workerd/io/io-channels.c++ index a4015d2642d..7337ee65e2c 100644 --- a/src/workerd/io/io-channels.c++ +++ b/src/workerd/io/io-channels.c++ @@ -5,20 +5,7 @@ namespace workerd { -kj::Promise> IoChannelFactory::SubrequestChannel::getToken( - ChannelTokenUsage usage) { - KJ_SWITCH_ONEOF(getTokenMaybeSync(usage)) { - KJ_CASE_ONEOF(token, kj::Array) { - return kj::mv(token); - } - KJ_CASE_ONEOF(promise, kj::Promise>) { - return kj::mv(promise); - } - } - KJ_UNREACHABLE; -} - -kj::Promise> IoChannelFactory::ActorClassChannel::getToken( +kj::Promise> IoChannelFactory::TokenizableChannel::getToken( ChannelTokenUsage usage) { KJ_SWITCH_ONEOF(getTokenMaybeSync(usage)) { KJ_CASE_ONEOF(token, kj::Array) { @@ -44,26 +31,15 @@ kj::Own IoChannelFactory::actorClassFromTok namespace { -class PromisedSubrequestChannel final: public IoChannelFactory::SubrequestChannel { +template +class PromisedTokenizableChannel: public ChannelType { public: - PromisedSubrequestChannel(kj::Promise> promise) + PromisedTokenizableChannel(kj::Promise> promise) : readyPromise(waitForResolution(kj::mv(promise)).fork()) {} - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - KJ_IF_SOME(channel, inner) { - return channel->startRequest(kj::mv(metadata)); - } else { - return newPromisedWorkerInterface(readyPromise.addBranch().then( - [self = addRefToThis(), metadata = kj::mv(metadata)]() mutable { - return KJ_ASSERT_NONNULL(self->inner)->startRequest(kj::mv(metadata)); - })); - } - } - void requireAllowsTransfer() override { - // PromisedSubrequestChannel is used for channels initialized from a promised channel token. - // A SubrequestChannel created from a channel token should always support transfer, via channel - // tokens. + // PromisedTokenizableChannel is used for channels initialized from a promised channel token. + // A channel created from a channel token should always support transfer, via channel tokens. } kj::OneOf, kj::Promise>> getTokenMaybeSync( @@ -85,118 +61,73 @@ class PromisedSubrequestChannel final: public IoChannelFactory::SubrequestChanne } } - kj::OneOf, kj::Promise>> getResolved() - override { + kj::OneOf, + kj::Promise>> + getResolved() override { KJ_IF_SOME(channel, inner) { - return kj::addRef(*channel); + return kj::addRef(*channel); } else { - return readyPromise.addBranch().then( - [this]() mutable { return kj::addRef(*KJ_ASSERT_NONNULL(inner)); }); + return readyPromise.addBranch().then([this]() mutable { + return kj::addRef(*KJ_ASSERT_NONNULL(inner)); + }); } } - private: - kj::Maybe> inner; + protected: + kj::Maybe> inner; kj::ForkedPromise readyPromise; - kj::Promise waitForResolution(kj::Promise> promise) { - for (;;) { - auto resolution = co_await promise; - KJ_SWITCH_ONEOF(resolution->getResolved()) { - KJ_CASE_ONEOF(channel, kj::Own) { - inner = kj::mv(channel); - co_return; - } - KJ_CASE_ONEOF(deeperPromise, kj::Promise>) { - // Promise resolved to another promise, wait for it too. - promise = kj::mv(deeperPromise); - } + kj::Promise waitForResolution(kj::Promise> promise) { + kj::Own resolution = co_await promise; + + KJ_SWITCH_ONEOF(resolution->getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + inner = channel.template downcast(); + co_return; + } + KJ_CASE_ONEOF(deeperPromise, kj::Promise>) { + // Promise resolved to another promise, wait for it too. + // + // Note that a promise returned by `getResolved()` will always itself resolve to a + // fully-resolved channel object, so we don't need to loop here. + inner = (co_await deeperPromise).template downcast(); } } } }; -class PromisedActorClassChannel final: public IoChannelFactory::ActorClassChannel { +class PromisedSubrequestChannel final + : public PromisedTokenizableChannel { public: - PromisedActorClassChannel(kj::Promise> promise) - : readyPromise(waitForResolution(kj::mv(promise)).fork()) {} - - void requireAllowsTransfer() override { - // PromisedActorClassChannel is used for channels initialized from a promised channel token. - // A ActorClassChannel created from a channel token should always support transfer, via channel - // tokens. - } - - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - KJ_IF_SOME(channel, inner) { - return channel->getTokenMaybeSync(usage); - } else { - return readyPromise.addBranch().then([this, usage]() -> kj::Promise> { - KJ_SWITCH_ONEOF(KJ_ASSERT_NONNULL(inner)->getTokenMaybeSync(usage)) { - KJ_CASE_ONEOF(token, kj::Array) { - return kj::mv(token); - } - KJ_CASE_ONEOF(promise, kj::Promise>) { - return kj::mv(promise); - } - } - KJ_UNREACHABLE; - }); - } - } + using PromisedTokenizableChannel::PromisedTokenizableChannel; - kj::OneOf, kj::Promise>> getResolved() - override { + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { KJ_IF_SOME(channel, inner) { - return kj::addRef(*channel); + return channel->startRequest(kj::mv(metadata)); } else { - return readyPromise.addBranch().then( - [this]() mutable { return kj::addRef(*KJ_ASSERT_NONNULL(inner)); }); + return newPromisedWorkerInterface(readyPromise.addBranch().then( + [self = addRefToThis(), metadata = kj::mv(metadata)]() mutable { + return KJ_ASSERT_NONNULL(self->inner)->startRequest(kj::mv(metadata)); + })); } } +}; - private: - kj::Maybe> inner; - kj::ForkedPromise readyPromise; - - kj::Promise waitForResolution(kj::Promise> promise) { - for (;;) { - auto resolution = co_await promise; - KJ_SWITCH_ONEOF(resolution->getResolved()) { - KJ_CASE_ONEOF(channel, kj::Own) { - inner = kj::mv(channel); - co_return; - } - KJ_CASE_ONEOF(deeperPromise, kj::Promise>) { - promise = kj::mv(deeperPromise); - } - } - } - } +class PromisedActorClassChannel final + : public PromisedTokenizableChannel { + public: + using PromisedTokenizableChannel::PromisedTokenizableChannel; }; kj::OneOf, kj::Promise>> resolveCap(kj::Own cap) { - KJ_IF_SOME(typed, kj::tryDowncast(*cap)) { - KJ_SWITCH_ONEOF(typed.getResolved()) { - KJ_CASE_ONEOF(channel, kj::Own) { - return kj::implicitCast>(kj::mv(channel)); - } - KJ_CASE_ONEOF(promise, kj::Promise>) { - return promise.then([](kj::Own channel) { - return kj::implicitCast>(kj::mv(channel)); - }); - } - } - KJ_UNREACHABLE; - } else KJ_IF_SOME(typed, kj::tryDowncast(*cap)) { + KJ_IF_SOME(typed, kj::tryDowncast(*cap)) { KJ_SWITCH_ONEOF(typed.getResolved()) { - KJ_CASE_ONEOF(channel, kj::Own) { + KJ_CASE_ONEOF(channel, kj::Own) { return kj::implicitCast>(kj::mv(channel)); } - KJ_CASE_ONEOF(promise, kj::Promise>) { - return promise.then([](kj::Own channel) { + KJ_CASE_ONEOF(promise, kj::Promise>) { + return promise.then([](kj::Own channel) { return kj::implicitCast>(kj::mv(channel)); }); } @@ -276,19 +207,6 @@ kj::Own IoChannelFactory::actorClassFromTok [this, usage](kj::Array token) { return actorClassFromToken(usage, token.asPtr()); })); } -void IoChannelFactory::ActorChannel::requireAllowsTransfer() { - JSG_FAIL_REQUIRE(DOMDataCloneError, - "Durable Object stubs cannot (yet) be transferred between Workers. This will change in " - "a future version."); -} - -kj::OneOf, kj::Promise>> IoChannelFactory::ActorChannel:: - getTokenMaybeSync(ChannelTokenUsage usage) { - JSG_FAIL_REQUIRE(DOMDataCloneError, - "Durable Object stubs cannot (yet) be transferred between Workers. This will change in " - "a future version."); -} - kj::Promise DynamicWorkerSource::ensureAllResolved() { kj::Vector> promises; @@ -298,12 +216,12 @@ kj::Promise DynamicWorkerSource::ensureAllResolved() { auto resolveChannelSlot = [&](kj::Own& slot) { KJ_SWITCH_ONEOF(slot->getResolved()) { - KJ_CASE_ONEOF(channel, kj::Own) { - slot = kj::mv(channel); + KJ_CASE_ONEOF(channel, kj::Own) { + slot = channel.downcast(); } - KJ_CASE_ONEOF(promise, kj::Promise>) { - promises.add(promise.then([&slot](kj::Own channel) { - slot = kj::mv(channel); + KJ_CASE_ONEOF(promise, kj::Promise>) { + promises.add(promise.then([&slot](kj::Own channel) { + slot = channel.downcast(); })); } } @@ -327,11 +245,11 @@ kj::Promise DynamicWorkerSource::ensureAllResolved() { kj::Promise Worker::Actor::FacetManager::StartInfo::ensureAllResolved() { KJ_SWITCH_ONEOF(actorClass->getResolved()) { - KJ_CASE_ONEOF(channel, kj::Own) { - actorClass = kj::mv(channel); + KJ_CASE_ONEOF(channel, kj::Own) { + actorClass = channel.downcast(); } - KJ_CASE_ONEOF(promise, kj::Promise>) { - actorClass = co_await promise; + KJ_CASE_ONEOF(promise, kj::Promise>) { + actorClass = (co_await promise).downcast(); } } } @@ -353,4 +271,18 @@ kj::Own IoChannelCapTableEntry::threadSafeClone() c return kj::heap(type, channel); } +template <> +kj::Own newPromisedChannel< + IoChannelFactory::SubrequestChannel>( + kj::Promise> promise) { + return kj::refcounted(kj::mv(promise)); +} + +template <> +kj::Own newPromisedChannel< + IoChannelFactory::ActorClassChannel>( + kj::Promise> promise) { + return kj::refcounted(kj::mv(promise)); +} + } // namespace workerd diff --git a/src/workerd/io/io-channels.h b/src/workerd/io/io-channels.h index a45b9693f39..27c26b75e49 100644 --- a/src/workerd/io/io-channels.h +++ b/src/workerd/io/io-channels.h @@ -167,26 +167,15 @@ class IoChannelFactory { STORAGE, }; - // Object representing somehere where generic workers subrequests can be sent. Multiple requests - // may be sent. This is an I/O type so it is only valid within the `IoContext` where it was - // created. - class SubrequestChannel: public kj::Refcounted, public Frankenvalue::CapTableEntry { + // Base class for all channel types that can be tokenized, e.g. SubrequestChannel, + // ActorClassChannel. + class TokenizableChannel: public kj::Refcounted, public Frankenvalue::CapTableEntry { public: - // Start a new request to this target. - // - // Note that not all `metadata` properties make sense here, but it didn't seem worth defining - // a new struct type. `cfBlobJson` and `parentSpan` make sense, but `featureFlagsForFl` and - // `dynamicDispatchTarget` do not. - // - // Note that the caller is expected to keep the SubrequestChannel alive until it is done with - // the returned WorkerInterface. - virtual kj::Own startRequest(SubrequestMetadata metadata) = 0; - kj::Own clone() override final { return kj::addRef(*this); } - // Throws a JSG error if a Fetcher backed by this channel should not be serialized and passed + // Throws a JSG error if an object backed by this channel should not be serialized and passed // to other workers. The default implementation throws a generic error, but subclasses may // specialize with better errror messages -- or override to just return in order to permit the // serialization. @@ -195,10 +184,13 @@ class IoChannelFactory { // in production, would be difficult or impossible to serialize. In particular, // dynamically-loaded workers cannot be serialized because the system does not know how to // reconstruct a dynamically-loaded worker from scratch. + // + // TODO(cleanup): Maybe we can remove this by having everyone call getToken() as a way to check + // transferrability, even in cases where we don't necessarily use the token? virtual void requireAllowsTransfer() = 0; - // Get a token representing this SubrequestChannel which can be converted back into a - // SubrequestChannel using subrequestChannelFromToken(). This is a convenience wrapper around + // Get a token representing this TokenizableChannel which can be converted back into a + // channel object using `IoChannelFactory::*FromToken()`. This is a convenience wrapper around // getTokenMaybeSync() for callers that don't care about the synchronous optimization. kj::Promise> getToken(ChannelTokenUsage usage); @@ -209,10 +201,13 @@ class IoChannelFactory { virtual kj::OneOf, kj::Promise>> getTokenMaybeSync( ChannelTokenUsage usage) = 0; - // If this SubrequestChannel is just a wrapper around a promise for some later - // SubrequestChannel, return the inner channel -- synchronously if the promise has resolved + // If this TokenizableChannel is just a wrapper around a promise for some later + // TokenizableChannel, return the inner channel -- synchronously if the promise has resolved // already, otherwise asynchronously. // + // The resolved channel is *always* the same kind (e.g. SubrequestChannel) as this one, so can + // be safely downcast without a runtime check. + // // Note that the various `IoChannelFactory` methods that take `props` or `env` objects all // automatically resolve all channel objects *before* passing off to the underlying // implementation. In the internal codebase, implementations end up needing to downcast these @@ -220,12 +215,28 @@ class IoChannelFactory { // in every use case would be painful, so it is taken care of in this layer. // // Default implementation returns self. - virtual kj::OneOf, kj::Promise>> + virtual kj::OneOf, kj::Promise>> getResolved() { return kj::addRef(*this); } }; + // Object representing somehere where generic workers subrequests can be sent. Multiple requests + // may be sent. This is an I/O type so it is only valid within the `IoContext` where it was + // created. + class SubrequestChannel: public TokenizableChannel { + public: + // Start a new request to this target. + // + // Note that not all `metadata` properties make sense here, but it didn't seem worth defining + // a new struct type. `cfBlobJson` and `parentSpan` make sense, but `featureFlagsForFl` and + // `dynamicDispatchTarget` do not. + // + // Note that the caller is expected to keep the SubrequestChannel alive until it is done with + // the returned WorkerInterface. + virtual kj::Own startRequest(SubrequestMetadata metadata) = 0; + }; + // Obtain an object representing a particular subrequest channel. // // getSubrequestChannel(i).startRequest(meta) is exactly equivalent to startSubrequest(i, meta). @@ -249,18 +260,9 @@ class IoChannelFactory { virtual kj::Own getSubrequestChannelResolved( uint channel, kj::Maybe props, kj::Maybe versionRequest) = 0; - // Stub for a remote actor. Allows sending requests to the actor. - class ActorChannel: public SubrequestChannel { - public: - // At present there are no methods beyond what `SubrequestChannel` defines. However, it's - // easy to imagine that actor stubs may have more functionality than just sending requests - // someday, so we keep this as a separate type. - - // For now, actor stubs are not transferrable -- but we do intend to change that at some point. - void requireAllowsTransfer() override final; - kj::OneOf, kj::Promise>> getTokenMaybeSync( - ChannelTokenUsage usage) override final; - }; + // ActorChannel used to be its own type, but no longer is. + // TODO(cleanup): Update all references. + using ActorChannel = SubrequestChannel; // Get an actor stub from the given namespace for the actor with the given ID. // @@ -284,22 +286,8 @@ class IoChannelFactory { // ActorClassChannel is a reference to an actor class in another worker. This class acts as a // token which can be passed into other interfaces that might use the actor class, particularly // Worker::Actor::FacetManager. - class ActorClassChannel: public kj::Refcounted, public Frankenvalue::CapTableEntry { + class ActorClassChannel: public TokenizableChannel { public: - kj::Own clone() override final { - return kj::addRef(*this); - } - - // Same as the corresponding methods on SubrequestChannel. - virtual void requireAllowsTransfer() = 0; - kj::Promise> getToken(ChannelTokenUsage usage); - virtual kj::OneOf, kj::Promise>> getTokenMaybeSync( - ChannelTokenUsage usage) = 0; - virtual kj::OneOf, kj::Promise>> - getResolved() { - return kj::addRef(*this); - } - // This class has no functional methods, since it serves as a token to be passed to other // interfaces (namely the facets API). }; @@ -489,4 +477,20 @@ class IoChannelCapTableEntry final: public Frankenvalue::CapTableEntry { uint channel; }; +// Construct a channel based on a promise for a future channel. These channels' `getResolved()` +// methods will resolve to the underlying channel. `BaseChannelType` must be either +// `SubrequestChannel` or `ActorClassChannel`. +template +kj::Own newPromisedChannel(kj::Promise> promise); + +template <> +kj::Own newPromisedChannel< + IoChannelFactory::SubrequestChannel>( + kj::Promise> promise); + +template <> +kj::Own newPromisedChannel< + IoChannelFactory::ActorClassChannel>( + kj::Promise> promise); + } // namespace workerd diff --git a/src/workerd/io/io-context.c++ b/src/workerd/io/io-context.c++ index ac35d319934..1e138904ae8 100644 --- a/src/workerd/io/io-context.c++ +++ b/src/workerd/io/io-context.c++ @@ -4,16 +4,17 @@ #include "io-context.h" +#include #include #include #include #include #include -#include #include #include #include #include +#include #include @@ -218,11 +219,13 @@ IoContext::IncomingRequest::IoContext_IncomingRequest(kj::Own context kj::Own ioChannelFactoryParam, kj::Own metricsParam, kj::Maybe> workerTracer, - kj::Maybe maybeTriggerInvocationSpan) + kj::Maybe maybeTriggerInvocationSpan, + kj::Maybe> accessInfo) : context(kj::mv(contextParam)), metrics(kj::mv(metricsParam)), workerTracer(kj::mv(workerTracer)), ioChannelFactory(kj::mv(ioChannelFactoryParam)), + accessInfo(kj::mv(accessInfo)), maybeTriggerInvocationSpan(kj::mv(maybeTriggerInvocationSpan)) {} tracing::InvocationSpanContext& IoContext::IncomingRequest::getInvocationSpanContext() { @@ -268,13 +271,9 @@ void IoContext::IncomingRequest::delivered(kj::SourceLocation location) { // IoContext's delete queue) are safe: user-tracing SpanSubmitters hold only a // BaseTracer::WeakRef, so they cannot extend tracer lifetime. KJ_IF_SOME(workerTracer, workerTracer) { - if (util::Autogate::isEnabled(util::AutogateKey::USER_SPAN_CONTEXT_PROPAGATION)) { - auto& invCtx = getInvocationSpanContext(); - rootUserTraceSpan = - workerTracer->makeUserRequestSpan(invCtx.getTraceId(), invCtx.getTraceFlags()); - } else { - rootUserTraceSpan = workerTracer->makeUserRequestSpan(tracing::TraceId(nullptr), kj::none); - } + auto& invCtx = getInvocationSpanContext(); + rootUserTraceSpan = + workerTracer->makeUserRequestSpan(invCtx.getTraceId(), invCtx.getTraceFlags()); } KJ_IF_SOME(a, context->actor) { @@ -539,13 +538,55 @@ void IoContext::addWaitUntil(kj::Promise promise) { waitUntilTasks.add(kj::mv(promise)); } +namespace { + +#ifdef KJ_DEBUG + +void requestGc(const Worker& worker) { + TRACE_EVENT("workerd", "Debug: requestGc()"); + jsg::runInV8Stack([&](jsg::V8StackScope& stackScope) { + auto& isolate = worker.getIsolate(); + auto lock = isolate.getApi().lock(stackScope); + lock->requestGcForTesting(); + }); +} + +template +kj::Promise addGcPassForTest(IoContext& context, kj::Promise promise) { + TRACE_EVENT("workerd", "Debug: addGcPassForTest"); + auto worker = kj::atomicAddRef(context.getWorker()); + if constexpr (kj::isSameType()) { + co_await promise; + requestGc(*worker); + } else { + auto ret = co_await promise; + requestGc(*worker); + co_return kj::mv(ret); + } +} + +#endif // KJ_DEBUG + +} // namespace + +template +kj::Promise IoContext::IncomingRequest::maybeAddGcPassForTest(kj::Promise promise) { +#ifdef KJ_DEBUG + if (isPredictableModeForTest()) { + return addGcPassForTest(*context, kj::mv(promise)); + } +#endif + return kj::mv(promise); +} + // Mark ourselves so we know that we made a best effort attempt to wait for waitUntilTasks. -kj::Promise IoContext::IncomingRequest::drain() { +void IoContext::IncomingRequest::drain( + kj::TaskSet& waitUntilTasks, kj::Own&& self) { waitedForWaitUntil = true; if (&context->incomingRequests.front() != this) { // A newer request was received, so draining isn't our job. - return kj::READY_NOW; + return; } kj::Promise timeoutPromise = nullptr; @@ -571,12 +612,28 @@ kj::Promise IoContext::IncomingRequest::drain() { }; timeoutPromise = context->limitEnforcer->limitDrain().then(kj::mv(timeoutLogPromise)); } - return context->waitUntilTasks.onEmpty() - .exclusiveJoin(kj::mv(timeoutPromise)) - .exclusiveJoin(context->onAbort().catch_([](kj::Exception&&) {})); + auto result = context->waitUntilTasks.onEmpty() + .exclusiveJoin(kj::mv(timeoutPromise)) + .exclusiveJoin(context->onAbort()); + + result = result.attach(kj::mv(self)); + + KJ_IF_SOME(a, context->actor) { + // Make sure the drain is canceled and the IncomingRequest dropped on actor abort. + result = a.getAbortCanceler().wrap(kj::mv(result)); + } + + // We actually don't want the promise we put in `waitUntilTasks` to report errors when aborted. + // Abort errors are already propagated to any connected clients and other places. Note that + // `waitUntilTasks.onEmpty()` never throws, and `timeoutPromise` as constructed above also never + // throws, so this is just squelching abort errors. + result = result.catch_([](kj::Exception&&) {}); + + waitUntilTasks.add(maybeAddGcPassForTest(kj::mv(result))); } -kj::Promise IoContext::IncomingRequest::finishScheduled() { +kj::Promise IoContext::IncomingRequest::finishScheduled( + kj::Own&& self) { // TODO(someday): In principle we should be able to support delivering the "scheduled" event type // to an actor, and this may be important if we open up the whole of WorkerInterface to be // callable from any stub. However, the logic around async tasks would have to be different. We @@ -591,18 +648,28 @@ kj::Promise IoContext::IncomingRequest::finishScheduled() { context->incomingRequests.front().waitedForWaitUntil = true; auto timeoutPromise = context->limitEnforcer->limitScheduled().then([] { - // TODO(soon): The limit being hit here is a wall time limit. Can we report an - // "exceededWallTime" outcome instead? - return EventOutcome::EXCEEDED_CPU; + return EventOutcome::EXCEEDED_WALL_TIME; }); - return context->waitUntilTasks.onEmpty() - .then([]() { return EventOutcome::OK; }) - .exclusiveJoin(kj::mv(timeoutPromise)) - .exclusiveJoin(context->onAbort().then([] { + auto outcome = context->waitUntilTasks.onEmpty() + .then([this]() { return context->waitUntilStatus(); }) + .exclusiveJoin(kj::mv(timeoutPromise)) + .exclusiveJoin(context->onAbort().then([] { // abortFulfiller should only ever be rejected instead of being fulfilled, return an // internalError outcome if it does happen return EventOutcome::INTERNAL_ERROR; - }, [](kj::Exception&& e) { return RequestObserver::outcomeFromException(e); })); + }, [](kj::Exception&& e) { + KJ_LOG(INFO, "execution context aborted", e); // for tests + return RequestObserver::outcomeFromException(e); + })); + + auto result = outcome.then([this](EventOutcome outcome) { + return WorkerInterface::ScheduledResult{ + .retry = context->shouldRetryScheduled(), + .outcome = outcome, + }; + }); + + return maybeAddGcPassForTest(result.attach(kj::mv(self))); } class IoContext::PendingEvent: public kj::Refcounted { diff --git a/src/workerd/io/io-context.h b/src/workerd/io/io-context.h index 34300da17b9..4106ebffd8e 100644 --- a/src/workerd/io/io-context.h +++ b/src/workerd/io/io-context.h @@ -8,6 +8,7 @@ #include "worker.h" #include +#include #include #include #include @@ -29,6 +30,8 @@ #include #include +#include + namespace workerd { class WorkerTracer; class BaseTracer; @@ -71,7 +74,8 @@ class IoContext_IncomingRequest final { kj::Own ioChannelFactory, kj::Own metrics, kj::Maybe> workerTracer, - kj::Maybe maybeTriggerInvocationSpan); + kj::Maybe maybeTriggerInvocationSpan, + kj::Maybe> accessInfo = kj::none); KJ_DISALLOW_COPY_AND_MOVE(IoContext_IncomingRequest); ~IoContext_IncomingRequest() noexcept(false); @@ -90,13 +94,22 @@ class IoContext_IncomingRequest final { // If delivered() is never called, then drain() need not be called. void delivered(kj::SourceLocation = kj::SourceLocation()); - // Waits until the request is "done". For non-actor requests this means waiting until - // all "waitUntil" tasks finish, applying the "soft timeout" time limit from WorkerLimits. + // Continues running the request in the background until it is "done", scheduling the work into + // `waitUntilTasks` and keeping `self` alive until work is finished. + // + // For non-actor requests this means waiting until all "waitUntil" tasks finish, applying the + // "soft timeout" time limit from WorkerLimits. // // For actor requests, this means waiting until either all tasks have finished (not just // waitUntil, all tasks), or a new incoming request has been received (which then takes over // responsibility for waiting for tasks), or the actor is shut down. - kj::Promise drain(); + // + // Note: `self` is declared as an rvalue reference here to ensure that if you write something + // like `incomingRequest->drain(tasks, kj::mv(incomingRequest))`, the value of + // `incomingRequest` will not be moved away until after the invocation of `drain()`. Otherwise, + // the evaluation order would be unspecified and `incomingRequest->drain()` could be + // dereferencing a moved-away pointer. + void drain(kj::TaskSet& waitUntilTasks, kj::Own&& self); // Waits for all "waitUntil" tasks to finish, up to the time limit for scheduled events, as // defined by `scheduledTimeoutMs` in `WorkerLimits`. Returns an enum indicating the event outcome @@ -110,7 +123,12 @@ class IoContext_IncomingRequest final { // This method is also used by some custom event handlers (see WorkerInterface::CustomEvent) that // need similar behavior, as well as the test handler. TODO(cleanup): Rename to something more // generic? - kj::Promise finishScheduled(); + // + // Similar to drain(), the IncomingRequest self-reference needs to be passed into this method. + // This allows finishScheduled() to arrange for the IncomingRequest to be *synchronously* dropped + // in certain situations (such as when an Actor is aborted). + kj::Promise finishScheduled( + kj::Own&& self); // Access the event loop's current time point. This will remain constant between ticks. This is // used to implement IoContext::now(), which should be preferred so that time can be adjusted @@ -131,6 +149,12 @@ class IoContext_IncomingRequest final { return rootUserTraceSpan.addRef(); } + // The Cloudflare Access auth info for this request, if any was provided by the embedder. Used + // to populate `ctx.access` in JavaScript. + kj::Maybe getAccessInfo() { + return accessInfo.map([](kj::Own& p) -> AccessInfo& { return *p; }); + } + // The invocation span context is a unique identifier for a specific // worker invocation. tracing::InvocationSpanContext& getInvocationSpanContext(); @@ -140,6 +164,7 @@ class IoContext_IncomingRequest final { kj::Own metrics; kj::Maybe> workerTracer; kj::Own ioChannelFactory; + kj::Maybe> accessInfo; // Root user trace span for this request. Populated during delivered() via // BaseTracer::makeUserRequestSpan(); otherwise a null SpanParent. The tracer it references @@ -172,6 +197,9 @@ class IoContext_IncomingRequest final { // Tracks the location where delivered() was called for debugging. kj::Maybe deliveredLocation; + template + kj::Promise maybeAddGcPassForTest(kj::Promise promise); + friend class IoContext; }; @@ -240,6 +268,13 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler return getCurrentIncomingRequest().getRootUserTraceSpan(); } + // The Cloudflare Access auth info for the current incoming request, if any was provided by + // the embedder. Used to populate `ctx.access` in JavaScript. + kj::Maybe getAccessInfo() { + if (incomingRequests.empty()) return kj::none; + return getCurrentIncomingRequest().getAccessInfo(); + } + LimitEnforcer& getLimitEnforcer() { return *limitEnforcer; } @@ -255,8 +290,19 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler // throws, the input lock will break, resetting the actor. // // This can only be called when I/O gates are active, i.e. in an actor. + // + // Like run(), the callback can accept either (jsg::Lock&) or (jsg::Lock&, IoContext&). template - jsg::PromiseForResult blockConcurrencyWhile(jsg::Lock& js, Func&& callback); + auto blockConcurrencyWhile(jsg::Lock& js, Func&& callback) { + if constexpr (runFuncAcceptsIoContext) { + return blockConcurrencyWhileImpl( + js, [this, callback = kj::fwd(callback)](jsg::Lock& lock) mutable { + return callback(lock, *this); + }); + } else { + return blockConcurrencyWhileImpl(js, kj::fwd(callback)); + } + } // Returns true if output lock gating is necessary. // Can be used in optimizations to bypass wait* calls altogether. @@ -323,6 +369,11 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler return abortPromise.addBranch(); } + // If this IoContext has been aborted already, return the abort reason. + kj::Maybe getAbortReason() { + return abortException.clone(); + } + // Force context abort now. // // Note that abort() is safe to call while the IoContext is current. Becaues of this, it cannot @@ -355,15 +406,34 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler // // If `inputLock` is not provided, and this is an actor context, an input lock will be obtained // before executing the callback. + // + // The callback can accept either (Worker::Lock&) or (Worker::Lock&, IoContext&). When the + // two-argument form is used, *this is passed as the second argument. Existing single-argument + // call sites are unaffected. template - kj::PromiseForResult run( - Func&& func, kj::Maybe inputLock = kj::none) KJ_WARN_UNUSED_RESULT; + auto run(Func&& func, kj::Maybe inputLock = kj::none) KJ_WARN_UNUSED_RESULT { + if constexpr (runFuncAcceptsIoContext) { + return runSingle([this, func = kj::fwd(func)](Worker::Lock& lock) mutable { + return func(lock, *this); + }, kj::mv(inputLock)); + } else { + return runSingle(kj::fwd(func), kj::mv(inputLock)); + } + } // Like run() but executes within the given critical section, if it is non-null. If // `criticalSection` is null, then this just forwards to the other run() (with null inputLock). template - kj::PromiseForResult run(Func&& func, - kj::Maybe> criticalSection) KJ_WARN_UNUSED_RESULT; + auto run(Func&& func, + kj::Maybe> criticalSection) KJ_WARN_UNUSED_RESULT { + if constexpr (runFuncAcceptsIoContext) { + return runSingle([this, func = kj::fwd(func)](Worker::Lock& lock) mutable { + return func(lock, *this); + }, kj::mv(criticalSection)); + } else { + return runSingle(kj::fwd(func), kj::mv(criticalSection)); + } + } // Returns the current IoContext for the thread. // Throws an exception if there is no current context (see hasCurrent() below). @@ -592,6 +662,11 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler template auto makeReentryCallback(Func func); + // Like makeReentryCallback(), but the existence of the callback doesn't hold open the IoContext + // at all, that is, it is NOT "treated as if a task were added using addTask()". + template + auto makeReentryCallbackWeak(Func func); + // Returns the number of times addTask() has been called (even if the tasks have completed). uint taskCount() { return addTaskCounter; @@ -1070,6 +1145,25 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler kj::Maybe inputLock, Runnable::Exceptional exceptional); + // Detect whether a callback accepts IoContext& as a second argument. + // Used by run() and blockConcurrencyWhile() to optionally pass *this. + template + static constexpr bool runFuncAcceptsIoContext = + std::invocable&, Worker::Lock&, IoContext&>; + + // Internal implementation of run(), always invoked with a single-arg callback. + template + kj::PromiseForResult runSingle( + Func&& func, kj::Maybe inputLock = kj::none); + + template + kj::PromiseForResult runSingle( + Func&& func, kj::Maybe> criticalSection); + + // Internal implementation of blockConcurrencyWhile(), always invoked with a single-arg callback. + template + jsg::PromiseForResult blockConcurrencyWhileImpl(jsg::Lock& js, Func&& callback); + void abortFromHang(Worker::AsyncLock& asyncLock); template @@ -1135,6 +1229,9 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler template friend Result throwOrReturnResult( jsg::Lock& js, IoContext::ExceptionOr&& exceptionOrResult); + + template + auto makeReentryCallbackImpl(Func func, kj::Own attachment); }; // The SuppressIoContextScope utility is used to temporarily suppress the active IoContext @@ -1155,21 +1252,21 @@ kj::Promise IoContext::lockOutputWhile(kj::Promise promise) { } template -kj::PromiseForResult IoContext::run( +kj::PromiseForResult IoContext::runSingle( Func&& func, kj::Maybe> criticalSection) { KJ_IF_SOME(cs, criticalSection) { return cs.get() ->wait(getCurrentTraceSpan()) .then([this, func = kj::fwd(func)](InputGate::Lock&& inputLock) mutable { - return run(kj::fwd(func), kj::mv(inputLock)); + return runSingle(kj::fwd(func), kj::mv(inputLock)); }); } else { - return run(kj::fwd(func)); + return runSingle(kj::fwd(func)); } } template -kj::PromiseForResult IoContext::run( +kj::PromiseForResult IoContext::runSingle( Func&& func, kj::Maybe inputLock) { // Before we try running anything, let's make sure our IoContext hasn't been aborted. If it has // been aborted, there's likely not an active request so later operations will fail anyway. @@ -1183,7 +1280,7 @@ kj::PromiseForResult IoContext::run( return a.getInputGate() .wait(getCurrentTraceSpan()) .then([this, func = kj::fwd(func)](InputGate::Lock&& inputLock) mutable { - return run(kj::fwd(func), kj::mv(inputLock)); + return runSingle(kj::fwd(func), kj::mv(inputLock)); }); } @@ -1523,9 +1620,22 @@ auto IoContext::makeReentryCallback(Func func) { fulfiller->fulfill(); }); + return makeReentryCallbackImpl(kj::mv(func), kj::heap(kj::mv(releaseNotifier))); +} + +template +auto IoContext::makeReentryCallbackWeak(Func func) { + requireCurrent(); + + // Skip the addTask stuff but still do attach a pending event. + return makeReentryCallbackImpl(kj::mv(func), registerPendingEvent()); +} + +template +auto IoContext::makeReentryCallbackImpl(Func func, kj::Own attachment) { auto ioFunc = addObjectReverse(kj::heap(kj::fwd(func))); - return [self = getWeakRef(), cs = getCriticalSection(), releaseNotifier = kj::mv(releaseNotifier), + return [self = getWeakRef(), cs = getCriticalSection(), attachment = kj::mv(attachment), ioFunc = kj::mv(ioFunc)](auto&&... params) mutable { auto& ctx = JSG_REQUIRE_NONNULL(self->tryGet(), Error, "The execution context which hosts this callback is no longer running."); @@ -1586,7 +1696,7 @@ inline ReverseIoOwn IoContext::addObjectReverse(kj::Own obj) { } template -jsg::PromiseForResult IoContext::blockConcurrencyWhile( +jsg::PromiseForResult IoContext::blockConcurrencyWhileImpl( jsg::Lock& js, Func&& callback) { auto lock = getInputLock(); auto cs = lock.startCriticalSection(); @@ -1613,9 +1723,11 @@ jsg::PromiseForResult IoContext::blockConcurrencyWhile( // Arrange to time out if the critical section runs more than 30 seconds, so that objects // won't be hung forever if they have a critical section that deadlocks. auto timeout = afterLimitTimeout(30 * kj::SECONDS).then([]() -> T { - kj::throwFatalException(JSG_KJ_EXCEPTION(OVERLOADED, Error, + auto e = JSG_KJ_EXCEPTION(OVERLOADED, Error, "A call to blockConcurrencyWhile() in a Durable Object waited for " - "too long. The call was canceled and the Durable Object was reset.")); + "too long. The call was canceled and the Durable Object was reset."); + e.setDetail(WALL_TIME_LIMIT_DETAIL_ID, kj::heapArray(0)); + kj::throwFatalException(kj::mv(e)); }); return awaitJs(lock, kj::mv(promise)).exclusiveJoin(kj::mv(timeout)); diff --git a/src/workerd/io/io-own.h b/src/workerd/io/io-own.h index 4f3ac62aeb4..44982a07b4b 100644 --- a/src/workerd/io/io-own.h +++ b/src/workerd/io/io-own.h @@ -362,13 +362,13 @@ IoPtr& IoPtr::operator=(decltype(nullptr)) { template inline T* IoOwn::operator->() { - DeleteQueue::checkFarGet(*deleteQueue.get(), typeid(T)); + DeleteQueue::checkFarGet(*deleteQueue, typeid(T)); return item->ptr; } template inline IoOwn::operator kj::Own() && { - DeleteQueue::checkFarGet(*deleteQueue.get(), typeid(T)); + DeleteQueue::checkFarGet(*deleteQueue, typeid(T)); auto result = kj::mv(item->ptr); OwnedObjectList::unlink(*item); item = nullptr; @@ -378,7 +378,7 @@ inline IoOwn::operator kj::Own() && { template inline T* IoPtr::operator->() { - DeleteQueue::checkFarGet(*deleteQueue.get(), typeid(T)); + DeleteQueue::checkFarGet(*deleteQueue, typeid(T)); return ptr; } diff --git a/src/workerd/io/limit-enforcer.h b/src/workerd/io/limit-enforcer.h index 0598368d955..9d14f084a6e 100644 --- a/src/workerd/io/limit-enforcer.h +++ b/src/workerd/io/limit-enforcer.h @@ -27,6 +27,7 @@ class Lock; } // namespace jsg static constexpr size_t DEFAULT_MAX_PBKDF2_ITERATIONS = 100'000; +static constexpr uint64_t DEFAULT_MAX_SCRYPT_COST = 1u << 20; // Interface for an object that enforces resource limits on an Isolate level. // @@ -93,6 +94,16 @@ class IsolateLimitEnforcer: public kj::Refcounted { return kj::none; } + virtual kj::Maybe checkScryptCost( + jsg::Lock& js, uint32_t N, uint32_t r, uint32_t p) const { + // Saturate to avoid overflow in the product. + if (N > DEFAULT_MAX_SCRYPT_COST || r > DEFAULT_MAX_SCRYPT_COST || p > DEFAULT_MAX_SCRYPT_COST) + return DEFAULT_MAX_SCRYPT_COST; + uint64_t cost = static_cast(N) * r * p; + if (cost > DEFAULT_MAX_SCRYPT_COST) return DEFAULT_MAX_SCRYPT_COST; + return kj::none; + } + // Called when a Blob is being created to determine the maximum allowed size of the Blob. virtual size_t getBlobSizeLimit() const { return 128 * 1024 * 1024; // 128 MB diff --git a/src/workerd/io/observer.c++ b/src/workerd/io/observer.c++ index 2fb2ef1ce44..afd14f015d1 100644 --- a/src/workerd/io/observer.c++ +++ b/src/workerd/io/observer.c++ @@ -55,6 +55,8 @@ EventOutcome RequestObserver::outcomeFromException(const kj::Exception& e, Failu return EventOutcome::EXCEEDED_MEMORY; } else if (e.getDetail(CPU_LIMIT_DETAIL_ID) != kj::none) { return EventOutcome::EXCEEDED_CPU; + } else if (e.getDetail(WALL_TIME_LIMIT_DETAIL_ID) != kj::none) { + return EventOutcome::EXCEEDED_WALL_TIME; } else if (e.getDetail(SCRIPT_KILLED_DETAIL_ID) != kj::none) { return EventOutcome::KILL_SWITCH; } else if (source == RequestObserver::FailureSource::DEFERRED_PROXY && diff --git a/src/workerd/io/observer.h b/src/workerd/io/observer.h index ad28668bd71..aa26340a7d5 100644 --- a/src/workerd/io/observer.h +++ b/src/workerd/io/observer.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -18,6 +19,10 @@ namespace workerd { class IoContext; + +// Whether an outgoing subrequest's request body can be rewound (e.g. a buffered or null body), and +// so the request could be re-sent. See RequestObserver::setNextSubrequestBodyRewindable(). +WD_STRONG_BOOL(SubrequestBodyRewindable); class WorkerInterface; class LimitEnforcer; class TimerChannel; @@ -126,6 +131,14 @@ class RequestObserver: public kj::Refcounted { return kj::mv(client); } + // Record whether the next outgoing subrequest's request body can be rewound (e.g. a buffered or + // null fetch body). Consumed when the subrequest client for that call is constructed. The + // set->consume window is synchronous, so the value always corresponds to the next call. This is + // intentionally target-agnostic: the signal is a property of the request body, not of the callee, + // so it applies equally to actor and (potentially, in the future) non-actor subrequests. No-op in + // the base observer; edgeworker overrides it to feed retry classification. + virtual void setNextSubrequestBodyRewindable(SubrequestBodyRewindable bodyRewindable) {} + // Used to record when a worker has used a dynamic dispatch binding. virtual void setHasDispatched() {}; diff --git a/src/workerd/io/outcome.capnp b/src/workerd/io/outcome.capnp index da7c0bf6ed1..1b0c4d753ad 100644 --- a/src/workerd/io/outcome.capnp +++ b/src/workerd/io/outcome.capnp @@ -28,4 +28,5 @@ enum EventOutcome { loadShed @9; responseStreamDisconnected @10; internalError @11; + exceededWallTime @12; } diff --git a/src/workerd/io/stored-value.c++ b/src/workerd/io/stored-value.c++ new file mode 100644 index 00000000000..22f7acb6048 --- /dev/null +++ b/src/workerd/io/stored-value.c++ @@ -0,0 +1,384 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "stored-value.h" + +#include "io-context.h" + +#include + +namespace workerd { + +namespace { + +// Return the id of the current actor (or the empty string if there is no current actor). +kj::Maybe getCurrentActorId() { + KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { + KJ_IF_SOME(actor, ioContext.getActor()) { + KJ_SWITCH_ONEOF(actor.getId()) { + KJ_CASE_ONEOF(s, kj::String) { + return kj::heapString(s); + } + KJ_CASE_ONEOF(actorId, kj::Own) { + return actorId->toString(); + } + } + KJ_UNREACHABLE; + } + } + return kj::none; +} + +} // namespace + +kj::Array serializeV8Value(jsg::Lock& js, kj::StringPtr key, const jsg::JsValue& value) { + StoredExternalHandler::Serializer externalHandler(key); + jsg::Serializer serializer(js, + jsg::Serializer::Options{ + .version = 15, + .omitHeader = false, + .externalHandler = externalHandler, + }); + serializer.write(js, value); + auto released = serializer.release(); + return kj::mv(released.data); +} + +jsg::JsValue deserializeV8Value( + jsg::Lock& js, kj::StringPtr key, kj::ArrayPtr buf) { + + KJ_ASSERT(buf.size() > 0, "unexpectedly empty value buffer", key); + try { + // The js.tryCatch will handle the normal exception path. We wrap this in an + // additional try/catch in case the js.tryCatch hits an exception that is + // terminal for the isolate, causing exception to be rethrown, in which case + // we throw a kj::Exception wrapping a jsg.Error. + return js.tryCatch([&]() -> jsg::JsValue { + StoredExternalHandler::Deserializer externalHandler(key); + jsg::Deserializer::Options options{ + .externalHandler = externalHandler, + }; + if (buf[0] != 0xFF) { + // When Durable Objects was first released, it did not properly write headers when serializing + // to storage. If we find that the header is missing (as indicated by the first byte not being + // 0xFF), it's safe to assume that the data was written at the only serialization version we + // used during that early time period, so we explicitly set that version here. + options.version = 13; + options.readHeader = false; + } + + jsg::Deserializer deserializer(js, buf, kj::none, kj::none, options); + + auto result = deserializer.readValue(js); + externalHandler.assertDone(); + return result; + }, [&](jsg::Value&& exception) mutable -> jsg::JsValue { + // If we do hit a deserialization error, we log information that will be helpful in + // understanding the problem but that won't leak too much about the customer's data. We + // include the key (to help find the data in the database if it hasn't been deleted), the + // length of the value, and the first three bytes of the value (which is just the v8-internal + // version header and the tag that indicates the type of the value, but not its contents). + kj::String actorId = getCurrentActorId().orDefault([]() { return kj::String(); }); + KJ_FAIL_ASSERT("actor storage deserialization failed", "failed to deserialize stored value", + actorId, exception.getHandle(js), key, buf.size(), + buf.first(std::min(static_cast(3), buf.size()))); + }); + } catch (jsg::JsExceptionThrown&) { + // We can occasionally hit an isolate termination here -- we prefix the error with jsg to avoid + // counting it against our internal storage error metrics but also throw a KJ exception rather + // than a jsExceptionThrown error to avoid confusing the normal termination handling code. + // We don't expect users to ever actually see this error. + JSG_FAIL_REQUIRE(Error, + "isolate terminated while deserializing value from Durable Object " + "storage; contact us if you're wondering why you're seeing this"); + } +} + +void StoredExternalHandler::cancelAllPendingWrites() { + // cancelAllPendingWrites() is called on rollback, so we actually want to cancel tombstones too. + // We can just clean the map. + pendingWrites.clear(); +} + +bool StoredExternalHandler::needsTombstone(kj::StringPtr key) { + kj::Maybe maybeTxn = currentSyncTxn; + for (;;) { + auto& txn = KJ_UNWRAP_OR(maybeTxn, break); + if (txn.savedWrites.find(key) != kj::none) { + // Key exists in a parent, so we need a tombstone if overwritten in children. + return true; + } + maybeTxn = txn.parent; + } + return false; +} + +void StoredExternalHandler::cancelPutExternals(kj::StringPtr key) { + if (needsTombstone(key)) { + pendingWrites.upsert(key.clone(), Tombstone(), + [](auto& existing, auto&& replacement) { existing = kj::mv(replacement); }); + } else { + pendingWrites.erase(key); + fulfillIfEmpty(); + } +} + +StoredExternalHandler::SyncNestedTransaction::SyncNestedTransaction(StoredExternalHandler& handler) + : handler(handler), + parent(handler.currentSyncTxn), + savedWrites(kj::mv(handler.pendingWrites)) { + handler.currentSyncTxn = this; +} + +StoredExternalHandler::SyncNestedTransaction::~SyncNestedTransaction() noexcept(false) { + // Cancel pending writes if they weren't already committed. (If commit() was called, we already + // merged pendingWrites into our own savedWrites.) + handler.cancelAllPendingWrites(); + + // Restore handler state. + handler.pendingWrites = kj::mv(savedWrites); + handler.currentSyncTxn = parent; + + handler.fulfillIfEmpty(); +} + +void StoredExternalHandler::SyncNestedTransaction::commit() { + // Merge all pending writes into the parent set. + for (auto& entry: handler.pendingWrites) { + if (parent == kj::none && entry.value.is()) { + // Parent is the root transaction, tombstones not needed anymore, just erase the entry. + savedWrites.erase(entry.key); + } else { + // In all other cases, just merge. + savedWrites.upsert(kj::mv(entry.key), kj::mv(entry.value), + [](auto& existing, auto&& replacement) { existing = kj::mv(replacement); }); + } + } + handler.pendingWrites.clear(); +} + +// Check if it's time to fulfill `onEmptyFulfiller` and if so, do so. +void StoredExternalHandler::fulfillIfEmpty() { + if (currentSyncTxn != kj::none) { + // No need to fulfill yet, we'll do it when the nested transactions unwind. + return; + } + + if (pendingWrites.size() > 0) { + // Not empty yet. Note that the top-level pendingWrites never contains tombstones so we don't + // have to check for those. + return; + } + + KJ_IF_SOME(f, onEmptyFulfiller) { + f->fulfill(); + onEmptyFulfiller = kj::none; + } +} + +// If there is a pending write for the given key, return its live channel objects. This is used +// when the app tries to read the key back before it is finished writing. +kj::Maybe>> StoredExternalHandler:: + findPendingWriteForRead(kj::StringPtr key) { + // Loop up the sync transaction stack to find a matching key. + decltype(pendingWrites)* nextMap = &pendingWrites; + kj::Maybe nextTxn = currentSyncTxn; + for (;;) { + KJ_IF_SOME(pending, nextMap->find(key)) { + KJ_SWITCH_ONEOF(pending) { + KJ_CASE_ONEOF(write, PendingWrite) { + return KJ_MAP(channel, write.channels) { return kj::addRef(*channel); }; + } + KJ_CASE_ONEOF(_, Tombstone) { + // In the current transaction, this key has been overwritten with something that has + // no channels. + return kj::Array>(); + } + } + } + + KJ_IF_SOME(txn, nextTxn) { + nextMap = &txn.savedWrites; + nextTxn = txn.parent; + } else { + break; + } + } + + return kj::none; +} + +StoredExternalHandler& StoredExternalHandler::current() { + // TODO(cleanup): It's a bit ugly that we're plucking the StoredExternalHandler out of the + // thread-local IoContext when needed. It really ought to be passed in by the caller of + // serializeV8Value() or deserializeV8Value(). However, stringing it through to all the call + // sites would be tedious. This tedium will probably be reduced if and when the legacy storage + // backend is removed. + + auto& actor = KJ_REQUIRE_NONNULL( + IoContext::current().getActor(), "serializing/deserializing storage outside of an actor?"); + return actor.getOrCreateStoredExternalHandler(); +} + +StoredExternalHandler::Serializer::~Serializer() noexcept(false) { + KJ_IF_SOME(state, this->state) { + PendingWrite pendingWrite; + + pendingWrite.channels = kj::mv(state.channels); + + pendingWrite.writePromise = + kj::joinPromisesFailFast(state.tokenPromises.releaseAsArray()) + .then([key = key.clone(), &handler = state.handler](kj::Array> tokens) { + // We can't possibly have a sync transaction open at this point because we're in an async + // continuation. Hence we can assume `pendingWrites` has the final merged set of writes. + KJ_ASSERT(handler.currentSyncTxn == kj::none); + + // Write the tokens to storage. + handler.sqliteKv.putExternals(key, kj::mv(tokens)); + + // HACK: We're about to erase ourselves from the map, but this would result in + // self-cancellation. To avoid that, detach this promise first. Since we know the promise + // is about to be done, detaching it should be safe. + auto& entry = KJ_ASSERT_NONNULL(handler.pendingWrites.findEntry(key)); + auto& pendingWrite = KJ_ASSERT_NONNULL(entry.value.tryGet()); + pendingWrite.writePromise.detach([](kj::Exception&& e) {}); + + // Erase ourselves from the map. + handler.pendingWrites.erase(entry); + + // If that was the last pending write, fulfill the on-empty fulfiller. + handler.fulfillIfEmpty(); + }).eagerlyEvaluate([&handler = state.handler](kj::Exception&& e) { + KJ_IF_SOME(f, handler.onEmptyFulfiller) { + f->reject(e.clone()); + handler.onEmptyFulfiller = kj::none; + } + }); + + state.handler.pendingWrites.upsert(key.clone(), kj::mv(pendingWrite), + [](auto& existing, auto&& replacement) { existing = kj::mv(replacement); }); + + // If this was the first pending write, arrange to block the transaction until there are no + // more writes. + if (state.handler.onEmptyFulfiller == kj::none) { + auto paf = kj::newPromiseAndFulfiller(); + state.handler.onEmptyFulfiller = kj::mv(paf.fulfiller); + state.handler.actorCache.blockTransaction( + paf.promise.attach(kj::defer([&handler = state.handler]() { + // If handler.pendingWrites is non-empty here, then either one of the writes failed or + // the promise was canceled due to a rollback. Either way, we should cancel all pending + // writes. + handler.cancelAllPendingWrites(); + }))); + } + } else { + // We didn't store any externals with this put, but we need to cancel any pending write + // from a previous put. + KJ_IF_SOME(ioctx, IoContext::tryCurrent()) { + KJ_IF_SOME(actor, ioctx.getActor()) { + KJ_IF_SOME(handler, actor.getStoredExternalHandler()) { + handler.cancelPutExternals(key); + } + } + } + } +} + +void StoredExternalHandler::Serializer::writeChannel( + kj::Own channel, + kj::Promise> tokenPromise) { + State& state = getState(); + state.channels.add(kj::mv(channel)); + state.tokenPromises.add(kj::mv(tokenPromise)); +} + +StoredExternalHandler::Serializer::State& StoredExternalHandler::Serializer::getState() { + KJ_IF_SOME(s, this->state) { + return s; + } else { + return this->state.emplace(StoredExternalHandler::current()); + } +} + +kj::Own StoredExternalHandler::Deserializer:: + readSubrequestChannel(IoChannelFactory& factory) { + KJ_SWITCH_ONEOF(readChannelImpl()) { + KJ_CASE_ONEOF(channel, kj::Own) { + return kj::addRef( + KJ_REQUIRE_NONNULL(kj::tryDowncast(*channel))); + } + KJ_CASE_ONEOF(token, kj::ArrayPtr) { + return factory.subrequestChannelFromToken( + IoChannelFactory::ChannelTokenUsage::STORAGE, token); + } + } + KJ_UNREACHABLE; +} + +kj::Own StoredExternalHandler::Deserializer:: + readActorClassChannel(IoChannelFactory& factory) { + KJ_SWITCH_ONEOF(readChannelImpl()) { + KJ_CASE_ONEOF(channel, kj::Own) { + return kj::addRef( + KJ_REQUIRE_NONNULL(kj::tryDowncast(*channel), + "serialized value doesn't match external type")); + } + KJ_CASE_ONEOF(token, kj::ArrayPtr) { + return factory.actorClassFromToken(IoChannelFactory::ChannelTokenUsage::STORAGE, token); + } + } + KJ_UNREACHABLE; +} + +StoredExternalHandler::Deserializer::State& StoredExternalHandler::Deserializer::getState() { + KJ_IF_SOME(s, this->state) { + return s; + } else { + auto& state = this->state.emplace(StoredExternalHandler::current()); + + // When first initializing the state, acquire the externals table. + KJ_IF_SOME(pending, state.handler.findPendingWriteForRead(key)) { + // A recent write to this key is still pending. Use the associated in-memory channels. + state.externals = kj::mv(pending); + } else { + // Read the tokens from the externals table in the database. + state.externals = state.handler.sqliteKv.getExternals(key); + } + + return state; + } +} + +kj::OneOf, kj::ArrayPtr> +StoredExternalHandler::Deserializer::readChannelImpl() { + auto& state = getState(); + uint idx = state.index++; + + KJ_SWITCH_ONEOF(state.externals) { + KJ_CASE_ONEOF(channels, kj::Array>) { + KJ_REQUIRE(idx < channels.size(), "serialized value doesn't match pending externals?"); + return kj::addRef(*channels[idx]); + } + KJ_CASE_ONEOF(tokens, kj::Array>) { + KJ_REQUIRE(idx < tokens.size(), "serialized value doesn't match stored externals?"); + return tokens[idx].asPtr().asConst(); + } + } + KJ_UNREACHABLE; +} + +void StoredExternalHandler::Deserializer::assertDone() { + KJ_IF_SOME(s, state) { + KJ_SWITCH_ONEOF(s.externals) { + KJ_CASE_ONEOF(channels, kj::Array>) { + KJ_REQUIRE(s.index == channels.size()); + } + KJ_CASE_ONEOF(tokens, kj::Array>) { + KJ_REQUIRE(s.index == tokens.size()); + } + } + } +} + +} // namespace workerd diff --git a/src/workerd/io/stored-value.h b/src/workerd/io/stored-value.h new file mode 100644 index 00000000000..ce6ea025467 --- /dev/null +++ b/src/workerd/io/stored-value.h @@ -0,0 +1,194 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#pragma once + +#include "io-channels.h" + +#include + +namespace workerd { + +class ActorCacheInterface; +class SqliteKv; + +kj::Array serializeV8Value(jsg::Lock& js, kj::StringPtr key, const jsg::JsValue& value); + +jsg::JsValue deserializeV8Value(jsg::Lock& js, kj::StringPtr key, kj::ArrayPtr buf); + +// Object that manages storing "externals" into DO KV storage such that writes appear synchronous +// even when they require waiting for async I/O. +// +// "Externals" are references to external resources (capabilities) stored in the DO's KV storage. +// +// Externals are stored as channel tokens. But, creating a channel token may require asynchronus +// I/O, while storing a value to DO KV storage (under sqlite) is intended to be synchronous. To +// handle this, the value is serialized and stored without the tokens, but with references to a +// list of external channels. Live objects representing those chanels are kept in memory in the +// StoredExternalHandler while we wait for tokens to be created. Reads can be served in the +// meantime by using the live objects. Once the tokens are all obtained, they are written to +// storage and the live objects can be dropped. +// +// While pending externals exist, the current transaction must be held open, and the output gate +// held closed. If we fail to create any of the tokens, the transaction is rolled back and the +// output gate broken, just like any hard storage failure. This makes the writes appear synchronous +// from the application's point of view. +class StoredExternalHandler { + public: + explicit StoredExternalHandler(ActorCacheInterface& actorCache, SqliteKv& sqliteKv) + : actorCache(actorCache), + sqliteKv(sqliteKv) {} + + // Cancel any outstanding task that might call `putExternals()` on the given key in the future. + // This must be called whenever the key has been invalidated by a new put or delete. + void cancelPutExternals(kj::StringPtr key); + + class SyncNestedTransaction; + + class Serializer; + class Deserializer; + + private: + ActorCacheInterface& actorCache; + SqliteKv& sqliteKv; + + struct PendingWrite { + kj::Vector> channels; + kj::Promise writePromise = nullptr; + }; + + struct Tombstone {}; + + // Maps keys to the list of pending externals for that key. When a write promise completes it + // removes itself from the map. If there are then no pending writes, `onEmptyFulfiller` is + // signaled. + // + // Entries in this map may also be `Tombstone`s. This is only relevant when in a nested sync + // transaction, i.e. `currentSyncTxn` is non-null. A tombstone indicates that, if the nested + // transaction manages to be committed, we need to cancel writes associated with the key in the + // parent. + kj::HashMap> pendingWrites; + + // When `pendingWrites` becomes empty, this should be fulfilled. + kj::Maybe>> onEmptyFulfiller; + + // If we're in a nested sync transaction (an instance of `SyncNestedTransaction` exists on the + // stack) then this points to it. + kj::Maybe currentSyncTxn; + + void fulfillIfEmpty(); + kj::Maybe>> findPendingWriteForRead( + kj::StringPtr key); + void cancelAllPendingWrites(); + bool needsTombstone(kj::StringPtr key); + + static StoredExternalHandler& current(); +}; + +// Construct this class on the stack while performing a synchronous nested transaction. It will +// move the pending external writes off to the side while the nested transaction performs its own +// writes, then the sets will be properly merged or canceled when it is known whether the nested +// transaction will be committed or rolled back. +class StoredExternalHandler::SyncNestedTransaction final { + public: + explicit SyncNestedTransaction(StoredExternalHandler& handler); + ~SyncNestedTransaction() noexcept(false); + KJ_DISALLOW_COPY_AND_MOVE(SyncNestedTransaction); + + // Call if the transaction completed successfully. Otherwise, rollback is assumed. + void commit(); + + private: + friend class StoredExternalHandler; + + StoredExternalHandler& handler; + kj::Maybe parent; + + // Set of writes that were pending before the nested transaction opened. We move these to the + // side so that if the nested transaction is rolled back we can restore them verbatim. On + // success, we'll instead cancel any writes that were overwritten by the nested transaction, + // and then merge the rest. + kj::HashMap> savedWrites; +}; + +// ExternalHandler used during serialization of stored values. +class StoredExternalHandler::Serializer final: public jsg::Serializer::ExternalHandler { + public: + explicit Serializer(kj::StringPtr key): key(key) {} + ~Serializer() noexcept(false); // inserts the pending externals into `handler` + + // Add an external to the list. + // + // TEMPORARY: The caller is expected to have called `getTokenMaybeSync()` already, and if the + // token is available synchronously, then it is serialized directly without using the externals + // mechanism. This is for backwards-compatibility as we roll out the new externals mechanism, to + // avoid writing backwards-incompatible data during the rollout. + // + // TODO(cleanup): Once rolled out, we should switch to always store tokens using the externals + // mechanism. We can remove the second parameter here at that time, and instead have + // writeChannel() make the call directly. + void writeChannel(kj::Own channel, + kj::Promise> tokenPromise); + + private: + kj::StringPtr key; + + struct State { + StoredExternalHandler& handler; + + kj::Vector> channels; + kj::Vector>> tokenPromises; + + explicit State(StoredExternalHandler& handler): handler(handler) {} + }; + + // Initialized when the first external is written. + kj::Maybe state; + + State& getState(); +}; + +// ExternalHandler used during deserialization of stored values. +class StoredExternalHandler::Deserializer final: public jsg::Deserializer::ExternalHandler { + public: + explicit Deserializer(kj::StringPtr key): key(key) {} + + // Read an external. Externals are expected to be read in the same order they were written. + kj::Own readSubrequestChannel(IoChannelFactory& factory); + kj::Own readActorClassChannel(IoChannelFactory& factory); + + // Throw if we haven't read all channels. + void assertDone(); + + private: + kj::StringPtr key; + + struct State { + StoredExternalHandler& handler; + + // If there is an active PendingWrite, we hold a reference to it here. We hold Rc + // so that if *during deserialization* someone performs a new put() on this key, it won't + // disrupt deserialization, which can continue with the previous value. + // + // If there is no active PendingWrite, we hold an array of tokens instead, read directly from + // storage. + kj::OneOf>, kj::Array>> + externals; + + // Index of next external to be read. + uint index = 0; + + explicit State(StoredExternalHandler& handler): handler(handler) {} + }; + + // Initialized when the first external is read. + kj::Maybe state; + + State& getState(); + + kj::OneOf, kj::ArrayPtr> + readChannelImpl(); +}; + +} // namespace workerd diff --git a/src/workerd/io/trace-stream.c++ b/src/workerd/io/trace-stream.c++ index a00414b21fa..e0ba5c56e7e 100644 --- a/src/workerd/io/trace-stream.c++ +++ b/src/workerd/io/trace-stream.c++ @@ -42,6 +42,7 @@ namespace { V(EVENT, "event") \ V(EXCEEDEDCPU, "exceededCpu") \ V(EXCEEDEDMEMORY, "exceededMemory") \ + V(EXCEEDEDWALLTIME, "exceededWallTime") \ V(EXCEPTION, "exception") \ V(EXECUTIONMODEL, "executionModel") \ V(FETCH, "fetch") \ @@ -337,6 +338,8 @@ jsg::JsValue ToJs(jsg::Lock& js, const EventOutcome& outcome, StringCache& cache return cache.get(js, SCRIPTNOTFOUND_STR); case EventOutcome::INTERNAL_ERROR: return cache.get(js, INTERNALERROR_STR); + case EventOutcome::EXCEEDED_WALL_TIME: + return cache.get(js, EXCEEDEDWALLTIME_STR); case EventOutcome::UNKNOWN: return cache.get(js, UNKNOWN_STR); } @@ -998,7 +1001,7 @@ kj::Promise TailStreamCustomEvent::run( KJ_DEFER({ // waitUntil() should allow extending execution on the server side even when the client // disconnects. - waitUntilTasks.add(incomingRequest->drain().attach(kj::mv(incomingRequest))); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); }); auto eventOutcome = co_await donePromise.exclusiveJoin(ioContext.onAbort()).then([&]() { diff --git a/src/workerd/io/trace.c++ b/src/workerd/io/trace.c++ index 5d06e9f037a..a786c5d38be 100644 --- a/src/workerd/io/trace.c++ +++ b/src/workerd/io/trace.c++ @@ -70,7 +70,7 @@ kj::Maybe TraceId::fromGoString(kj::ArrayPtr s) { return TraceId(low, 0); } } else { - KJ_IF_SOME(high, hexToUint64(s.slice(0, n - 16))) { + KJ_IF_SOME(high, hexToUint64(s.first(n - 16))) { KJ_IF_SOME(low, hexToUint64(s.slice(n - 16, n))) { return TraceId(low, high); } @@ -717,6 +717,7 @@ HibernatableWebSocketEventInfo::Type HibernatableWebSocketEventInfo::readFrom( return Error{}; } } + KJ_UNREACHABLE; } FetchResponseInfo::FetchResponseInfo(uint16_t statusCode): statusCode(statusCode) {} diff --git a/src/workerd/io/worker-entrypoint.c++ b/src/workerd/io/worker-entrypoint.c++ index 8afb03fda5f..3d8f3d8f26a 100644 --- a/src/workerd/io/worker-entrypoint.c++ +++ b/src/workerd/io/worker-entrypoint.c++ @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,8 @@ class WorkerEntrypoint final: public WorkerInterface { kj::Maybe cfBlobJson, kj::Maybe versionInfo, kj::Maybe maybeTriggerInvocationSpan, - bool isDynamicDispatch); + bool isDynamicDispatch, + kj::Maybe> accessInfo); kj::Promise request(kj::HttpMethod method, kj::StringPtr url, @@ -87,6 +89,7 @@ class WorkerEntrypoint final: public WorkerInterface { ThreadContext& threadContext; kj::TaskSet& waitUntilTasks; + kj::Maybe canceler; kj::Maybe> incomingRequest; bool tunnelExceptions; bool isDynamicDispatch; @@ -110,20 +113,34 @@ class WorkerEntrypoint final: public WorkerInterface { kj::Own ioChannelFactory, kj::Own metrics, kj::Maybe> workerTracer, - kj::Maybe maybeTriggerInvocationSpan); + kj::Maybe maybeTriggerInvocationSpan, + kj::Maybe> accessInfo); - template - kj::Promise maybeAddGcPassForTest(IoContext& context, kj::Promise promise); + kj::Promise requestImpl(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + Response& response); kj::Promise runAlarmImpl( kj::Own incomingRequest, kj::Date scheduledTime, uint32_t retryCount); + template + kj::Promise wrapWithCanceler(kj::Promise promise) { + KJ_IF_SOME(c, canceler) { + return c.wrap(kj::mv(promise)); + } else { + return kj::mv(promise); + } + } + public: // For kj::heap() only; pretend this is private. WorkerEntrypoint(kj::Badge badge, ThreadContext& threadContext, kj::TaskSet& waitUntilTasks, + kj::Maybe canceler, bool tunnelExceptions, bool isDynamicDispatch, kj::Maybe entrypointName, @@ -184,15 +201,22 @@ kj::Own WorkerEntrypoint::construct(ThreadContext& threadContex kj::Maybe cfBlobJson, kj::Maybe versionInfo, kj::Maybe maybeTriggerInvocationSpan, - bool isDynamicDispatch) { + bool isDynamicDispatch, + kj::Maybe> accessInfo) { TRACE_EVENT("workerd", "WorkerEntrypoint::construct()"); + // Arrange to forcefully cancel work when the Actor is aborted. + kj::Maybe canceler; + KJ_IF_SOME(a, actor) { + canceler = a->getAbortCanceler(); + } + auto obj = kj::heap(kj::Badge(), threadContext, - waitUntilTasks, tunnelExceptions, isDynamicDispatch, entrypointName, kj::mv(props), + waitUntilTasks, canceler, tunnelExceptions, isDynamicDispatch, entrypointName, kj::mv(props), kj::mv(cfBlobJson), kj::mv(versionInfo)); obj->init(kj::mv(worker), kj::mv(actor), kj::mv(limitEnforcer), kj::mv(ioContextDependency), kj::mv(ioChannelFactory), kj::addRef(*metrics), kj::mv(workerTracer), - kj::mv(maybeTriggerInvocationSpan)); + kj::mv(maybeTriggerInvocationSpan), kj::mv(accessInfo)); auto& wrapper = metrics->wrapWorkerInterface(*obj); return kj::attachRef(wrapper, kj::mv(obj), kj::mv(metrics)); } @@ -200,6 +224,7 @@ kj::Own WorkerEntrypoint::construct(ThreadContext& threadContex WorkerEntrypoint::WorkerEntrypoint(kj::Badge badge, ThreadContext& threadContext, kj::TaskSet& waitUntilTasks, + kj::Maybe canceler, bool tunnelExceptions, bool isDynamicDispatch, kj::Maybe entrypointName, @@ -208,6 +233,7 @@ WorkerEntrypoint::WorkerEntrypoint(kj::Badge badge, kj::Maybe versionInfo) : threadContext(threadContext), waitUntilTasks(waitUntilTasks), + canceler(canceler), tunnelExceptions(tunnelExceptions), isDynamicDispatch(isDynamicDispatch), entrypointName(entrypointName), @@ -222,7 +248,8 @@ void WorkerEntrypoint::init(kj::Own worker, kj::Own ioChannelFactory, kj::Own metrics, kj::Maybe> workerTracer, - kj::Maybe maybeTriggerInvocationSpan) { + kj::Maybe maybeTriggerInvocationSpan, + kj::Maybe> accessInfo) { TRACE_EVENT("workerd", "WorkerEntrypoint::init()"); // We need to construct the IoContext -- unless this is an actor and it already has a // IoContext, in which case we reuse it. @@ -252,10 +279,37 @@ void WorkerEntrypoint::init(kj::Own worker, } incomingRequest = kj::heap(kj::mv(context), kj::mv(ioChannelFactory), - kj::mv(metrics), kj::mv(workerTracer), kj::mv(maybeTriggerInvocationSpan)) + kj::mv(metrics), kj::mv(workerTracer), kj::mv(maybeTriggerInvocationSpan), kj::mv(accessInfo)) .attach(kj::mv(actor)); } +// To match our historical behavior (when we used to pull the headers from the JavaScript object +// later on), headers are canonicalized: names are lower-cased and values with the same name are +// combined into a comma-delimited list. (This explicitly breaks the Set-Cookie header, +// incidentally, but should be equivalent for all other headers.) +tracing::FetchEventInfo buildFetchEventInfo(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::Maybe cfBlobJson) { + kj::String cfJson; + KJ_IF_SOME(c, cfBlobJson) { + cfJson = kj::str(c); + } + + kj::TreeMap> traceHeaders; + headers.forEach([&](kj::StringPtr name, kj::StringPtr value) { + kj::String lower = toLower(name); + auto& slot = traceHeaders.findOrCreate( + lower, [&]() { return decltype(traceHeaders)::Entry{kj::mv(lower), {}}; }); + slot.add(value); + }); + auto traceHeadersArray = KJ_MAP(entry, traceHeaders) { + return tracing::FetchEventInfo::Header(kj::mv(entry.key), kj::strArray(entry.value, ", ")); + }; + + return tracing::FetchEventInfo(method, kj::str(url), kj::mv(cfJson), kj::mv(traceHeadersArray)); +} + kj::Exception exceptionToPropagate(bool isInternalException, kj::Exception&& exception) { if (isInternalException) { // We've already logged it here, the only thing that matters to the client is that we failed @@ -285,47 +339,33 @@ kj::Promise WorkerEntrypoint::request(kj::HttpMethod method, const kj::HttpHeaders& headers, kj::AsyncInputStream& requestBody, Response& response) { + return wrapWithCanceler(requestImpl(method, url, headers, requestBody, response)); +} + +kj::Promise WorkerEntrypoint::requestImpl(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + Response& response) { TRACE_EVENT("workerd", "WorkerEntrypoint::request()", "url", url.cStr(), PERFETTO_FLOW_FROM_POINTER(this)); + + // ----- Stage 1: Set up per-request state. ----- + auto incomingRequest = kj::mv(KJ_REQUIRE_NONNULL(this->incomingRequest, "request() can only be called once")); this->incomingRequest = kj::none; auto& context = incomingRequest->getContext(); - auto wrappedResponse = kj::heap(response); - bool isActor = context.getActor() != kj::none; + // HACK: Capture workerTracer directly, it's unclear how to acquire the right tracer from context // when we need it (for DOs, IoContext may point to a different WorkerTracer by the time we use // it). The tracer lives as long or longer than the IoContext (based on being co-owned // by IncomingRequest and PipelineTracer) so long enough. kj::Maybe workerTracer; - KJ_IF_SOME(t, incomingRequest->getWorkerTracer()) { - kj::String cfJson; - KJ_IF_SOME(c, cfBlobJson) { - cfJson = kj::str(c); - } - - // To match our historical behavior (when we used to pull the headers from the JavaScript - // object later on), we need to canonicalize the headers, including: - // - Lower-case the header name. - // - Combine multiple headers with the same name into a comma-delimited list. (This explicitly - // breaks the Set-Cookie header, incidentally, but should be equivalent for all other - // headers.) - kj::TreeMap> traceHeaders; - headers.forEach([&](kj::StringPtr name, kj::StringPtr value) { - kj::String lower = toLower(name); - auto& slot = traceHeaders.findOrCreate( - lower, [&]() { return decltype(traceHeaders)::Entry{kj::mv(lower), {}}; }); - slot.add(value); - }); - auto traceHeadersArray = KJ_MAP(entry, traceHeaders) { - return tracing::FetchEventInfo::Header(kj::mv(entry.key), kj::strArray(entry.value, ", ")); - }; - - t.setEventInfo(*incomingRequest, - tracing::FetchEventInfo(method, kj::str(url), kj::mv(cfJson), kj::mv(traceHeadersArray))); + t.setEventInfo(*incomingRequest, buildFetchEventInfo(method, url, headers, cfBlobJson)); workerTracer = t; } @@ -337,121 +377,138 @@ kj::Promise WorkerEntrypoint::request(kj::HttpMethod method, TRACE_EVENT_BEGIN("workerd", "WorkerEntrypoint::request() waiting on context", PERFETTO_TRACK_FROM_POINTER(&context), PERFETTO_FLOW_FROM_POINTER(this)); - return context - .run([this, &context, method, url, &headers, &requestBody, - &metrics = incomingRequest->getMetrics(), &wrappedResponse = *wrappedResponse, - entrypointName = entrypointName](Worker::Lock& lock) mutable { - TRACE_EVENT_END("workerd", PERFETTO_TRACK_FROM_POINTER(&context)); - TRACE_EVENT("workerd", "WorkerEntrypoint::request() run", PERFETTO_FLOW_FROM_POINTER(this)); - jsg::AsyncContextFrame::StorageScope traceScope = context.makeAsyncTraceScope(lock); - jsg::AsyncContextFrame::StorageScope userTraceScope = context.makeUserAsyncTraceScope(lock); - auto featureFlags = FeatureFlags::get(lock); + KJ_TRY { + // Cancel any in-flight deferred-proxy task on the way out, including on cancellation of this + // request and including when the fail-open fallback (in the outer KJ_CATCH) runs. This is the + // outermost cleanup so that the proxy task is never left pinning the IoContext past this + // function. (It's a no-op when `proxyTask` was never set, e.g. if Stage 2 threw.) + KJ_DEFER({ proxyTask = kj::none; }); - kj::Maybe> signal; + // ----- Stage 2: Run the JS request handler. ----- - if (featureFlags.getEnableRequestSignal()) { - auto abortSignalFlag = featureFlags.getRequestSignalPassthrough() - ? api::AbortSignal::Flag::NONE - : api::AbortSignal::Flag::IGNORE_FOR_SUBREQUESTS; - jsg::Lock& js = lock; - signal.emplace(abortController.emplace(js.alloc(js, abortSignalFlag)) - ->getSignal()); - } + { + // Drain the incoming request and trigger the client-disconnect abort signal on scope exit + // (success, failure, or cancellation). This must run regardless of outcome so that the + // incoming request is always drained and the AbortController is released; it must also run + // before final error handling so that `failOpenService` is populated when needed. + KJ_DEFER({ + // The request has been canceled, but allow it to continue executing in the background. + if (context.isFailOpen()) { + // Fail-open behavior has been chosen, we'd better save an interface that we can use for + // that purpose later. + failOpenService = context.getSubrequestChannelNoChecks( + IoContext::NEXT_CLIENT_CHANNEL, false, kj::mv(cfBlobJson)); + } - return lock.getGlobalScope().request(method, url, headers, requestBody, wrappedResponse, - cfBlobJson, lock, - lock.getExportedHandler(entrypointName, kj::mv(versionInfo), kj::mv(props), - context.getActor(), isDynamicDispatch), - kj::mv(signal)); - }) - .then([this, &context, &wrappedResponse = *wrappedResponse, workerTracer]( - api::DeferredProxy deferredProxy) { - TRACE_EVENT("workerd", "WorkerEntrypoint::request() deferred proxy step", - PERFETTO_FLOW_FROM_POINTER(this)); - proxyTask = kj::mv(deferredProxy.proxyTask); - KJ_IF_SOME(t, workerTracer) { - auto httpResponseStatus = wrappedResponse.getHttpResponseStatus(); - if (httpResponseStatus != 0) { - t.setReturn(context.now(), tracing::FetchResponseInfo(httpResponseStatus)); - } else { - t.setReturn(context.now()); - } - } - }) - .catch_([this, &context](kj::Exception&& exception) mutable -> kj::Promise { - TRACE_EVENT("workerd", "WorkerEntrypoint::request() catch", PERFETTO_FLOW_FROM_POINTER(this)); - // Log JS exceptions to the JS console, if inspector is attached. This also has the effect of - // logging internal errors to syslog. - loggedExceptionEarlier = true; - context.logUncaughtExceptionAsync(UncaughtExceptionSource::REQUEST_HANDLER, exception.clone()); + // When the client disconnects, trigger an abort on request.signal, unless the request has + // already completed normally, or failed with an exception. + // TODO(perf): Don't add a task to trigger the abort unless we know it has at least one + // listener. + if (proxyTask == kj::none && !loggedExceptionEarlier && abortController != kj::none) { + auto ctrl = KJ_ASSERT_NONNULL(abortController).addRef(); + context.addWaitUntil(context.run([ctrl = kj::mv(ctrl)](Worker::Lock& lock) mutable { + ctrl->getSignal()->triggerAbort( + lock, JSG_KJ_EXCEPTION(DISCONNECTED, DOMAbortError, "The client has disconnected")); + })); + } - // Do not allow the exception to escape the isolate without waiting for the output gate to - // open. Note that in the success path, this is taken care of in `FetchEvent::respondWith()`. - return context.waitForOutputLocks().then( -#ifdef WORKERD_USE_PERFETTO - [exception = kj::mv(exception), - flow = PERFETTO_TERMINATING_FLOW_FROM_POINTER(this)]() mutable -> kj::Promise { - TRACE_EVENT("workerd", "WorkerEntrypoint::request() after output lock wait", flow); - return kj::mv(exception); - }); -#else - [exception = kj::mv(exception)]() mutable -> kj::Promise { - return kj::mv(exception); - }); -#endif // defined(WORKERD_USE_PERFETTO) - }) - .attach(kj::defer([this, incomingRequest = kj::mv(incomingRequest), &context]() mutable { - // The request has been canceled, but allow it to continue executing in the background. - if (context.isFailOpen()) { - // Fail-open behavior has been chosen, we'd better save an interface that we can use for - // that purpose later. - failOpenService = context.getSubrequestChannelNoChecks( - IoContext::NEXT_CLIENT_CHANNEL, false, kj::mv(cfBlobJson)); - } + // Release reference to the AbortController. + // Either the waitUntilTask holds a reference to it, or it will never be triggered at all. + abortController = kj::none; - if (proxyTask == kj::none && !loggedExceptionEarlier) { - // When the client disconnects, trigger an abort on request.signal, unless the request has - // already completed normally, or failed with an exception. - - // TODO(perf): Don't add a task to trigger the abort unless we know it has at least one - // listener. - KJ_IF_SOME(ctrl, abortController) { - context.addWaitUntil(context.run([ctrl = ctrl.addRef()](Worker::Lock& lock) mutable { - ctrl->getSignal()->triggerAbort( - lock, JSG_KJ_EXCEPTION(DISCONNECTED, DOMAbortError, "The client has disconnected")); - })); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); + }); + + KJ_TRY { + api::DeferredProxy deferredProxy = + co_await context.run([this, &context, method, url, &headers, &requestBody, + &wrappedResponse = *wrappedResponse, + entrypointName = entrypointName](Worker::Lock& lock) mutable { + TRACE_EVENT_END("workerd", PERFETTO_TRACK_FROM_POINTER(&context)); + TRACE_EVENT( + "workerd", "WorkerEntrypoint::request() run", PERFETTO_FLOW_FROM_POINTER(this)); + jsg::AsyncContextFrame::StorageScope traceScope = context.makeAsyncTraceScope(lock); + jsg::AsyncContextFrame::StorageScope userTraceScope = + context.makeUserAsyncTraceScope(lock); + + kj::Maybe> signal; + auto featureFlags = FeatureFlags::get(lock); + if (featureFlags.getEnableRequestSignal()) { + auto abortSignalFlag = featureFlags.getRequestSignalPassthrough() + ? api::AbortSignal::Flag::NONE + : api::AbortSignal::Flag::IGNORE_FOR_SUBREQUESTS; + jsg::Lock& js = lock; + signal.emplace( + abortController.emplace(js.alloc(js, abortSignalFlag)) + ->getSignal()); + } + + return lock.getGlobalScope().request(method, url, headers, requestBody, wrappedResponse, + cfBlobJson, lock, + lock.getExportedHandler(entrypointName, kj::mv(versionInfo), kj::mv(props), + context.getActor(), isDynamicDispatch), + kj::mv(signal)); + }); + + // Record the proxy task and the tracer return time on the success path. + TRACE_EVENT("workerd", "WorkerEntrypoint::request() deferred proxy step", + PERFETTO_FLOW_FROM_POINTER(this)); + proxyTask = kj::mv(deferredProxy.proxyTask); + KJ_IF_SOME(t, workerTracer) { + auto httpResponseStatus = wrappedResponse->getHttpResponseStatus(); + if (httpResponseStatus != 0) { + t.setReturn(context.now(), tracing::FetchResponseInfo(httpResponseStatus)); + } else { + t.setReturn(context.now()); + } + } } - } + KJ_CATCH(exception) { + TRACE_EVENT( + "workerd", "WorkerEntrypoint::request() catch", PERFETTO_FLOW_FROM_POINTER(this)); + // Log JS exceptions to the JS console, if inspector is attached. This also has the effect + // of logging internal errors to syslog. + loggedExceptionEarlier = true; + context.logUncaughtExceptionAsync( + UncaughtExceptionSource::REQUEST_HANDLER, exception.clone()); + + // Do not allow the exception to escape the isolate without waiting for the output gate to + // open. Note that in the success path, this is taken care of in `FetchEvent::respondWith()`. + // If the gate is broken, that exception propagates and replaces the original. + co_await context.waitForOutputLocks(); + TRACE_EVENT("workerd", "WorkerEntrypoint::request() after output lock wait", + PERFETTO_TERMINATING_FLOW_FROM_POINTER(this)); + // Yield to give a pending cancellation (e.g., the caller dropping our promise because + // the upstream WebSocket was torn down) a chance to take effect before propagating to + // the final catch. The original `.then()` chain had an implicit yield point here where + // the chain crossed into the next `.then` after this catch; without it, downstream + // observers can mistake a canceled request for one that threw. + co_await kj::yield(); + kj::throwFatalException(kj::mv(exception)); + } + } // Above KJ_DEFER fires here: abort signal + drain. - // Release reference to the AbortController. - // Either the waitUntilTask holds a reference to it, or it will never be triggered at all. - abortController = kj::none; + // ----- Stage 3: Wait for the deferred-proxy task (if any). ----- - auto promise = incomingRequest->drain().attach(kj::mv(incomingRequest)); - waitUntilTasks.add(maybeAddGcPassForTest(context, kj::mv(promise))); - })) - .then([this, metrics = kj::mv(metricsForProxyTask)]() mutable -> kj::Promise { - TRACE_EVENT("workerd", "WorkerEntrypoint::request() finish proxying", - PERFETTO_TERMINATING_FLOW_FROM_POINTER(this)); - // Now that the IoContext is dropped (unless it had waitUntil()s), we can finish proxying - // without pinning it or the isolate into memory. KJ_IF_SOME(p, proxyTask) { - return p.catch_([metrics = kj::mv(metrics)](kj::Exception&& e) mutable -> kj::Promise { - metrics->reportFailure(e, RequestObserver::FailureSource::DEFERRED_PROXY); - return kj::mv(e); - }); - } else { - return kj::READY_NOW; + TRACE_EVENT("workerd", "WorkerEntrypoint::request() finish proxying", + PERFETTO_TERMINATING_FLOW_FROM_POINTER(this)); + // Now that the IoContext is dropped (unless it had waitUntil()s), we can finish proxying + // without pinning it or the isolate into memory. + KJ_TRY { + co_await p; + } + KJ_CATCH(e) { + metricsForProxyTask->reportFailure(e, RequestObserver::FailureSource::DEFERRED_PROXY); + // See the matching yield in stage 2's catch. + co_await kj::yield(); + kj::throwFatalException(kj::mv(e)); + } } - }) - .attach(kj::defer([this]() mutable { - // If we're being cancelled, we need to make sure `proxyTask` gets canceled. - proxyTask = kj::none; - })) - .catch_([this, wrappedResponse = kj::mv(wrappedResponse), isActor, method, url, &headers, - &requestBody, metrics = kj::mv(metricsForCatch), - workerTracer](kj::Exception&& exception) mutable -> kj::Promise { - // Don't return errors to end user. + } + KJ_CATCH(exception) { + // ----- Stage 4: Handle whatever exception escaped the stages above. ----- + TRACE_EVENT("workerd", "WorkerEntrypoint::request() exception", PERFETTO_TERMINATING_FLOW_FROM_POINTER(this)); @@ -468,76 +525,76 @@ kj::Promise WorkerEntrypoint::request(kj::HttpMethod method, } if (wrappedResponse->isSent()) { - // We can't fail open if the response was already sent, so set `failOpenService` null so that - // that branch isn't taken below. + // Can't fail open if a response was already started. failOpenService = kj::none; } + auto sendSyntheticStatus = [&](uint statusCode, kj::StringPtr statusText) { + if (wrappedResponse->isSent()) return; + kj::HttpHeaders errorHeaders(threadContext.getHeaderTable()); + wrappedResponse->send(statusCode, statusText, errorHeaders, static_cast(0)); + KJ_IF_SOME(t, workerTracer) { + t.setReturn(kj::none, tracing::FetchResponseInfo(wrappedResponse->getHttpResponseStatus())); + } + }; + + // Decide what to do with the exception. Exactly one of these branches runs: + // 1. Actor -> tunnel exception back to the caller. + // 2. Fail-open service configured -> retry the request through it. + // 3. `tunnelExceptions` set (worker-to-worker) -> tunnel exception back to the caller. + // 4. Otherwise -> synthesize a 5xx response. + if (isActor) { - // We want to tunnel exceptions from actors back to the caller. // TODO(cleanup): We'd really like to tunnel exceptions any time a worker is calling another // worker, not just for actors (and W2W below), but getting that right will require cleaning // up error handling more generally. - return exceptionToPropagate(isInternalException, kj::mv(exception)); - } else KJ_IF_SOME(service, failOpenService) { - // Fall back to origin. + auto propagated = exceptionToPropagate(isInternalException, kj::mv(exception)); + // See the matching yield in stage 2's catch. + co_await kj::yield(); + kj::throwFatalException(kj::mv(propagated)); + } + KJ_IF_SOME(service, failOpenService) { // We're catching the exception, but metrics should still indicate an exception. - metrics->reportFailure(exception); + metricsForCatch->reportFailure(exception); - auto promise = kj::evalNow([&] { - auto promise = service.get()->request(method, url, headers, requestBody, *wrappedResponse); - metrics->setFailedOpen(true); - return promise.attach(kj::mv(service)); - }); - return promise.catch_([this, wrappedResponse = kj::mv(wrappedResponse), workerTracer, - metrics = kj::mv(metrics)](kj::Exception&& e) mutable { - metrics->setFailedOpen(false); + auto serviceOwn = kj::mv(service); + metricsForCatch->setFailedOpen(true); + KJ_TRY { + co_await serviceOwn->request(method, url, headers, requestBody, *wrappedResponse); + } + KJ_CATCH(e) { + metricsForCatch->setFailedOpen(false); + // Avoid logging recognized external errors here, such as invalid headers returned from + // the server. if (e.getType() != kj::Exception::Type::DISCONNECTED && - // Avoid logging recognized external errors here, such as invalid headers returned from - // the server. !jsg::isTunneledException(e.getDescription()) && !jsg::isDoNotLogException(e.getDescription())) { LOG_EXCEPTION("failOpenFallback", e); } - if (!wrappedResponse->isSent()) { - kj::HttpHeaders headers(threadContext.getHeaderTable()); - wrappedResponse->send(500, "Internal Server Error", headers, static_cast(0)); - KJ_IF_SOME(t, workerTracer) { - t.setReturn(kj::none, tracing::FetchResponseInfo(500)); - } - } - }); - } else if (tunnelExceptions) { - // Like with the isActor check, we want to return exceptions back to the caller. - // We don't want to handle this case the same as the isActor case though, since we want - // fail-open to operate normally, which means this case must happen after fail-open handling. - return exceptionToPropagate(isInternalException, kj::mv(exception)); - } else { - // Return error. - - // We're catching the exception and replacing it with 5xx, but metrics should still indicate - // an exception. - metrics->reportFailure(exception); - - // We can't send an error response if a response was already started; we can only drop the - // connection in that case. - if (!wrappedResponse->isSent()) { - kj::HttpHeaders headers(threadContext.getHeaderTable()); - if (exception.getType() == kj::Exception::Type::OVERLOADED) { - wrappedResponse->send(503, "Service Unavailable", headers, static_cast(0)); - } else { - wrappedResponse->send(500, "Internal Server Error", headers, static_cast(0)); - } - KJ_IF_SOME(t, workerTracer) { - t.setReturn( - kj::none, tracing::FetchResponseInfo(wrappedResponse->getHttpResponseStatus())); - } + sendSyntheticStatus(500, "Internal Server Error"_kj); } + co_return; + } - return kj::READY_NOW; + if (tunnelExceptions) { + // Like with the isActor check, we want to return exceptions back to the caller. This case + // must happen after fail-open handling so that fail-open continues to operate normally. + auto propagated = exceptionToPropagate(isInternalException, kj::mv(exception)); + // See the matching yield in stage 2's catch. + co_await kj::yield(); + kj::throwFatalException(kj::mv(propagated)); } - }); + + // We're catching the exception and replacing it with 5xx, but metrics should still indicate + // an exception. + metricsForCatch->reportFailure(exception); + if (exception.getType() == kj::Exception::Type::OVERLOADED) { + sendSyntheticStatus(503, "Service Unavailable"_kj); + } else { + sendSyntheticStatus(500, "Internal Server Error"_kj); + } + } } kj::Promise WorkerEntrypoint::connect(kj::StringPtr host, @@ -557,8 +614,7 @@ kj::Promise WorkerEntrypoint::connect(kj::StringPtr host, KJ_DEFER({ // Since we called incomingRequest->delivered, we are obliged to call `drain()`. - auto promise = incomingRequest->drain().attach(kj::mv(incomingRequest)); - waitUntilTasks.add(maybeAddGcPassForTest(context, kj::mv(promise))); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); }); // connect_pass_through feature flag means we should just forward the connect request on to // the global outbound. @@ -588,10 +644,11 @@ kj::Promise WorkerEntrypoint::connect(kj::StringPtr host, auto metricsForCatch = kj::addRef(incomingRequest->getMetrics()); - return context - .run( - [this, &headers, &context, &connection, &response, entrypointName = entrypointName, - versionInfo = kj::mv(versionInfo), host = kj::str(host)](Worker::Lock& lock) mutable { + return wrapWithCanceler( + context + .run([this, &headers, &context, &connection, &response, entrypointName = entrypointName, + versionInfo = kj::mv(versionInfo), + host = kj::str(host)](Worker::Lock& lock) mutable { jsg::AsyncContextFrame::StorageScope traceScope = context.makeAsyncTraceScope(lock); jsg::AsyncContextFrame::StorageScope userTraceScope = context.makeUserAsyncTraceScope(lock); @@ -599,12 +656,12 @@ kj::Promise WorkerEntrypoint::connect(kj::StringPtr host, lock.getExportedHandler(entrypointName, kj::mv(versionInfo), kj::mv(props), context.getActor(), isDynamicDispatch)); }) - .then([&context, workerTracer]() { + .then([&context, workerTracer]() { KJ_IF_SOME(t, workerTracer) { t.setReturn(context.now()); } }) - .catch_([this, &context](kj::Exception&& exception) mutable -> kj::Promise { + .catch_([this, &context](kj::Exception&& exception) mutable -> kj::Promise { // Log JS exceptions to the JS console, if inspector is attached. This also has the effect of // logging internal errors to syslog. loggedExceptionEarlier = true; @@ -617,13 +674,12 @@ kj::Promise WorkerEntrypoint::connect(kj::StringPtr host, return kj::mv(exception); }); }) - .attach(kj::defer([this, incomingRequest = kj::mv(incomingRequest), &context]() mutable { + .attach(kj::defer([this, incomingRequest = kj::mv(incomingRequest)]() mutable { // The request has been canceled, but allow it to continue executing in the background. - auto promise = incomingRequest->drain().attach(kj::mv(incomingRequest)); - waitUntilTasks.add(maybeAddGcPassForTest(context, kj::mv(promise))); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); })) - .catch_([this, isActor, &response, metrics = kj::mv(metricsForCatch), workerTracer]( - kj::Exception&& exception) mutable -> kj::Promise { + .catch_([this, isActor, &response, metrics = kj::mv(metricsForCatch), workerTracer]( + kj::Exception&& exception) mutable -> kj::Promise { // Don't return errors to end user. auto isInternalException = !jsg::isTunneledException(exception.getDescription()) && !jsg::isDoNotLogException(exception.getDescription()); @@ -663,7 +719,7 @@ kj::Promise WorkerEntrypoint::connect(kj::StringPtr host, return kj::READY_NOW; } - }); + })); } kj::Promise WorkerEntrypoint::prewarm(kj::StringPtr url) { @@ -717,19 +773,13 @@ kj::Promise WorkerEntrypoint::runScheduled( entrypointName, kj::mv(versionInfo), kj::mv(props), context.getActor())); })); - static auto constexpr waitForFinished = [](IoContext& context, - kj::Own request) + static auto constexpr waitForFinished = [](kj::Own request) -> kj::Promise { TRACE_EVENT("workerd", "WorkerEntrypoint::runScheduled() waitForFinished()"); - auto scheduledResult = co_await request->finishScheduled(); - bool completed = scheduledResult == EventOutcome::OK; - co_return WorkerInterface::ScheduledResult{.retry = context.shouldRetryScheduled(), - .outcome = completed ? context.waitUntilStatus() : scheduledResult}; + return request->finishScheduled(kj::mv(request)); }; - auto promise = waitForFinished(context, kj::mv(incomingRequest)); - - return maybeAddGcPassForTest(context, kj::mv(promise)); + return wrapWithCanceler(waitForFinished(kj::mv(incomingRequest))); } kj::Promise WorkerEntrypoint::runAlarmImpl( @@ -781,7 +831,7 @@ kj::Promise WorkerEntrypoint::runAlarmImpl( KJ_DEFER({ // The alarm has finished but allow the request to continue executing in the background. - waitUntilTasks.add(incomingRequest->drain().attach(kj::mv(incomingRequest))); + incomingRequest->drain(waitUntilTasks, kj::mv(incomingRequest)); }); try { @@ -849,8 +899,8 @@ kj::Promise WorkerEntrypoint::runAlarm( this->incomingRequest = kj::none; auto& context = incomingRequest->getContext(); - auto promise = runAlarmImpl(kj::mv(incomingRequest), scheduledTime, retryCount); - auto result = co_await maybeAddGcPassForTest(context, kj::mv(promise)); + auto result = + co_await wrapWithCanceler(runAlarmImpl(kj::mv(incomingRequest), scheduledTime, retryCount)); KJ_IF_SOME(t, context.getWorkerTracer()) { t.setReturn(context.now()); } @@ -897,34 +947,19 @@ kj::Promise WorkerEntrypoint::test() { })); static auto constexpr waitForFinished = - [](IoContext& context, kj::Own request) -> kj::Promise { + [](kj::Own request) -> kj::Promise { TRACE_EVENT("workerd", "WorkerEntrypoint::test() waitForFinished()"); - auto scheduledResult = co_await request->finishScheduled(); - - if (scheduledResult == EventOutcome::EXCEPTION) { - // If the test handler throws an exception (without aborting - just a regular exception), - // then `outcome` ends up being EventOutcome::EXCEPTION, which causes us to return false. - // But in that case we are separately relying on the exception being logged as an uncaught - // exception, rather than throwing it. - // This is why we don't rethrow the exception but rather log it as an uncaught exception. - try { - co_await context.onAbort(); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - KJ_LOG(ERROR, exception); - } - } + + auto scheduledResult = co_await request->finishScheduled(kj::mv(request)); // Not adding a return event here – we only provide rudimentary tracing support for test events // (enough so that we can get logs/spans from them in wd-tests), so this is not needed in // practice. - bool completed = scheduledResult == EventOutcome::OK; - auto outcome = completed ? context.waitUntilStatus() : scheduledResult; - co_return outcome == EventOutcome::OK; + co_return scheduledResult.outcome == EventOutcome::OK; }; - return maybeAddGcPassForTest(context, waitForFinished(context, kj::mv(incomingRequest))); + return wrapWithCanceler(waitForFinished(kj::mv(incomingRequest))); } kj::Promise WorkerEntrypoint::customEvent( @@ -934,8 +969,6 @@ kj::Promise WorkerEntrypoint::customEvent( kj::mv(KJ_REQUIRE_NONNULL(this->incomingRequest, "customEvent() can only be called once")); this->incomingRequest = kj::none; - auto& context = incomingRequest->getContext(); - // Set event info BEFORE calling run() to ensure onset event is reported before // any user code executes (particularly important for actors whose constructors may run // during delivered()). @@ -943,52 +976,10 @@ kj::Promise WorkerEntrypoint::customEvent( t.setEventInfo(*incomingRequest, event->getEventInfo()); } - auto promise = event - ->run(kj::mv(incomingRequest), entrypointName, kj::mv(versionInfo), - kj::mv(props), waitUntilTasks, isDynamicDispatch) - .attach(kj::mv(event)); - - // TODO(cleanup): In theory `context` may have been destroyed by now if `event->run()` dropped - // the `incomingRequest` synchronously. No current implementation does that, and - // maybeAddGcPassForTest() is a no-op outside of tests, so I'm ignoring the theoretical problem - // for now. Otherwise we will need to `atomicAddRef()` the `Worker` at some point earlier on - // but I'd like to avoid that in the non-test case. - return maybeAddGcPassForTest(context, kj::mv(promise)); -} - -#ifdef KJ_DEBUG -void requestGc(const Worker& worker) { - TRACE_EVENT("workerd", "Debug: requestGc()"); - jsg::runInV8Stack([&](jsg::V8StackScope& stackScope) { - auto& isolate = worker.getIsolate(); - auto lock = isolate.getApi().lock(stackScope); - lock->requestGcForTesting(); - }); -} - -template -kj::Promise addGcPassForTest(IoContext& context, kj::Promise promise) { - TRACE_EVENT("workerd", "Debug: addGcPassForTest"); - auto worker = kj::atomicAddRef(context.getWorker()); - if constexpr (kj::isSameType()) { - co_await promise; - requestGc(*worker); - } else { - auto ret = co_await promise; - requestGc(*worker); - co_return kj::mv(ret); - } -} -#endif - -template -kj::Promise WorkerEntrypoint::maybeAddGcPassForTest(IoContext& context, kj::Promise promise) { -#ifdef KJ_DEBUG - if (isPredictableModeForTest()) { - return addGcPassForTest(context, kj::mv(promise)); - } -#endif - return kj::mv(promise); + return wrapWithCanceler(event + ->run(kj::mv(incomingRequest), entrypointName, kj::mv(versionInfo), + kj::mv(props), waitUntilTasks, isDynamicDispatch) + .attach(kj::mv(event))); } } // namespace @@ -1008,12 +999,13 @@ kj::Own newWorkerEntrypoint(ThreadContext& threadContext, kj::Maybe cfBlobJson, kj::Maybe versionInfo, kj::Maybe maybeTriggerInvocationSpan, - bool isDynamicDispatch) { + bool isDynamicDispatch, + kj::Maybe> accessInfo) { return WorkerEntrypoint::construct(threadContext, kj::mv(worker), kj::mv(entrypointName), kj::mv(props), kj::mv(actor), kj::mv(limitEnforcer), kj::mv(ioContextDependency), kj::mv(ioChannelFactory), kj::mv(metrics), waitUntilTasks, tunnelExceptions, kj::mv(workerTracer), kj::mv(cfBlobJson), kj::mv(versionInfo), - kj::mv(maybeTriggerInvocationSpan), isDynamicDispatch); + kj::mv(maybeTriggerInvocationSpan), isDynamicDispatch, kj::mv(accessInfo)); } } // namespace workerd diff --git a/src/workerd/io/worker-entrypoint.h b/src/workerd/io/worker-entrypoint.h index e7ed1c1dfa8..f0cfd1b6498 100644 --- a/src/workerd/io/worker-entrypoint.h +++ b/src/workerd/io/worker-entrypoint.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include @@ -47,6 +48,9 @@ kj::Own newWorkerEntrypoint(ThreadContext& threadContext, // subtask of another request. If it is kj::none, then this invocation is a top-level // invocation. kj::Maybe maybeTriggerInvocationSpan = kj::none, - bool isDynamicDispatch = false); + bool isDynamicDispatch = false, + // Per-request Cloudflare Access info. Supplied by the embedding application; standalone + // workerd passes kj::none, which causes `ctx.access` to be `undefined` in JS. + kj::Maybe> accessInfo = kj::none); } // namespace workerd diff --git a/src/workerd/io/worker-fs.c++ b/src/workerd/io/worker-fs.c++ index ae67afc757a..c718608d156 100644 --- a/src/workerd/io/worker-fs.c++ +++ b/src/workerd/io/worker-fs.c++ @@ -559,11 +559,11 @@ class DirectoryBase final: public Directory { for (auto& entry: entries) { KJ_SWITCH_ONEOF(entry.value) { KJ_CASE_ONEOF(file, kj::Rc) { - tracker.trackField("file", *file.get()); + tracker.trackField("file", *file); break; } KJ_CASE_ONEOF(dir, kj::Rc) { - tracker.trackField("directory", *dir.get()); + tracker.trackField("directory", *dir); break; } KJ_CASE_ONEOF(link, kj::Rc) { @@ -692,7 +692,7 @@ class FileImpl final: public File { auto src = data.slice(offset); KJ_DASSERT(src.size() > 0); if (buffer.size() > src.size()) { - buffer.first(src.size()).copyFrom(src); + buffer.write(src); return src.size(); } buffer.copyFrom(src.first(buffer.size())); @@ -741,8 +741,9 @@ class FileImpl final: public File { if (size > owned.data.size()) { // To grow the file, we need to allocate a new array, copy the old data over, // and replace the original. - newData.first(owned.data.size()).copyFrom(owned.data); - newData.slice(owned.data.size()).fill(0); + auto remaining = newData.asPtr(); + remaining.write(owned.data); + remaining.fill(0); } else { newData.asPtr().copyFrom(owned.data.first(size)); } @@ -1153,14 +1154,14 @@ kj::OneOf File::readAllText(jsg::Lock& js) { return js.str(data); } -kj::OneOf File::readAllBytes(jsg::Lock& js) { +kj::OneOf> File::readAllBytes(jsg::Lock& js) { auto info = stat(js); KJ_DASSERT(info.type == FsType::FILE); - auto backing = jsg::BackingStore::alloc(js, info.size); + auto u8 = jsg::JsUint8Array::create(js, info.size); if (info.size > 0) { - KJ_ASSERT(read(js, 0, backing) == info.size); + KJ_ASSERT(read(js, 0, u8.asArrayPtr()) == info.size); } - return jsg::BufferSource(js, kj::mv(backing)); + return u8.addRef(js); } void Directory::Builder::add( @@ -1758,27 +1759,35 @@ class DevRandomFile final: public File { // - inspector reporting // - structured logging // - stdio output otherwise +// +// SECURITY: The `bytes` parameter may alias mutable state (e.g. StdioFile::lineBuffer's +// heap backing store). We MUST copy the bytes into an owned kj::String before any JS +// property access, because property getters on globalThis.console / console.log can run +// arbitrary user code that may re-enter StdioFile::write and invalidate the buffer. void writeStdio(jsg::Lock& js, VirtualFileSystem::Stdio type, kj::ArrayPtr bytes) { auto chars = bytes.asChars(); size_t endPos = chars.size(); if (endPos > 0 && chars[endPos - 1] == '\n') endPos--; + // Own the line text up-front so that `bytes`/`chars` are no longer used after this point. + // This prevents use-after-free if a JS getter re-enters and reallocates the caller's buffer. + kj::String line = kj::str(chars.first(endPos)); + KJ_IF_SOME(console, js.global().get(js, "console"_kj).tryCast()) { auto method = console.get(js, "log"_kj); if (method.isFunction()) { v8::Local methodVal(method); auto methodFunc = jsg::JsFunction(methodVal.As()); - kj::String outputStr; auto isolate = &Worker::Isolate::from(js); auto prefix = type == VirtualFileSystem::Stdio::OUT ? isolate->getStdoutPrefix() : isolate->getStderrPrefix(); - if (endPos == 0) { + if (line.size() == 0) { methodFunc.call(js, console, js.str(prefix)); } else if (prefix.size() > 0) { - methodFunc.call(js, console, js.str(kj::str(prefix, " "_kj, chars.first(endPos)))); + methodFunc.call(js, console, js.str(kj::str(prefix, " "_kj, line))); } else { - methodFunc.call(js, console, js.str(chars.first(endPos))); + methodFunc.call(js, console, js.str(line)); } return; } @@ -1864,10 +1873,14 @@ class StdioFile final: public File { auto lineData = buffer.slice(pos, newlinePos + 1); if (!lineBuffer.empty()) { - // We have buffered data - append the line data to it + // We have buffered data - append the line data to it. + // SECURITY: Move lineBuffer into a local before calling writeStdio so that + // re-entrant writes (via JS getters on console/console.log) operate on a + // fresh buffer and cannot free the backing store we pass to writeStdio. lineBuffer.addAll(lineData); - writeStdio(js, type, lineBuffer.asPtr()); - lineBuffer.clear(); + auto toFlush = kj::mv(lineBuffer); + lineBuffer = kj::Vector(); + writeStdio(js, type, toFlush.asPtr()); } else { // No buffered data - log this line directly writeStdio(js, type, lineData); @@ -1944,10 +1957,13 @@ class StdioFile final: public File { self.microtaskScheduled = false; if (!self.lineBuffer.empty()) { + // SECURITY: Move lineBuffer into a local before calling writeStdio so that + // re-entrant writes cannot free the backing store during the call. + auto toFlush = kj::mv(self.lineBuffer); + self.lineBuffer = kj::Vector(); if (IoContext::hasCurrent()) { - writeStdio(js, self.type, self.lineBuffer.asPtr()); + writeStdio(js, self.type, toFlush.asPtr()); } - self.lineBuffer.clear(); } }); }); diff --git a/src/workerd/io/worker-fs.h b/src/workerd/io/worker-fs.h index 3bc929e8749..484d5c463f6 100644 --- a/src/workerd/io/worker-fs.h +++ b/src/workerd/io/worker-fs.h @@ -220,7 +220,8 @@ class File: public kj::Refcounted { kj::OneOf readAllText(jsg::Lock& js) KJ_WARN_UNUSED_RESULT; // Reads all the contents of the file as a Uint8Array. - kj::OneOf readAllBytes(jsg::Lock& js) KJ_WARN_UNUSED_RESULT; + kj::OneOf> readAllBytes( + jsg::Lock& js) KJ_WARN_UNUSED_RESULT; // Reads data from the file at the given offset into the given buffer. virtual uint32_t read(jsg::Lock& js, uint32_t offset, kj::ArrayPtr buffer) const = 0; diff --git a/src/workerd/io/worker-interface.c++ b/src/workerd/io/worker-interface.c++ index fc070fe3d4d..06f124c0946 100644 --- a/src/workerd/io/worker-interface.c++ +++ b/src/workerd/io/worker-interface.c++ @@ -288,7 +288,7 @@ kj::Promise RevocableWebSocketWorkerInterface::connect(kj::StringPtr host, return kj::READY_NOW; }).eagerlyEvaluate(nullptr); - return worker.connect(host, headers, *wrappedConnection.get(), response, kj::mv(settings)) + return worker.connect(host, headers, *wrappedConnection, response, kj::mv(settings)) .attach(kj::mv(wrappedConnection), kj::mv(revokeTask)); } diff --git a/src/workerd/io/worker-modules.c++ b/src/workerd/io/worker-modules.c++ index 53da48d359a..2c364469208 100644 --- a/src/workerd/io/worker-modules.c++ +++ b/src/workerd/io/worker-modules.c++ @@ -13,7 +13,6 @@ kj::Own createPyodideMetadataState( auto mainModule = kj::str(source.mainModule); auto modules = source.modules.asPtr(); int numFiles = 0; - int numRequirements = 0; for (auto& module: modules) { KJ_SWITCH_ONEOF(module.content) { KJ_CASE_ONEOF(content, Worker::Script::TextModule) { @@ -37,8 +36,8 @@ kj::Own createPyodideMetadataState( KJ_CASE_ONEOF(content, Worker::Script::PythonModule) { numFiles++; } - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { - numRequirements++; + KJ_CASE_ONEOF(content, Worker::Script::ObsoletePythonRequirement) { + // No longer supported; ignored. } KJ_CASE_ONEOF(content, Worker::Script::CapnpModule) { // Not exposed to Python. @@ -48,7 +47,6 @@ kj::Own createPyodideMetadataState( auto names = kj::heapArrayBuilder(numFiles); auto contents = kj::heapArrayBuilder>(numFiles); - auto requirements = kj::heapArrayBuilder(numRequirements); for (auto& module: modules) { KJ_SWITCH_ONEOF(module.content) { KJ_CASE_ONEOF(content, Worker::Script::TextModule) { @@ -77,8 +75,8 @@ kj::Own createPyodideMetadataState( names.add(kj::str(module.name)); contents.add(kj::heapArray(content.body.asBytes())); } - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { - requirements.add(kj::str(module.name)); + KJ_CASE_ONEOF(content, Worker::Script::ObsoletePythonRequirement) { + // No longer supported; ignored. } KJ_CASE_ONEOF(content, Worker::Script::CapnpModule) { // Not exposeud to Python. @@ -94,7 +92,6 @@ kj::Own createPyodideMetadataState( kj::mv(mainModule), names.finish(), contents.finish(), - requirements.finish(), kj::str(pythonRelease.getPyodide()), kj::str(pythonRelease.getPackages()), kj::mv(lock), diff --git a/src/workerd/io/worker-modules.h b/src/workerd/io/worker-modules.h index 74bb517c1d4..c0b695935c3 100644 --- a/src/workerd/io/worker-modules.h +++ b/src/workerd/io/worker-modules.h @@ -214,7 +214,7 @@ static kj::Arc newWorkerModuleRegistry( // bundleBuilder.addEsmModule(def.name, entry); // break; } - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { + KJ_CASE_ONEOF(content, Worker::Script::ObsoletePythonRequirement) { // Handled separately break; } @@ -362,7 +362,7 @@ kj::Maybe tryCompileLegacyModule(jsg::Lock& js, // Nothing to do. Handled elsewhere. return kj::none; } - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { + KJ_CASE_ONEOF(content, Worker::Script::ObsoletePythonRequirement) { // Nothing to do. Handled elsewhere. return kj::none; } @@ -416,7 +416,7 @@ kj::Array compileServiceWorkerGlobals(jsg::Lock& KJ_CASE_ONEOF(content, Worker::Script::PythonModule) { KJ_FAIL_REQUIRE("modules not supported with mainScript"); } - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { + KJ_CASE_ONEOF(content, Worker::Script::ObsoletePythonRequirement) { KJ_FAIL_REQUIRE("modules not supported with mainScript"); } KJ_CASE_ONEOF(content, Worker::Script::CapnpModule) { @@ -492,16 +492,17 @@ void registerPythonCommonModules(jsg::Lock& lock, kj::mv(maybeSnapshot), featureFlags)), jsg::ModuleRegistry::Type::INTERNAL); - // Inject packages tar file - modules.addBuiltinModule("pyodide-internal:packages_tar_reader", "export default { }"_kj, - workerd::jsg::ModuleRegistry::Type::INTERNAL, {}); - // Inject artifact bundler. modules.addBuiltinModule("pyodide-internal:artifacts", lock.alloc(kj::mv(artifacts).orDefault( []() { return api::pyodide::ArtifactBundler::makeDisabledBundler(); })), jsg::ModuleRegistry::Type::INTERNAL); + // Inject the Python stdlib packages that are extracted and embedded directly in the bundle. + modules.addBuiltinModule("pyodide-internal:packages", + api::pyodide::EmbeddedPackagesReader::fromBundle(lock, pyodideBundle), + jsg::ModuleRegistry::Type::INTERNAL); + // Inject disk cache module modules.addBuiltinModule("pyodide-internal:disk_cache", kj::mv(diskCache).orDefault([&lock]() { return lock.alloc(); }), diff --git a/src/workerd/io/worker-source.h b/src/workerd/io/worker-source.h index e29a477a57a..06a4c0e2610 100644 --- a/src/workerd/io/worker-source.h +++ b/src/workerd/io/worker-source.h @@ -56,9 +56,8 @@ struct WorkerSource { kj::StringPtr body; }; - // PythonRequirement is a variant of ModuleContent, but has no body. The module name specifies - // a Python package to be provided by the system. - struct PythonRequirement {}; + // This is no longer supported by Python, but it used to define built-in packages. + struct ObsoletePythonRequirement {}; // CapnpModule is a .capnp Cap'n Proto schema file. The original text of the file isn't provided; // instead, `ModulesSource::capnpSchemas` contains all the capnp schemas needed by the Worker, @@ -77,7 +76,7 @@ struct WorkerSource { WasmModule, JsonModule, PythonModule, - PythonRequirement, + ObsoletePythonRequirement, CapnpModule>; struct Module { @@ -116,7 +115,7 @@ struct WorkerSource { KJ_CASE_ONEOF(content, PythonModule) { result.content = content; } - KJ_CASE_ONEOF(content, PythonRequirement) { + KJ_CASE_ONEOF(content, ObsoletePythonRequirement) { result.content = content; } KJ_CASE_ONEOF(content, CapnpModule) { diff --git a/src/workerd/io/worker.c++ b/src/workerd/io/worker.c++ index 2394e065b64..8f9227a0a2e 100644 --- a/src/workerd/io/worker.c++ +++ b/src/workerd/io/worker.c++ @@ -3,11 +3,13 @@ // https://opensource.org/licenses/Apache-2.0 #include "actor-cache.h" +#include "stored-value.h" #include #include #include #include // for api::StreamEncoding +#include #include #include #include @@ -653,8 +655,8 @@ struct Worker::Isolate::Impl { void setupContext(v8::Local context) { // The V8Inspector implements the `console` object. KJ_IF_SOME(i, impl.inspector) { - i.get()->contextCreated( - v8_inspector::V8ContextInfo(context, 1, jsg::toInspectorStringView("Worker"))); + i.get()->contextCreated(v8_inspector::V8ContextInfo( + context, 1, jsg::toInspectorStringView("Worker").stringView)); } Worker::setupContext(*lock, context, loggingOptions); } @@ -986,7 +988,7 @@ struct Worker::Script::Impl { kj::Maybe getNewModuleRegistry() const { return maybeNewModuleRegistry.map( - [](auto& r) -> const workerd::jsg::modules::ModuleRegistry& { return *r.get(); }); + [](auto& r) -> const workerd::jsg::modules::ModuleRegistry& { return *r; }); } }; @@ -1085,6 +1087,12 @@ struct HeapSnapshotDeleter: public kj::Disposer { }; const HeapSnapshotDeleter HeapSnapshotDeleter::INSTANCE; +void messageCallback(v8::Local msg, v8::Local) { + auto scriptLocation = kj::str(msg->GetScriptResourceName(), ":", msg->GetStartPosition()); + auto message = kj::str(msg->Get()); + KJ_LOG(ERROR, "NOSENTRY V8 message callback", message, scriptLocation, kj::getStackTrace()); +} + } // namespace Worker::Isolate::Isolate(kj::Own apiParam, @@ -1176,6 +1184,10 @@ Worker::Isolate::Isolate(kj::Own apiParam, }); } + // If no message listeners are registered, then the default message reporter writes errors to + // stdout. Add a callback that instead writes the message to KJ_LOG + lock->v8Isolate->AddMessageListener(messageCallback); + // By default, V8's memory pressure level is "none". This tells V8 that no one else on the // machine is competing for memory so it might as well use all it wants and be lazy about GC. // @@ -1439,8 +1451,8 @@ Worker::Script::Script(kj::Own isolateParam, // (For modules, the context was already registered by `setupContext()`, above. KJ_IF_SOME(i, isolate->impl->inspector) { if (!modular) { - i.get()->contextCreated( - v8_inspector::V8ContextInfo(context, 1, jsg::toInspectorStringView("Compiler"))); + i.get()->contextCreated(v8_inspector::V8ContextInfo( + context, 1, jsg::toInspectorStringView("Compiler").stringView)); } } else { } // Here to squash a compiler warning @@ -1741,9 +1753,7 @@ void Worker::setupContext( setWebAssemblyModuleHasInstance(lock, context); // Shim WebAssembly.instantiate to detect modules exporting "__instance_signal". - if (util::Autogate::isEnabled(util::AutogateKey::WASM_SHUTDOWN_SIGNAL_SHIM)) { - shimWebAssemblyInstantiate(lock, context); - } + shimWebAssemblyInstantiate(lock, context); // We replace the default V8 console.log(), etc. methods, to give the worker access to // logged content, and log formatted values to stdout/stderr locally. @@ -2169,14 +2179,8 @@ void Worker::handleLog(jsg::Lock& js, // Determine whether `obj` is constructed using `{}` or `new Object()`. This ensures // we don't serialise values like Promises to JSON. -#if V8_MAJOR_VERSION >= 15 || (V8_MAJOR_VERSION == 14 && V8_MINOR_VERSION >= 7) if (obj->GetPrototype()->SameValue(freshObj->GetPrototype()) || obj->GetPrototype()->IsNull()) { -#else - // TODO(cleanup): Remove when unnecessary. - if (obj->GetPrototypeV2()->SameValue(freshObj->GetPrototypeV2()) || - obj->GetPrototypeV2()->IsNull()) { -#endif shouldSerialiseToJson = true; } @@ -2825,7 +2829,9 @@ kj::Promise Worker::AsyncLock::whenThreadIdle() { continue; } - co_await kj::yieldUntilQueueEmpty(); + // yieldUntilWouldSleep() waits for both the queue and event port signals, + // so cross-thread fulfiller wakeups are processed before we declare idle. + co_await kj::yieldUntilWouldSleep(); if (currentWaiter == nullptr) { co_return; @@ -3022,7 +3028,7 @@ class Worker::Isolate::InspectorChannelImpl final: public v8_inspector::V8Inspec ExceptionOrDuration limitErrorOrTime = 0 * kj::NANOSECONDS; { auto limitScope = isolate.getLimitEnforcer().enterInspectorJs(*lock, limitErrorOrTime); - session.dispatchProtocolMessage(jsg::toInspectorStringView(message)); + session.dispatchProtocolMessage(jsg::toInspectorStringView(message).stringView); } // Run microtasks in case the user made an async call. @@ -3643,6 +3649,7 @@ struct Worker::Actor::Impl { kj::OneOf> transient; kj::Maybe> actorCache; + kj::Maybe storageExternalHandler; kj::Maybe> ctxObject; @@ -3691,9 +3698,11 @@ struct Worker::Actor::Impl { auto timeout = 30 * kj::SECONDS; co_await timerChannel.afterLimitTimeout(timeout); - kj::throwFatalException(KJ_EXCEPTION(OVERLOADED, + auto e = KJ_EXCEPTION(OVERLOADED, "broken.outputGateBroken; jsg.Error: Durable Object storage operation exceeded " - "timeout which caused object to be reset.")); + "timeout which caused object to be reset."); + e.setDetail(WALL_TIME_LIMIT_DETAIL_ID, kj::heapArray(0)); + kj::throwFatalException(kj::mv(e)); } // Implements OutputGate::Hooks. @@ -3737,6 +3746,10 @@ struct Worker::Actor::Impl { // Handles output locks. OutputGate outputGate; + // All incoming requests are registered with this, so that work can be forcefully canceled when + // the Actor is aborted. + kj::Canceler abortCanceler; + // `ioContext` is initialized upon delivery of the first request. kj::Maybe> ioContext; @@ -3831,6 +3844,11 @@ struct Worker::Actor::Impl { actorCache = makeActorCache(self.worker->getIsolate().impl->actorCacheLru, outputGate, hooks, *metrics); } + + ~Impl() noexcept(false) { + // Don't cancel anything if we weren't actually aborted. + abortCanceler.release(); + } }; kj::Promise Worker::takeAsyncLockWhenActorCacheReady( @@ -3917,7 +3935,12 @@ kj::Promise Worker::Actor::ensureConstructedImpl(IoContext& context, Actor // with starting the script, and also if we could save the status across hibernations. But // that would require some refactoring, and this RPC should (eventally) be local, so it's // not a huge deal. - auto status = co_await c.statusRequest(capnp::MessageSize{4, 0}).send(); + auto statusRequest = + c.statusRequest(capnp::MessageSize{4 + capnp::sizeInWords(), 0}); + KJ_IF_SOME(spanContext, context.getCurrentTraceSpan().toSpanContext()) { + spanContext.toCapnp(statusRequest.initSpanContext()); + } + auto status = co_await statusRequest.send(); containerRunning = status.getRunning(); } @@ -4005,6 +4028,37 @@ void Worker::Actor::shutdownActorCache(kj::Maybe error) { } } +void Worker::Actor::abort(const kj::Exception& error) { + KJ_IF_SOME(ctx, impl->ioContext) { + impl->metrics->shutdown(0, ctx->getLimitEnforcer()); + ctx->abort(error.clone()); + } else { + shutdownActorCache(error); + } + impl->shutdownFulfiller->fulfill(); + + // Now hard-cancel everything that might be using the actor. + // + // Canceling tasks can queue more tasks (especially drain() tasks), so keep canceling until + // nothing more is queued. + while (!impl->abortCanceler.isEmpty()) { + impl->abortCanceler.cancel(error); + } + + KJ_IF_SOME(ctx, impl->ioContext) { + if (ctx->hasCurrentIncomingRequest()) { + // This should never happen, but if it does we'll defer killing the ioContext for fear of + // creating UaFs. + DEBUG_FATAL_RELEASE_LOG(ERROR, "abortCanceler wasn't able to cancel all IncomingRequests"); + } else { + // Eagerly kill off the IoContext itself to ensure that all tasks are canceled, reentry + // callbacks are dead, etc. + impl->metricsFlushLoopTask = kj::none; + impl->ioContext = kj::none; + } + } +} + kj::Promise Worker::Actor::onShutdown() { return impl->shutdownPromise.addBranch(); } @@ -4025,6 +4079,10 @@ kj::Promise Worker::Actor::onBroken() { return abortPromise; } +kj::Canceler& Worker::Actor::getAbortCanceler() { + return impl->abortCanceler; +} + const Worker::Actor::Id& Worker::Actor::getId() { return impl->actorId; } @@ -4076,6 +4134,26 @@ kj::Maybe Worker::Actor::getPersistent() { return impl->actorCache; } +StoredExternalHandler& Worker::Actor::getOrCreateStoredExternalHandler() { + KJ_IF_SOME(handler, impl->storageExternalHandler) { + return handler; + } + + KJ_IF_SOME(ac, impl->actorCache) { + KJ_IF_SOME(kv, ac->getSqliteKv()) { + return impl->storageExternalHandler.emplace(*ac, kv); + } + } + + JSG_FAIL_REQUIRE(DOMDataCloneError, + "Storing RPC stubs in Durable Object KV storage is only supported when using the SQLite " + "storage backend."); +} + +kj::Maybe Worker::Actor::getStoredExternalHandler() { + return impl->storageExternalHandler; +} + kj::Own Worker::Actor::getLoopback() { return impl->loopback->addRef(); } diff --git a/src/workerd/io/worker.h b/src/workerd/io/worker.h index 626f6e87f29..cb87151d1e8 100644 --- a/src/workerd/io/worker.h +++ b/src/workerd/io/worker.h @@ -63,6 +63,8 @@ class IoContext; class InputGate; class OutputGate; +class StoredExternalHandler; + // Type signature of an entrypoint implementation class (Durable Object or stateless service). using ExecutionContextOrState = kj::OneOf, jsg::Ref>; @@ -289,7 +291,7 @@ class Worker::Script: public kj::AtomicRefcounted { using WasmModule = WorkerSource::WasmModule; using JsonModule = WorkerSource::JsonModule; using PythonModule = WorkerSource::PythonModule; - using PythonRequirement = WorkerSource::PythonRequirement; + using ObsoletePythonRequirement = WorkerSource::ObsoletePythonRequirement; using CapnpModule = WorkerSource::CapnpModule; using ModuleContent = WorkerSource::ModuleContent; using Module = WorkerSource::Module; @@ -927,6 +929,7 @@ class Worker::Actor final: public kj::Refcounted { kj::StringPtr name, kj::Function()> getStartInfo) = 0; virtual void abortFacet(kj::StringPtr name, kj::Exception reason) = 0; virtual void deleteFacet(kj::StringPtr name) = 0; + virtual void cloneFacet(kj::StringPtr src, kj::StringPtr dst) = 0; }; // Create a new Actor hosted by this Worker. Note that this Actor object may only be manipulated @@ -969,6 +972,11 @@ class Worker::Actor final: public kj::Refcounted { // interactions between `onAbort` and `onShutdown` promises. void shutdownActorCache(kj::Maybe error); + // Immediately, synchronously abort all work going on in the actor. All requests throw the + // given exception. All background work stops. Any async task that holds a strong reference on + // the Actor is canceled, so that there should be no more references floating around. + void abort(const kj::Exception& error); + // Get a promise that resolves when `shutdown()` has been called. kj::Promise onShutdown(); @@ -979,6 +987,11 @@ class Worker::Actor final: public kj::Refcounted { // This method can only be called once. kj::Promise onBroken(); + // Get a canceler which will be canceled when `abort()` is called. All incoming requests to + // the actor and all background work should be wrapped in this canceler. (worker-entrypoint.c++ + // takes care of this.) + kj::Canceler& getAbortCanceler(); + const Id& getId(); Id cloneId(); static Id cloneId(Id& id); @@ -986,6 +999,13 @@ class Worker::Actor final: public kj::Refcounted { kj::Maybe getPersistent(); kj::Own getLoopback(); + // Get the StoredExternalHandler, creating it if it doesn't already exist. Returns none if the + // actor's storage is not SQLite-backed, in which case externals cannot be stored. + StoredExternalHandler& getOrCreateStoredExternalHandler(); + + // Get the StoredExternalHandler if it has been created previously. + kj::Maybe getStoredExternalHandler(); + // Make the storage object for use in Service Workers syntax. This should not be used for // modules-syntax workers. (Note that Service-Workers-syntax actors are not supported publicly.) kj::Maybe> makeStorageForSwSyntax(Worker::Lock& lock); diff --git a/src/workerd/jsg/AGENTS.md b/src/workerd/jsg/AGENTS.md index 645b9bdc5ec..bb15c6ed13c 100644 --- a/src/workerd/jsg/AGENTS.md +++ b/src/workerd/jsg/AGENTS.md @@ -26,6 +26,7 @@ Macro-driven C++/V8 binding layer: declares C++ types as JS-visible resources/st | `jsvalue.h` | `JsValue`, `JsObject`, `JsString`, etc. — typed wrappers over `v8::Value` | | `type-wrapper.h` | `TypeWrapper` template: compile-time dispatch for C++ ↔ V8 conversions | | `meta.h` | Argument unwrapping, `ArgumentContext`, parameter pack metaprogramming | +| `unwrap-args.h` | `UnwrappedArgs` helper: deterministic left-to-right argument unwrapping in V8 callbacks | | `fast-api.h` | V8 Fast API call optimizations | | `ser.h` | Structured clone: `Serializer`/`Deserializer` | | `web-idl.h` | Web IDL types: `NonCoercible`, `Sequence`, etc. | diff --git a/src/workerd/jsg/BUILD.bazel b/src/workerd/jsg/BUILD.bazel index b829dfbf0c8..9dd1cc31702 100644 --- a/src/workerd/jsg/BUILD.bazel +++ b/src/workerd/jsg/BUILD.bazel @@ -103,6 +103,7 @@ wd_cc_library( "resource.h", "ser.h", "setup.h", + "unwrap-args.h", "util.h", "v8-platform-wrapper.h", "web-idl.h", @@ -202,6 +203,7 @@ wd_cc_library( "url.h", ], implementation_deps = [ + "//src/workerd/util:sentry", "//src/workerd/util:strings", "@ada-url", ], diff --git a/src/workerd/jsg/README.md b/src/workerd/jsg/README.md index 822451b060f..fc21fa37926 100644 --- a/src/workerd/jsg/README.md +++ b/src/workerd/jsg/README.md @@ -49,13 +49,16 @@ For file map and coding invariants, see [AGENTS.md](AGENTS.md). | `jsg::AsyncGenerator` | `Symbol.asyncIterator` | Async per-item iteration | | `jsg::Dict` | `Object` | Record type; string keys, uniform value type | | `kj::OneOf` | Union | Web IDL validated at compile time | -| `jsg::Function` | `Function` | Bidirectional: JS↔C++ callable | +| `jsg::Function` | `Function` | Bidirectional: JS↔C++ callable | | `jsg::Promise` | `Promise` | Full `.then()`/`.catch_()` API | | `jsg::Name` | `string` or `Symbol` | Property name wrapper | | `jsg::BufferSource` | `ArrayBuffer`/`TypedArray` | Type-preserving; supports detach | | `jsg::V8Ref` | Any V8 type | Persistent strong reference | | `jsg::Value` | Any | Alias for `V8Ref` | | `jsg::Ref` | Resource wrapper | Strong ref to JSG Resource Type | +| `jsg::WeakRef` | — | Non-owning weak ref to JSG Resource Type | +| `jsg::WeakV8Ref` | Any V8 type | Non-owning weak ref to V8 value | +| `jsg::WeakJsRef` | Any JsValue type | Non-owning weak ref to JsValue type | | `jsg::HashableV8Ref` | Any V8 type | `V8Ref` + `hashCode()` | | `jsg::MemoizedIdentity` | Any | Preserves JS object identity across round-trips | | `jsg::Identified` | Any | Captures JS object identity + unwrapped value | @@ -215,6 +218,105 @@ All types that must be visited in `visitForGc()` if held as Resource Type member | `jsg::AsyncGenerator` | Async generator | | `kj::Maybe` | When `T` is GC-visitable | +**Not GC-visitable** (compile error if visited): +`jsg::WeakRef`. +This is intentionally weak and does NOT keep its target alive during GC. +Attempting to `visitor.visit()` a weak ref field is a compile error — the correct +signal that weak references should not be traced. Do not include them in `visitForGc()`. + +## Weak References + +`jsg::WeakRef` provides a non-owning, automatically-invalidated reference +to a JSG resource type. Unlike `workerd::WeakRef` from `util/weak-refs.h`, +it integrates with the JSG lifecycle and requires **no manual invalidation** — +it becomes invalid automatically when the target is destroyed. + +### `jsg::WeakRef` — Weak reference to a JSG Resource Type + +Created from any `jsg::Ref` via `getWeakRef()`. Becomes invalid when the +underlying `Wrappable` is destroyed (all `Ref`s dropped and JS wrapper collected). +The invalidation happens in `Wrappable::~Wrappable()`. + +```cpp +jsg::Ref strong = js.alloc(); +jsg::WeakRef weak = strong.getWeakRef(js); + +weak->doSomething(); // OK if alive; throws kj::Exception if dead +KJ_ASSERT(weak.isAlive()); // true + +// Safe check-and-use: +KJ_IF_SOME(ref, weak.tryGet()) { + ref.doSomething(); +} + +// Promote to strong reference: +KJ_IF_SOME(ref, weak.tryAddRef(js)) { + // ref is a jsg::Ref that keeps the object alive +} +``` + +| Method | Returns | Behavior when dead | +| --------------- | -------------------- | ------------------------------ | +| `operator->()` | `T*` | Throws `kj::Exception` | +| `isAlive()` | `bool` | Returns `false` | +| `tryGet()` | `kj::Maybe` | Returns `kj::none` | +| `tryAddRef()` | `kj::Maybe>` | Returns `kj::none` | +| `addRef()` | `WeakRef` | Returns null-constructed copy | + +**Not GC-traced** — attempting to visit in `visitForGc()` is a compile error. +Does not hold V8 handles. Safe to drop outside the isolate lock. + +`operator*()` is deliberately omitted to prevent storing dangling references. + +Supports converting moves: `WeakRef` → `WeakRef`. + +### `jsg::WeakV8Ref` — Weak reference to a V8 value + +Created from any `jsg::V8Ref` via `getWeakRef(isolate)` or `getWeakRef(js)`. +Uses `v8::Global::SetWeak()` internally — V8 automatically clears the handle +when the target is garbage collected. + +Note: `WeakV8Ref` is provided mostly to support the impl of `WeakJsRef`. +As newer code should be focusing on use of `jsg::Js*` types and `jsg::JsRef` +rather than using `v8::Value` types and `jsg::V8Ref` directly. + +```cpp +jsg::V8Ref strong = js.v8Ref(someLocal); +jsg::WeakV8Ref weak = strong.getWeakRef(js); + +KJ_IF_SOME(local, weak.tryGetHandle(js)) { + // value is still alive +} + +KJ_IF_SOME(ref, weak.tryAddRef(js)) { + // ref is a jsg::V8Ref that keeps the value alive +} +``` + +| Method | Returns | Behavior when collected | +| ------------------------- | ------------------------- | ------------------------- | +| `getHandle(isolate/js)` | `v8::Local` | Throws `kj::Exception` | +| `tryGetHandle(isolate/js)`| `kj::Maybe>` | Returns `kj::none` | +| `isAlive()` | `bool` | Returns `false` | +| `tryAddRef(isolate/js)` | `kj::Maybe>` | Returns `kj::none` | + +**Not GC-traced** — attempting to visit in `visitForGc()` is a compile error. +Safe to drop outside the isolate lock (uses deferred destruction). + +### `jsg::WeakJsRef` — Weak reference to a JsValue type + +Created from any `jsg::JsRef` via `getWeakRef(js)`. Wraps `WeakV8Ref` +with typed `JsValue` access. + +```cpp +jsg::JsRef strong(js, someJsObj); +jsg::WeakJsRef weak = strong.getWeakRef(js); + +KJ_IF_SOME(handle, weak.tryGetHandle(js)) { + // handle is a JsObject +} +``` + ## Error Type Catalog | JSG Error Name | JS Exception Type | When to Use | @@ -403,6 +505,7 @@ Both may take additional `TypeHandler&` trailing parameters. new wrapper on next JS access | 7. C++ destroyed → + WeakRef anchors invalidated; detachWrapper(); JS wrapper = empty shell ``` diff --git a/src/workerd/jsg/arg-order-test.c++ b/src/workerd/jsg/arg-order-test.c++ new file mode 100644 index 00000000000..9c6e21151ca --- /dev/null +++ b/src/workerd/jsg/arg-order-test.c++ @@ -0,0 +1,306 @@ +// Copyright (c) 2017-2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Integration tests verifying that JSG-generated V8 callbacks unwrap their +// arguments in deterministic left-to-right order, regardless of the host +// compiler's chosen argument-evaluation order. +// +// Each test passes 3+ arguments to a JSG-exposed entry point, where each +// argument is a JS object with a `toString` that records its label into a +// shared array. The C++ entry point coerces each argument to `kj::String` +// (triggering `.toString()`), which appends to the array. We then assert +// the array's contents match the declaration order. +// +// Before this fix, on toolchains that evaluated function-call arguments +// right-to-left (e.g. MSVC), the order would reverse. After the fix, all +// toolchains produce `a,b,c` regardless of evaluation choice. + +#include "jsg-test.h" + +namespace workerd::jsg::test { +namespace { + +V8System v8System; +class ContextGlobalObject: public Object, public ContextGlobal {}; + +// Shared JS snippet that creates a function `record(label)` returning an +// object whose `toString` pushes `label` onto a global `order` array, then +// returns `label` so the C++ side receives a normal string. Each test +// concatenates this prelude with its actual invocation. +constexpr kj::StringPtr kRecordPrelude = + "let order = [];" + "let record = (label) => ({ toString() { order.push(label); return label; } });"_kj; + +// ===================================================================================== +// MethodCallback (plain) — instance method that takes 3 stringifiable args. + +struct PlainMethodContext: public ContextGlobalObject { + kj::String orderTest(kj::String a, kj::String b, kj::String c) { + return kj::str(a, ",", b, ",", c); + } + JSG_RESOURCE_TYPE(PlainMethodContext) { + JSG_METHOD(orderTest); + } +}; +JSG_DECLARE_ISOLATE_TYPE(PlainMethodIsolate, PlainMethodContext); + +KJ_TEST("Argument evaluation order: instance method (plain)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "orderTest(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +// ===================================================================================== +// MethodCallback (Lock& first) — same shape, with Lock& as first parameter. + +struct LockFirstMethodContext: public ContextGlobalObject { + kj::String orderTest(Lock& js, kj::String a, kj::String b, kj::String c) { + return kj::str(a, ",", b, ",", c); + } + JSG_RESOURCE_TYPE(LockFirstMethodContext) { + JSG_METHOD(orderTest); + } +}; +JSG_DECLARE_ISOLATE_TYPE(LockFirstMethodIsolate, LockFirstMethodContext); + +KJ_TEST("Argument evaluation order: instance method (Lock& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "orderTest(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +// ===================================================================================== +// MethodCallback (FunctionCallbackInfo& first) — info-receiving method. + +struct InfoFirstMethodContext: public ContextGlobalObject { + kj::String orderTest( + const v8::FunctionCallbackInfo& info, kj::String a, kj::String b, kj::String c) { + return kj::str(a, ",", b, ",", c); + } + JSG_RESOURCE_TYPE(InfoFirstMethodContext) { + JSG_METHOD(orderTest); + } +}; +JSG_DECLARE_ISOLATE_TYPE(InfoFirstMethodIsolate, InfoFirstMethodContext); + +KJ_TEST("Argument evaluation order: instance method (FunctionCallbackInfo& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "orderTest(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +// ===================================================================================== +// ConstructorCallback (plain / Lock& first / FunctionCallbackInfo& first). +// +// A constructor's job is to produce a Ref; the body just records and +// discards its args. The JS test then re-uses `order` to assert that the +// constructor arguments were evaluated left-to-right before the C++ body +// ran. + +struct PlainConstructible: public Object { + static Ref constructor(kj::String a, kj::String b, kj::String c) { + return jsg::alloc(); + } + JSG_RESOURCE_TYPE(PlainConstructible) {} +}; + +struct PlainCtorContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(PlainCtorContext) { + JSG_NESTED_TYPE(PlainConstructible); + } +}; +JSG_DECLARE_ISOLATE_TYPE(PlainCtorIsolate, PlainCtorContext, PlainConstructible); + +KJ_TEST("Argument evaluation order: constructor (plain)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "new PlainConstructible(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +struct LockFirstConstructible: public Object { + static Ref constructor( + Lock& js, kj::String a, kj::String b, kj::String c) { + return jsg::alloc(); + } + JSG_RESOURCE_TYPE(LockFirstConstructible) {} +}; + +struct LockFirstCtorContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(LockFirstCtorContext) { + JSG_NESTED_TYPE(LockFirstConstructible); + } +}; +JSG_DECLARE_ISOLATE_TYPE(LockFirstCtorIsolate, LockFirstCtorContext, LockFirstConstructible); + +KJ_TEST("Argument evaluation order: constructor (Lock& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "new LockFirstConstructible(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +struct InfoFirstConstructible: public Object { + static Ref constructor( + const v8::FunctionCallbackInfo& info, kj::String a, kj::String b, kj::String c) { + return jsg::alloc(); + } + JSG_RESOURCE_TYPE(InfoFirstConstructible) {} +}; + +struct InfoFirstCtorContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(InfoFirstCtorContext) { + JSG_NESTED_TYPE(InfoFirstConstructible); + } +}; +JSG_DECLARE_ISOLATE_TYPE(InfoFirstCtorIsolate, InfoFirstCtorContext, InfoFirstConstructible); + +KJ_TEST("Argument evaluation order: constructor (FunctionCallbackInfo& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "new InfoFirstConstructible(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +// ===================================================================================== +// StaticMethodCallback (plain / Lock& first / FunctionCallbackInfo& first). + +struct PlainStaticHost: public Object { + static kj::String orderTest(kj::String a, kj::String b, kj::String c) { + return kj::str(a, ",", b, ",", c); + } + JSG_RESOURCE_TYPE(PlainStaticHost) { + JSG_STATIC_METHOD(orderTest); + } +}; + +struct PlainStaticContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(PlainStaticContext) { + JSG_NESTED_TYPE(PlainStaticHost); + } +}; +JSG_DECLARE_ISOLATE_TYPE(PlainStaticIsolate, PlainStaticContext, PlainStaticHost); + +KJ_TEST("Argument evaluation order: static method (plain)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "PlainStaticHost.orderTest(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +struct LockFirstStaticHost: public Object { + static kj::String orderTest(Lock& js, kj::String a, kj::String b, kj::String c) { + return kj::str(a, ",", b, ",", c); + } + JSG_RESOURCE_TYPE(LockFirstStaticHost) { + JSG_STATIC_METHOD(orderTest); + } +}; + +struct LockFirstStaticContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(LockFirstStaticContext) { + JSG_NESTED_TYPE(LockFirstStaticHost); + } +}; +JSG_DECLARE_ISOLATE_TYPE(LockFirstStaticIsolate, LockFirstStaticContext, LockFirstStaticHost); + +KJ_TEST("Argument evaluation order: static method (Lock& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "LockFirstStaticHost.orderTest(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +struct InfoFirstStaticHost: public Object { + static kj::String orderTest( + const v8::FunctionCallbackInfo& info, kj::String a, kj::String b, kj::String c) { + return kj::str(a, ",", b, ",", c); + } + JSG_RESOURCE_TYPE(InfoFirstStaticHost) { + JSG_STATIC_METHOD(orderTest); + } +}; + +struct InfoFirstStaticContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(InfoFirstStaticContext) { + JSG_NESTED_TYPE(InfoFirstStaticHost); + } +}; +JSG_DECLARE_ISOLATE_TYPE(InfoFirstStaticIsolate, InfoFirstStaticContext, InfoFirstStaticHost); + +KJ_TEST("Argument evaluation order: static method (FunctionCallbackInfo& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "InfoFirstStaticHost.orderTest(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +// ===================================================================================== +// FunctorCallback — jsg::Function invoked from JS. +// +// JSG exposes Function<...> via methods that return one — the JS side then +// calls the returned function. This exercises FunctorCallback's argument +// unwrap path. + +struct FunctorContext: public ContextGlobalObject { + Function makeFn() { + return [](Lock& js, kj::String a, kj::String b, kj::String c) -> kj::String { + return kj::str(a, ",", b, ",", c); + }; + } + JSG_RESOURCE_TYPE(FunctorContext) { + JSG_METHOD(makeFn); + } +}; +JSG_DECLARE_ISOLATE_TYPE(FunctorIsolate, FunctorContext); + +KJ_TEST("Argument evaluation order: jsg::Function (plain)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "makeFn()(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +// Specialisation for callable signatures that take +// `const v8::FunctionCallbackInfo&` as the parameter after `Lock&`. +// In production this shape is used by `jsg::Function&)>` (see e.g. `jsg::Lock`'s test-only +// `simpleFunction` callback in setup.h / jsg.h). + +struct InfoFirstFunctorContext: public ContextGlobalObject { + Function&, kj::String, kj::String, kj::String)> + makeFn() { + return [](Lock& js, const v8::FunctionCallbackInfo& info, kj::String a, kj::String b, + kj::String c) -> kj::String { return kj::str(a, ",", b, ",", c); }; + } + JSG_RESOURCE_TYPE(InfoFirstFunctorContext) { + JSG_METHOD(makeFn); + } +}; +JSG_DECLARE_ISOLATE_TYPE(InfoFirstFunctorIsolate, InfoFirstFunctorContext); + +KJ_TEST("Argument evaluation order: jsg::Function (FunctionCallbackInfo& first)") { + Evaluator e(v8System); + e.expectEval(kj::str(kRecordPrelude, + "makeFn()(record('a'), record('b'), record('c'));" + "order.join(',')"), + "string", "a,b,c"); +} + +} // namespace +} // namespace workerd::jsg::test diff --git a/src/workerd/jsg/buffersource-test.c++ b/src/workerd/jsg/buffersource-test.c++ index 4505ef362b9..079cb568b95 100644 --- a/src/workerd/jsg/buffersource-test.c++ +++ b/src/workerd/jsg/buffersource-test.c++ @@ -11,6 +11,10 @@ namespace { V8System v8System; +v8::Local unusedBufferSourceConstructor(Lock& js, BackingStore&) { + return v8::Undefined(js.v8Isolate); +} + struct BufferSourceContext: public jsg::Object, public jsg::ContextGlobal { BufferSource takeBufferSource(BufferSource buf) { auto ptr = buf.asArrayPtr(); @@ -177,5 +181,23 @@ KJ_TEST("BackingStore const asArrayPtr handles byteOffset correctly") { "boolean", "true"); } +KJ_TEST("BackingStore rejects byteOffset outside backing store") { + Evaluator e(v8System); + + e.run([](Lock& js) { + KJ_EXPECT_THROW(FAILED, + BackingStore(js.allocBackingStore(8), 0, 9, 1, unusedBufferSourceConstructor, true)); + }); +} + +KJ_TEST("BackingStore rejects byteLength extending outside backing store") { + Evaluator e(v8System); + + e.run([](Lock& js) { + KJ_EXPECT_THROW(FAILED, + BackingStore(js.allocBackingStore(8), 2, 7, 1, unusedBufferSourceConstructor, true)); + }); +} + } // namespace } // namespace workerd::jsg::test diff --git a/src/workerd/jsg/buffersource.c++ b/src/workerd/jsg/buffersource.c++ index 987e28c452a..c9df1dcf232 100644 --- a/src/workerd/jsg/buffersource.c++ +++ b/src/workerd/jsg/buffersource.c++ @@ -68,7 +68,9 @@ BackingStore::BackingStore(std::shared_ptr backingStore, ctor(ctor), integerType(integerType) { KJ_REQUIRE(this->backingStore != nullptr); - KJ_REQUIRE(this->byteLength <= this->backingStore->ByteLength()); + auto backingStoreByteLength = this->backingStore->ByteLength(); + KJ_REQUIRE(this->byteOffset <= backingStoreByteLength); + KJ_REQUIRE(this->byteLength <= backingStoreByteLength - this->byteOffset); KJ_REQUIRE(this->byteLength % this->elementSize == 0, kj::str("byteLength must be a multiple of ", this->elementSize, ".")); } diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h index aa7bf9d61a7..4d5c633a65a 100644 --- a/src/workerd/jsg/buffersource.h +++ b/src/workerd/jsg/buffersource.h @@ -495,8 +495,4 @@ class BufferSourceWrapper { } }; -inline BufferSource Lock::arrayBuffer(kj::Array data) { - return BufferSource(*this, BackingStore::from(*this, kj::mv(data))); -} - } // namespace workerd::jsg diff --git a/src/workerd/jsg/function.h b/src/workerd/jsg/function.h index c26b40084b7..729f359adc0 100644 --- a/src/workerd/jsg/function.h +++ b/src/workerd/jsg/function.h @@ -11,6 +11,7 @@ #include "wrappable.h" #include +#include #include #include @@ -87,15 +88,14 @@ struct FunctorCallback> { auto& func = extractInternalPointer, false>( context, args.Data().As()); + auto unwrapped = _::unwrapArgs(wrapper, js, context, args, + []() { return TypeErrorContext::callbackArgument(i); }); + if constexpr (isVoid()) { - func(Lock::from(isolate), - wrapper.template unwrap( - js, context, args, indexes, TypeErrorContext::callbackArgument(indexes))...); + func(js, kj::mv(unwrapped).template take()...); } else { - return wrapper.wrap(js, context, args.This(), - func(Lock::from(isolate), - wrapper.template unwrap( - js, context, args, indexes, TypeErrorContext::callbackArgument(indexes))...)); + return wrapper.wrap( + js, context, args.This(), func(js, kj::mv(unwrapped).template take()...)); } }); } @@ -117,15 +117,14 @@ struct FunctorCallback&, Args...)>, false>( context, args.Data().As()); + auto unwrapped = _::unwrapArgs(wrapper, js, context, args, + []() { return TypeErrorContext::callbackArgument(i); }); + if constexpr (isVoid()) { - func(js, args, - wrapper.template unwrap( - js, context, args, indexes, TypeErrorContext::callbackArgument(indexes))...); + func(js, args, kj::mv(unwrapped).template take()...); } else { return wrapper.wrap(js, context, args.This(), - func(js, args, - wrapper.template unwrap( - js, context, args, indexes, TypeErrorContext::callbackArgument(indexes))...)); + func(js, args, kj::mv(unwrapped).template take()...)); } }); } diff --git a/src/workerd/jsg/inspector.c++ b/src/workerd/jsg/inspector.c++ index 25d0cacafc7..a434696e136 100644 --- a/src/workerd/jsg/inspector.c++ +++ b/src/workerd/jsg/inspector.c++ @@ -35,29 +35,18 @@ kj::String KJ_STRINGIFY(const v8_inspector::StringView& view) { } // namespace v8_inspector namespace workerd::jsg { -namespace { -class StringViewWithScratch: public v8_inspector::StringView { - public: - StringViewWithScratch(v8_inspector::StringView text, kj::Array&& scratch) - : v8_inspector::StringView(text), - scratch(kj::mv(scratch)) {} - - private: - kj::Array scratch; -}; -} // namespace - -v8_inspector::StringView toInspectorStringView(kj::StringPtr text) { + +StringViewWithScratch toInspectorStringView(kj::StringPtr text) { bool isAscii = simdutf::validate_ascii(text.begin(), text.size()); if (isAscii) { return StringViewWithScratch( - v8_inspector::StringView(text.asBytes().begin(), text.size()), nullptr); + nullptr, v8_inspector::StringView(text.asBytes().begin(), text.size())); } else { kj::Array scratch = kj::encodeUtf16(text); - return StringViewWithScratch( - v8_inspector::StringView(reinterpret_cast(scratch.begin()), scratch.size()), - kj::mv(scratch)); + auto stringView = + v8_inspector::StringView(reinterpret_cast(scratch.begin()), scratch.size()); + return StringViewWithScratch(kj::mv(scratch), kj::mv(stringView)); } } @@ -67,7 +56,8 @@ v8_inspector::StringView toInspectorStringView(kj::StringPtr text) { void sendExceptionToInspector( jsg::Lock& js, v8_inspector::V8Inspector& inspector, kj::StringPtr description) { inspector.exceptionThrown(js.v8Context(), v8_inspector::StringView(), v8::Local(), - jsg::toInspectorStringView(description), v8_inspector::StringView(), 0, 0, nullptr, 0); + jsg::toInspectorStringView(description).stringView, v8_inspector::StringView(), 0, 0, nullptr, + 0); } void sendExceptionToInspector(jsg::Lock& js, @@ -101,9 +91,10 @@ void sendExceptionToInspector(jsg::Lock& js, // TODO(soon): EW-2636 Pass a real "script ID" as the last parameter instead of 0. I suspect this // has something to do with the incorrect links in the console when it logs uncaught exceptions. - inspector.exceptionThrown(context, jsg::toInspectorStringView(source), exception, - jsg::toInspectorStringView(detailedMessage), jsg::toInspectorStringView(scriptResourceName), - lineNumber, startColumn, inspector.createStackTrace(stackTrace), 0); + inspector.exceptionThrown(context, jsg::toInspectorStringView(source).stringView, exception, + jsg::toInspectorStringView(detailedMessage).stringView, + jsg::toInspectorStringView(scriptResourceName).stringView, lineNumber, startColumn, + inspector.createStackTrace(stackTrace), 0); } } // namespace workerd::jsg diff --git a/src/workerd/jsg/inspector.h b/src/workerd/jsg/inspector.h index 37746c6afb8..9a30bb47bcd 100644 --- a/src/workerd/jsg/inspector.h +++ b/src/workerd/jsg/inspector.h @@ -1,5 +1,9 @@ #pragma once +#include + +#include + namespace kj { class String; class StringPtr; @@ -16,7 +20,18 @@ class Lock; class JsValue; class JsMessage; -v8_inspector::StringView toInspectorStringView(kj::StringPtr text); +struct StringViewWithScratch { + StringViewWithScratch(kj::Array scratch, v8_inspector::StringView stringView) + : scratch(kj::mv(scratch)), + stringView(kj::mv(stringView)) {} + + kj::Array scratch; + v8_inspector::StringView stringView; +}; + +// Converts the given text pointer to a StringView, backed either by the memory of the string +// itself or a scratch buffer, if conversion was needed to handle non-ascii content. +StringViewWithScratch toInspectorStringView(kj::StringPtr text); // Inform the inspector of a problem not associated with any particular exception object. // diff --git a/src/workerd/jsg/iterator.h b/src/workerd/jsg/iterator.h index ee8f314668f..8cbb00498a3 100644 --- a/src/workerd/jsg/iterator.h +++ b/src/workerd/jsg/iterator.h @@ -174,10 +174,11 @@ class AsyncGenerator final { template AsyncGenerator(Lock& js, JsObject object, TypeWrapper*) : maybeActive(Active(js, object, static_cast(nullptr))), - maybeSelfRef(kj::rc>(kj::Badge{}, *this)) {} + maybeSelfRef(kj::rc>(kj::Badge{}, *this)) { + } AsyncGenerator(AsyncGenerator&& other) noexcept : maybeActive(kj::mv(other.maybeActive)), - maybeSelfRef(kj::rc>(kj::Badge{}, *this)) { + maybeSelfRef(kj::rc>(kj::Badge{}, *this)) { // Invalidate the old WeakRef since it's being moved. KJ_IF_SOME(selfRef, other.maybeSelfRef) { selfRef->invalidate(); @@ -192,7 +193,7 @@ class AsyncGenerator final { selfRef->invalidate(); } maybeActive = kj::mv(other.maybeActive); - maybeSelfRef = kj::rc>(kj::Badge{}, *this); + maybeSelfRef = kj::rc>(kj::Badge{}, *this); } return *this; } @@ -349,7 +350,7 @@ class AsyncGenerator final { } }; kj::Maybe maybeActive; - kj::Maybe>> maybeSelfRef; + kj::Maybe>> maybeSelfRef; }; template diff --git a/src/workerd/jsg/jsg-test.h b/src/workerd/jsg/jsg-test.h index c480f7e454c..6afc4211ecc 100644 --- a/src/workerd/jsg/jsg-test.h +++ b/src/workerd/jsg/jsg-test.h @@ -236,6 +236,10 @@ struct NumberBox: public Object { JSG_INSTANCE_PROPERTY(boxed, getBoxed, setBoxed); JSG_READONLY_INSTANCE_PROPERTY(boxedFromTypeHandler, getBoxedFromTypeHandler); } + + WeakRef getWeakRefToSelf(jsg::Lock& js) { + return JSG_THIS_WEAK(js); + } }; class BoxBox: public Object { diff --git a/src/workerd/jsg/jsg.c++ b/src/workerd/jsg/jsg.c++ index 03e0b221d74..940079418a5 100644 --- a/src/workerd/jsg/jsg.c++ +++ b/src/workerd/jsg/jsg.c++ @@ -42,6 +42,11 @@ const char* JsExceptionThrown::what() const noexcept { return whatBuffer.cStr(); } +void Data::deferGlobalDestruction(v8::Isolate* isolate, v8::Global handle) { + auto& jsgIsolate = IsolateBase::from(isolate); + jsgIsolate.deferDestruction(kj::mv(handle)); +} + void Data::destroy() { assertInvariant(); if (isolate != nullptr) { @@ -76,8 +81,7 @@ void Data::destroy() { // // Note that only the v8::Global part of `handle` needs to be destroyed under isolate lock. // The `tracedRef` part has a trivial destructor so can be destroyed on any thread. - auto& jsgIsolate = *reinterpret_cast(isolate->GetData(SET_DATA_ISOLATE_BASE)); - jsgIsolate.deferDestruction(v8::Global(kj::mv(handle))); + deferGlobalDestruction(isolate, kj::mv(handle)); } isolate = nullptr; } @@ -191,6 +195,14 @@ void Lock::setAllowEval(bool allow) { IsolateBase::from(v8Isolate).setAllowEval({}, allow); } +void Lock::setDisallowJavascriptExecution(bool allow) { + IsolateBase::from(v8Isolate).setDisallowJavascriptExecution({}, allow); +} + +bool Lock::isJavascriptExecutionDisallowed() const { + return IsolateBase::from(v8Isolate).getDisallowJavascriptExecution(); +} + void Lock::setUsingEnhancedErrorSerialization() { IsolateBase::from(v8Isolate).setUsingEnhancedErrorSerialization(); } diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h index a32c277a169..eb7f773bd31 100644 --- a/src/workerd/jsg/jsg.h +++ b/src/workerd/jsg/jsg.h @@ -7,6 +7,7 @@ // // Any files declaring an API to export to JavaScript will need to include this header. +#include "kj/common.h" #include "util.h" #include "wrappable.h" @@ -540,7 +541,7 @@ using HasGetTemplateOverload = decltype(kj::instance().getTemplate( // Declares the type serializable. See jsg::Serializer for usage. #define JSG_SERIALIZABLE(TAG, ...) \ - static_assert(static_cast(jsgSuper::jsgSerializeTag) != static_cast(TAG)); \ + static constexpr auto jsgSerializeLevel = jsgSuper::jsgSerializeLevel + 1; \ static constexpr auto jsgSerializeTag = TAG; \ static constexpr decltype(jsgSerializeTag) jsgSerializeOldTags[] = {__VA_ARGS__}; \ static constexpr auto jsgSerializeOneway = false @@ -551,7 +552,7 @@ using HasGetTemplateOverload = decltype(kj::instance().getTemplate( // // Used e.g. for JsRpcTarget, which becomes JsRpcStub after serialization. #define JSG_ONEWAY_SERIALIZABLE(TAG) \ - static_assert(static_cast(jsgSuper::jsgSerializeTag) != static_cast(TAG)); \ + static constexpr auto jsgSerializeLevel = jsgSuper::jsgSerializeLevel + 1; \ static constexpr auto jsgSerializeTag = TAG; \ static constexpr decltype(jsgSerializeTag) jsgSerializeOldTags[] = {}; \ static constexpr auto jsgSerializeOneway = true @@ -803,6 +804,9 @@ enum SetDataIndex { class Lock; WD_STRONG_BOOL(RequireEsm); +template +class WeakV8Ref; + // Arbitrary V8 data, wrapped for storage from C++. You can't do much with it, so instead you // should probably use V8Ref, a version of this that's strongly typed. // @@ -904,6 +908,13 @@ class Data { // garbage collection. void moveFromTraced(Data& other, v8::TracedReference& otherTracedRef) noexcept; + // Defers destruction of a v8::Global handle to the next time the isolate is locked. + // Used by Data::destroy() and WeakV8Ref::destroy(). + static void deferGlobalDestruction(v8::Isolate* isolate, v8::Global handle); + + template + friend class WeakV8Ref; + friend class MemoryTracker; }; @@ -952,6 +963,13 @@ class V8Ref: private Data { template V8Ref cast(jsg::Lock& js); + // Create a weak reference to the held V8 value. The weak reference does not prevent the + // value from being garbage collected and is not traced by GC. + WeakV8Ref getWeakRef(v8::Isolate* isolate) const { + return WeakV8Ref(isolate, getHandle(isolate)); + } + WeakV8Ref getWeakRef(jsg::Lock& js) const; + private: friend class GcVisitor; friend class MemoryTracker; @@ -995,6 +1013,106 @@ class HashableV8Ref: public V8Ref { identityHash(identityHash) {} }; +// A weak reference to a V8 value (where T is a v8::Value subtype). +// +// Unlike V8Ref, a WeakV8Ref does NOT prevent the referenced value from being garbage +// collected and is NOT traced by V8's GC. Internally it holds a v8::Global with SetWeak() +// applied, which V8 automatically clears when the target is collected. +// +// Use tryGetHandle() to safely access the value: +// +// KJ_IF_SOME(local, weakRef.tryGetHandle(js)) { +// // value is still alive, use local +// } +// +// Use tryAddRef() to promote to a strong V8Ref: +// +// KJ_IF_SOME(strong, weakRef.tryAddRef(js.v8Isolate)) { +// // strong keeps the value alive +// } +// +// It is safe to destroy a WeakV8Ref outside the isolate lock (handles are deferred for later +// cleanup, like V8Ref). +template +class WeakV8Ref final { + public: + WeakV8Ref(decltype(nullptr)) {} + + WeakV8Ref(v8::Isolate* isolate, v8::Local handle): isolate(isolate), handle(isolate, handle) { + this->handle.SetWeak(); + } + + ~WeakV8Ref() noexcept(false) { + destroy(); + } + + WeakV8Ref(WeakV8Ref&& other) noexcept: isolate(other.isolate), handle(kj::mv(other.handle)) { + other.isolate = nullptr; + } + + WeakV8Ref& operator=(WeakV8Ref&& other) { + if (this != &other) { + auto tmp = kj::mv(other.handle); + auto tmpIsolate = other.isolate; + other.handle = kj::mv(handle); + other.isolate = isolate; + handle = kj::mv(tmp); + isolate = tmpIsolate; + other.destroy(); + } + return *this; + } + KJ_DISALLOW_COPY(WeakV8Ref); + + // Check if the referenced value is still alive (not yet garbage collected). + bool isAlive() const { + return !handle.IsEmpty(); + } + + // Try to get the handle. Returns kj::none if the value has been garbage collected. + kj::Maybe> tryGetHandle(v8::Isolate* isolate) const { + if (handle.IsEmpty()) return kj::none; + if constexpr (std::is_base_of_v) { + auto local = handle.Get(isolate).template As().template As(); + if (local.IsEmpty()) return kj::none; + return local; + } else { + auto local = handle.Get(isolate).template As(); + if (local.IsEmpty()) return kj::none; + return local; + } + } + kj::Maybe> tryGetHandle(Lock& js) const; + + // Get the handle, throwing kj::Exception if collected. + v8::Local getHandle(v8::Isolate* isolate) const { + return KJ_ASSERT_NONNULL( + tryGetHandle(isolate), "attempt to access collected jsg::WeakV8Ref target"); + } + v8::Local getHandle(Lock& js) const; + + // Try to promote to a strong V8Ref. Returns kj::none if collected. + kj::Maybe> tryAddRef(v8::Isolate* isolate) const { + return tryGetHandle(isolate).map([&](v8::Local local) { return V8Ref(isolate, local); }); + } + kj::Maybe> tryAddRef(Lock& js) const; + + private: + v8::Isolate* isolate = nullptr; + v8::Global handle; + + void destroy() { + if (isolate != nullptr && !handle.IsEmpty()) { + if (v8::Locker::IsLocked(isolate)) { + handle.Reset(); + } else { + Data::deferGlobalDestruction(isolate, kj::mv(handle)); + } + isolate = nullptr; + } + } +}; + template void MemoryTracker::trackField( kj::StringPtr edgeName, const V8Ref& value, kj::Maybe nodeName) { @@ -1167,6 +1285,10 @@ constexpr bool resourceNeedsGcTracing(); template void visitSubclassForGc(T* obj, GcVisitor& visitor); +// Forward declaration for weak reference types. +template +class WeakRef; + // All resource types must inherit from this. class Object: private Wrappable { public: @@ -1205,9 +1327,12 @@ class Object: private Wrappable { template inline void jsgInitReflection(TypeWrapper& wrapper) {} - // Dummy invalid serialization tag. This is only used to detect when a subclass has defined their - // own tag. - static constexpr uint jsgSerializeTag = kj::maxValue; + // This is used to detect when a subclass has defined a custom serializer. + static constexpr uint jsgSerializeLevel = 0; + + protected: + template + WeakRef getWeakRefToThis(Lock& js); private: inline void visitForMemoryInfo(MemoryTracker& tracker) const {} @@ -1346,6 +1471,12 @@ class Ref { inner->Wrappable::attachWrapper(isolate, object, resourceNeedsGcTracing()); } + // Obtain a weak reference to the referenced object. The weak reference does not keep the + // object alive and does not participate in GC tracing. It becomes invalid when the underlying + // Wrappable is destroyed (all Refs dropped and JS wrapper collected). + WeakRef getWeakRef(Lock& js) &; + WeakRef getWeakRef(Lock& js) && = delete; // Don't weaken an expiring ref + private: kj::Own inner; @@ -1377,6 +1508,8 @@ class Ref { template friend class ObjectWrapper; friend class GcVisitor; + template + friend class WeakRef; }; template @@ -1400,6 +1533,142 @@ Ref _jsgThis(T* obj) { } #define JSG_THIS (::workerd::jsg::_jsgThis(this)) +#define JSG_THIS_WEAK(js) (getWeakRefToThis>(js)) + +// A non-owning weak reference to a resource type (a type with a JSG_RESOURCE_TYPE block). +// +// Unlike Ref, a WeakRef does NOT keep the referenced object alive and is NOT traced by +// V8's GC. When the underlying Wrappable is destroyed (all Refs are dropped and the JS +// wrapper is collected), the WeakRef automatically becomes invalid — no manual invalidation +// is required. +// +// Use operator->() for convenient single-expression access that asserts liveness: +// +// weakFoo->doSomething(); // throws kj::Exception if dead +// +// Use tryGet() or tryAddRef() when the target might legitimately be dead: +// +// KJ_IF_SOME(strong, weakFoo.tryAddRef(js)) { +// strong->doSomething(); +// } +// +// operator*() is deliberately omitted to discourage storing dangling references. +// +// Safe to drop outside of the isolate lock, but requires the isolate lock to +// acquire more references. +template +class WeakRef { + public: + WeakRef(decltype(nullptr)) {} + + WeakRef(WeakRef&& other) noexcept: impl(kj::mv(other.impl)) {} + template + requires(kj::canConvert()) + WeakRef(WeakRef&& other) noexcept { + KJ_IF_SOME(o, kj::mv(other.impl)) { + impl = Impl{ + .isolate = o.isolate, + .target = o.target, + .anchor = kj::mv(o.anchor), + }; + } + } + KJ_DISALLOW_COPY(WeakRef); + + WeakRef& operator=(WeakRef&& other) noexcept { + if (this != &other) { + auto otherImpl = kj::mv(other.impl); + destroy(); + impl = kj::mv(otherImpl); + } + return *this; + } + + template + requires(kj::canConvert()) + WeakRef& operator=(WeakRef&& other) noexcept { + auto otherImpl = kj::mv(other.impl); + destroy(); + KJ_IF_SOME(o, otherImpl) { + impl = Impl{ + .isolate = o.isolate, + .target = o.target, + .anchor = kj::mv(o.anchor), + }; + } + return *this; + } + + ~WeakRef() noexcept(false) { + destroy(); + } + + // Dereference. Asserts if the target has been destroyed. + // Safe for single-expression use: weakFoo->doSomething() + T* operator->() const KJ_LIFETIMEBOUND { + auto& i = KJ_ASSERT_NONNULL(impl, "attempt to access destroyed jsg::WeakRef target"); + KJ_ASSERT(i.anchor->isAlive(), "attempt to access invalidated jsg::WeakRef target"); + return &i.target; + } + + // Deliberately omitted: operator*() + // This prevents: T& ref = *weakRef; (dangling reference risk) + + // Check if the referenced object is still alive. + bool isAlive() const { + KJ_IF_SOME(i, impl) { + return i.anchor->isAlive(); + } + return false; + } + + // Try to get a raw reference. Returns kj::none if the target has been destroyed. + // Use of tryGet is discouraged because it does return a raw reference that can + // dangle. Use it only for single-expression access, essentially as a non-asserting + // version of operator->(). + kj::Maybe tryGet() const KJ_LIFETIMEBOUND { + KJ_IF_SOME(i, impl) { + if (i.anchor->isAlive()) { + return i.target; + } + } + return kj::none; + } + + // Try to promote to a strong Ref. Returns kj::none if the target has been destroyed. + kj::Maybe> tryAddRef(Lock&) const { + return tryGet().map([](T& t) { return Ref(kj::addRef(t)); }); + } + + // Create another weak ref to the same target. + WeakRef addRef(jsg::Lock& js) &; + WeakRef addRef(jsg::Lock& js) && = delete; // Redundant, just move. + + private: + struct Impl { + v8::Isolate* isolate; + T& target; + kj::Rc anchor; + }; + + kj::Maybe impl; + + WeakRef(v8::Isolate* isolate, T& target, kj::Rc anchor) + : impl(Impl{ + .isolate = isolate, + .target = target, + .anchor = kj::mv(anchor), + }) {} + + // Arranges to have the anchor always dropped under isolate lock + void destroy(); + + template + friend class WeakRef; + template + friend class Ref; + friend class Object; +}; // Holds a value of type `T` and allows it to be passed to JavaScript multiple times, resulting // in exactly the same JavaScript object each time (will compare equal using `===`). You may @@ -1852,6 +2121,10 @@ class GcVisitor { } } + // No visit() overload for WeakRef. + // WeakRef is intentionally NOT GC-visitable — attempting to visit one is a compile error, + // which is the correct signal that weak references should not be traced. + void visit() {} template @@ -2200,7 +2473,8 @@ class JsMessage; V(Function) \ V(Uint8Array) \ V(ArrayBuffer) \ - V(ArrayBufferView) + V(ArrayBufferView) \ + V(SharedArrayBuffer) #define V(Name) class Js##Name; JS_TYPE_CLASSES(V) @@ -2665,6 +2939,11 @@ class Lock { // Use to enable/disable dynamic code evaluation (via eval(), new Function(), or WebAssembly). void setAllowEval(bool allow); + // Use to choose the safe path in unwrap() when under a `DisallowJavascriptExecution` scope + // TODO(cleanup): replace with scope guard if we need to use this in multiple places + void setDisallowJavascriptExecution(bool allow); + bool isJavascriptExecutionDisallowed() const; + void setCaptureThrowsAsRejections(bool capture); void setUsingEnhancedErrorSerialization(); void setUsingFastJsgStruct(); @@ -2773,14 +3052,6 @@ class Lock { template JsObject opaque(T&& inner) KJ_WARN_UNUSED_RESULT; - // Returns a jsg::BufferSource whose underlying JavaScript handle is a Uint8Array. - BufferSource bytes(kj::Array data) KJ_WARN_UNUSED_RESULT; - - // Returns a jsg::BufferSource whose underlying JavaScript handle is an ArrayBuffer - // as opposed to the default Uint8Array. May copy and move the bytes if they are - // not in the right sandbox. - BufferSource arrayBuffer(kj::Array data) KJ_WARN_UNUSED_RESULT; - enum class AllocOption { ZERO_INITIALIZED, UNINITIALIZED }; // Utility method to safely allocate a v8::BackingStore with allocation failure handling. @@ -2919,6 +3190,26 @@ class Lock { virtual v8::Local getPrototypeFor(const std::type_info& type) = 0; }; +template +inline WeakV8Ref V8Ref::getWeakRef(jsg::Lock& js) const { + return getWeakRef(js.v8Isolate); +} + +template +inline kj::Maybe> WeakV8Ref::tryGetHandle(Lock& js) const { + return tryGetHandle(js.v8Isolate); +} + +template +inline v8::Local WeakV8Ref::getHandle(Lock& js) const { + return getHandle(js.v8Isolate); +} + +template +inline kj::Maybe> WeakV8Ref::tryAddRef(Lock& js) const { + return tryAddRef(js.v8Isolate); +} + // Ensures that the given fn is run within both a handlescope and the context scope. // The lock must be assignable to a jsg::Lock, and the context must be or be assignable // to a v8::Local. The context will be evaluated within the handle scope. diff --git a/src/workerd/jsg/jsvalue.c++ b/src/workerd/jsg/jsvalue.c++ index 0b8d744cf8d..22b133a100e 100644 --- a/src/workerd/jsg/jsvalue.c++ +++ b/src/workerd/jsg/jsvalue.c++ @@ -155,7 +155,7 @@ JsValue JsObject::getPrototype(Lock& js) { continue; // unwrap one layer iteratively, no native recursion } JSG_REQUIRE(trap.isFunction(), TypeError, "Proxy getPrototypeOf trap is not a function"); - v8::Local fn = ((v8::Local)trap).As(); + v8::Local fn = (v8::Local(trap)).As(); v8::Local args[] = {target}; auto ret = JsValue(check(fn->Call(js.v8Context(), jsHandler.inner, 1, args))); JSG_REQUIRE(ret.isObject() || ret.isNull(), TypeError, @@ -169,12 +169,7 @@ JsValue JsObject::getPrototype(Lock& js) { // given how we are currently using this function. return ret; } -#if V8_MAJOR_VERSION >= 15 || (V8_MAJOR_VERSION == 14 && V8_MINOR_VERSION >= 7) return JsValue(current->GetPrototype()); -#else - // TODO(cleanup): Remove when unnecessary. - return JsValue(current->GetPrototypeV2()); -#endif } kj::String JsSymbol::description(Lock& js) const { @@ -425,13 +420,17 @@ bool JsString::containsOnlyOneByte() const { kj::Maybe JsRegExp::operator()(Lock& js, const JsString& input) const { auto result = check(inner->Exec(js.v8Context(), input)); - if (result->IsNullOrUndefined()) return kj::none; + // v8::RegExp::Exec dispatches to the current RegExp.prototype.exec, which + // user code can override to return anything. Reject non-Array results so a + // monkey-patched exec cannot feed arbitrary objects into native code. + if (result->IsNullOrUndefined() || !result->IsArray()) return kj::none; return JsArray(result.As()); } kj::Maybe JsRegExp::operator()(Lock& js, kj::StringPtr input) const { auto result = check(inner->Exec(js.v8Context(), js.str(input))); - if (result->IsNull()) return kj::none; + // Same hardening as above: reject non-null, non-Array results. + if (result->IsNullOrUndefined() || !result->IsArray()) return kj::none; return JsArray(result.As()); } @@ -660,13 +659,18 @@ uint JsFunction::hashCode() const { return kj::hashCode(obj->GetIdentityHash()); } -BufferSource Lock::bytes(kj::Array data) { - return BufferSource(*this, BackingStore::from(*this, kj::mv(data))); -} - // ====================================================================================== // JsArrayBuffer +kj::Maybe JsArrayBuffer::tryCreate(Lock& js, size_t length) { + JSG_REQUIRE(length < v8::ArrayBuffer::kMaxByteLength, RangeError, "The length is too large"); + auto backing = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, length, + v8::BackingStoreInitializationMode::kZeroInitialized, + v8::BackingStoreOnFailureMode::kReturnNull); + if (backing == nullptr) return kj::none; + return create(js, kj::mv(backing)); +} + JsArrayBuffer JsArrayBuffer::create(Lock& js, size_t length) { JSG_REQUIRE(length < v8::ArrayBuffer::kMaxByteLength, RangeError, "The length is too large"); auto backing = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, length, @@ -686,7 +690,12 @@ JsArrayBuffer JsArrayBuffer::create(Lock& js, std::unique_ptr return JsArrayBuffer(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore))); } +JsArrayBuffer JsArrayBuffer::create(Lock& js, std::shared_ptr backingStore) { + return JsArrayBuffer(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore))); +} + kj::ArrayPtr JsArrayBuffer::asArrayPtr() { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBuffer is immutable"); v8::Local inner = *this; if (inner->WasDetached()) [[unlikely]] { return nullptr; @@ -708,15 +717,9 @@ kj::ArrayPtr JsArrayBuffer::asArrayPtr() const { JsArrayBuffer JsArrayBuffer::slice(Lock& js, size_t newLength) const { JSG_REQUIRE(newLength <= size(), RangeError, "New length exceeds buffer length"); - auto backing = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, newLength, - v8::BackingStoreInitializationMode::kUninitialized, - v8::BackingStoreOnFailureMode::kReturnNull); - JSG_REQUIRE(backing != nullptr, RangeError, "Failed to allocate memory for ArrayBuffer"); - auto dest = kj::ArrayPtr(static_cast(backing->Data()), newLength); - v8::Local inner = *this; - dest.copyFrom( - kj::ArrayPtr(static_cast(inner->GetBackingStore()->Data()), newLength)); - return JsArrayBuffer(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backing))); + auto dest = create(js, newLength); + dest.asArrayPtr().copyFrom(asArrayPtr().slice(0, newLength)); + return dest; } size_t JsArrayBuffer::size() const { @@ -729,6 +732,268 @@ kj::Array JsArrayBuffer::copy() { return kj::heapArray(ptr); } +JsArrayBuffer::operator JsBufferSource() const { + v8::Local inner = *this; + return jsg::JsBufferSource(inner); +} + +bool JsArrayBuffer::isDetachable() const { + v8::Local inner = *this; + return inner->IsDetachable(); +} + +bool JsArrayBuffer::isDetached() const { + v8::Local inner = *this; + return inner->WasDetached(); +} + +void JsArrayBuffer::detachInPlace(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBuffer is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "ArrayBuffer is not detachable"); + v8::Local inner = *this; + check(inner->Detach({})); +} + +JsArrayBuffer JsArrayBuffer::detachAndTake(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBuffer is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "ArrayBuffer is not detachable"); + v8::Local inner = *this; + auto backing = inner->GetBackingStore(); + check(inner->Detach({})); + return JsArrayBuffer(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backing))); +} + +namespace { +template +void checkViewBounds(size_t offset, size_t numElements, size_t bufferSize) { + JSG_REQUIRE(offset % N == 0, RangeError, "Byte offset is not a multiple of ", N); + JSG_REQUIRE(offset <= bufferSize && numElements <= (bufferSize - offset) / N, RangeError, + "Typed array view extends beyond the ArrayBuffer bounds"); +} +} // namespace + +JsUint8Array JsArrayBuffer::newUint8View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsUint8Array(v8::Uint8Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newInt8View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Int8Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newUint8ClampedView(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Uint8ClampedArray::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newUint16View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Uint16Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newInt16View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Int16Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newUint32View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Uint32Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newInt32View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Int32Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newFloat16View(size_t offset, size_t numElements) const { + static constexpr size_t kFloat16Size = 2; + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Float16Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newFloat32View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Float32Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newFloat64View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Float64Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newBigInt64View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::BigInt64Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newBigUint64View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::BigUint64Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsArrayBuffer::newDataView(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::DataView::New(inner, offset, numElements)); +} + +bool JsArrayBuffer::isResizable() const { + v8::Local inner = *this; + return inner->IsResizableByUserJavaScript(); +} + +JsArrayBuffer::operator JsUint8Array() const { + return newUint8View(0, size()); +} + +// ====================================================================================== +// JsSharedArrayBuffer + +kj::Maybe JsSharedArrayBuffer::tryCreate(Lock& js, size_t length) { + JSG_REQUIRE(length < v8::ArrayBuffer::kMaxByteLength, RangeError, "The length is too large"); + auto backing = v8::SharedArrayBuffer::NewBackingStore(js.v8Isolate, length, + v8::BackingStoreInitializationMode::kZeroInitialized, + v8::BackingStoreOnFailureMode::kReturnNull); + if (backing == nullptr) return kj::none; + return create(js, kj::mv(backing)); +} + +JsSharedArrayBuffer JsSharedArrayBuffer::create(Lock& js, size_t length) { + JSG_REQUIRE(length < v8::ArrayBuffer::kMaxByteLength, RangeError, "The length is too large"); + auto backing = v8::SharedArrayBuffer::NewBackingStore(js.v8Isolate, length, + v8::BackingStoreInitializationMode::kZeroInitialized, + v8::BackingStoreOnFailureMode::kReturnNull); + JSG_REQUIRE(backing != nullptr, RangeError, "Failed to allocate memory for ArrayBuffer"); + return create(js, kj::mv(backing)); +} + +JsSharedArrayBuffer JsSharedArrayBuffer::create(Lock& js, kj::ArrayPtr data) { + auto buf = create(js, data.size()); + buf.asArrayPtr().copyFrom(data); + return buf; +} + +JsSharedArrayBuffer JsSharedArrayBuffer::create( + Lock& js, std::unique_ptr backingStore) { + return JsSharedArrayBuffer(v8::SharedArrayBuffer::New(js.v8Isolate, kj::mv(backingStore))); +} + +JsSharedArrayBuffer JsSharedArrayBuffer::create( + Lock& js, std::shared_ptr backingStore) { + return JsSharedArrayBuffer(v8::SharedArrayBuffer::New(js.v8Isolate, kj::mv(backingStore))); +} + +kj::ArrayPtr JsSharedArrayBuffer::asArrayPtr() { + // No immutability check here because SharedArrayBuffers are always mutable. + v8::Local inner = *this; + auto data = static_cast(inner->Data()); + size_t length = inner->ByteLength(); + return kj::ArrayPtr(data, length); +} + +kj::ArrayPtr JsSharedArrayBuffer::asArrayPtr() const { + v8::Local inner = *this; + auto data = static_cast(inner->Data()); + size_t length = inner->ByteLength(); + return kj::ArrayPtr(data, length); +} + +JsSharedArrayBuffer JsSharedArrayBuffer::slice(Lock& js, size_t newLength) const { + JSG_REQUIRE(newLength <= size(), RangeError, "New length exceeds buffer length"); + auto dest = create(js, newLength); + dest.asArrayPtr().copyFrom(asArrayPtr().slice(0, newLength)); + return dest; +} + +size_t JsSharedArrayBuffer::size() const { + v8::Local inner = *this; + return inner->ByteLength(); +} + +kj::Array JsSharedArrayBuffer::copy() { + auto ptr = asArrayPtr(); + return kj::heapArray(ptr); +} + +JsSharedArrayBuffer::operator JsBufferSource() const { + v8::Local inner = *this; + return jsg::JsBufferSource(inner); +} + +JsUint8Array JsSharedArrayBuffer::newUint8View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsUint8Array(v8::Uint8Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newInt8View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Int8Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newUint8ClampedView( + size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Uint8ClampedArray::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newUint16View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Uint16Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newInt16View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Int16Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newUint32View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Uint32Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newInt32View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Int32Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newFloat16View(size_t offset, size_t numElements) const { + static constexpr size_t kFloat16Size = 2; + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Float16Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newFloat32View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Float32Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newFloat64View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::Float64Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newBigInt64View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::BigInt64Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newBigUint64View(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::BigUint64Array::New(inner, offset, numElements)); +} +JsArrayBufferView JsSharedArrayBuffer::newDataView(size_t offset, size_t numElements) const { + checkViewBounds(offset, numElements, size()); + v8::Local inner = *this; + return JsArrayBufferView(v8::DataView::New(inner, offset, numElements)); +} + +JsSharedArrayBuffer::operator JsUint8Array() const { + return newUint8View(0, size()); +} + // ====================================================================================== // JsArrayBufferView @@ -737,6 +1002,11 @@ size_t JsArrayBufferView::size() const { return inner->ByteLength(); } +size_t JsArrayBufferView::getOffset() const { + v8::Local inner = *this; + return inner->ByteOffset(); +} + bool JsArrayBufferView::isIntegerType() const { v8::Local inner = *this; return inner->IsUint8Array() || inner->IsUint8ClampedArray() || inner->IsInt8Array() || @@ -744,10 +1014,257 @@ bool JsArrayBufferView::isIntegerType() const { inner->IsInt32Array() || inner->IsBigInt64Array() || inner->IsBigUint64Array(); } +bool JsArrayBufferView::isUint8Array() const { + v8::Local inner = *this; + return inner->IsUint8Array(); +} + +bool JsArrayBufferView::isInt8Array() const { + v8::Local inner = *this; + return inner->IsInt8Array(); +} + +bool JsArrayBufferView::isUint8ClampedArray() const { + v8::Local inner = *this; + return inner->IsUint8ClampedArray(); +} + +bool JsArrayBufferView::isUint16Array() const { + v8::Local inner = *this; + return inner->IsUint16Array(); +} + +bool JsArrayBufferView::isInt16Array() const { + v8::Local inner = *this; + return inner->IsInt16Array(); +} + +bool JsArrayBufferView::isUint32Array() const { + v8::Local inner = *this; + return inner->IsUint32Array(); +} + +bool JsArrayBufferView::isInt32Array() const { + v8::Local inner = *this; + return inner->IsInt32Array(); +} + +bool JsArrayBufferView::isFloat16Array() const { + v8::Local inner = *this; + return inner->IsFloat16Array(); +} + +bool JsArrayBufferView::isFloat32Array() const { + v8::Local inner = *this; + return inner->IsFloat32Array(); +} + +bool JsArrayBufferView::isFloat64Array() const { + v8::Local inner = *this; + return inner->IsFloat64Array(); +} + +bool JsArrayBufferView::isBigInt64Array() const { + v8::Local inner = *this; + return inner->IsBigInt64Array(); +} + +bool JsArrayBufferView::isBigUint64Array() const { + v8::Local inner = *this; + return inner->IsBigUint64Array(); +} + +bool JsArrayBufferView::isDataView() const { + v8::Local inner = *this; + return inner->IsDataView(); +} + +size_t JsArrayBufferView::getElementSize() const { + v8::Local inner = *this; + if (inner->IsUint8Array() || inner->IsInt8Array() || inner->IsUint8ClampedArray()) { + return 1; + } else if (inner->IsUint16Array() || inner->IsInt16Array() || inner->IsFloat16Array()) { + return 2; + } else if (inner->IsUint32Array() || inner->IsInt32Array() || inner->IsFloat32Array()) { + return 4; + } else if (inner->IsFloat64Array() || inner->IsBigInt64Array() || inner->IsBigUint64Array()) { + return 8; + } else if (inner->IsDataView()) { + return 1; // DataView is byte-addressable + } + KJ_UNREACHABLE; // Not a valid ArrayBufferView type +} + +JsArrayBuffer JsArrayBufferView::getBuffer() const { + v8::Local inner = *this; + return JsArrayBuffer(inner->Buffer()); +} + +bool JsArrayBufferView::isDetachable() const { + v8::Local inner = *this; + return inner->Buffer()->IsDetachable(); +} + +bool JsArrayBufferView::isDetached() const { + v8::Local inner = *this; + return inner->Buffer()->WasDetached(); +} + +void JsArrayBufferView::detachInPlace(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBufferView is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "ArrayBuffer is not detachable"); + v8::Local inner = *this; + check(inner->Buffer()->Detach({})); +} + +JsArrayBufferView JsArrayBufferView::detachAndTake(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBufferView is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "ArrayBuffer is not detachable"); + v8::Local inner = *this; + auto length = inner->ByteLength(); + auto offset = inner->ByteOffset(); + auto ab = getBuffer().detachAndTake(js); + + // We have to return the same type of view + size_t size = length / getElementSize(); + if (inner->IsUint8Array()) { + return ab.newUint8View(offset, size); + } else if (inner->IsInt8Array()) { + return ab.newInt8View(offset, size); + } else if (inner->IsUint8ClampedArray()) { + return ab.newUint8ClampedView(offset, size); + } else if (inner->IsUint16Array()) { + return ab.newUint16View(offset, size); + } else if (inner->IsInt16Array()) { + return ab.newInt16View(offset, size); + } else if (inner->IsUint32Array()) { + return ab.newUint32View(offset, size); + } else if (inner->IsInt32Array()) { + return ab.newInt32View(offset, size); + } else if (inner->IsFloat16Array()) { + return ab.newFloat16View(offset, size); + } else if (inner->IsFloat32Array()) { + return ab.newFloat32View(offset, size); + } else if (inner->IsFloat64Array()) { + return ab.newFloat64View(offset, size); + } else if (inner->IsBigInt64Array()) { + return ab.newBigInt64View(offset, size); + } else if (inner->IsBigUint64Array()) { + return ab.newBigUint64View(offset, size); + } else if (inner->IsDataView()) { + return ab.newDataView(offset, size); + } + + KJ_UNREACHABLE; +} + +JsArrayBufferView JsArrayBufferView::slice(Lock& js, size_t offset, size_t length) const { + v8::Local inner = *this; + auto byteOffset = inner->ByteOffset(); + JSG_REQUIRE(offset <= SIZE_MAX - byteOffset, RangeError, "offset overflow"); + offset = byteOffset + offset; + auto buffer = inner->Buffer(); + auto bufSize = buffer->ByteLength(); + JSG_REQUIRE(offset <= bufSize && length <= bufSize - offset, RangeError, + "Typed array view extends beyond the ArrayBuffer bounds"); + + size_t size = length / getElementSize(); + + if (inner->IsUint8Array()) { + return JsArrayBufferView(v8::Uint8Array::New(buffer, offset, size)); + } else if (inner->IsInt8Array()) { + return JsArrayBufferView(v8::Int8Array::New(buffer, offset, size)); + } else if (inner->IsUint8ClampedArray()) { + return JsArrayBufferView(v8::Uint8ClampedArray::New(buffer, offset, size)); + } else if (inner->IsUint16Array()) { + return JsArrayBufferView(v8::Uint16Array::New(buffer, offset, size)); + } else if (inner->IsInt16Array()) { + return JsArrayBufferView(v8::Int16Array::New(buffer, offset, size)); + } else if (inner->IsUint32Array()) { + return JsArrayBufferView(v8::Uint32Array::New(buffer, offset, size)); + } else if (inner->IsInt32Array()) { + return JsArrayBufferView(v8::Int32Array::New(buffer, offset, size)); + } else if (inner->IsFloat16Array()) { + return JsArrayBufferView(v8::Float16Array::New(buffer, offset, size)); + } else if (inner->IsFloat32Array()) { + return JsArrayBufferView(v8::Float32Array::New(buffer, offset, size)); + } else if (inner->IsFloat64Array()) { + return JsArrayBufferView(v8::Float64Array::New(buffer, offset, size)); + } else if (inner->IsBigInt64Array()) { + return JsArrayBufferView(v8::BigInt64Array::New(buffer, offset, size)); + } else if (inner->IsBigUint64Array()) { + return JsArrayBufferView(v8::BigUint64Array::New(buffer, offset, size)); + } else if (inner->IsDataView()) { + return JsArrayBufferView(v8::DataView::New(buffer, offset, size)); + } + + KJ_UNREACHABLE; +} + +bool JsArrayBufferView::isResizable() const { + v8::Local inner = *this; + return inner->Buffer()->IsResizableByUserJavaScript(); +} + +JsArrayBufferView::operator JsBufferSource() const { + v8::Local inner = *this; + return jsg::JsBufferSource(inner); +} + +JsArrayBufferView::operator JsUint8Array() const { + v8::Local inner = *this; + if (inner->IsUint8Array()) { + return jsg::JsUint8Array(inner.As()); + } + + auto buf = inner->Buffer(); + return jsg::JsUint8Array(v8::Uint8Array::New(buf, inner->ByteOffset(), inner->ByteLength())); +} + +JsArrayBufferView JsArrayBufferView::clone(jsg::Lock& js) { + v8::Local inner = *this; + auto backing = inner->Buffer()->GetBackingStore(); + auto ab = jsg::JsArrayBuffer::create(js, kj::mv(backing)); + + auto offset = getOffset(); + auto length = size(); + auto size = length / getElementSize(); + + if (inner->IsUint8Array()) { + return ab.newUint8View(offset, size); + } else if (inner->IsInt8Array()) { + return ab.newInt8View(offset, size); + } else if (inner->IsUint8ClampedArray()) { + return ab.newUint8ClampedView(offset, size); + } else if (inner->IsUint16Array()) { + return ab.newUint16View(offset, size); + } else if (inner->IsInt16Array()) { + return ab.newInt16View(offset, size); + } else if (inner->IsUint32Array()) { + return ab.newUint32View(offset, size); + } else if (inner->IsInt32Array()) { + return ab.newInt32View(offset, size); + } else if (inner->IsFloat16Array()) { + return ab.newFloat16View(offset, size); + } else if (inner->IsFloat32Array()) { + return ab.newFloat32View(offset, size); + } else if (inner->IsFloat64Array()) { + return ab.newFloat64View(offset, size); + } else if (inner->IsBigInt64Array()) { + return ab.newBigInt64View(offset, size); + } else if (inner->IsBigUint64Array()) { + return ab.newBigUint64View(offset, size); + } else if (inner->IsDataView()) { + return ab.newDataView(offset, size); + } + KJ_UNREACHABLE; +} + // ====================================================================================== // JsBufferSource kj::ArrayPtr JsBufferSource::asArrayPtr() { + JSG_REQUIRE(!isImmutable(), TypeError, "BufferSource is immutable"); v8::Local inner = *this; if (inner->IsArrayBuffer()) { auto buf = inner.As(); @@ -765,8 +1282,45 @@ kj::ArrayPtr JsBufferSource::asArrayPtr() { if (buf->WasDetached()) [[unlikely]] { return nullptr; } - kj::byte* data = static_cast(buf->Data()) + view->ByteOffset(); - return kj::ArrayPtr(data, view->ByteLength()); + auto byteOffset = view->ByteOffset(); + auto byteLength = view->ByteLength(); + // Sandbox hardening: validate view's byte range against trusted backing store size. + auto bufSize = buf->ByteLength(); + if (byteOffset > bufSize || byteLength > bufSize - byteOffset) [[unlikely]] { + return nullptr; + } + kj::byte* data = static_cast(buf->Data()) + byteOffset; + return kj::ArrayPtr(data, byteLength); + } +} + +kj::ArrayPtr JsBufferSource::asArrayPtr() const { + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + auto buf = inner.As(); + if (buf->WasDetached()) [[unlikely]] { + return nullptr; + } + return kj::ArrayPtr(static_cast(buf->Data()), buf->ByteLength()); + } else if (inner->IsSharedArrayBuffer()) { + auto buf = inner.As(); + return kj::ArrayPtr(static_cast(buf->Data()), buf->ByteLength()); + } else { + KJ_DASSERT(inner->IsArrayBufferView()); + auto view = inner.As(); + auto buf = view->Buffer(); + if (buf->WasDetached()) [[unlikely]] { + return nullptr; + } + auto byteOffset = view->ByteOffset(); + auto byteLength = view->ByteLength(); + // Sandbox hardening: validate view's byte range against trusted backing store size. + auto bufSize = buf->ByteLength(); + if (byteOffset > bufSize || byteLength > bufSize - byteOffset) [[unlikely]] { + return nullptr; + } + const kj::byte* data = static_cast(buf->Data()) + byteOffset; + return kj::ArrayPtr(data, byteLength); } } @@ -829,9 +1383,137 @@ bool JsBufferSource::isResizable() const { return false; } +bool JsBufferSource::isDetachable() const { + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + return inner.As()->IsDetachable(); + } else if (inner->IsSharedArrayBuffer()) { + return false; // SharedArrayBuffers are never detachable + } else { + KJ_DASSERT(inner->IsArrayBufferView()); + return inner.As()->Buffer()->IsDetachable(); + } +} + +bool JsBufferSource::isDetached() const { + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + return inner.As()->WasDetached(); + } else if (inner->IsSharedArrayBuffer()) { + return false; // SharedArrayBuffers are never detachable + } else { + KJ_DASSERT(inner->IsArrayBufferView()); + return inner.As()->Buffer()->WasDetached(); + } +} + +void JsBufferSource::detachInPlace(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "BufferSource is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "BufferSource is not detachable"); + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + auto buf = inner.As(); + check(buf->Detach({})); + } else if (inner->IsSharedArrayBuffer()) { + KJ_UNREACHABLE; // SharedArrayBuffers are never detachable + } else { + KJ_DASSERT(inner->IsArrayBufferView()); + auto view = inner.As(); + check(view->Buffer()->Detach({})); + } +} + +JsBufferSource JsBufferSource::detachAndTake(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "BufferSource is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "BufferSource is not detachable"); + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + JsArrayBuffer ab(inner.As()); + return JsBufferSource(ab.detachAndTake(js)); + } else if (inner->IsSharedArrayBuffer()) { + KJ_UNREACHABLE; // SharedArrayBuffers are never detachable + } + + KJ_DASSERT(inner->IsArrayBufferView()); + JsArrayBufferView view(inner.As()); + return JsBufferSource(view.detachAndTake(js)); +} + +JsBufferSource::operator JsUint8Array() const { + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + JsArrayBuffer ab(inner.As()); + return ab; + } + if (inner->IsSharedArrayBuffer()) { + JsSharedArrayBuffer ab(inner.As()); + return ab; + } + if (inner->IsUint8Array()) { + return jsg::JsUint8Array(inner.As()); + } + JsArrayBufferView view(inner.As()); + return view; +} + +size_t JsBufferSource::getOffset() const { + v8::Local inner = *this; + if (inner->IsArrayBuffer() || inner->IsSharedArrayBuffer()) { + return 0; + } + KJ_DASSERT(inner->IsArrayBufferView()); + auto view = inner.As(); + return view->ByteOffset(); +} + +size_t JsBufferSource::underlyingArrayBufferSize(Lock& js) const { + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + auto buf = inner.As(); + if (buf->WasDetached()) [[unlikely]] { + return 0; + } + return buf->ByteLength(); + } else if (inner->IsSharedArrayBuffer()) { + auto buf = inner.As(); + return buf->ByteLength(); + } else { + KJ_DASSERT(inner->IsArrayBufferView()); + auto view = inner.As(); + auto buf = view->Buffer(); + if (buf->WasDetached()) [[unlikely]] { + return 0; + } + return buf->ByteLength(); + } +} + +bool JsBufferSource::isImmutable() const { + v8::Local inner = *this; + if (inner->IsArrayBuffer()) { + v8::Local buf = inner.As(); + return buf->IsImmutable(); + } else if (inner->IsSharedArrayBuffer()) { + return false; // SharedArrayBuffers are never immutable + } else if (inner->IsArrayBufferView()) { + v8::Local view = inner.As(); + return view->Buffer()->IsImmutable(); + } + KJ_UNREACHABLE; +} + // ====================================================================================== // JsUint8Array +kj::Maybe JsUint8Array::tryCreate(Lock& js, size_t length) { + JSG_REQUIRE(length < v8::ArrayBuffer::kMaxByteLength, RangeError, "The length is too large"); + auto backing = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, length, + v8::BackingStoreInitializationMode::kZeroInitialized, + v8::BackingStoreOnFailureMode::kReturnNull); + if (backing == nullptr) return kj::none; + return create(js, kj::mv(backing), 0, length); +} + JsUint8Array JsUint8Array::create(Lock& js, size_t length) { JSG_REQUIRE(length < v8::ArrayBuffer::kMaxByteLength, RangeError, "The length is too large"); auto backing = v8::ArrayBuffer::NewBackingStore(js.v8Isolate, length, @@ -852,26 +1534,21 @@ JsUint8Array JsUint8Array::create(Lock& js, JsArrayBuffer& buffer) { return JsUint8Array(v8::Uint8Array::New(ab, 0, ab->ByteLength())); } +JsUint8Array JsUint8Array::create(Lock& js, JsSharedArrayBuffer& buffer) { + v8::Local ab = buffer; + return JsUint8Array(v8::Uint8Array::New(ab, 0, ab->ByteLength())); +} + JsUint8Array JsUint8Array::create( Lock& js, std::unique_ptr backingStore, size_t byteOffset, size_t length) { + checkViewBounds(byteOffset, length, backingStore->ByteLength()); return JsUint8Array(v8::Uint8Array::New( v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), byteOffset, length)); } JsUint8Array JsUint8Array::slice(Lock& js, size_t newLength) const { JSG_REQUIRE(newLength <= size(), RangeError, "New length exceeds array length"); - auto u8 = v8::Uint8Array::New(inner->Buffer(), inner->ByteOffset(), newLength); - return JsUint8Array(u8); -} - -kj::ArrayPtr JsUint8Array::asArrayPtr() const { - auto buf = inner->Buffer(); - if (buf->WasDetached()) [[unlikely]] { - return nullptr; - } - const kj::byte* data = static_cast(buf->Data()) + inner->ByteOffset(); - size_t length = inner->ByteLength(); - return kj::ArrayPtr(data, length); + return slice(js, 0, newLength); } size_t JsUint8Array::size() const { @@ -883,4 +1560,69 @@ kj::Array JsUint8Array::copy() { return kj::heapArray(ptr); } +JsArrayBuffer JsUint8Array::getBuffer() const { + auto buf = inner->Buffer(); + return JsArrayBuffer(buf); +} + +bool JsUint8Array::isDetachable() const { + auto buf = inner->Buffer(); + return buf->IsDetachable(); +} + +bool JsUint8Array::isDetached() const { + auto buf = inner->Buffer(); + return buf->WasDetached(); +} + +void JsUint8Array::detachInPlace(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "Uint8Array is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "Uint8Array is not detachable"); + auto buf = inner->Buffer(); + check(buf->Detach({})); +} + +JsUint8Array JsUint8Array::detachAndTake(Lock& js) { + JSG_REQUIRE(!isImmutable(), TypeError, "Uint8Array is immutable"); + JSG_REQUIRE(isDetachable(), TypeError, "Uint8Array is not detachable"); + v8::Local inner = *this; + auto length = inner->ByteLength(); + auto offset = inner->ByteOffset(); + auto ab = getBuffer().detachAndTake(js); + return JsUint8Array(v8::Uint8Array::New(ab, offset, length)); +} + +JsUint8Array JsUint8Array::slice(Lock& js, size_t offset, size_t length) const { + auto buf = inner->Buffer(); + auto byteOffset = inner->ByteOffset(); + JSG_REQUIRE(offset <= SIZE_MAX - byteOffset, RangeError, "offset overflow"); + checkViewBounds(byteOffset + offset, length, buf->ByteLength()); + return JsUint8Array(v8::Uint8Array::New(buf, byteOffset + offset, length)); +} + +bool JsUint8Array::isResizable() const { + auto buf = inner->Buffer(); + return buf->IsResizableByUserJavaScript(); +} + +JsUint8Array::operator JsArrayBufferView() const { + v8::Local inner = *this; + return jsg::JsArrayBufferView(inner); +} + +JsUint8Array::operator JsBufferSource() const { + v8::Local inner = *this; + return jsg::JsBufferSource(inner); +} + +JsUint8Array JsUint8Array::clone(jsg::Lock& js) { + // Creates a new Uint8Array view over the same underlying backing store. The ArrayBuffer + // instance is different but the backing store is shared. So "clone" here is really + // "shallow clone". Intentionally does not copy the data. + auto buf = inner->Buffer(); + auto backing = buf->GetBackingStore(); + auto ab = jsg::JsArrayBuffer::create(js, kj::mv(backing)); + return JsUint8Array(v8::Uint8Array::New(ab, inner->ByteOffset(), inner->ByteLength())); +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h index f6d6647e733..ea744a4a6f4 100644 --- a/src/workerd/jsg/jsvalue.h +++ b/src/workerd/jsg/jsvalue.h @@ -58,7 +58,6 @@ inline void requireOnStack(void* self) { V(BigInt64Array) \ V(BigUint64Array) \ V(DataView) \ - V(SharedArrayBuffer) \ V(WasmMemoryObject) \ V(WasmModuleObject) \ JS_TYPE_CLASSES(V) @@ -79,6 +78,9 @@ enum IndexFilter { INCLUDE_INDICES, SKIP_INDICES }; enum PromiseState { PENDING, FULFILLED, REJECTED }; +template +class WeakJsRef; + // A JsValue is an abstraction for a JavaScript value that has not been mapped // to a C++ type. It wraps an underlying v8::Local in order to avoid direct // use of the v8 API in many cases. The JsValue (and JsRef) are meant to @@ -153,6 +155,7 @@ class JsValue final { static JsValue fromJson(Lock& js, const JsValue& input) KJ_WARN_UNUSED_RESULT; JsRef addRef(Lock& js) KJ_WARN_UNUSED_RESULT; + WeakJsRef getWeakRef(Lock& js) KJ_WARN_UNUSED_RESULT; JsValue structuredClone( Lock& js, kj::Maybe> maybeTransfers = kj::none) KJ_WARN_UNUSED_RESULT; @@ -202,6 +205,7 @@ class JsBase { requireOnStack(this); } JsRef addRef(Lock& js) KJ_WARN_UNUSED_RESULT; + WeakJsRef getWeakRef(Lock& js) KJ_WARN_UNUSED_RESULT; private: v8::Local inner; @@ -234,12 +238,15 @@ class JsArray final: public JsBase { class JsArrayBuffer final: public JsBase { public: + static kj::Maybe tryCreate(Lock& js, size_t length); + static JsArrayBuffer create(Lock& js, size_t length); // Allocate and copy data from the given ArrayPtr in a single step. static JsArrayBuffer create(Lock& js, kj::ArrayPtr data); static JsArrayBuffer create(Lock& js, std::unique_ptr backingStore); + static JsArrayBuffer create(Lock& js, std::shared_ptr backingStore); JsArrayBuffer slice(Lock& js, size_t newLength) const; @@ -251,35 +258,194 @@ class JsArrayBuffer final: public JsBase { // Return a copy of this buffer's data as a kj::Array. kj::Array copy(); + // A JsArrayBuffer can be used as a JsBufferSource, which is a more general type that + // also includes JsArrayBufferView. + operator JsBufferSource() const; + + // A JsArrayBuffer might be detachable. + bool isDetachable() const; + bool isDetached() const; + void detachInPlace(Lock& js); + JsArrayBuffer detachAndTake(Lock& js) KJ_WARN_UNUSED_RESULT; + + // Set up for later when immutable arraybuffer is a thing + bool isImmutable() const { + return inner->IsImmutable(); + } + + // Return a view over this buffer + JsUint8Array newUint8View(size_t offset, size_t numElements) const; + JsArrayBufferView newInt8View(size_t offset, size_t numElements) const; + JsArrayBufferView newUint8ClampedView(size_t offset, size_t numElements) const; + JsArrayBufferView newUint16View(size_t offset, size_t numElements) const; + JsArrayBufferView newInt16View(size_t offset, size_t numElements) const; + JsArrayBufferView newUint32View(size_t offset, size_t numElements) const; + JsArrayBufferView newInt32View(size_t offset, size_t numElements) const; + JsArrayBufferView newFloat16View(size_t offset, size_t numElements) const; + JsArrayBufferView newFloat32View(size_t offset, size_t numElements) const; + JsArrayBufferView newFloat64View(size_t offset, size_t numElements) const; + JsArrayBufferView newBigInt64View(size_t offset, size_t numElements) const; + JsArrayBufferView newBigUint64View(size_t offset, size_t numElements) const; + JsArrayBufferView newDataView(size_t offset, size_t numElements) const; + + bool isResizable() const; + + operator JsUint8Array() const; + using JsBase::JsBase; }; +class JsSharedArrayBuffer final: public JsBase { + public: + static kj::Maybe tryCreate(Lock& js, size_t length); + + static JsSharedArrayBuffer create(Lock& js, size_t length); + + // Allocate and copy data from the given ArrayPtr in a single step. + static JsSharedArrayBuffer create(Lock& js, kj::ArrayPtr data); + + // Take ownership of the given backing store. + static JsSharedArrayBuffer create(Lock& js, std::unique_ptr backingStore); + static JsSharedArrayBuffer create(Lock& js, std::shared_ptr backingStore); + + JsSharedArrayBuffer slice(Lock& js, size_t newLength) const; + + kj::ArrayPtr asArrayPtr(); + kj::ArrayPtr asArrayPtr() const; + + size_t size() const; + + // Return a copy of this buffer's data as a kj::Array. + kj::Array copy(); + + // A JsSharedArrayBuffer can be used as a JsBufferSource, which is a more general type that + // also includes JsArrayBufferView. + operator JsBufferSource() const; + + // Return a view over this buffer + JsUint8Array newUint8View(size_t offset, size_t numElements) const; + JsArrayBufferView newInt8View(size_t offset, size_t numElements) const; + JsArrayBufferView newUint8ClampedView(size_t offset, size_t numElements) const; + JsArrayBufferView newUint16View(size_t offset, size_t numElements) const; + JsArrayBufferView newInt16View(size_t offset, size_t numElements) const; + JsArrayBufferView newUint32View(size_t offset, size_t numElements) const; + JsArrayBufferView newInt32View(size_t offset, size_t numElements) const; + JsArrayBufferView newFloat16View(size_t offset, size_t numElements) const; + JsArrayBufferView newFloat32View(size_t offset, size_t numElements) const; + JsArrayBufferView newFloat64View(size_t offset, size_t numElements) const; + JsArrayBufferView newBigInt64View(size_t offset, size_t numElements) const; + JsArrayBufferView newBigUint64View(size_t offset, size_t numElements) const; + JsArrayBufferView newDataView(size_t offset, size_t numElements) const; + + operator JsUint8Array() const; + + using JsBase::JsBase; +}; + class JsArrayBufferView final: public JsBase { public: template kj::ArrayPtr asArrayPtr() { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBufferView is immutable"); v8::Local inner = *this; auto buf = inner->Buffer(); if (buf->WasDetached()) [[unlikely]] { return nullptr; } auto byteLength = inner->ByteLength(); - T* data = reinterpret_cast(static_cast(buf->Data()) + inner->ByteOffset()); + auto byteOffset = inner->ByteOffset(); + + // Sandbox hardening: validate that the view's byte range falls within the + // backing store's trusted size. In-cage ByteOffset/ByteLength fields can be + // corrupted by an attacker; buf->ByteLength() is the trusted out-of-cage value. + auto bufSize = buf->ByteLength(); + if (byteOffset > bufSize || byteLength > bufSize - byteOffset) [[unlikely]] { + return nullptr; + } + T* data = reinterpret_cast(static_cast(buf->Data()) + byteOffset); + + return kj::ArrayPtr(data, byteLength / sizeof(T)); + } + + template + kj::ArrayPtr asArrayPtr() const { + v8::Local inner = *this; + auto buf = inner->Buffer(); + if (buf->WasDetached()) [[unlikely]] { + return nullptr; + } + auto byteLength = inner->ByteLength(); + auto byteOffset = inner->ByteOffset(); + + // Sandbox hardening: validate that the view's byte range falls within the + // backing store's trusted size. In-cage ByteOffset/ByteLength fields can be + // corrupted by an attacker; buf->ByteLength() is the trusted out-of-cage value. + auto bufSize = buf->ByteLength(); + if (byteOffset > bufSize || byteLength > bufSize - byteOffset) [[unlikely]] { + return nullptr; + } + const T* data = + reinterpret_cast(static_cast(buf->Data()) + byteOffset); + return kj::ArrayPtr(data, byteLength / sizeof(T)); } size_t size() const; + size_t getOffset() const; // Returns true if the underlying view is an integer-typed TypedArray // (e.g. Uint8Array, Int32Array, BigUint64Array) as opposed to a float-typed // TypedArray or DataView. bool isIntegerType() const; + bool isUint8Array() const; + bool isInt8Array() const; + bool isUint8ClampedArray() const; + bool isUint16Array() const; + bool isInt16Array() const; + bool isUint32Array() const; + bool isInt32Array() const; + bool isFloat16Array() const; + bool isFloat32Array() const; + bool isFloat64Array() const; + bool isBigInt64Array() const; + bool isBigUint64Array() const; + bool isDataView() const; + + size_t getElementSize() const; + + JsArrayBuffer getBuffer() const; + + bool isDetachable() const; + bool isDetached() const; + void detachInPlace(Lock& js); + JsArrayBufferView detachAndTake(Lock& js) KJ_WARN_UNUSED_RESULT; + + // Get a new view of the same type over the same buffer. offset and length are in bytes, + // with offset relative to the start of this view. For multi-byte views, length is + // truncated to a multiple of getElementSize(). + JsArrayBufferView slice(Lock& js, size_t offset, size_t length) const; + + bool isResizable() const; + + bool isImmutable() const { + return inner->Buffer()->IsImmutable(); + } + + operator JsBufferSource() const; + + // Regardless of what kind of typed array view this is, we can always get it as a Uint8Array + operator JsUint8Array() const; + + JsArrayBufferView clone(jsg::Lock& js); + using JsBase::JsBase; }; class JsUint8Array final: public JsBase { public: + static kj::Maybe tryCreate(Lock& js, size_t length); + static JsUint8Array create(Lock& js, size_t length); // Allocate and copy data from the given ArrayPtr in a single step. @@ -287,6 +453,7 @@ class JsUint8Array final: public JsBase { // Create a Uint8Array view over the given ArrayBuffer. static JsUint8Array create(Lock& js, JsArrayBuffer& buffer); + static JsUint8Array create(Lock& js, JsSharedArrayBuffer& buffer); static JsUint8Array create( Lock& js, std::unique_ptr backingStore, size_t byteOffset, size_t length); @@ -295,23 +462,78 @@ class JsUint8Array final: public JsBase { template kj::ArrayPtr asArrayPtr() { + JSG_REQUIRE(!isImmutable(), TypeError, "ArrayBufferView is immutable"); v8::Local inner = *this; auto buf = inner->Buffer(); if (buf->WasDetached()) [[unlikely]] { return nullptr; } auto byteLength = inner->ByteLength(); - T* data = reinterpret_cast(static_cast(buf->Data()) + inner->ByteOffset()); + + auto byteOffset = inner->ByteOffset(); + // Sandbox hardening: validate that the view's byte range falls within the + // backing store's trusted size. In-cage ByteOffset/ByteLength fields can be + // corrupted by an attacker; buf->ByteLength() is the trusted out-of-cage value. + auto bufSize = buf->ByteLength(); + if (byteOffset > bufSize || byteLength > bufSize - byteOffset) [[unlikely]] { + return nullptr; + } + T* data = reinterpret_cast(static_cast(buf->Data()) + byteOffset); + return kj::ArrayPtr(data, byteLength / sizeof(T)); } - kj::ArrayPtr asArrayPtr() const; + template + kj::ArrayPtr asArrayPtr() const { + v8::Local inner = *this; + auto buf = inner->Buffer(); + if (buf->WasDetached()) [[unlikely]] { + return nullptr; + } + auto byteLength = inner->ByteLength(); + + auto byteOffset = inner->ByteOffset(); + // Sandbox hardening: validate that the view's byte range falls within the + // backing store's trusted size. In-cage ByteOffset/ByteLength fields can be + // corrupted by an attacker; buf->ByteLength() is the trusted out-of-cage value. + auto bufSize = buf->ByteLength(); + if (byteOffset > bufSize || byteLength > bufSize - byteOffset) [[unlikely]] { + return nullptr; + } + const T* data = + reinterpret_cast(static_cast(buf->Data()) + byteOffset); + + return kj::ArrayPtr(data, byteLength / sizeof(T)); + } size_t size() const; // Return a copy of this buffer's data as a kj::Array. kj::Array copy(); + JsArrayBuffer getBuffer() const; + + bool isDetachable() const; + bool isDetached() const; + void detachInPlace(Lock& js); + JsUint8Array detachAndTake(Lock& js) KJ_WARN_UNUSED_RESULT; + + // Get a new view of the same type over the same buffer. offset and length are in bytes, + // with offset relative to the start of this view. + JsUint8Array slice(Lock& js, size_t offset, size_t length) const; + + bool isResizable() const; + + // Set up for later when immutable arraybuffer is a thing + bool isImmutable() const { + return inner->Buffer()->IsImmutable(); + } + + operator JsArrayBufferView() const; + operator JsBufferSource() const; + + JsUint8Array clone(jsg::Lock& js); + using JsBase::JsBase; }; @@ -324,15 +546,17 @@ class JsUint8Array final: public JsBase { // JS_TYPE_CLASSES; instead, JsValue::tryCast and JsValueWrapper handle it specially. class JsBufferSource final: public JsBase { public: - JsBufferSource(JsArrayBuffer& buffer): JsBase(static_cast>(buffer)) {} - JsBufferSource(JsUint8Array& buffer): JsBase(static_cast>(buffer)) {} - JsBufferSource(JsArrayBufferView& buffer): JsBase(static_cast>(buffer)) {} - JsBufferSource(v8::Local buffer) - : JsBase(static_cast>(buffer)) {} + JsBufferSource(JsArrayBuffer buffer): JsBase(static_cast>(buffer)) {} + JsBufferSource(JsUint8Array buffer): JsBase(static_cast>(buffer)) {} + JsBufferSource(JsArrayBufferView buffer): JsBase(static_cast>(buffer)) {} + JsBufferSource(JsSharedArrayBuffer buffer): JsBase(static_cast>(buffer)) {} kj::ArrayPtr asArrayPtr(); + kj::ArrayPtr asArrayPtr() const; size_t size() const; + size_t getOffset() const; + size_t underlyingArrayBufferSize(Lock& js) const; // Returns true if the underlying value is an integer-typed TypedArray. bool isIntegerType() const; @@ -342,9 +566,20 @@ class JsBufferSource final: public JsBase { bool isArrayBufferView() const; bool isResizable() const; + // Set up for later when immutable arraybuffer is a thing + bool isImmutable() const; + + bool isDetachable() const; + bool isDetached() const; + void detachInPlace(Lock& js); + JsBufferSource detachAndTake(Lock& js) KJ_WARN_UNUSED_RESULT; + // Return a copy of this buffer's data as a kj::Array. kj::Array copy(); + // Regardless of what kind of typed array view this is, we can always get it as a Uint8Array + operator JsUint8Array() const; + using JsBase::JsBase; }; @@ -529,6 +764,10 @@ class JsObject final: public JsBase { void setReadOnly(Lock& js, kj::StringPtr name, const JsValue& value); void setNonEnumerable(Lock& js, const JsSymbol& name, const JsValue& value); + // Like set but uses the createDataProperty API instead to avoid invoking + // user-defined Object.prototype setters + void createDataProperty(Lock& js, const JsValue& name, const JsValue& value); + // Like set but uses the defineProperty API instead in order to override // the default property attributes. This is useful for defining properties // that otherwise would not be normally settable, such as the name of an @@ -783,6 +1022,12 @@ class JsRef final { return kj::mv(value).template cast(jsg::Lock::current()); } + // Create a weak reference to the held JS value. The weak reference does not prevent the + // value from being garbage collected. + WeakJsRef getWeakRef(Lock& js) const { + return WeakJsRef(js, getHandle(js)); + } + JSG_MEMORY_INFO(JsRef) { tracker.trackField("value", value); } @@ -797,11 +1042,65 @@ class JsRef final { friend class MemoryTracker; }; +// A weak reference to a JsValue type (JsObject, JsString, etc.). +// +// Mirrors jsg::JsRef but does not prevent the value from being garbage collected. +// Automatically becomes invalid when V8's GC collects the underlying value. +// +// Usage: +// WeakJsRef weak(js, jsObj); +// KJ_IF_SOME(handle, weak.tryGetHandle(js)) { ... } +// KJ_IF_SOME(strong, weak.tryAddRef(js)) { ... } +template +class WeakJsRef final { + static_assert( + std::is_assignable_v, "WeakJsRef, T must be assignable to type JsValue"); + + public: + WeakJsRef(): WeakJsRef(nullptr) {} + WeakJsRef(decltype(nullptr)): value(nullptr) {} + WeakJsRef(Lock& js, const T& val): value(js.v8Isolate, v8::Local(val)) {} + WeakJsRef(WeakJsRef&& other) = default; + WeakJsRef& operator=(WeakJsRef&& other) = default; + KJ_DISALLOW_COPY(WeakJsRef); + + bool isAlive() const { + return value.isAlive(); + } + + kj::Maybe tryGetHandle(Lock& js) const { + return value.tryGetHandle(js.v8Isolate).map([](v8::Local local) -> T { + JsValue handle(local); + return KJ_ASSERT_NONNULL(handle.tryCast()); + }); + } + + T getHandle(Lock& js) const { + return KJ_ASSERT_NONNULL(tryGetHandle(js), "attempt to access collected jsg::WeakJsRef target"); + } + + kj::Maybe> tryAddRef(Lock& js) const { + return tryGetHandle(js).map([&](T handle) { return JsRef(js, handle); }); + } + + private: + WeakV8Ref value; +}; + +inline WeakJsRef JsValue::getWeakRef(Lock& js) { + return WeakJsRef(js, *this); +} + template inline JsRef JsBase::addRef(Lock& js) { return JsRef(js, *static_cast(this)); } +template +inline WeakJsRef JsBase::getWeakRef(Lock& js) { + return WeakJsRef(js, *static_cast(this)); +} + inline kj::String KJ_STRINGIFY(const JsValue& value) { return value.toString(jsg::Lock::current()); } @@ -1077,6 +1376,11 @@ inline void JsObject::set(Lock& js, kj::StringPtr name, const JsValue& value) { set(js, js.strIntern(name), value); } +inline void JsObject::createDataProperty(Lock& js, const JsValue& name, const JsValue& value) { + KJ_ASSERT(name.inner->IsName()); + check(inner->CreateDataProperty(js.v8Context(), name.inner.As(), value.inner)); +} + inline JsValue JsObject::get(Lock& js, const JsValue& name) { return JsValue(check(inner->Get(js.v8Context(), name.inner))); } diff --git a/src/workerd/jsg/modules-new.c++ b/src/workerd/jsg/modules-new.c++ index 73f3d5dd5c0..ac18ad81d99 100644 --- a/src/workerd/jsg/modules-new.c++ +++ b/src/workerd/jsg/modules-new.c++ @@ -1984,10 +1984,7 @@ Module::EvaluateCallback Module::newDataModuleHandler(kj::ArrayPtr bool { JSG_TRY(js) { - auto backing = jsg::BackingStore::alloc(js, data.size()); - backing.asArrayPtr().copyFrom(data); - auto buffer = jsg::BufferSource(js, kj::mv(backing)); - return ns.setDefault(js, JsValue(buffer.getHandle(js))); + return ns.setDefault(js, jsg::JsArrayBuffer::create(js, data)); } JSG_CATCH(exception) { js.v8Isolate->ThrowException(exception.getHandle(js)); diff --git a/src/workerd/jsg/promise-test.c++ b/src/workerd/jsg/promise-test.c++ index d43c5c3a873..afa37f216e4 100644 --- a/src/workerd/jsg/promise-test.c++ +++ b/src/workerd/jsg/promise-test.c++ @@ -98,6 +98,10 @@ struct PromiseContext: public jsg::Object, public jsg::ContextGlobal { return result; } + void prototypePolution(jsg::Lock& js) { + js.resolvedPromise(js.num(1)); + } + JSG_RESOURCE_TYPE(PromiseContext) { JSG_READONLY_PROTOTYPE_PROPERTY(promise, makePromise); JSG_METHOD(resolvePromise); @@ -111,6 +115,7 @@ struct PromiseContext: public jsg::Object, public jsg::ContextGlobal { JSG_METHOD(whenResolved); JSG_METHOD(thenable); + JSG_METHOD(prototypePolution); } kj::Maybe::Resolver> resolver; @@ -192,5 +197,23 @@ KJ_TEST("thenable") { e.expectEval("thenable({ then(res) { res(123) } })", "number", "123"); } +KJ_TEST("prototype polution") { + Evaluator e(v8System); + + e.expectEval(R"A( + let m = false; + Object.defineProperty(Object.prototype, 'then', { + configurable: true, + get() { + m = true; + return undefined; + }, + }); + prototypePolution(); + m; + )A", + "boolean", "false"); +} + } // namespace } // namespace workerd::jsg::test diff --git a/src/workerd/jsg/resource.h b/src/workerd/jsg/resource.h index 36407038646..c8597d95b18 100644 --- a/src/workerd/jsg/resource.h +++ b/src/workerd/jsg/resource.h @@ -17,6 +17,7 @@ // JSG has very entrenched include cycles // NOLINTNEXTLINE(misc-header-include-cycle) #include +#include #include #include @@ -29,11 +30,6 @@ #include #include -// TODO(cleanup): Remove when unnecessary. -#if V8_MAJOR_VERSION >= 15 || (V8_MAJOR_VERSION == 14 && V8_MINOR_VERSION >= 7) -#define HolderV2 Holder -#endif - namespace std { inline auto KJ_HASHCODE(const std::type_index& idx) { // Make std::type_index (which points to std::type_info) usable as a kj::HashMap key. @@ -119,8 +115,10 @@ struct ConstructorCallback(Args...), kj::_::Indexes ptr = T::constructor(wrapper.template unwrap(js, context, args, indexes, - TypeErrorContext::constructorArgument(typeid(T), indexes))...); + auto unwrapped = _::unwrapArgs(wrapper, js, context, args, + []() { return TypeErrorContext::constructorArgument(typeid(T), i); }); + + Ref ptr = T::constructor(kj::mv(unwrapped).template take()...); if constexpr (T::jsgHasReflection) { ptr->jsgInitReflection(wrapper); } @@ -145,9 +143,10 @@ struct ConstructorCallback(Lock&, Args...), kj::_::Indexe auto& wrapper = TypeWrapper::from(isolate); - Ref ptr = T::constructor(Lock::from(isolate), - wrapper.template unwrap(js, context, args, indexes, - TypeErrorContext::constructorArgument(typeid(T), indexes))...); + auto unwrapped = _::unwrapArgs(wrapper, js, context, args, + []() { return TypeErrorContext::constructorArgument(typeid(T), i); }); + + Ref ptr = T::constructor(js, kj::mv(unwrapped).template take()...); if constexpr (T::jsgHasReflection) { ptr->jsgInitReflection(wrapper); } @@ -176,9 +175,10 @@ struct ConstructorCallback ptr = T::constructor(args, - wrapper.template unwrap(js, context, args, indexes, - TypeErrorContext::constructorArgument(typeid(T), indexes))...); + auto unwrapped = _::unwrapArgs(wrapper, js, context, args, + []() { return TypeErrorContext::constructorArgument(typeid(T), i); }); + + Ref ptr = T::constructor(args, kj::mv(unwrapped).template take()...); if constexpr (T::jsgHasReflection) { ptr->jsgInitReflection(wrapper); } @@ -233,13 +233,13 @@ struct MethodCallback(context, obj); + auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, + []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); if constexpr (isVoid()) { - (self.*method)(wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); + (self.*method)(kj::mv(unwrapped).template take()...); } else { - return wrapper.wrap(lock, context, obj, - (self.*method)(wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...)); + return wrapper.wrap( + lock, context, obj, (self.*method)(kj::mv(unwrapped).template take()...)); } }); } @@ -259,6 +259,10 @@ struct MethodCallback(isolate, [&]() { + // Pack expansion order is unspecified by [expr.call], but ordering + // is safe here: `unwrapFastApi` is invoked only with parameter types + // that pass `isFastApiCompatible` (FastApiPrimitive or v8::Local), and + // neither path fires JS-observable side effects. See unwrap-args.h. return (self.*method)(wrapper.template unwrapFastApi(js, context, fastArgs, TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); }); @@ -293,15 +297,13 @@ struct MethodCallback(context, obj); auto& lock = Lock::from(isolate); + auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, + []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); if constexpr (isVoid()) { - (self.*method)(lock, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); + (self.*method)(lock, kj::mv(unwrapped).template take()...); } else { return wrapper.wrap(lock, context, obj, - (self.*method)(lock, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...)); + (self.*method)(lock, kj::mv(unwrapped).template take()...)); } }); } @@ -321,6 +323,7 @@ struct MethodCallback(isolate, [&]() { + // See note on fast-API ordering in the plain-method specialization above. return (self.*method)(lock, wrapper.template unwrapFastApi(lock, context, fastArgs, TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); @@ -354,15 +357,13 @@ struct MethodCallback(context, obj); + auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, + []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); if constexpr (isVoid()) { - (self.*method)(args, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); + (self.*method)(args, kj::mv(unwrapped).template take()...); } else { return wrapper.wrap(lock, context, obj, - (self.*method)(args, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...)); + (self.*method)(args, kj::mv(unwrapped).template take()...)); } }); } @@ -428,13 +429,13 @@ struct StaticMethodCallbackGetCurrentContext(); auto& wrapper = TypeWrapper::from(isolate); auto& lock = Lock::from(isolate); + auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, + []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); if constexpr (isVoid()) { - (*method)(wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); + (*method)(kj::mv(unwrapped).template take()...); } else { - return wrapper.wrap(lock, context, kj::none, - (*method)(wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...)); + return wrapper.wrap( + lock, context, kj::none, (*method)(kj::mv(unwrapped).template take()...)); } }); } @@ -453,6 +454,7 @@ struct StaticMethodCallback(isolate, [&]() { + // See note on fast-API ordering in MethodCallback above. return (*method)(wrapper.template unwrapFastApi(lock, context, fastArgs, TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); }); @@ -483,15 +485,13 @@ struct StaticMethodCallbackGetCurrentContext(); auto& wrapper = TypeWrapper::from(isolate); auto& lock = Lock::from(isolate); + auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, + []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); if constexpr (isVoid()) { - (*method)(lock, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); + (*method)(lock, kj::mv(unwrapped).template take()...); } else { return wrapper.wrap(lock, context, kj::none, - (*method)(lock, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...)); + (*method)(lock, kj::mv(unwrapped).template take()...)); } }); } @@ -510,6 +510,7 @@ struct StaticMethodCallback(isolate, [&]() { + // See note on fast-API ordering in MethodCallback above. return (*method)(lock, wrapper.template unwrapFastApi(lock, context, fastArgs, TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); @@ -539,15 +540,13 @@ struct StaticMethodCallbackGetCurrentContext(); auto& lock = Lock::from(isolate); auto& wrapper = TypeWrapper::from(isolate); + auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, + []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); if constexpr (isVoid()) { - (*method)(args, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...); + (*method)(args, kj::mv(unwrapped).template take()...); } else { return wrapper.wrap(lock, context, kj::none, - (*method)(args, - wrapper.template unwrap(lock, context, args, indexes, - TypeErrorContext::methodArgument(typeid(T), methodName, indexes))...)); + (*method)(args, kj::mv(unwrapped).template take()...)); } }); } @@ -643,7 +642,7 @@ struct GetterCallback; liftKj(info, [&]() { \ auto isolate = info.GetIsolate(); \ auto context = isolate->GetCurrentContext(); \ - auto obj = info.HolderV2(); \ + auto obj = info.Holder(); \ auto& js = Lock::from(isolate); \ auto& wrapper = TypeWrapper::from(isolate); \ /* V8 no longer supports AccessorSignature, so we must manually verify `this`'s type. */ \ @@ -690,7 +689,7 @@ struct GetterCallback; auto isolate = info.GetIsolate(); \ auto context = isolate->GetCurrentContext(); \ auto& js = Lock::from(isolate); \ - auto obj = info.HolderV2(); \ + auto obj = info.Holder(); \ auto& wrapper = TypeWrapper::from(isolate); \ /* V8 no longer supports AccessorSignature, so we must manually verify `this`'s type. */ \ if (!isContext && \ @@ -882,13 +881,14 @@ template struct SetterCallback { - static void callback( - v8::Local, v8::Local value, const v8::PropertyCallbackInfo& info) { + static void callback(v8::Local, + v8::Local value, + const v8::PropertyCallbackInfo& info) { liftKj(info, [&]() { auto isolate = info.GetIsolate(); auto context = isolate->GetCurrentContext(); auto& js = Lock::from(isolate); - auto obj = info.HolderV2(); + auto obj = info.Holder(); auto& wrapper = TypeWrapper::from(isolate); // V8 no longer supports AccessorSignature, so we must manually verify `this`'s type. if (!isContext && !wrapper.getTemplate(isolate, static_cast(nullptr))->HasInstance(obj)) { @@ -909,12 +909,13 @@ template struct SetterCallback { - static void callback( - v8::Local, v8::Local value, const v8::PropertyCallbackInfo& info) { + static void callback(v8::Local, + v8::Local value, + const v8::PropertyCallbackInfo& info) { liftKj(info, [&]() { auto isolate = info.GetIsolate(); auto context = isolate->GetCurrentContext(); - auto obj = info.HolderV2(); + auto obj = info.Holder(); auto& wrapper = TypeWrapper::from(isolate); // V8 no longer supports AccessorSignature, so we must manually verify `this`'s type. if (!isContext && !wrapper.getTemplate(isolate, static_cast(nullptr))->HasInstance(obj)) { @@ -1203,7 +1204,7 @@ struct WildcardPropertyCallbacks v8::Local { auto isolate = info.GetIsolate(); auto context = isolate->GetCurrentContext(); - auto obj = info.HolderV2(); + auto obj = info.Holder(); auto& wrapper = TypeWrapper::from(isolate); if (!wrapper.getTemplate(isolate, static_cast(nullptr))->HasInstance(obj)) { throwTypeError(isolate, kIllegalInvocation); @@ -1230,7 +1231,7 @@ struct WildcardPropertyCallbacks v8::Local { auto isolate = info.GetIsolate(); auto context = isolate->GetCurrentContext(); - auto obj = info.HolderV2(); + auto obj = info.Holder(); auto& wrapper = TypeWrapper::from(isolate); if (!wrapper.getTemplate(isolate, static_cast(nullptr))->HasInstance(obj)) { throwTypeError(isolate, kIllegalInvocation); @@ -1777,8 +1778,8 @@ class ResourceWrapper { return {wrapper.getTemplate(isolate, static_cast(nullptr)), rinit}; }); - if constexpr (static_cast(T::jsgSerializeTag) != - static_cast(T::jsgSuper::jsgSerializeTag)) { + if constexpr (static_cast(T::jsgSerializeLevel) != + static_cast(T::jsgSuper::jsgSerializeLevel)) { // This type is declared JSG_SERIALIZABLE. // HACK: The type of `serializer` should be `Serializer&`, not `auto&`, but Clang complains // about the `writeRawUint32()` call being made on an incomplete type if `ser.h` hasn't been diff --git a/src/workerd/jsg/ser.c++ b/src/workerd/jsg/ser.c++ index 9797fea26cd..2638cc54f27 100644 --- a/src/workerd/jsg/ser.c++ +++ b/src/workerd/jsg/ser.c++ @@ -230,12 +230,7 @@ v8::Maybe Serializer::IsHostObject(v8::Isolate* isolate, v8::Local= 15 || (V8_MAJOR_VERSION == 14 && V8_MINOR_VERSION >= 7) return v8::Just(object->GetPrototype() != prototypeOfObject); -#else - // TODO(cleanup): Remove when unnecessary. - return v8::Just(object->GetPrototypeV2() != prototypeOfObject); -#endif } v8::Maybe Serializer::WriteHostObject(v8::Isolate* isolate, v8::Local object) { @@ -434,6 +429,8 @@ void Deserializer::init(Lock& js, } JsValue Deserializer::readValue(Lock& js) { + js.setDisallowJavascriptExecution(true); + KJ_DEFER(js.setDisallowJavascriptExecution(false)); return JsValue(check(deser.ReadValue(js.v8Context()))); } @@ -527,7 +524,13 @@ v8::MaybeLocal Deserializer::ReadHostObject(v8::Isolate* isolate) { // serialized output. if (!preserveStackInErrors && name.strictEquals(stack)) continue; auto value = serObj.get(js, name); - obj.set(js, name, value); + // Use createDataProperty instead of ordinary set to avoid + // invoking prototype-chain setters. This code runs inside V8's + // DisallowJavascriptExecution scope; an ordinary Set() that hits + // a tenant-installed Error.prototype setter would trigger + // V8_Fatal -> abort(). CreateDataProperty defines an own data + // property directly, matching the HTML structured-clone spec. + obj.createDataProperty(js, name, value); } } diff --git a/src/workerd/jsg/setup.c++ b/src/workerd/jsg/setup.c++ index 606c6af3331..6992028cb0d 100644 --- a/src/workerd/jsg/setup.c++ +++ b/src/workerd/jsg/setup.c++ @@ -73,7 +73,8 @@ static kj::Own userPlatform(v8::Platform& platform) { return kj::Own(&platform, kj::NullDisposer::instance); } -V8System::V8System(kj::ArrayPtr flags) { +V8System::V8System( + kj::ArrayPtr flags, JitCodeEventTracking jitCodeEventTracking) { auto platform = defaultPlatform(0); auto defaultPlatformPtr = platform.get(); init(kj::mv(platform), flags, [defaultPlatformPtr](v8::Isolate* isolate) { @@ -81,36 +82,41 @@ V8System::V8System(kj::ArrayPtr flags) { defaultPlatformPtr, isolate, v8::platform::MessageLoopBehavior::kDoNotWait); }, [defaultPlatformPtr](v8::Isolate* isolate) { v8::platform::NotifyIsolateShutdown(defaultPlatformPtr, isolate); - }); + }, jitCodeEventTracking); } V8System::V8System(v8::Platform& platformParam, kj::ArrayPtr flags, - v8::Platform* defaultPlatformPtr) { + v8::Platform* defaultPlatformPtr, + JitCodeEventTracking jitCodeEventTracking) { KJ_REQUIRE_NONNULL(defaultPlatformPtr); init(userPlatform(platformParam), flags, [defaultPlatformPtr](v8::Isolate* isolate) { return v8::platform::PumpMessageLoop( defaultPlatformPtr, isolate, v8::platform::MessageLoopBehavior::kDoNotWait); }, [defaultPlatformPtr](v8::Isolate* isolate) { v8::platform::NotifyIsolateShutdown(defaultPlatformPtr, isolate); - }); + }, jitCodeEventTracking); } V8System::V8System(v8::Platform& platformParam, kj::ArrayPtr flags, PumpMsgLoopType pumpMsgLoopFn, - ShutdownIsolateType shutdownIsolateFn) { - init(userPlatform(platformParam), flags, kj::mv(pumpMsgLoopFn), kj::mv(shutdownIsolateFn)); + ShutdownIsolateType shutdownIsolateFn, + JitCodeEventTracking jitCodeEventTracking) { + init(userPlatform(platformParam), flags, kj::mv(pumpMsgLoopFn), kj::mv(shutdownIsolateFn), + jitCodeEventTracking); } void V8System::init(kj::Own platformParam, kj::ArrayPtr flags, PumpMsgLoopType pumpMsgLoopFn, - ShutdownIsolateType shutdownIsolateFn) { + ShutdownIsolateType shutdownIsolateFn, + JitCodeEventTracking jitCodeEventTrackingParam) { platformInner = kj::mv(platformParam); platformWrapper = kj::heap(*platformInner); pumpMsgLoop = kj::mv(pumpMsgLoopFn); shutdownIsolate = kj::mv(shutdownIsolateFn); + jitCodeEventTracking = jitCodeEventTrackingParam; #if V8_HAS_STACK_START_MARKER v8::StackStartMarker::EnableForProcess(); @@ -407,7 +413,9 @@ IsolateBase::IsolateBase(V8System& system, // attacks. ptr->SetAllowAtomicsWait(false); - ptr->SetJitCodeEventHandler(v8::kJitCodeEventDefault, &jitCodeEvent); + if (system.jitCodeEventTracking) { + ptr->SetJitCodeEventHandler(v8::kJitCodeEventDefault, &jitCodeEvent); + } // V8 10.5 introduced this API which is used to resolve the promise returned by // WebAssembly.compile(). For some reason, the default implementation of the callback does not @@ -439,6 +447,7 @@ IsolateBase::IsolateBase(V8System& system, // Create opaqueTemplate auto opaqueTemplate = v8::FunctionTemplate::New(ptr, &throwIllegalConstructor); + opaqueTemplate->ReadOnlyPrototype(); opaqueTemplate->InstanceTemplate()->SetInternalFieldCount(Wrappable::INTERNAL_FIELD_COUNT); this->opaqueTemplate.Reset(ptr, opaqueTemplate); } @@ -573,7 +582,6 @@ void IsolateBase::jitCodeEvent(const v8::JitCodeEvent* event) noexcept { // code locations, which we use when reporting stack traces during crashes. IsolateBase* self = static_cast(event->isolate->GetData(SET_DATA_ISOLATE_BASE)); - auto& codeMap = self->codeMap; // Pointer comparison between pointers not from the same array is UB so we'd better operate on // uintptr_t instead. @@ -585,12 +593,23 @@ void IsolateBase::jitCodeEvent(const v8::JitCodeEvent* event) noexcept { kj::Vector mapping; }; + // NOTE: `codeMap` is intentionally NOT protected by a mutex, even though + // V8 14.9+ can in principle deliver these events from background + // compilation threads (concurrent sparkplug, maglev, turbofan, etc.). + // See the comment on `IsolateBase::codeMap` in `setup.h` for the full + // rationale: `getJsStackTrace()` reads this map from a signal handler, + // and acquiring a mutex from a signal handler is not async-signal-safe. + // Embedders that enable concurrent V8 JIT compilation must accept the + // race or provide a different stack-tracing implementation. + auto& codeMap = self->codeMap; + using CodeMapEntry = kj::TreeMap::Entry; + switch (event->type) { case v8::JitCodeEvent::CODE_ADDED: { // Usually CODE_ADDED comes after CODE_END_LINE_INFO_RECORDING, but sometimes it doesn't, // particularly in the case of Wasm where it appears no line info is provided. auto& info = codeMap.findOrCreate( - startAddr, [&]() { return decltype(self->codeMap)::Entry{startAddr, CodeBlockInfo()}; }); + startAddr, [&]() { return CodeMapEntry{startAddr, CodeBlockInfo()}; }); info.size = event->code_len; info.name = kj::str(kj::arrayPtr(event->name.str, event->name.len)); info.type = event->code_type; @@ -660,7 +679,7 @@ void IsolateBase::jitCodeEvent(const v8::JitCodeEvent* event) noexcept { // Sometimes CODE_END_LINE_INFO_RECORDING comes after CODE_ADDED, in particular with // modules. auto& info = codeMap.findOrCreate( - startAddr, [&]() { return decltype(self->codeMap)::Entry{startAddr, CodeBlockInfo()}; }); + startAddr, [&]() { return CodeMapEntry{startAddr, CodeBlockInfo()}; }); UserData* data = static_cast(event->user_data); info.mapping = data->mapping.releaseAsArray(); @@ -775,6 +794,10 @@ kj::Maybe getJsStackTrace(void* ucontext, kj::ArrayPtr scra } appendText("js: (", vmState, ")"); + // Read `codeMap` without locking: this function runs from a signal handler + // and acquiring a mutex from a signal handler is not async-signal-safe. See + // the comment on `IsolateBase::codeMap` in `setup.h` for the implications + // when V8 is configured with concurrent JIT compilation. auto& codeMap = static_cast(isolate->GetData(SET_DATA_ISOLATE_BASE))->codeMap; for (auto i: kj::zeroTo(sampleInfo.frames_count)) { diff --git a/src/workerd/jsg/setup.h b/src/workerd/jsg/setup.h index 471fde5dbbc..7a36436a844 100644 --- a/src/workerd/jsg/setup.h +++ b/src/workerd/jsg/setup.h @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -26,6 +27,13 @@ namespace workerd::jsg { class Deserializer; class Serializer; +// Whether to register a JIT code event handler on each isolate created through a V8System, +// to build a mapping from compiled code addresses to JavaScript source locations. This +// mapping is consumed by `jsg::getJsStackTrace()` to produce signal-handler-safe stack +// traces during crash reporting. Adds overhead (V8 invokes a callback on every JIT code +// event), so it is opt-in. +WD_STRONG_BOOL(JitCodeEventTracking); + // Construct a default V8 platform, with the given background thread pool size. // // Passing zero for `backgroundThreadCount` causes V8 to ask glibc how many processors there are. @@ -51,18 +59,21 @@ class V8System { // auto v8System = V8System(*v8Platform, flags); // (Optional) `flags` is a list of command-line flags to pass to V8, like "--expose-gc" or // "--single_threaded_gc". An exception will be thrown if any flags are not recognized. - explicit V8System(kj::ArrayPtr flags = nullptr); + explicit V8System(kj::ArrayPtr flags = nullptr, + JitCodeEventTracking jitCodeEventTracking = JitCodeEventTracking::NO); // Use a possibly-custom v8::Platform wrapper over default v8::Platform, and apply flags. explicit V8System(v8::Platform& platform, kj::ArrayPtr flags, - v8::Platform* defaultPlatformPtr); + v8::Platform* defaultPlatformPtr, + JitCodeEventTracking jitCodeEventTracking = JitCodeEventTracking::NO); // Use a possibly-custom v8::Platform implementation with custom task queue, and apply flags. explicit V8System(v8::Platform& platform, kj::ArrayPtr flags, PumpMsgLoopType, - ShutdownIsolateType); + ShutdownIsolateType, + JitCodeEventTracking jitCodeEventTracking = JitCodeEventTracking::NO); ~V8System() noexcept(false); @@ -74,12 +85,14 @@ class V8System { kj::Own platformWrapper; PumpMsgLoopType pumpMsgLoop; ShutdownIsolateType shutdownIsolate; + JitCodeEventTracking jitCodeEventTracking = JitCodeEventTracking::NO; friend class IsolateBase; void init(kj::Own, kj::ArrayPtr, PumpMsgLoopType, - ShutdownIsolateType); + ShutdownIsolateType, + JitCodeEventTracking); }; // Base class of Isolate containing parts that don't need to be templated, to avoid code @@ -152,6 +165,19 @@ class IsolateBase { evalAllowed = allow; } + inline void setDisallowJavascriptExecution(kj::Badge, bool allow) { + if (allow) { + javascriptExecutionDisallowed++; + } else { + KJ_ASSERT(javascriptExecutionDisallowed > 0); + javascriptExecutionDisallowed--; + } + } + + inline bool getDisallowJavascriptExecution() const { + return javascriptExecutionDisallowed != 0; + } + inline void setAllowsAllowEval() { alwaysAllowEval = true; evalAllowed = true; @@ -374,6 +400,12 @@ class IsolateBase { bool alwaysAllowEval = false; bool evalAllowed = false; + // When > 0, we take the "safe" path in unwrap() to avoid calling Get() which can invoke + // user-defined getters, triggering the `DisallowJavascriptExecution` scope constructed + // as part of `Deserializer::readValue` + // This is a counter instead of a boolean as `readValue` calls can be nested + uint javascriptExecutionDisallowed = 0; + // The Web Platform API specifications require that any API that returns a JavaScript Promise // should never throw errors synchronously. Rather, they are supposed to capture any synchronous // throws and return a rejected Promise. Historically, Workers did not follow that guideline @@ -452,6 +484,38 @@ class IsolateBase { }; // Maps instructions to source code locations. + // + // WARNING: This map is read by `getJsStackTrace()` from a signal handler, + // so this field is deliberately NOT protected by a mutex. Two consequences: + // + // 1. If V8 is configured to compile JS on background threads, V8 may + // invoke `jitCodeEvent()` (which mutates `codeMap`) concurrently with + // a `getJsStackTrace()` read on another thread — a data race that can + // crash the process during stack walking. + // + // 2. We can't fix (1) by adding a mutex here, because `getJsStackTrace()` + // runs inside a signal handler and POSIX disallows acquiring a mutex + // from a signal handler (it is not async-signal-safe and may deadlock + // if the signal interrupted a thread already holding the same mutex). + // + // Callers who configure V8 with any of the following flags MUST account + // for this themselves: + // --concurrent_recompilation + // --concurrent_sparkplug + // --maglev_build_code_on_background + // --maglev_deopt_data_on_background + // --lazy_compile_dispatcher + // --parallel_compile_tasks_for_eager_toplevel + // --parallel_compile_tasks_for_lazy + // --stress_concurrent_inlining + // + // Our internal embedder disables all of the above, so `jitCodeEvent` + // only ever fires on the main thread and the race cannot occur in that + // configuration. + // + // Wasm tier-up compilation runs concurrently but emits its own + // JitCodeEvents and does not race with JS stack traces in practice + // because Wasm code does not appear in JS stack traces. kj::TreeMap codeMap; explicit IsolateBase(V8System& system, @@ -951,4 +1015,36 @@ class Isolate: public IsolateBase { bool hasExtraWrappers = false; }; +template +WeakRef Object::getWeakRefToThis(Lock& js) { + return WeakRef(js.v8Isolate, static_cast(*this), getOrCreateWeakRefAnchor()); +} + +template +WeakRef Ref::getWeakRef(Lock& js) & { + return WeakRef(js.v8Isolate, static_cast(*inner.get()), inner->getOrCreateWeakRefAnchor()); +} + +template +WeakRef WeakRef::addRef(jsg::Lock& js) & { + KJ_IF_SOME(i, impl) { + return WeakRef(i.isolate, i.target, i.anchor.addRef()); + } + return WeakRef(nullptr); +} + +template +void WeakRef::destroy() { + KJ_IF_SOME(i, impl) { + if (v8::Locker::IsLocked(i.isolate)) { + impl = kj::none; + } else { + auto& base = IsolateBase::from(i.isolate); + kj::Own dropIt = kj::mv(i.anchor).toOwn(); + base.destroyUnderLock(kj::mv(dropIt)); + impl = kj::none; + } + } +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/struct.h b/src/workerd/jsg/struct.h index bcc3b03967a..819ba3dbbed 100644 --- a/src/workerd/jsg/struct.h +++ b/src/workerd/jsg/struct.h @@ -138,8 +138,23 @@ class FieldWrapper { v8::Local context, v8::Local in) { static_assert(NotV8Local); - v8::Local jsValue = check(in->Get(context, nameHandle.Get(isolate))); auto& js = Lock::from(isolate); + auto fieldName = nameHandle.Get(isolate); + v8::Local jsValue = v8::Undefined(isolate); + if (!js.isJavascriptExecutionDisallowed()) { + jsValue = check(in->Get(context, fieldName)); + } else { + // Safe path to get a v8::Value under the `DisallowJavascriptExecution` scope without + // walking the prototype chain, hence skipping any Object.prototype getters + // NOTE: GetRealNamedProperty() can technically execute user-defined getters on the object + // itself, but this path only deals with plain objects produced by ValueDeserializer + // NOTE: We must check HasRealNamedProperty() first because GetRealNamedProperty() + // returns an empty v8::Maybe both in-case of an error and in-case the property + // does not exist + if (check(in->HasRealNamedProperty(context, fieldName))) { + jsValue = check(in->GetRealNamedProperty(context, fieldName)); + } + } return wrapper.template unwrap( js, context, jsValue, TypeErrorContext::structField(typeid(Struct), exportedName), in); } diff --git a/src/workerd/jsg/unwrap-args-test.c++ b/src/workerd/jsg/unwrap-args-test.c++ new file mode 100644 index 00000000000..59fb019e5a2 --- /dev/null +++ b/src/workerd/jsg/unwrap-args-test.c++ @@ -0,0 +1,187 @@ +// Copyright (c) 2017-2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include + +#include +#include +#include +#include + +namespace workerd::jsg::test { +namespace { + +// A probe whose constructor records the value of a global counter, then +// increments it. When N probes are constructed in sequence, their `order` +// fields equal 0, 1, 2, ..., N-1 in the order they were constructed. +struct OrderProbe { + int order; + OrderProbe(): order(counter++) {} + static inline int counter = 0; +}; + +KJ_TEST("UnwrappedArgs constructs elements left-to-right") { + using Indexes = kj::_::Indexes<0, 1, 2, 3>; + + OrderProbe::counter = 0; + auto makeProbe = []() -> OrderProbe { return OrderProbe{}; }; + + jsg::_::UnwrappedArgs args(makeProbe); + + // After construction, each slot's `order` field records the sequential + // value the counter had when that slot's probe was constructed. + // Left-to-right construction means slot 0 was first. Locals avoid + // unparenthesised `<` inside the KJ_EXPECT macro expansion. + int order0 = static_cast&>(args).value.order; + int order1 = static_cast&>(args).value.order; + int order2 = static_cast&>(args).value.order; + int order3 = static_cast&>(args).value.order; + KJ_EXPECT(order0 == 0); + KJ_EXPECT(order1 == 1); + KJ_EXPECT(order2 == 2); + KJ_EXPECT(order3 == 3); +} + +KJ_TEST("UnwrappedArgs invokes callable with correct compile-time Index and Type") { + using Indexes = kj::_::Indexes<0, 1, 2>; + + // Record each (index, type) the unwrap callable is invoked with. + struct Call { + size_t index; + kj::StringPtr typeName; + }; + kj::Vector calls; + + auto recordingUnwrap = [&calls]() -> int { + if constexpr (kj::isSameType()) { + calls.add(Call{I, "int"_kj}); + } else if constexpr (kj::isSameType()) { + calls.add(Call{I, "double"_kj}); + } else if constexpr (kj::isSameType()) { + calls.add(Call{I, "bool"_kj}); + } else { + calls.add(Call{I, "unknown"_kj}); + } + return static_cast(I); + }; + + jsg::_::UnwrappedArgs args(recordingUnwrap); + + KJ_ASSERT(calls.size() == 3); + KJ_EXPECT(calls[0].index == 0); + KJ_EXPECT(calls[0].typeName == "int"); + KJ_EXPECT(calls[1].index == 1); + KJ_EXPECT(calls[1].typeName == "double"); + KJ_EXPECT(calls[2].index == 2); + KJ_EXPECT(calls[2].typeName == "bool"); +} + +KJ_TEST("UnwrappedArgs take() moves values out of the I'th slot") { + using Indexes = kj::_::Indexes<0, 1, 2>; + + auto unwrap = []() -> kj::String { return kj::str("slot-", I); }; + + jsg::_::UnwrappedArgs args(unwrap); + + kj::String a = kj::mv(args).template take<0>(); + kj::String b = kj::mv(args).template take<1>(); + kj::String c = kj::mv(args).template take<2>(); + + KJ_EXPECT(a == "slot-0"); + KJ_EXPECT(b == "slot-1"); + KJ_EXPECT(c == "slot-2"); +} + +KJ_TEST("UnwrappedArgs take() forwards rvalue-ref parameter types as rvalue refs") { + // Rvalue-ref parameters (e.g. `JsgStruct&&` for move-in, as + // `HTMLRewriter::on` does with `ElementContentHandlers&&`) need to come + // out as rvalue references that bind to T&& method parameters. Because + // `RemoveRvalueRef` strips the `&&`, the stored value is held by value + // (owned by the helper) rather than as a dangling rvalue-ref member. + // `take()` then forwards it back as an rvalue ref via reference + // collapsing on `kj::fwd`. + using Indexes = kj::_::Indexes<0>; + + auto unwrap = []() -> kj::String { return kj::str("moved"); }; + + jsg::_::UnwrappedArgs args(unwrap); + + // take<0>() must return `kj::String&&` so it binds to a T&& method + // parameter, allowing move-in semantics at the call site. + static_assert(kj::isSameType()), kj::String&&>(), + "take() of T&& parameter must return T&&, enabling move into the call site"); + + kj::String s = kj::mv(args).template take<0>(); + KJ_EXPECT(s == "moved"); +} + +KJ_TEST("UnwrappedArgs take() preserves reference parameter types as lvalue refs") { + // For reference-typed parameters (e.g. `Lock&`, `TypeHandler&`), the + // stored value is a reference member. `take()` must return an lvalue + // reference, not an rvalue reference — otherwise the value would not bind + // to a non-const lvalue-ref parameter at the JSG call site. + using Indexes = kj::_::Indexes<0, 1>; + + int x = 10; + int y = 20; + auto unwrap = [&]() -> int& { return I == 0 ? x : y; }; + + jsg::_::UnwrappedArgs args(unwrap); + + // take<0>() must return `int&` so we can mutate through it. + static_assert(kj::isSameType()), int&>(), + "take() of int& parameter must return int&, not int&&"); + + int& a = kj::mv(args).template take<0>(); + int& b = kj::mv(args).template take<1>(); + a = 100; + b = 200; + KJ_EXPECT(x == 100); + KJ_EXPECT(y == 200); +} + +// Records its destruction in a shared vector. Used to verify that when +// construction of one slot throws, earlier slots are destroyed in reverse +// order — standard C++ subobject unwinding behavior. +struct DestructionProbe { + kj::Vector& destructions; + int order; + DestructionProbe(kj::Vector& d, int o): destructions(d), order(o) {} + ~DestructionProbe() { + destructions.add(order); + } + // Disallow copy and move so each slot's destructor fires exactly once. + // Returning a DestructionProbe by value is still legal thanks to + // guaranteed copy elision for prvalues (C++17+). + KJ_DISALLOW_COPY_AND_MOVE(DestructionProbe); +}; + +KJ_TEST("UnwrappedArgs unwinds partially-constructed bases in reverse on throw") { + using Indexes = kj::_::Indexes<0, 1, 2>; + + kj::Vector unwound; + + auto unwrapOrThrow = [&]() -> DestructionProbe { + if constexpr (I == 2) { + KJ_FAIL_REQUIRE("construction failure at slot 2"); + } else { + return DestructionProbe{unwound, static_cast(I)}; + } + }; + + KJ_EXPECT_THROW_MESSAGE("construction failure at slot 2", + (jsg::_::UnwrappedArgs( + unwrapOrThrow))); + + // Slot 0 and slot 1 were constructed; slot 2 threw before its body + // produced a probe. C++ destroys already-constructed base subobjects + // in reverse declaration order, so slot 1 is destroyed first, then + // slot 0. + KJ_ASSERT(unwound.size() == 2); + KJ_EXPECT(unwound[0] == 1); + KJ_EXPECT(unwound[1] == 0); +} + +} // namespace +} // namespace workerd::jsg::test diff --git a/src/workerd/jsg/unwrap-args.h b/src/workerd/jsg/unwrap-args.h new file mode 100644 index 00000000000..3727983944b --- /dev/null +++ b/src/workerd/jsg/unwrap-args.h @@ -0,0 +1,157 @@ +// Copyright (c) 2017-2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#pragma once + +// INTERNAL IMPLEMENTATION FILE +// +// Deterministic left-to-right argument unwrapping for JSG-generated V8 +// callbacks. +// +// Background +// ---------- +// JSG-generated method/constructor/static-method/functor callbacks need to +// convert each `v8::Local` argument in a `FunctionCallbackInfo` +// into a typed C++ value via `TypeWrapper::unwrap(...)`. The natural +// way to write this is a pack expansion inside the function-call argument +// list: +// +// (self.*method)(lock, +// wrapper.template unwrap(lock, context, args, indexes, ...)...); +// +// Per the C++ standard ([expr.call]), the order in which a function call's +// argument expressions are evaluated is *unsequenced*. Different toolchains +// make different choices: Clang and GCC evaluate left-to-right on Linux; +// MSVC evaluates right-to-left on Windows. Since `unwrap` can fire +// user-defined JS code (e.g. `toString`, getters, `Symbol.iterator`), the +// order in which the unwraps run is observable from JavaScript. This +// contradicts Web IDL's requirement that operation arguments be evaluated +// left-to-right. +// +// Approach +// -------- +// `UnwrappedArgs, Args...>` inherits from +// `UnwrappedArg...`. Each `UnwrappedArg` constructor invokes a +// caller-supplied callable to produce its value. Per the C++ standard +// ([class.base.init]), non-virtual base subobjects are initialized in their +// declaration order — which, for a pack-expanded base list, is +// left-to-right. This is the same trick `kj::Tuple` uses to initialize its +// `TupleElement` bases in order, generalised in two ways: (1) the +// per-element constructor invokes a user-supplied callable to produce its +// value, rather than receiving an already-evaluated value; (2) for +// rvalue-reference parameters (e.g. `JsgStruct&&` arguments for move-in), +// `RemoveRvalueRef` is used as the storage type so the produced value +// is owned by `UnwrappedArg` and forwarded back as an rvalue reference on +// extraction. `kj::Tuple` instead declares a dangling `T&&` member +// with no extension-of-lifetime, which would clash with the +// produce-and-own semantics here. +// +// Usage +// ----- +// auto unwrapped = _::unwrapArgs(wrapper, lock, context, args, +// []() { return TypeErrorContext::methodArgument(typeid(T), methodName, i); }); +// (self.*method)(lock, kj::mv(unwrapped).template take()...); +// +// The second pack expansion (`take()...`) is safe: each `take` +// call is a `kj::fwd` of an already-initialized member, with no side +// effects, so the outer call's argument-evaluation order is irrelevant. + +#include // for RemoveRvalueRef + +#include +#include // for kj::_::Indexes, kj::_::TypeByIndex + +#include + +namespace workerd::jsg::_ { // private + +// Holds the unwrapped value for argument slot `Index` of an +// `UnwrappedArgs, Args...>`. `U` is the original method +// parameter type; we store `RemoveRvalueRef` which is exactly what +// `TypeWrapper::unwrap(...)` returns. This preserves lvalue-reference +// parameter types (e.g. `Lock&`, `TypeHandler&`) — reference members +// bind during mem-init — and stores other parameter types by value. +template +struct UnwrappedArg { + using Type = RemoveRvalueRef; + Type value; + + template + explicit UnwrappedArg(Unwrap& unwrap): value(unwrap.template operator()()) {} +}; + +template +struct UnwrappedArgs; + +template +struct UnwrappedArgs, Args...>: UnwrappedArg... { + // Non-virtual base subobjects are initialized in declaration order + // ([class.base.init]), which — because the base list is a pack expansion + // of `UnwrappedArg` — is left-to-right. This is the guarantee + // that fixes the unsequenced-function-argument-evaluation hazard at the + // original JSG call sites. + // + // The `unwrap` callable is taken by value so its lifetime spans the + // construction of all base subobjects. Each `UnwrappedArg` ctor + // receives it by lvalue reference. + template + explicit UnwrappedArgs(Unwrap unwrap): UnwrappedArg(unwrap)... {} + + KJ_DISALLOW_COPY_AND_MOVE(UnwrappedArgs); + + // Forward the value out of the Idx'th argument slot. Intended to be + // called exactly once per slot, on an rvalue `UnwrappedArgs`. + // + // We use `kj::fwd` (a.k.a. `std::forward`) rather than `kj::mv` + // so that reference-typed parameters (e.g. `Lock&`, `TypeHandler&`) + // come out as lvalue references that bind to non-const lvalue-ref + // parameters. Reference collapsing on `kj::fwd(value)`: + // - T = ValueType → rvalue ref T&& (move into value param) + // - T = T& → lvalue ref T& (binds to lvalue-ref param) + // - T = T&& → rvalue ref T&& (binds to rvalue-ref param) + template + decltype(auto) take() && { + using T = kj::_::TypeByIndex; + return kj::fwd(static_cast&>(*this).value); + } +}; + +// Convenience factory that constructs an UnwrappedArgs, deducing the +// index pack internally from sizeof...(Args). The error-context factory +// `makeEC` is invoked once per slot with the slot index as a compile-time +// template parameter, and should return a TypeErrorContext describing that +// argument position. +// +// C++17 guaranteed copy elision allows returning the non-movable +// UnwrappedArgs by prvalue. + +// Implementation: needs the indexes as a deduced pack to construct the +// UnwrappedArgs return type. +template +UnwrappedArgs, Args...> unwrapArgsImpl(kj::_::Indexes, + TypeWrapper& wrapper, + Lock& js, + v8::Local context, + const v8::FunctionCallbackInfo& args, + MakeErrorContext& makeErrorContext) { + auto doUnwrap = [&]() -> decltype(auto) { + return wrapper.template unwrap( + js, context, args, I, makeErrorContext.template operator()()); + }; + return UnwrappedArgs, Args...>(doUnwrap); +} + +// Public entry point — callers only need to supply `Args...`; the index +// pack is reconstructed from `sizeof...(Args)`. +template +auto unwrapArgs(TypeWrapper& wrapper, + Lock& js, + v8::Local context, + const v8::FunctionCallbackInfo& args, + MakeErrorContext makeErrorContext) { + return unwrapArgsImpl( + kj::_::MakeIndexes{}, wrapper, js, context, args, makeErrorContext); +} + +} // namespace workerd::jsg::_ diff --git a/src/workerd/jsg/url-test.c++ b/src/workerd/jsg/url-test.c++ index 8c97c33ea6b..763ebe56ca5 100644 --- a/src/workerd/jsg/url-test.c++ +++ b/src/workerd/jsg/url-test.c++ @@ -1514,5 +1514,43 @@ KJ_TEST("Normalize path for comparison and cloning") { KJ_ASSERT(url9.getHref() == "file:///foo%2F%2F/bar"_kj); } +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-387: deeply nested non-capturing groups in a +// URLPattern protocol component must not crash the process via stack exhaustion in libc++ +// std::regex. Pre-fix, protocolComponentMatchesSpecialScheme() passed the attacker-controlled +// regex string to std::regex without any size bound, causing unbounded recursion in libc++'s +// recursive-descent parser (__parse_ecma_exp → __parse_atom cycle) and a SIGSEGV. +KJ_TEST("UrlPattern protocol regex stack exhaustion regression (AUTOVULN-CLOUDFLARE-WORKERD-387)") { + // Build a protocol pattern with deeply nested non-capturing groups. + // The URLPattern syntax `(...)` embeds a regex group; the inner text is passed verbatim + // into the generated component regex string by generateRegexAndNameList(). + // A depth of 50000 produces a regex string of ~250 KB — well above the 256-byte safety + // limit added by the fix, and deep enough to overflow the thread stack pre-fix. + constexpr size_t depth = 50000; + kj::Vector buf; + buf.add('('); + for (size_t i = 0; i < depth; ++i) { + buf.add('('); + buf.add('?'); + buf.add(':'); + } + buf.add('h'); + for (size_t i = 0; i < depth; ++i) { + buf.add(')'); + } + buf.add(')'); + buf.add('\0'); + kj::String protocol(buf.releaseAsArray()); + + UrlPattern::Init init; + init.protocol = kj::mv(protocol); + + // This must complete without crashing. The result may be a successfully compiled pattern + // (with the protocol treated as non-special due to the length guard) or an error string — + // either is acceptable. A SIGSEGV here means the fix is missing. + auto result = UrlPattern::tryCompile(kj::mv(init)); + // Silence unused-variable warning; we only care that we survived the call. + (void)result; +} + } // namespace } // namespace workerd::jsg::test diff --git a/src/workerd/jsg/url.c++ b/src/workerd/jsg/url.c++ index 2f28572e8d5..553f8a60b6f 100644 --- a/src/workerd/jsg/url.c++ +++ b/src/workerd/jsg/url.c++ @@ -1,5 +1,6 @@ #include "url.h" +#include #include #include @@ -1672,11 +1673,31 @@ UrlPattern::Result tryCompileComponent(kj::Maybe kMaxProtocolRegexLen) { + return false; + } + if (regex.size() > kMaxProtocolRegexWarnLen) { + LOG_PERIODICALLY(WARNING, "NOSENTRY VULN-136606 Used large regex in urlpattern", regex.size()); + } + try { + std::regex rx(regex.begin(), regex.size()); + std::cmatch cmatch; + return std::regex_match("http", cmatch, rx) || std::regex_match("https", cmatch, rx) || + std::regex_match("ws", cmatch, rx) || std::regex_match("wss", cmatch, rx) || + std::regex_match("ftp", cmatch, rx); + } catch (const std::regex_error&) { + // Invalid regex per libc++ -- treat as non-special. The component will be + // re-validated by V8's regex engine later and a proper TypeError thrown. + return false; + } } UrlPattern::Result tryParseConstructorString( diff --git a/src/workerd/jsg/util-test.c++ b/src/workerd/jsg/util-test.c++ index 6f247679a8a..279ff2c053d 100644 --- a/src/workerd/jsg/util-test.c++ +++ b/src/workerd/jsg/util-test.c++ @@ -422,5 +422,60 @@ KJ_TEST("isTunneledException") { } } +// ======================================================================================== +// Regression test for asBytes() with resizable ArrayBuffers. +// resize(0) decommits pages even while the BackingStore shared_ptr is held, so asBytes() +// must deep-copy. Ref: AUTOVULN-CLOUDFLARE-WORKERD-73 + +struct AsBytesResizableContext: public ContextGlobalObject { + // sumBytes(data, extra): sum all bytes in data, ignore extra. + // The extra parameter (kj::Maybe) exists so that a hostile valueOf() on + // the second argument can resize the ArrayBuffer during argument unwrapping. + double sumBytes(kj::Array data, kj::Maybe extra) { + double sum = 0; + for (auto b: data) sum += b; + return sum; + } + JSG_RESOURCE_TYPE(AsBytesResizableContext) { + JSG_METHOD(sumBytes); + } +}; +JSG_DECLARE_ISOLATE_TYPE(AsBytesResizableIsolate, AsBytesResizableContext); + +KJ_TEST("asBytes copies data from resizable ArrayBuffer") { + Evaluator e(v8System); + + // Baseline: normal (non-resizable) buffer works. + e.expectEval("var buf = new ArrayBuffer(4);\n" + "var view = new Uint8Array(buf);\n" + "view[0] = 1; view[1] = 2; view[2] = 3; view[3] = 4;\n" + "sumBytes(view, 0);\n", + "number", "10"); + + // Resizable buffer, no hostile resize -- should work normally. + e.expectEval("var rab = new ArrayBuffer(4, { maxByteLength: 4 });\n" + "var u8 = new Uint8Array(rab);\n" + "u8[0] = 10; u8[1] = 20; u8[2] = 30; u8[3] = 40;\n" + "sumBytes(u8, 0);\n", + "number", "100"); + + // Resizable buffer with hostile valueOf that resizes to 0 during argument unwrap. + // The evaluation order of arguments is indeterminate, so we can't predict whether + // the data is captured before or after the resize. Either way, it must not crash. + // We pass a Uint8Array (exercises the ArrayBufferView overload). + e.expectEval("var rab2 = new ArrayBuffer(4, { maxByteLength: 4 });\n" + "var u8b = new Uint8Array(rab2);\n" + "u8b[0] = 10; u8b[1] = 20; u8b[2] = 30; u8b[3] = 40;\n" + "typeof sumBytes(u8b, { valueOf() { rab2.resize(0); return 0; } });\n", + "string", "number"); + + // Same, but pass the raw ArrayBuffer (exercises the ArrayBuffer overload). + e.expectEval("var rab3 = new ArrayBuffer(3, { maxByteLength: 3 });\n" + "var v3 = new Uint8Array(rab3);\n" + "v3[0] = 5; v3[1] = 15; v3[2] = 25;\n" + "typeof sumBytes(rab3, { valueOf() { rab3.resize(0); return 0; } });\n", + "string", "number"); +} + } // namespace } // namespace workerd::jsg::test diff --git a/src/workerd/jsg/util.c++ b/src/workerd/jsg/util.c++ index a75f3fdd3cd..cf998174b57 100644 --- a/src/workerd/jsg/util.c++ +++ b/src/workerd/jsg/util.c++ @@ -8,7 +8,6 @@ #include "setup.h" #include -#include #include #include @@ -101,36 +100,6 @@ kj::String typeName(const std::type_info& type) { namespace { -// For internal errors, we generate an ID to include when rendering user-facing "internal error" -// exceptions and writing internal exception logs, to make it easier to search for logs -// corresponding to "internal error" exceptions reported by users. -// -// We'll use an ID of 24 base-32 encoded characters, just because its relatively simple to -// generate from random bytes. This should give us a value with 120 bits of uniqueness, which is -// about as good as a UUID. -// -// (We're not using base-64 encoding to avoid issues with case insensitive search, as well as -// ensuring that the id is easy to select and copy via double-clicking.) -using InternalErrorId = kj::FixedArray; - -constexpr char BASE32_DIGITS[] = "0123456789abcdefghijklmnopqrstuv"; - -InternalErrorId makeInternalErrorId() { - InternalErrorId id; - if (isPredictableModeForTest()) { - // In testing mode, use content that generates a "0123456789abcdefghijklm" ID: - for (auto i: kj::indices(id)) { - id[i] = i; - } - } else { - getEntropy(kj::asBytes(id)); - } - for (auto i: kj::indices(id)) { - id[i] = BASE32_DIGITS[static_cast(id[i]) % 32]; - } - return id; -} - kj::String renderInternalError(InternalErrorId& internalErrorId) { return kj::str("internal error; reference = ", internalErrorId); } @@ -657,26 +626,53 @@ static kj::Array getEmptyArray() { } kj::Array asBytes(v8::Local arrayBuffer) { + if (arrayBuffer->IsResizableByUserJavaScript() || arrayBuffer->IsImmutable()) { + // For resizable ArrayBuffers, resize(0) decommits pages (PROT_NONE) even while the + // BackingStore shared_ptr is held. Deep-copy to prevent SIGSEGV if JS shrinks the + // buffer after we capture the pointer. We use arrayBuffer->ByteLength() (the live + // length) rather than backing->ByteLength() (which returns the max reservation size). + // Ref: AUTOVULN-CLOUDFLARE-WORKERD-73 + // + // We also want to copy for immutable ArrayBuffers. Since the expectation might + // be that the memory buffer returned from asBytes() is mutable, we don't want + // to violate the expectation. + auto byteLength = arrayBuffer->ByteLength(); + if (byteLength == 0) { + return getEmptyArray(); + } + kj::ArrayPtr bytes(static_cast(arrayBuffer->Data()), byteLength); + return kj::heapArray(bytes); + } auto backing = arrayBuffer->GetBackingStore(); kj::ArrayPtr bytes(static_cast(backing->Data()), backing->ByteLength()); if (bytes == nullptr) { return getEmptyArray(); - } else { - return bytes.attach(kj::mv(backing)); } + return bytes.attach(kj::mv(backing)); } kj::Array asBytes(v8::Local arrayBufferView) { - auto backing = arrayBufferView->Buffer()->GetBackingStore(); - kj::ArrayPtr buffer(static_cast(backing->Data()), backing->ByteLength()); + auto buffer = arrayBufferView->Buffer(); + if (buffer->IsResizableByUserJavaScript() || buffer->IsImmutable()) { + // Deep-copy for resizable or immutable ArrayBuffers -- see comment above. + // CopyContents handles bounds checking internally for out-of-bounds views. + auto len = arrayBufferView->ByteLength(); + if (len == 0) { + return getEmptyArray(); + } + auto copy = kj::heapArray(len); + arrayBufferView->CopyContents(copy.begin(), copy.size()); + return copy; + } + auto backing = buffer->GetBackingStore(); + kj::ArrayPtr bufferBytes(static_cast(backing->Data()), backing->ByteLength()); auto sliceStart = arrayBufferView->ByteOffset(); auto sliceEnd = sliceStart + arrayBufferView->ByteLength(); - KJ_ASSERT(buffer.size() >= sliceEnd); - auto bytes = buffer.slice(sliceStart, sliceEnd); + KJ_ASSERT(bufferBytes.size() >= sliceEnd); + auto bytes = bufferBytes.slice(sliceStart, sliceEnd); if (bytes == nullptr) { return getEmptyArray(); - } else { - return bytes.attach(kj::mv(backing)); } + return bytes.attach(kj::mv(backing)); } // TODO(soon): If the returned kj::Array is used outside of the isolate lock, diff --git a/src/workerd/jsg/util.h b/src/workerd/jsg/util.h index 57b3e1558c9..1bf773ec2ce 100644 --- a/src/workerd/jsg/util.h +++ b/src/workerd/jsg/util.h @@ -342,7 +342,12 @@ struct LiftKj_ { } else { if constexpr (isVoid()) { func(); - if constexpr (!kj::canConvert&>()) { + if constexpr (!kj::canConvert&>() && + !kj::canConvert&>()) { + // Skip `SetUndefined` for `PropertyCallbackInfo` (the V2 native data + // property setter signature): `ReturnValue::SetUndefined` does not compile + // (its `static_assert` rejects `Boolean`), and per V8's contract leaving the return + // value unset is interpreted as setter success. info.GetReturnValue().SetUndefined(); } } else { diff --git a/src/workerd/jsg/v8-platform-wrapper.h b/src/workerd/jsg/v8-platform-wrapper.h index 08837bece2d..26660d9b107 100644 --- a/src/workerd/jsg/v8-platform-wrapper.h +++ b/src/workerd/jsg/v8-platform-wrapper.h @@ -71,6 +71,15 @@ class V8PlatformWrapper: public v8::Platform { return inner.GetTracingController(); } + v8::ThreadIsolatedAllocator* GetThreadIsolatedAllocator() override { + // Forward to the inner platform so that V8's ThreadIsolation can use PKU + // (Memory Protection Keys) to enforce W^X on JIT code pages and + // write-protect the code pointer tables. Without this, the + // DefaultPlatform's allocator (which calls pkey_alloc) was silently + // dropped and ThreadIsolation was disabled. + return inner.GetThreadIsolatedAllocator(); + } + private: v8::Platform& inner; diff --git a/src/workerd/jsg/weakref-test.c++ b/src/workerd/jsg/weakref-test.c++ new file mode 100644 index 00000000000..33df02e5f98 --- /dev/null +++ b/src/workerd/jsg/weakref-test.c++ @@ -0,0 +1,390 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "jsg-test.h" + +namespace workerd::jsg::test { +namespace { + +V8System v8System({"--expose-gc"_kj}); +class ContextGlobalObject: public Object, public ContextGlobal {}; + +struct WeakRefContext: public ContextGlobalObject { + JSG_RESOURCE_TYPE(WeakRefContext) { + JSG_NESTED_TYPE(NumberBox); + } +}; +JSG_DECLARE_ISOLATE_TYPE(WeakRefIsolate, WeakRefContext, NumberBox); + +// ======================================================================================== +// jsg::WeakRef tests + +KJ_TEST("WeakRef: basic creation and access") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto strong = js.alloc(42); + auto weak = strong.getWeakRef(js); + + // Weak ref should be alive. + KJ_ASSERT(weak.isAlive()); + + // operator->() should work. + KJ_ASSERT(weak->value == 42); + + // tryGet() should return the object. + KJ_IF_SOME(ref, weak.tryGet()) { + KJ_ASSERT(ref.value == 42); + } else { + KJ_FAIL_ASSERT("expected alive WeakRef"); + } + }); +} + +KJ_TEST("WeakRef: tryAddRef promotes to strong Ref") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto strong = js.alloc(7); + auto weak = strong.getWeakRef(js); + + // Promote to strong reference. + auto promoted = KJ_ASSERT_NONNULL(weak.tryAddRef(js)); + KJ_ASSERT(promoted->value == 7); + + // Both refs refer to the same object. + KJ_ASSERT(&*strong == &*promoted); + }); +} + +KJ_TEST("WeakRef: becomes invalid when all Refs dropped") { + Evaluator e(v8System); + e.run([](Lock& js) { + WeakRef weak(nullptr); + + { + auto strong = js.alloc(99); + weak = strong.getWeakRef(js); + KJ_ASSERT(weak.isAlive()); + } + // strong is destroyed, Wrappable refcount hits 0, destructor invalidates anchor. + KJ_ASSERT(!weak.isAlive()); + + // tryGet returns none. + KJ_ASSERT(weak.tryGet() == kj::none); + + // tryAddRef returns none. + KJ_ASSERT(weak.tryAddRef(js) == kj::none); + + // operator->() throws. + KJ_EXPECT_THROW_MESSAGE("invalidated", weak->value); + + // operation->() throws different message when weak itself is destroyed/moved + auto weak2 = kj::mv(weak); + KJ_EXPECT_THROW_MESSAGE("destroyed", weak->value); + }); +} + +KJ_TEST("WeakRef: addRef creates independent weak ref") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto strong = js.alloc(5); + auto weak1 = strong.getWeakRef(js); + auto weak2 = weak1.addRef(js); + + // Both alive. + KJ_ASSERT(weak1.isAlive()); + KJ_ASSERT(weak2.isAlive()); + + // Both refer to same object. + auto& ref1 = KJ_ASSERT_NONNULL(weak1.tryGet()); + auto& ref2 = KJ_ASSERT_NONNULL(weak2.tryGet()); + KJ_ASSERT(&ref1 == &ref2); + }); +} + +KJ_TEST("WeakRef: null-constructed is not alive") { + WeakRef weak(nullptr); + KJ_ASSERT(!weak.isAlive()); + KJ_ASSERT(weak.tryGet() == kj::none); +} + +KJ_TEST("WeakRef: move semantics") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto strong = js.alloc(3); + auto weak1 = strong.getWeakRef(js); + auto weak2 = kj::mv(weak1); + + // weak2 should be alive, weak1 should be null. + KJ_ASSERT(weak2.isAlive()); + KJ_ASSERT(!weak1.isAlive()); + KJ_ASSERT(weak2->value == 3); + }); +} + +KJ_TEST("WeakRef: promote keeps object alive") { + Evaluator e(v8System); + e.run([](Lock& js) { + WeakRef weak(nullptr); + kj::Maybe> maybePromoted; + + { + auto strong = js.alloc(11); + weak = strong.getWeakRef(js); + maybePromoted = weak.tryAddRef(js); + // strong goes out of scope, but promoted keeps object alive. + } + + // Object is still alive because promoted ref exists. + KJ_ASSERT(weak.isAlive()); + + auto& promoted = KJ_ASSERT_NONNULL(maybePromoted); + KJ_ASSERT(promoted->value == 11); + }); +} + +KJ_TEST("WeakRef: drop out of lock") { + Evaluator e(v8System); + kj::Maybe> weak; + e.run([&weak](Lock& js) { + auto strong = js.alloc(11); + weak = strong.getWeakRef(js); + KJ_ASSERT(KJ_ASSERT_NONNULL(weak).isAlive()); + }); + + // We are now outside the isolate lock. The + // strong object is not alive, + KJ_ASSERT(!KJ_ASSERT_NONNULL(weak).isAlive()); + weak = kj::none; + + e.run([](Lock& js) { + // The weak should be destroyed finally when + // we entered this lock. Don't crash! + }); +} + +KJ_TEST("WeakRef: drop out of lock (drop in any order)") { + Evaluator e(v8System); + kj::Maybe> weak; + kj::Maybe> strong; + e.run([&](Lock& js) { + strong = js.alloc(11); + weak = KJ_ASSERT_NONNULL(strong).getWeakRef(js); + KJ_ASSERT(KJ_ASSERT_NONNULL(weak).isAlive()); + }); + + // The order in which the items are dropped outside of the + // isolate lock determines the order in which they are + // added to the deferred struction queue. + weak = kj::none; + strong = kj::none; + + e.run([](Lock& js) { + // The weak should be destroyed finally when + // we entered this lock. Don't crash! + }); +} + +KJ_TEST("WeakRef: drop out of lock (drop in any order 2)") { + Evaluator e(v8System); + kj::Maybe> weak; + kj::Maybe> strong; + e.run([&](Lock& js) { + strong = js.alloc(11); + weak = KJ_ASSERT_NONNULL(strong).getWeakRef(js); + KJ_ASSERT(KJ_ASSERT_NONNULL(weak).isAlive()); + }); + + // The order in which the items are dropped outside of the + // isolate lock determines the order in which they are + // added to the deferred struction queue. + strong = kj::none; + weak = kj::none; + + e.run([](Lock& js) { + // The weak should be destroyed finally when + // we entered this lock. Don't crash! + }); +} + +class NumberBox2: public NumberBox { + public: + using NumberBox::NumberBox; + JSG_RESOURCE_TYPE(NumberBox2) { + JSG_INHERIT(NumberBox); + } +}; + +KJ_TEST("Moving WeakRefs") { + Evaluator e(v8System); + + e.run([](Lock& js) { + auto strong = js.alloc(123); + auto weak1 = strong.getWeakRef(js); + auto weak2 = kj::mv(weak1); + KJ_ASSERT(weak2.isAlive()); + KJ_ASSERT(!weak1.isAlive()); + + auto strong2 = js.alloc(456); + auto weak3 = strong2.getWeakRef(js); + jsg::WeakRef weak4 = kj::mv(weak3); + jsg::WeakRef weak5(strong2.getWeakRef(js)); + KJ_ASSERT(KJ_ASSERT_NONNULL(weak4.tryGet()).value == 456); + KJ_ASSERT(KJ_ASSERT_NONNULL(weak5.tryGet()).value == 456); + }); +} + +KJ_TEST("Getting weakref from self") { + Evaluator e(v8System); + + e.run([](Lock& js) { + auto strong = js.alloc(123); + // Uses JSG_THIS_WEAK internally + auto weak = strong->getWeakRefToSelf(js); + KJ_ASSERT(weak.isAlive()); + auto strong2 = KJ_ASSERT_NONNULL(weak.tryAddRef(js)); + KJ_ASSERT(strong.get() == strong2.get()); + }); +} + +// ======================================================================================== +// jsg::WeakV8Ref tests + +KJ_TEST("WeakV8Ref: basic creation and access") { + Evaluator e(v8System); + e.run([](Lock& js) { + // Create a V8 value and a weak ref to it. + auto strong = js.v8Ref(v8Str(js.v8Isolate, "hello"_kj)); + auto weak = strong.getWeakRef(js); + + KJ_ASSERT(weak.isAlive()); + + auto local = KJ_ASSERT_NONNULL(weak.tryGetHandle(js.v8Isolate)); + v8::String::Utf8Value utf8(js.v8Isolate, local); + KJ_ASSERT(kj::StringPtr(*utf8, utf8.length()) == "hello"); + }); +} + +KJ_TEST("WeakV8Ref: tryAddRef promotes to strong V8Ref") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto strong = js.v8Ref(v8Str(js.v8Isolate, "world"_kj)); + auto weak = strong.getWeakRef(js); + + auto promoted = KJ_ASSERT_NONNULL(weak.tryAddRef(js.v8Isolate)); + auto local = promoted.getHandle(js.v8Isolate); + v8::String::Utf8Value utf8(js.v8Isolate, local); + KJ_ASSERT(kj::StringPtr(*utf8, utf8.length()) == "world"); + }); +} + +KJ_TEST("WeakV8Ref: null-constructed is not alive") { + WeakV8Ref weak(nullptr); + KJ_ASSERT(!weak.isAlive()); +} + +KJ_TEST("WeakV8Ref: move semantics") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto strong = js.v8Ref(v8Str(js.v8Isolate, "test"_kj)); + auto weak1 = strong.getWeakRef(js); + auto weak2 = kj::mv(weak1); + + KJ_ASSERT(weak2.isAlive()); + KJ_ASSERT(!weak1.isAlive()); + }); +} + +KJ_TEST("WeakV8Ref: not alive after drop") { + setPredictableModeForTest(); + Evaluator e(v8System); + e.run([](Lock& js) { + // A nested handle scope is required to ensure that the object is collected + // and not being held alive by the outer handle scope. + auto weak = js.withinHandleScope([&] { + auto strong = js.v8Ref(v8::Object::New(js.v8Isolate)); + auto weak = strong.getWeakRef(js); + KJ_ASSERT(weak.isAlive()); + return kj::mv(weak); + }); + js.requestGcForTesting(); + KJ_ASSERT(!weak.isAlive()); + }); +} + +// ======================================================================================== +// jsg::WeakJsRef tests + +KJ_TEST("WeakJsRef: basic creation and access") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto obj = js.obj(); + JsRef strong(js, obj); + auto weak = strong.getWeakRef(js); + + KJ_ASSERT(weak.isAlive()); + + auto handle = KJ_ASSERT_NONNULL(weak.tryGetHandle(js)); + // Should be the same object. + KJ_ASSERT(handle == obj); + }); +} + +KJ_TEST("WeakJsRef: tryAddRef promotes to strong JsRef") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto str = js.str("test"_kj); + JsRef strong(js, str); + auto weak = strong.getWeakRef(js); + + auto promoted = KJ_ASSERT_NONNULL(weak.tryAddRef(js)); + auto handle = promoted.getHandle(js); + KJ_ASSERT(handle == str); + }); +} + +KJ_TEST("WeakJsRef: null-constructed is not alive") { + WeakJsRef weak(nullptr); + KJ_ASSERT(!weak.isAlive()); +} + +KJ_TEST("WeakJsRef: move semantics") { + Evaluator e(v8System); + e.run([](Lock& js) { + auto obj = js.obj(); + JsRef strong(js, obj); + auto weak1 = strong.getWeakRef(js); + auto weak2 = kj::mv(weak1); + + KJ_ASSERT(weak2.isAlive()); + KJ_ASSERT(!weak1.isAlive()); + }); +} + +KJ_TEST("WeakJsRef: getHandle asserts when dead") { + Evaluator e(v8System); + e.run([](Lock& js) { + WeakJsRef weak(nullptr); + KJ_EXPECT_THROW_MESSAGE("collected", weak.getHandle(js)); + }); +} + +KJ_TEST("WeakJsRef: not alive after drop") { + setPredictableModeForTest(); + Evaluator e(v8System); + e.run([](Lock& js) { + // A nested handle scope is required to ensure that the object is collected + // and not being held alive by the outer handle scope. + auto weak = js.withinHandleScope([&] { + auto obj = js.obj(); + auto weak = obj.getWeakRef(js); + KJ_ASSERT(weak.isAlive()); + return kj::mv(weak); + }); + js.requestGcForTesting(); + KJ_ASSERT(!weak.isAlive()); + }); +} + +} // namespace +} // namespace workerd::jsg::test diff --git a/src/workerd/jsg/wrappable.c++ b/src/workerd/jsg/wrappable.c++ index 65be7e8631c..61dbabac400 100644 --- a/src/workerd/jsg/wrappable.c++ +++ b/src/workerd/jsg/wrappable.c++ @@ -404,6 +404,7 @@ v8::Local Wrappable::attachOpaqueWrapper( auto isolate = v8::Isolate::GetCurrent(); auto object = jsg::check(IsolateBase::getOpaqueTemplate(isolate)->InstanceTemplate()->NewInstance(context)); + jsg::check(object->SetPrototype(context, v8::Null(isolate))); attachWrapper(isolate, object, needsGcTracing); return object; } diff --git a/src/workerd/jsg/wrappable.h b/src/workerd/jsg/wrappable.h index 088a8414eb7..75ecd6cd29e 100644 --- a/src/workerd/jsg/wrappable.h +++ b/src/workerd/jsg/wrappable.h @@ -85,6 +85,31 @@ using kj::uint; class GcVisitor; class HeapTracer; +class Wrappable; // Forward declaration for WeakRefAnchor. + +// Shared alive/dead flag for weak references to Wrappable objects. Allocated lazily in +// Wrappable when a weak reference is first requested via getOrCreateWeakRefAnchor(). +// Automatically invalidated in Wrappable's destructor, so derived types never need to +// manage invalidation. +// +// The anchor itself does NOT store the target pointer — each jsg::WeakRef stores its +// own typed T* alongside a reference to this anchor. This avoids downcasting from the +// privately-inherited Wrappable base class. +class WeakRefAnchor final: public kj::Refcounted { + public: + bool isAlive() const { + return alive; + } + + private: + bool alive = true; + + void invalidate() { + alive = false; + } + + friend class Wrappable; +}; // Base class for C++ objects which can be "wrapped" for JavaScript consumption. A JavaScript // "wrapper" object is created, and then the JS wrapper and C++ Wrappable are "attached" to each @@ -108,6 +133,15 @@ class HeapTracer; // Wrappable and are not visible to GC tracing. class Wrappable: public kj::Refcounted { public: + ~Wrappable() noexcept(false) { + // Invalidate all outstanding jsg::WeakRefs before any derived state is accessed again. + // This is safe in single-threaded JSG context because no other code can call tryGet() during + // the destructor call chain. + KJ_IF_SOME(a, weakRefAnchor) { + a->invalidate(); + } + } + enum InternalFields : int { // Field must contain a pointer to `WORKERD_WRAPPABLE_TAG`. This is a workerd-specific // tag that helps us to identify a v8 API object as one of our own. @@ -246,9 +280,26 @@ class Wrappable: public kj::Refcounted { // When `wrapperRef` is non-empty, the Wrappable is a member of the list `HeapTracer::wrappers`. kj::ListLink link; + // Lazy-allocated shared state for jsg::WeakRef. Zero overhead for objects that never + // have weak references taken. Created on first call to getOrCreateWeakRefAnchor(). + kj::Maybe> weakRefAnchor; + + // Returns (or creates) the shared WeakRefAnchor for this object. Used by Ref::getWeakRef(). + kj::Rc getOrCreateWeakRefAnchor() { + KJ_IF_SOME(a, weakRefAnchor) { + return a.addRef(); + } + auto a = kj::rc(); + weakRefAnchor = a.addRef(); + return a; + } + + friend class Object; friend class GcVisitor; friend class HeapTracer; friend class MemoryTracker; + template + friend class Ref; }; // For historical reasons, this is actually implemented in setup.c++. @@ -339,9 +390,10 @@ T& extractInternalPointer( getAlignedPointerFromEmbedderData(context, ContextPointerSlot::GLOBAL_WRAPPER)); } else { KJ_ASSERT(object->InternalFieldCount() == Wrappable::INTERNAL_FIELD_COUNT); - return *reinterpret_cast( - object->GetAlignedPointerFromInternalField(Wrappable::WRAPPED_OBJECT_FIELD_INDEX, - static_cast(Wrappable::WRAPPED_OBJECT_FIELD_INDEX))); + auto* ptr = object->GetAlignedPointerFromInternalField(Wrappable::WRAPPED_OBJECT_FIELD_INDEX, + static_cast(Wrappable::WRAPPED_OBJECT_FIELD_INDEX)); + KJ_ASSERT(ptr != nullptr, "EPT type-tag mismatch: internal field returned nullptr"); + return *reinterpret_cast(ptr); } } diff --git a/src/workerd/server/actor-id-impl.c++ b/src/workerd/server/actor-id-impl.c++ index de5a99305f0..e65a557ab66 100644 --- a/src/workerd/server/actor-id-impl.c++ +++ b/src/workerd/server/actor-id-impl.c++ @@ -87,16 +87,22 @@ kj::Own ActorIdFactoryImpl::idFromString(kj::String str JSG_REQUIRE(str.size() == SHA256_DIGEST_LENGTH * 2 && !decoded.hadErrors && decoded.size() == SHA256_DIGEST_LENGTH, TypeError, "Invalid Durable Object ID: must be 64 hex digits"); + return idFromRaw(decoded, kj::none); +} + +kj::Own ActorIdFactoryImpl::idFromRaw( + kj::ArrayPtr bytes, kj::Maybe name) { + KJ_REQUIRE(bytes.size() == SHA256_DIGEST_LENGTH, "Invalid Durable Object ID: must be 32 bytes"); kj::byte id[BASE_LENGTH + SHA256_DIGEST_LENGTH]{}; - memcpy(id, decoded.begin(), BASE_LENGTH); + memcpy(id, bytes.begin(), BASE_LENGTH); computeMac(id); // Verify that the computed mac matches the input. - JSG_REQUIRE(kj::arrayPtr(id).slice(BASE_LENGTH).startsWith(decoded.asPtr().slice(BASE_LENGTH)), - TypeError, "Durable Object ID is not valid for this namespace."); + JSG_REQUIRE(kj::arrayPtr(id).slice(BASE_LENGTH).startsWith(bytes.slice(BASE_LENGTH)), TypeError, + "Durable Object ID is not valid for this namespace."); - return kj::heap(id, kj::none); + return kj::heap(id, kj::mv(name)); } kj::Own ActorIdFactoryImpl::cloneWithJurisdiction( diff --git a/src/workerd/server/actor-id-impl.h b/src/workerd/server/actor-id-impl.h index 9b53c75f4f7..983c4138983 100644 --- a/src/workerd/server/actor-id-impl.h +++ b/src/workerd/server/actor-id-impl.h @@ -5,6 +5,9 @@ #include namespace workerd::server { + +using kj::byte; + class ActorIdFactoryImpl final: public ActorIdFactory { public: ActorIdFactoryImpl(kj::StringPtr uniqueKey); @@ -14,6 +17,10 @@ class ActorIdFactoryImpl final: public ActorIdFactory { public: ActorIdImpl(const kj::byte idParam[SHA256_DIGEST_LENGTH], kj::Maybe name); + kj::ArrayPtr getRaw() const { + return id; + } + kj::String toString() const override; kj::Maybe getName() const override; kj::Maybe getJurisdiction() const override; @@ -29,6 +36,8 @@ class ActorIdFactoryImpl final: public ActorIdFactory { kj::Maybe name; }; + kj::Own idFromRaw(kj::ArrayPtr bytes, kj::Maybe name); + kj::Own newUniqueId(kj::Maybe jurisdiction) override; kj::Own idFromName(kj::String name) override; kj::Own idFromString(kj::String str) override; diff --git a/src/workerd/server/channel-token-test.c++ b/src/workerd/server/channel-token-test.c++ index 97e28a66138..927b7092f27 100644 --- a/src/workerd/server/channel-token-test.c++ +++ b/src/workerd/server/channel-token-test.c++ @@ -124,6 +124,54 @@ class MockActorClassChannel: public IoChannelFactory::ActorClassChannel { } }; +class MockActorChannel: public IoChannelFactory::ActorChannel { + public: + MockActorChannel( + kj::StringPtr namespaceKey, kj::ArrayPtr id, kj::Maybe name) + : namespaceKey(kj::str(namespaceKey)), + id(kj::heapArray(id)), + name(name.map([](kj::StringPtr s) { return kj::str(s); })) {} + + MockActorChannel(ChannelTokenHandler& handler, + kj::StringPtr namespaceKey, + kj::ArrayPtr id, + kj::Maybe name, + kj::Maybe> readyPromise = kj::none) + : handler(handler), + namespaceKey(kj::str(namespaceKey)), + id(kj::heapArray(id)), + name(name.map([](kj::StringPtr s) { return kj::str(s); })), + readyPromise(kj::mv(readyPromise)) {} + + kj::Maybe handler; + kj::String namespaceKey; + kj::Array id; + kj::Maybe name; + kj::Maybe> readyPromise; + + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + KJ_UNREACHABLE; + } + void requireAllowsTransfer() override { + KJ_UNREACHABLE; + } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + auto& h = KJ_ASSERT_NONNULL(handler, "this mock was not constructed with a handler ref"); + KJ_IF_SOME(p, readyPromise) { + auto promise = kj::mv(p); + readyPromise = kj::none; + return promise.then([&h, usage, this]() mutable -> kj::Array { + return h.encodeActorChannelToken( + usage, namespaceKey, id, name.map([](kj::String& s) -> kj::StringPtr { return s; })); + }); + } else { + return h.encodeActorChannelToken( + usage, namespaceKey, id, name.map([](kj::String& s) -> kj::StringPtr { return s; })); + } + } +}; + class MockResolver: public ChannelTokenHandler::Resolver { public: kj::Own resolveEntrypoint( @@ -137,6 +185,12 @@ class MockResolver: public ChannelTokenHandler::Resolver { return kj::refcounted( ServiceTriplet(serviceName, entrypoint, kj::mv(props))); } + + kj::Own resolveActor(kj::StringPtr namespaceKey, + kj::ArrayPtr id, + kj::Maybe name) override { + return kj::refcounted(namespaceKey, id, name); + } }; using Usage = IoChannelFactory::ChannelTokenUsage; @@ -152,6 +206,18 @@ Frankenvalue propsWithCaps(kj::Vector> caps return Frankenvalue::fromCapnp(builder.asReader(), kj::mv(caps)); } +void expectActorChannel(MockActorChannel& channel, + kj::StringPtr namespaceKey, + kj::ArrayPtr id, + kj::Maybe name) { + KJ_EXPECT(channel.namespaceKey == namespaceKey); + KJ_ASSERT(channel.id.size() == id.size()); + for (auto i: kj::indices(id)) { + KJ_EXPECT(channel.id[i] == id[i]); + } + KJ_EXPECT(channel.name.map([](kj::String& s) -> kj::StringPtr { return s; }) == name); +} + KJ_TEST("channel token basics") { MockResolver resolver; ChannelTokenHandler handler(resolver); @@ -259,12 +325,41 @@ KJ_TEST("actor class channel tokens") { "channel token type mismatch", handler.decodeSubrequestChannelToken(Usage::RPC, token)); } +KJ_TEST("actor channel tokens") { + MockResolver resolver; + ChannelTokenHandler handler(resolver); + + const byte actorId[] = {12, 34, 56, 78}; + + auto token = handler.encodeActorChannelToken(Usage::RPC, "foo-namespace", actorId, "my-actor"_kj); + + { + auto channel = + handler.decodeSubrequestChannelToken(Usage::RPC, token).downcast(); + expectActorChannel(*channel, "foo-namespace", actorId, "my-actor"_kj); + } + + auto storageToken = handler.encodeActorChannelToken( + Usage::STORAGE, "foo-namespace", actorId, kj::Maybe(kj::none)); + + { + auto channel = handler.decodeSubrequestChannelToken(Usage::STORAGE, storageToken) + .downcast(); + expectActorChannel(*channel, "foo-namespace", actorId, kj::Maybe(kj::none)); + } + + KJ_EXPECT_THROW_MESSAGE( + "channel token type mismatch", handler.decodeActorClassChannelToken(Usage::RPC, token)); +} + KJ_TEST("channel token with nested channels (all synchronous)") { MockResolver resolver; ChannelTokenHandler handler(resolver); - // Build a props cap table containing a SubrequestChannel and an ActorClassChannel, both of - // which produce their tokens synchronously. + const byte actorId[] = {90, 91, 92, 93}; + + // Build a props cap table containing a SubrequestChannel, an ActorClassChannel, and an + // ActorChannel, all of which produce their tokens synchronously. kj::Vector> caps; caps.add(kj::refcounted(handler, ServiceTriplet( @@ -272,6 +367,8 @@ KJ_TEST("channel token with nested channels (all synchronous)") { caps.add(kj::refcounted(handler, ServiceTriplet("nested-actor", kj::Maybe(kj::none), Frankenvalue::fromJson(kj::str("{\"inner\": 2}"))))); + caps.add(kj::refcounted( + handler, "nested-namespace", actorId, "nested-actor-name"_kj)); auto props = propsWithCaps(kj::mv(caps)); // Encoding is synchronous. @@ -287,7 +384,7 @@ KJ_TEST("channel token with nested channels (all synchronous)") { "OuterEntry"_kj); auto capTable = channel->triplet.props.getCapTable(); - KJ_ASSERT(capTable.size() == 2); + KJ_ASSERT(capTable.size() == 3); auto& nestedSub = KJ_ASSERT_NONNULL(kj::tryDowncast(*capTable[0]), "expected nested cap 0 to be a SubrequestChannel"); @@ -300,6 +397,10 @@ KJ_TEST("channel token with nested channels (all synchronous)") { KJ_EXPECT(nestedActor.triplet == ServiceTriplet("nested-actor", kj::Maybe(kj::none), Frankenvalue::fromJson(kj::str("{\"inner\": 2}")))); + + auto& nestedActorStub = KJ_ASSERT_NONNULL(kj::tryDowncast(*capTable[2]), + "expected nested cap 2 to be an ActorChannel"); + expectActorChannel(nestedActorStub, "nested-namespace", actorId, "nested-actor-name"_kj); } // Also works with STORAGE usage. @@ -308,7 +409,7 @@ KJ_TEST("channel token with nested channels (all synchronous)") { { auto channel = handler.decodeSubrequestChannelToken(Usage::STORAGE, storageToken) .downcast(); - KJ_EXPECT(channel->triplet.props.getCapTable().size() == 2); + KJ_EXPECT(channel->triplet.props.getCapTable().size() == 3); } // And the outer channel can itself be an ActorClassChannel. @@ -317,7 +418,7 @@ KJ_TEST("channel token with nested channels (all synchronous)") { { auto channel = handler.decodeActorClassChannelToken(Usage::RPC, actorToken) .downcast(); - KJ_EXPECT(channel->triplet.props.getCapTable().size() == 2); + KJ_EXPECT(channel->triplet.props.getCapTable().size() == 3); } } diff --git a/src/workerd/server/channel-token.c++ b/src/workerd/server/channel-token.c++ index 58620ba8405..c18b1fef5ba 100644 --- a/src/workerd/server/channel-token.c++ +++ b/src/workerd/server/channel-token.c++ @@ -47,14 +47,15 @@ kj::OneOf, kj::Promise>> ChannelTokenHandler:: builder.setType(type); - builder.setName(serviceName); + auto service = builder.getService(); + service.setName(serviceName); KJ_IF_SOME(e, entrypoint) { - builder.setEntrypoint(e); + service.setEntrypoint(e); } { - auto propsBuilder = builder.initProps(); + auto propsBuilder = service.initProps(); props.toCapnp(propsBuilder); auto capTable = props.getCapTable(); @@ -184,6 +185,26 @@ kj::OneOf, kj::Promise>> ChannelTokenHandler:: ChannelToken::Type::ACTOR_CLASS, usage, serviceName, entrypoint, props); } +kj::Array ChannelTokenHandler::encodeActorChannelToken( + IoChannelFactory::ChannelTokenUsage usage, + kj::StringPtr namespaceKey, + kj::ArrayPtr id, + kj::Maybe name) { + capnp::word scratch[128]{}; + capnp::MallocMessageBuilder message(scratch); + auto builder = message.getRoot(); + builder.setType(ChannelToken::Type::SUBREQUEST); + + auto actor = builder.initActor(); + actor.setNamespaceKey(namespaceKey); + actor.setId(id); + KJ_IF_SOME(n, name) { + actor.setName(n); + } + + return serializeTokenImpl(usage, message); +} + kj::Own ChannelTokenHandler::decodeChannelTokenImpl( ChannelToken::Type type, IoChannelFactory::ChannelTokenUsage usage, @@ -257,52 +278,73 @@ kj::Own ChannelTokenHandler::decodeChannelTokenImpl KJ_REQUIRE(reader.getType() == type, "channel token type mismatch"); - kj::Maybe entrypoint; - if (reader.hasEntrypoint()) { - entrypoint = reader.getEntrypoint(); - } + switch (reader.which()) { + case ChannelToken::SERVICE: { + auto service = reader.getService(); + + kj::Maybe entrypoint; + if (service.hasEntrypoint()) { + entrypoint = service.getEntrypoint(); + } - Frankenvalue props; - if (reader.hasProps()) { - auto propsReader = reader.getProps(); - auto tableReader = propsReader.getCapTable().getAs(); - - kj::Vector> capTable; - if (tableReader.hasCaps()) { - auto caps = tableReader.getCaps(); - capTable.reserve(caps.size()); - - for (auto cap: caps) { - switch (cap.which()) { - case ChannelToken::FrankenvalueCapTable::Cap::UNKNOWN: - break; - case ChannelToken::FrankenvalueCapTable::Cap::SUBREQUEST_CHANNEL: - capTable.add(decodeSubrequestChannelToken(usage, cap.getSubrequestChannel())); - continue; - case ChannelToken::FrankenvalueCapTable::Cap::ACTOR_CLASS_CHANNEL: - capTable.add(decodeActorClassChannelToken(usage, cap.getActorClassChannel())); - continue; + Frankenvalue props; + if (service.hasProps()) { + auto propsReader = service.getProps(); + auto tableReader = propsReader.getCapTable().getAs(); + + kj::Vector> capTable; + if (tableReader.hasCaps()) { + auto caps = tableReader.getCaps(); + capTable.reserve(caps.size()); + + for (auto cap: caps) { + switch (cap.which()) { + case ChannelToken::FrankenvalueCapTable::Cap::UNKNOWN: + break; + case ChannelToken::FrankenvalueCapTable::Cap::SUBREQUEST_CHANNEL: + capTable.add(decodeSubrequestChannelToken(usage, cap.getSubrequestChannel())); + continue; + case ChannelToken::FrankenvalueCapTable::Cap::ACTOR_CLASS_CHANNEL: + capTable.add(decodeActorClassChannelToken(usage, cap.getActorClassChannel())); + continue; + } + KJ_FAIL_REQUIRE("unknown cap table type", cap.which()); + } } - KJ_FAIL_REQUIRE("unknown cap table type", cap.which()); + + props = Frankenvalue::fromCapnp(propsReader, kj::mv(capTable)); + } + + // HACK: It would be more type-safe for us to return the (name, entrypoint, props) triplet and + // let the caller call the appropriate resolver method. However, this would require making + // heap string copies of the name and entrypoint which would just be thrown way immediately. + // Since both types happen to subclass Frankenvalue::CapTableEntry, we just make the resolver + // call here, return either type, and let the caller downcast to the right type. + switch (type) { + case ChannelToken::Type::SUBREQUEST: + return resolver.resolveEntrypoint(service.getName(), entrypoint, kj::mv(props)); + case ChannelToken::Type::ACTOR_CLASS: + return resolver.resolveActorClass(service.getName(), entrypoint, kj::mv(props)); } + + KJ_UNREACHABLE; } - props = Frankenvalue::fromCapnp(propsReader, kj::mv(capTable)); - } + case ChannelToken::ACTOR: { + auto actor = reader.getActor(); - // HACK: It would be more type-safe for us to return the (name, entrypoint, props) triplet and - // let the caller call the appropriate resolver method. However, this would require making - // heap string copies of the name and entrypoint which would just be thrown way immediately. - // Since both types happen to subclass Frankenvalue::CapTableEntry, we just make the resolver - // call here, return either type, and let the caller downcast to the right type. - switch (type) { - case ChannelToken::Type::SUBREQUEST: - return resolver.resolveEntrypoint(reader.getName(), entrypoint, kj::mv(props)); - case ChannelToken::Type::ACTOR_CLASS: - return resolver.resolveActorClass(reader.getName(), entrypoint, kj::mv(props)); + KJ_REQUIRE(type == ChannelToken::Type::SUBREQUEST, "channel token type mismatch"); + + kj::Maybe name; + if (actor.hasName()) { + name = actor.getName(); + } + + return resolver.resolveActor(actor.getNamespaceKey(), actor.getId(), name); + } } - KJ_UNREACHABLE; + KJ_FAIL_REQUIRE("unknown channel token kind", reader.which()); } kj::Own ChannelTokenHandler::decodeSubrequestChannelToken( diff --git a/src/workerd/server/channel-token.capnp b/src/workerd/server/channel-token.capnp index b5b0ac3ad50..004699c6ec5 100644 --- a/src/workerd/server/channel-token.capnp +++ b/src/workerd/server/channel-token.capnp @@ -59,15 +59,38 @@ struct ChannelToken { actorClass @1; # token for IoChannelFactory::ActorClassChannel } - name @1 :Text; - # Name of the service in the workerd config's services list. + union { + service :group { + # This points to an entrypoint exported by a service, with no associated storage. - entrypoint @2 :Text; - # Name of the entrypoint the channel points at. For subrequest channels this must be a - # WorkerEntrypoint derivative (or plain object implementing `ExportedHandlers`). For actor class - # channels this must be a `DurableObject` implementation. + name @1 :Text; + # Name of the service in the workerd config's services list. - props @3 :Frankenvalue; + entrypoint @2 :Text; + # Name of the entrypoint the channel points at. For subrequest channels this must be a + # WorkerEntrypoint derivative (or plain object implementing `ExportedHandlers`). For actor + # class channels this must be a `DurableObject` implementation. + + props @3 :Frankenvalue; + } + + actor :group { + # This points to a specific actor instance. + # + # Note that `type` must be `subrequest` in this case. + + namespaceKey @4 :Text; + # The `uniqueKey` for the namespace, as defined in workerd.capnp. + # + # This identifies the specific namespace that the token points at. + + id @5 :Data; + # Raw DO ID bytes (not hex). + + name @6 :Text; + # Name, if known, otherwise null. + } + } struct FrankenvalueCapTable { # CapTable representation for `ChannelToken.props`. diff --git a/src/workerd/server/channel-token.h b/src/workerd/server/channel-token.h index 43ee0b18271..169c1e042ba 100644 --- a/src/workerd/server/channel-token.h +++ b/src/workerd/server/channel-token.h @@ -32,6 +32,9 @@ class ChannelTokenHandler { virtual kj::Own resolveActorClass( kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue props) = 0; + + virtual kj::Own resolveActor( + kj::StringPtr namespaceKey, kj::ArrayPtr id, kj::Maybe name) = 0; }; explicit ChannelTokenHandler(Resolver& resolver); @@ -47,6 +50,10 @@ class ChannelTokenHandler { kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue& props); + kj::Array encodeActorChannelToken(IoChannelFactory::ChannelTokenUsage usage, + kj::StringPtr namespaceKey, + kj::ArrayPtr id, + kj::Maybe name); // Helpers to implement `IoChannelFactory::{subrequestChannel,actorClass}FromToken()`. kj::Own decodeSubrequestChannelToken( diff --git a/src/workerd/server/container-client.c++ b/src/workerd/server/container-client.c++ index 69f2f0c6201..4351960d33a 100644 --- a/src/workerd/server/container-client.c++ +++ b/src/workerd/server/container-client.c++ @@ -138,7 +138,7 @@ class BufferedAsyncIoStream final: public kj::AsyncIoStream { auto bufferedRemaining = buffered.size() - bufferedOffset; if (bufferedRemaining > 0) { auto toCopy = kj::min(maxBytes, bufferedRemaining); - out.first(toCopy).copyFrom(buffered.asPtr().slice(bufferedOffset, bufferedOffset + toCopy)); + out.write(buffered.asPtr().slice(bufferedOffset, bufferedOffset + toCopy)); bufferedOffset += toCopy; copied = toCopy; @@ -147,7 +147,7 @@ class BufferedAsyncIoStream final: public kj::AsyncIoStream { } } - auto read = co_await inner->tryRead(out.begin() + copied, minBytes - copied, maxBytes - copied); + auto read = co_await inner->tryRead(out.begin(), minBytes - copied, maxBytes - copied); co_return copied + read; } @@ -472,7 +472,7 @@ kj::StringPtr signalToString(uint32_t signal) { void writeTarField(kj::ArrayPtr field, kj::StringPtr value) { auto len = kj::min(value.size(), field.size()); - field.first(len).copyFrom(value.asBytes().first(len)); + field.write(value.asBytes().first(len)); } // createTarWithFile creates simple tar files without importing a full blown TAR library. @@ -489,7 +489,7 @@ kj::Array createTarWithFile( tar.asPtr().fill(0); auto header = tar.first(512); - writeTarField(header.slice(0, 100), filename); + writeTarField(header.first(100), filename); writeTarField(header.slice(100, 108), "0000644"_kj); writeTarField(header.slice(108, 116), "0000000"_kj); writeTarField(header.slice(116, 124), "0000000"_kj); diff --git a/src/workerd/server/pyodide.c++ b/src/workerd/server/pyodide.c++ index aafbe9e7d16..6b538452dcf 100644 --- a/src/workerd/server/pyodide.c++ +++ b/src/workerd/server/pyodide.c++ @@ -47,6 +47,7 @@ void writePyodideBundleFileToDisk(const kj::Maybe>& kj::Promise> fetchPyodideBundle( const api::pyodide::PythonConfig& pyConfig, kj::String version, + kj::StringPtr integrity, kj::Network& network, kj::Timer& timer) { if (pyConfig.pyodideBundleManager.getPyodideBundle(version) != kj::none) { @@ -56,6 +57,7 @@ kj::Promise> fetchPyodideBundle( auto maybePyodideBundleFile = getPyodideBundleFile(pyConfig.pyodideDiskCacheRoot, version); KJ_IF_SOME(pyodideBundleFile, maybePyodideBundleFile) { auto body = pyodideBundleFile->readAllBytes(); + api::pyodide::verifyPyodideBundleIntegrity(version, integrity, body); pyConfig.pyodideBundleManager.setPyodideBundleData(kj::str(version), kj::mv(body)); co_return pyConfig.pyodideBundleManager.getPyodideBundle(version); } @@ -86,6 +88,8 @@ kj::Promise> fetchPyodideBundle( KJ_ASSERT(res.statusCode == 200, "Request for Pyodide bundle failed", url); auto body = co_await res.body->readAllBytes(); + api::pyodide::verifyPyodideBundleIntegrity(version, integrity, body); + writePyodideBundleFileToDisk(pyConfig.pyodideDiskCacheRoot, version, body); pyConfig.pyodideBundleManager.setPyodideBundleData(kj::str(version), kj::mv(body)); @@ -93,149 +97,4 @@ kj::Promise> fetchPyodideBundle( co_return pyConfig.pyodideBundleManager.getPyodideBundle(version); } -// Downloads a package with retry logic (up to 3 attempts with 5-second delays) -kj::Promise>> downloadPackageWithRetry(kj::HttpClient& client, - kj::Timer& timer, - kj::HttpHeaderTable& headerTable, - kj::StringPtr url, - kj::StringPtr path) { - constexpr uint retryLimit = 3; - kj::HttpHeaders headers(headerTable); - - for (uint retryCount = 0; retryCount < retryLimit; ++retryCount) { - if (retryCount > 0) { - // Sleep for 5 seconds before retrying - co_await timer.afterDelay(5 * kj::SECONDS); - KJ_LOG(INFO, "Retrying package download", path, "attempt", retryCount + 1, "of", retryLimit); - } - - try { - auto req = client.request(kj::HttpMethod::GET, url, headers); - auto res = co_await req.response; - - if (res.statusCode != 200) { - KJ_LOG(WARNING, "Failed to download package", path, res.statusCode, "attempt", - retryCount + 1, "of", retryLimit); - continue; // Try again in the next iteration - } - - // Request succeeded, read the body - co_return co_await res.body->readAllBytes(); - } catch (kj::Exception& e) { - if (retryCount + 1 >= retryLimit) { - // This was our last attempt - KJ_LOG(WARNING, "Failed to download package after all retry attempts", path, e, "attempts", - retryLimit); - } else { - KJ_LOG(WARNING, "Failed to download package", path, e, "attempt", retryCount + 1, "of", - retryLimit, "will retry"); - } - } - } - - co_return kj::none; // All retry attempts failed -} - -// Loads a single Python package, either from disk cache or by downloading it -kj::Promise loadPyodidePackage(const api::pyodide::PythonConfig& pyConfig, - const api::pyodide::PyodidePackageManager& pyodidePackageManager, - kj::StringPtr packagesVersion, - kj::StringPtr filename, - kj::Network& network, - kj::Timer& timer) { - - auto path = kj::str("python-package-bucket/", packagesVersion, "/", filename); - // First check if we already have this package in memory - if (pyodidePackageManager.getPyodidePackage(path) != kj::none) { - co_return; - } - - // Then check disk cache - KJ_IF_SOME(diskCachePath, pyConfig.packageDiskCacheRoot) { - auto parsedPath = kj::Path::parse(filename); - if (diskCachePath->exists(parsedPath)) { - try { - auto file = diskCachePath->openFile(parsedPath); - auto blob = file->readAllBytes(); - - // Decompress the package - kj::ArrayInputStream ais(blob); - kj::GzipInputStream gzip(ais); - auto decompressed = gzip.readAllBytes(); - - // Store in memory - pyodidePackageManager.setPyodidePackageData(kj::str(path), kj::mv(decompressed)); - co_return; - } catch (kj::Exception& e) { - // Something went wrong while reading or processing the file - KJ_LOG(WARNING, "Failed to read or process package from disk cache", path, e); - } - } - } - - // Need to fetch from network - kj::HttpHeaderTable table; - kj::TlsContext::Options tlsOptions; - tlsOptions.useSystemTrustStore = true; - kj::Own tlsContext = kj::heap(kj::mv(tlsOptions)); - - auto tlsNetwork = tlsContext->wrapNetwork(network); - auto client = kj::newHttpClient(timer, table, network, *tlsNetwork); - - kj::String url = kj::str(api::pyodide::PYTHON_PACKAGES_URL, path); - - auto maybeBody = co_await downloadPackageWithRetry(*client, timer, table, url, path); - KJ_IF_SOME(body, maybeBody) { - // Successfully downloaded the package - // Save the compressed data to disk cache (if enabled) - KJ_IF_SOME(diskCachePath, pyConfig.packageDiskCacheRoot) { - try { - auto parsedPath = kj::Path::parse(path); - auto file = diskCachePath->openFile(parsedPath, - kj::WriteMode::CREATE | kj::WriteMode::MODIFY | kj::WriteMode::CREATE_PARENT); - file->writeAll(body); - } catch (kj::Exception& e) { - KJ_LOG(WARNING, "Failed to write package to disk cache", e); - } - } - - // Now decompress and store in memory - kj::ArrayInputStream ais(body); - kj::GzipInputStream gzip(ais); - auto decompressed = gzip.readAllBytes(); - - pyodidePackageManager.setPyodidePackageData(kj::str(path), kj::mv(decompressed)); - } else { - KJ_FAIL_ASSERT("Failed to download package after all retry attempts", path); - } - - co_return; -} - -kj::Promise fetchPyodidePackages(const api::pyodide::PythonConfig& pyConfig, - const api::pyodide::PyodidePackageManager& pyodidePackageManager, - kj::ArrayPtr pythonRequirements, - workerd::PythonSnapshotRelease::Reader pythonSnapshotRelease, - kj::Network& network, - kj::Timer& timer) { - auto packagesVersion = pythonSnapshotRelease.getPackages(); - - auto pyodideLock = api::pyodide::getPyodideLock(pythonSnapshotRelease); - if (pyodideLock == kj::none) { - KJ_LOG(WARNING, "No lock file found for Python packages version", packagesVersion); - co_return; - } - - auto filenames = api::pyodide::getPythonPackageFiles( - KJ_ASSERT_NONNULL(pyodideLock), pythonRequirements, packagesVersion); - - kj::Vector> promises(filenames.size()); - for (const auto& filename: filenames) { - promises.add(loadPyodidePackage( - pyConfig, pyodidePackageManager, packagesVersion, filename, network, timer)); - } - - co_await kj::joinPromisesFailFast(promises.releaseAsArray()); -} - } // namespace workerd::server diff --git a/src/workerd/server/pyodide.h b/src/workerd/server/pyodide.h index 2f4c5ee6aad..ab66407faca 100644 --- a/src/workerd/server/pyodide.h +++ b/src/workerd/server/pyodide.h @@ -15,18 +15,15 @@ namespace workerd::server { -// Used to preload the Pyodide bundle during workerd startup +// Used to preload the Pyodide bundle during workerd startup. +// +// `integrity` is a subresource-integrity-style checksum ("sha256-") used to verify the +// integrity of the bundle when downloaded from the network. It may be empty (e.g. for the "dev" +// version), in which case no verification is performed. kj::Promise> fetchPyodideBundle( const api::pyodide::PythonConfig& pyConfig, kj::String version, - kj::Network& network, - kj::Timer& timer); - -// Preloads all required Python packages for a worker -kj::Promise fetchPyodidePackages(const api::pyodide::PythonConfig& pyConfig, - const api::pyodide::PyodidePackageManager& pyodidePackageManager, - kj::ArrayPtr pythonRequirements, - workerd::PythonSnapshotRelease::Reader pythonSnapshotRelease, + kj::StringPtr integrity, kj::Network& network, kj::Timer& timer); diff --git a/src/workerd/server/server-test.c++ b/src/workerd/server/server-test.c++ index 33a4f889a1c..248748b4826 100644 --- a/src/workerd/server/server-test.c++ +++ b/src/workerd/server/server-test.c++ @@ -32,7 +32,8 @@ namespace { else \ KJ_FAIL_EXPECT_AT(location, "failed: expected " #cond, _kjCondition, ##__VA_ARGS__) -jsg::V8System v8System; +jsg::V8System v8System({"--expose-gc"_kj}); + // This can only be created once per process, so we have to put it at the top level. const bool verboseLog = ([]() { @@ -5076,6 +5077,9 @@ KJ_TEST("Server: Durable Object facets") { ` ` // Delete bar, which recursively deletes its children. ` this.ctx.facets.delete("bar"); + ` + ` // Delete a facet name that never existed, to make sure this doesn't throw. + ` this.ctx.facets.delete("no-such-facet-ever"); ` } else if (request.url.endsWith("/props")) { ` results.push(JSON.stringify(this.ctx.props)); ` @@ -5266,6 +5270,254 @@ KJ_TEST("Server: Durable Object facets") { } } +KJ_TEST("Server: Durable Object facet cloning") { + kj::StringPtr config = R"(( + services = [ + ( name = "hello", + worker = ( + compatibilityDate = "2026-04-01", + modules = [ + ( name = "main.js", + esModule = + `import { DurableObject } from "cloudflare:workers"; + `export default { + ` async fetch(request, env, ctx) { + ` let id = ctx.exports.MyActorClass.idFromName("name"); + ` let actor = ctx.exports.MyActorClass.get(id); + ` return await actor.fetch(request); + ` } + `} + `export class MyActorClass extends DurableObject { + ` async fetch(request) { + ` let url = new URL(request.url); + ` switch (url.pathname) { + ` case "/setup": { + ` // Create "src" with a value, and "src" has children "a" and "b". + ` let src = this.ctx.facets.get("src", () => ({class: this.env.NESTED})); + ` await src.setValue(10); + ` await src.setChildValue("a", 100); + ` await src.setChildValue("b", 200); + ` return new Response("ok"); + ` } + ` case "/clone-basic": { + ` // Clone src to dst. Verify dst has matching data. + ` this.ctx.facets.clone("src", "dst"); + ` let dst = this.ctx.facets.get("dst", () => ({class: this.env.NESTED})); + ` let dstVal = await dst.getValue(); + ` let dstA = await dst.getChildValue("a"); + ` let dstB = await dst.getChildValue("b"); + ` return new Response(`dst=${dstVal} dst.a=${dstA} dst.b=${dstB}`); + ` } + ` case "/verify-src-unchanged": { + ` // The original src should still have its data, untouched. + ` let src = this.ctx.facets.get("src", () => ({class: this.env.NESTED})); + ` let srcVal = await src.getValue(); + ` let srcA = await src.getChildValue("a"); + ` let srcB = await src.getChildValue("b"); + ` return new Response(`src=${srcVal} src.a=${srcA} src.b=${srcB}`); + ` } + ` case "/mutate-dst-then-check-src": { + ` // Mutating dst should not affect src. + ` let dst = this.ctx.facets.get("dst", () => ({class: this.env.NESTED})); + ` await dst.setValue(999); + ` await dst.setChildValue("a", 888); + ` let src = this.ctx.facets.get("src", () => ({class: this.env.NESTED})); + ` let srcVal = await src.getValue(); + ` let srcA = await src.getChildValue("a"); + ` return new Response(`src=${srcVal} src.a=${srcA}`); + ` } + ` case "/clone-replaces-existing": { + ` // Create a populated `target`, then clone src over it. The previous + ` // data of `target` should be gone. + ` let target = this.ctx.facets.get("target", + ` () => ({class: this.env.NESTED})); + ` await target.setValue(42); + ` await target.setChildValue("oldChild", 77); + ` this.ctx.facets.clone("src", "target"); + ` // Look up "target" again -- the previous handle was aborted. + ` let target2 = this.ctx.facets.get("target", + ` () => ({class: this.env.NESTED})); + ` let val = await target2.getValue(); + ` let a = await target2.getChildValue("a"); + ` // The old child "oldChild" should NOT have data (it was deleted). + ` let oldChild = await target2.getChildValue("oldChild"); + ` return new Response(`target=${val} target.a=${a} oldChild=${oldChild}`); + ` } + ` case "/clone-from-nonexistent-deletes-dst": { + ` // Populate dst, then clone from a never-existed src. dst should be + ` // empty afterwards (matching DO semantics: no-data is indistinguishable + ` // from never-ran). + ` let dst = this.ctx.facets.get("toBeWiped", + ` () => ({class: this.env.NESTED})); + ` await dst.setValue(123); + ` await dst.setChildValue("c", 456); + ` this.ctx.facets.clone("never-existed", "toBeWiped"); + ` let dst2 = this.ctx.facets.get("toBeWiped", + ` () => ({class: this.env.NESTED})); + ` let val = await dst2.getValue(); + ` let c = await dst2.getChildValue("c"); + ` return new Response(`val=${val} c=${c}`); + ` } + ` case "/clone-self-aborts-but-preserves": { + ` // Set up "self": create it, store some data, and warm up its in-memory + ` // state by setting a non-persisted property. + ` let self = this.ctx.facets.get("self", + ` () => ({class: this.env.NESTED})); + ` await self.setValue(7); + ` await self.setMemoryOnly("alive"); + ` // Confirm the in-memory property is set. + ` let beforeMem = await self.getMemoryOnly(); + ` // Clone "self" onto itself. This should abort the running facet but + ` // leave its storage untouched. + ` this.ctx.facets.clone("self", "self"); + ` // Old handle should now throw on use. + ` let oldThrew = false; + ` try { await self.getValue(); } catch (e) { oldThrew = true; } + ` // Get a fresh handle. The persisted data should still be intact, but + ` // the in-memory property should be cleared (fresh instance). + ` let self2 = this.ctx.facets.get("self", + ` () => ({class: this.env.NESTED})); + ` let val = await self2.getValue(); + ` let mem = await self2.getMemoryOnly(); + ` return new Response( + ` `beforeMem=${beforeMem} oldThrew=${oldThrew} val=${val} mem=${mem}`); + ` } + ` case "/read-restored": { + ` // Used after a server restart to verify dst's data persisted. + ` let dst = this.ctx.facets.get("dst", () => ({class: this.env.NESTED})); + ` let v = await dst.getValue(); + ` let a = await dst.getChildValue("a"); + ` let b = await dst.getChildValue("b"); + ` return new Response(`dst=${v} dst.a=${a} dst.b=${b}`); + ` } + ` } + ` return new Response("bad url", {status: 404}); + ` } + `} + `export class NestedFacet extends DurableObject { + ` async setValue(v) { + ` await this.ctx.storage.put("value", v); + ` } + ` async getValue() { + ` return (await this.ctx.storage.get("value")) ?? null; + ` } + ` async setChildValue(name, v) { + ` let child = this.ctx.facets.get(name, () => ({class: this.env.LEAF})); + ` await child.setValue(v); + ` } + ` async getChildValue(name) { + ` let child = this.ctx.facets.get(name, () => ({class: this.env.LEAF})); + ` return await child.getValue(); + ` } + ` setMemoryOnly(v) { this.memoryOnly = v; } + ` getMemoryOnly() { return this.memoryOnly ?? null; } + `} + `export class LeafFacet extends DurableObject { + ` async setValue(v) { + ` await this.ctx.storage.put("value", v); + ` } + ` async getValue() { + ` return (await this.ctx.storage.get("value")) ?? null; + ` } + `} + ) + ], + bindings = [ + (name = "NESTED", + durableObjectClass = (name = "hello", entrypoint = "NestedFacet")), + (name = "LEAF", + durableObjectClass = (name = "hello", entrypoint = "LeafFacet")) + ], + durableObjectNamespaces = [ + ( className = "MyActorClass", + uniqueKey = "mykey", + ) + ], + durableObjectStorage = (localDisk = "my-disk") + ) + ), + ( name = "my-disk", + disk = ( + path = "../../do-storage", + writable = true, + ) + ), + ], + sockets = [ + ( name = "main", + address = "test-addr", + service = "hello" + ) + ] + ))"_kj; + + // A directory outside of the test scope that can be reused across multiple TestServers. + auto dir = kj::newInMemoryDirectory(kj::nullClock()); + + { + TestServer test(config); + + test.root->transfer( + kj::Path({"do-storage"_kj}), kj::WriteMode::CREATE, *dir, nullptr, kj::TransferMode::LINK); + + test.server.allowExperimental(); + test.start(); + auto conn = test.connect("test-addr"); + + // Setup: create src with children a and b. + conn.httpGet200("/setup", "ok"); + + // Basic clone with descendant subtree. + conn.httpGet200("/clone-basic", "dst=10 dst.a=100 dst.b=200"); + + // Source is untouched after the clone. + conn.httpGet200("/verify-src-unchanged", "src=10 src.a=100 src.b=200"); + + // Mutating dst should not bleed back into src. + conn.httpGet200("/mutate-dst-then-check-src", "src=10 src.a=100"); + + // dst itself was mutated. + conn.httpGet200("/verify-src-unchanged", "src=10 src.a=100 src.b=200"); + + // Clone over an existing facet: previous data is gone. + conn.httpGet200("/clone-replaces-existing", "target=10 target.a=100 oldChild=null"); + + // Clone from a never-existed src acts as a delete on dst. + conn.httpGet200("/clone-from-nonexistent-deletes-dst", "val=null c=null"); + + // Cloning a facet onto itself aborts it (clearing in-memory state) but does not touch its + // stored data. + conn.httpGet200( + "/clone-self-aborts-but-preserves", "beforeMem=alive oldThrew=true val=7 mem=null"); + } + + // Verify a few key on-disk properties. + auto nsDir = dir->openSubdir(kj::Path({"mykey"})); + // The root, src (id 1), src/a, src/b, dst (some id), dst/a, dst/b, target, target/a, target/b + // should all have files. We don't assume specific IDs beyond src=1, but we do verify that + // the index and at least the first few facet files exist. + KJ_EXPECT(nsDir->exists( + kj::Path({"3652ef6221834806dc8df802d1d216e27b7d07e0a6b7adf6cfdaeec90f06459a.sqlite"}))); + KJ_EXPECT(nsDir->exists( + kj::Path({"3652ef6221834806dc8df802d1d216e27b7d07e0a6b7adf6cfdaeec90f06459a.facets"}))); + KJ_EXPECT(nsDir->exists( + kj::Path({"3652ef6221834806dc8df802d1d216e27b7d07e0a6b7adf6cfdaeec90f06459a.1.sqlite"}))); + + // After a server restart, clone destinations should still be readable. + { + TestServer test(config); + + test.root->transfer( + kj::Path({"do-storage"_kj}), kj::WriteMode::CREATE, *dir, nullptr, kj::TransferMode::LINK); + + test.server.allowExperimental(); + test.start(); + auto conn = test.connect("test-addr"); + // We previously mutated dst to value=999 and dst.a=888, dst.b unchanged at 200. + conn.httpGet200("/read-restored", "dst=999 dst.a=888 dst.b=200"); + } +} + KJ_TEST("Server: Durable Object facet limits") { kj::StringPtr config = R"(( services = [ @@ -5318,6 +5570,22 @@ KJ_TEST("Server: Durable Object facet limits") { ` return new Response(e.constructor.name + ": " + e.message); ` } ` } + ` case "/clone-src-name-too-long": { + ` try { + ` this.ctx.facets.clone("x".repeat(257), "ok"); + ` return new Response("no error"); + ` } catch (e) { + ` return new Response(e.constructor.name + ": " + e.message); + ` } + ` } + ` case "/clone-dst-name-too-long": { + ` try { + ` this.ctx.facets.clone("ok", "x".repeat(257)); + ` return new Response("no error"); + ` } catch (e) { + ` return new Response(e.constructor.name + ": " + e.message); + ` } + ` } ` case "/depth-ok": { ` // Create 3 levels of facets below root = 4 total (the max). ` let facet = this.ctx.facets.get("a", @@ -5398,6 +5666,10 @@ KJ_TEST("Server: Durable Object facet limits") { "/abort-name-too-long", "TypeError: Facet name is too long (max 256 characters)."); conn.httpGet200( "/delete-name-too-long", "TypeError: Facet name is too long (max 256 characters)."); + conn.httpGet200( + "/clone-src-name-too-long", "TypeError: Facet name is too long (max 256 characters)."); + conn.httpGet200( + "/clone-dst-name-too-long", "TypeError: Facet name is too long (max 256 characters)."); // Depth limit. conn.httpGet200("/depth-ok", "ok"); @@ -6398,44 +6670,105 @@ KJ_TEST("Server: workerdDebugPort WebSocket passthrough via WorkerEntrypoint") { wsConn.send(kj::str("\x81\x05", testMessage2)); wsConn.recvWebSocket("echo:world"); } -// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-9: a wrapped binding whose moduleName -// does not resolve to any internal module must produce a config error, not a fatal assertion. -// Before the fix, this config would hit KJ_ASSERT(!value.IsEmpty()) in compileGlobals() -// and abort. After the fix, the unresolved module is rejected with KJ_FAIL_REQUIRE which -// produces a recoverable config error containing the module name. -KJ_TEST("Server: wrapped binding with unresolvable module produces config error") { - // Enable predictable mode so the internal error reference ID is deterministic. - setPredictableModeForTest(); - TestServer test(singleWorker(R"(( - compatibilityDate = "2024-01-01", - modules = [ - ( name = "main.js", - esModule = - `export default { - ` async fetch(request) { - ` return new Response("should not reach here"); - ` } - `} - ) - ], - bindings = [ - ( name = "brokenBinding", - wrapped = ( - moduleName = "nonexistent:missing-module", - innerBindings = [ (name = "inner", text = "value") ] +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-100: heap use-after-free in ActorContainer +// when a facet is aborted while a request is pending on the async startup callback. The +// constructor's .then([this]) continuation and the getActor() coroutine both hold references +// to the ForkHub independently of the ActorContainer refcount. Without kj::addRef(*this) in +// getActor()/startRequest(), aborting the facet + dropping JS references can free the +// ActorContainer while the coroutine is still suspended, leading to a UAF when the startup +// promise resolves. With the fix, the pending request rejects cleanly with the abort error. +KJ_TEST("Server: DO facet abort during pending startup") { + kj::StringPtr config = R"(( + services = [ + ( name = "hello", + worker = ( + compatibilityDate = "2026-04-01", + modules = [ + ( name = "main.js", + esModule = + `import { DurableObject } from "cloudflare:workers"; + ` + `let startupResolve; + ` + `export default { + ` async fetch(request, env, ctx) { + ` let id = ctx.exports.Parent.idFromName("test"); + ` let actor = ctx.exports.Parent.get(id); + ` return await actor.fetch(request); + ` } + `} + ` + `export class Parent extends DurableObject { + ` async fetch(request) { + ` // Create a facet with a startup callback that we control. + ` // The callback returns a promise that won't resolve until we say so. + ` let startupPromise = new Promise(resolve => { startupResolve = resolve; }); + ` + ` let facet = this.ctx.facets.get("child", async () => { + ` await startupPromise; + ` return { class: this.ctx.exports.Child }; + ` }); + ` + ` // Send an RPC to the facet. This will suspend in getActor() waiting + ` // for the startup callback to resolve. + ` let rpcPromise = facet.ping().catch(err => "caught: " + err.message); + ` + ` // Abort the facet while the RPC is pending. This drops one ref on the + ` // ActorContainer (from the parent's facets map). + ` this.ctx.facets.abort("child", new Error("aborted during startup")); + ` facet = null; + ` gc(); + ` // Now resolve the startup callback. Without the fix, the .then([this]) + ` // continuation would run on freed memory (UAF). With the fix, the + ` // coroutine holds a self-ref so the container stays alive, and + ` // requireNotBroken() after co_await rejects the request cleanly. + ` startupResolve(); + ` + ` let result = await rpcPromise; + ` return new Response(result); + ` } + `} + ` + `export class Child extends DurableObject { + ` ping() { return "pong"; } + `} + ) + ], + durableObjectNamespaces = [ + ( className = "Parent", + uniqueKey = "parentkey", + ) + ], + durableObjectStorage = (localDisk = "my-disk") + ) + ), + ( name = "my-disk", + disk = ( + path = "../../do-storage", + writable = true, ) + ), + ], + sockets = [ + ( name = "main", + address = "test-addr", + service = "hello" ) ] - ))"_kj)); + ))"_kj; + + TestServer test(config); + test.root->openSubdir(kj::Path({"do-storage"_kj}), kj::WriteMode::CREATE); + test.server.allowExperimental(); + test.start(); + + auto conn = test.connect("test-addr"); + conn.sendHttpGet("/"); - // The KJ_FAIL_REQUIRE exception propagates through compileGlobals, gets caught by the - // worker constructor, converted to a JS "internal error" with a predictable reference ID, - // and reported as a config error. The jsg layer logs the original exception at ERROR level - // (a single log line containing both the exception description and "jsgInternalError"). - KJ_EXPECT_LOG(ERROR, "jsgInternalError"); - test.expectErrors("service hello: Uncaught Error: internal error;" - " reference = 0123456789abcdefghijklmn\n"_kj); + // The response should contain the caught abort error message, proving the request + // was rejected cleanly rather than crashing with a UAF. + conn.recvHttp200("caught: aborted during startup"); } } // namespace diff --git a/src/workerd/server/server.c++ b/src/workerd/server/server.c++ index a298e80cad4..8f7b3cd4442 100644 --- a/src/workerd/server/server.c++ +++ b/src/workerd/server/server.c++ @@ -306,2918 +306,3085 @@ Server::~Server() noexcept { // ======================================================================================= -kj::Own Server::makeTlsContext(config::TlsOptions::Reader conf) { - kj::TlsContext::Options options; - - struct Attachments { - kj::Maybe keypair; - kj::Array trustedCerts; - }; - auto attachments = kj::heap(); +class Server::ActorNamespace final { + public: + ActorNamespace(kj::Own actorClass, + const ActorConfig& config, + const kj::Clock& clock, + kj::Timer& timer, + capnp::ByteStreamFactory& byteStreamFactory, + ChannelTokenHandler& channelTokenHandler, + kj::Network& dockerNetwork, + kj::Maybe dockerPath, + kj::Maybe containerEgressInterceptorImage, + kj::TaskSet& waitUntilTasks) + : actorClass(kj::mv(actorClass)), + config(config), + clock(clock), + timer(timer), + byteStreamFactory(byteStreamFactory), + channelTokenHandler(channelTokenHandler), + dockerNetwork(dockerNetwork), + dockerPath(dockerPath), + containerEgressInterceptorImage(containerEgressInterceptorImage), + waitUntilTasks(waitUntilTasks) {} + + void link(kj::Maybe serviceActorStorage) { + KJ_IF_SOME(dir, serviceActorStorage) { + KJ_IF_SOME(d, config.tryGet()) { + this->actorStorage.emplace( + dir.openSubdir(kj::Path({d.uniqueKey}), kj::WriteMode::CREATE | kj::WriteMode::MODIFY)); + } + } - if (conf.hasKeypair()) { - auto pairConf = conf.getKeypair(); - options.defaultKeypair = attachments->keypair.emplace( - kj::TlsKeypair{.privateKey = kj::TlsPrivateKey(pairConf.getPrivateKey()), - .certificate = kj::TlsCertificate(pairConf.getCertificateChain())}); - } + KJ_IF_SOME(d, config.tryGet()) { + auto idFactory = kj::heap(d.uniqueKey); + AlarmScheduler::GetActorFn getActor = + [this, idFactory = kj::mv(idFactory)]( + kj::String idStr) mutable -> kj::Own { + Worker::Actor::Id id = idFactory->idFromString(kj::mv(idStr)); + auto actorContainer = this->getActorContainer(kj::mv(id)); + return newPromisedWorkerInterface( + actorContainer->startRequest({}).attach(actorContainer->addRef())); + }; - options.verifyClients = conf.getRequireClientCerts(); - options.useSystemTrustStore = conf.getTrustBrowserCas(); + KJ_IF_SOME(as, this->actorStorage) { + // Create per-namespace alarm scheduler backed by on-disk storage in the + // namespace directory, alongside the per-actor .sqlite files. + this->ownAlarmScheduler = kj::heap( + clock, timer, as.vfs, kj::Path({"metadata.sqlite"}), kj::mv(getActor)); + } else { + // No on-disk storage -- create an in-memory alarm scheduler. + auto memDir = kj::newInMemoryDirectory(clock); + auto vfs = kj::heap(*memDir); + this->ownAlarmScheduler = kj::heap( + clock, timer, *vfs, kj::Path({"metadata.sqlite"}), kj::mv(getActor)) + .attach(kj::mv(vfs), kj::mv(memDir)); + } - auto trustList = conf.getTrustedCertificates(); - if (trustList.size() > 0) { - attachments->trustedCerts = KJ_MAP(cert, trustList) { return kj::TlsCertificate(cert); }; - options.trustedCertificates = attachments->trustedCerts; + this->alarmScheduler = *KJ_ASSERT_NONNULL(ownAlarmScheduler); + } } - switch (conf.getMinVersion()) { - case config::TlsOptions::Version::GOOD_DEFAULT: - // Don't change. - goto validVersion; - case config::TlsOptions::Version::SSL3: - options.minVersion = kj::TlsVersion::SSL_3; - goto validVersion; - case config::TlsOptions::Version::TLS1_DOT0: - options.minVersion = kj::TlsVersion::TLS_1_0; - goto validVersion; - case config::TlsOptions::Version::TLS1_DOT1: - options.minVersion = kj::TlsVersion::TLS_1_1; - goto validVersion; - case config::TlsOptions::Version::TLS1_DOT2: - options.minVersion = kj::TlsVersion::TLS_1_2; - goto validVersion; - case config::TlsOptions::Version::TLS1_DOT3: - options.minVersion = kj::TlsVersion::TLS_1_3; - goto validVersion; + const ActorConfig& getConfig() { + return config; } - reportConfigError(kj::str("Encountered unknown TlsOptions::minVersion setting. Was the " - "config compiled with a newer version of the schema?")); -validVersion: - if (conf.hasCipherList()) { - options.cipherList = conf.getCipherList(); + kj::Own getActorChannel(Worker::Actor::Id id) { + KJ_IF_SOME(doId, id.tryGet>()) { + KJ_IF_SOME(name, doId->getName()) { + // To emulate production, we preserve the name on the id, but only if it's <= 1024 bytes. + if (name.size() > 1024) { + auto* idImpl = dynamic_cast(doId.get()); + KJ_ASSERT(idImpl != nullptr, "Unexpected ActorId type?"); + idImpl->clearName(); + } + } + } + + return kj::refcounted(getActorContainer(kj::mv(id))); } - return kj::heap(kj::mv(options)); -} + class ActorContainer; + using ActorMap = kj::HashMap>; -kj::Promise> Server::makeTlsNetworkAddress( - config::TlsOptions::Reader conf, - kj::StringPtr addrStr, - kj::Maybe certificateHost, - uint defaultPort) { - auto context = makeTlsContext(conf); + // ActorContainer mostly serves as a wrapper around Worker::Actor. + // We use it to associate a HibernationManager with the Worker::Actor, since the + // Worker::Actor can be destroyed during periods of prolonged inactivity. + // + // We use a RequestTracker to track strong references to this ActorContainer's Worker::Actor. + // Once there are no Worker::Actor's left (excluding our own), `inactive()` is triggered and we + // initiate the eviction of the Durable Object. If no requests arrive in the next 10 seconds, + // the DO is evicted, otherwise we cancel the eviction task. + class ActorContainer final: public RequestTracker::Hooks, + public kj::Refcounted, + public Worker::Actor::FacetManager { + public: + // Information which is needed before start() can be called, but may not be available yet + // when the ActorContainer is constructed (especially in the case of facets). + struct ClassAndId { + kj::Own actorClass; + Worker::Actor::Id id; + + ClassAndId(kj::Own actorClass, Worker::Actor::Id id) + : actorClass(kj::mv(actorClass)), + id(kj::mv(id)) {} + }; - KJ_IF_SOME(h, certificateHost) { - auto parsed = co_await network.parseAddress(addrStr, defaultPort); - co_return context->wrapAddress(kj::mv(parsed), h).attach(kj::mv(context)); - } + ActorContainer(kj::String key, + ActorNamespace& ns, + kj::Maybe parent, + kj::OneOf> classAndIdParam, + kj::Timer& timer) + : key(kj::mv(key)), + tracker(kj::refcounted(*this)), + ns(ns), + root(parent.map([](ActorContainer& p) -> ActorContainer& { return p.root; }) + .orDefault(*this)), + parent(parent), + timer(timer), + lastAccess(timer.now()) { + KJ_SWITCH_ONEOF(classAndIdParam) { + KJ_CASE_ONEOF(value, ClassAndId) { + // `classAndId` is immediately available. + classAndId = kj::mv(value); + } + KJ_CASE_ONEOF(promise, kj::Promise) { + // We are receiving a promise for a `ClassAndId` to come later. Arrange to initialize + // `classAndId` from the promise. Create a `ForkedPromise` that resolves when + // initialization is complete. + classAndId = promise + .then([this](ClassAndId value) { + auto& forked = KJ_ASSERT_NONNULL(classAndId.tryGet>()); + if (!forked.hasBranches()) { + // HACK: We're about to replace the ForkedPromise but it has no one waiting on it, + // so we'd end up cancelling ourselves. Add a branch and detach it so this doesn't + // happen. + forked.addBranch().detach([](auto&&) {}); + } - // Wrap the `Network` itself so we can use the TLS implementation's `parseAddress()` to extract - // the authority from the address. - auto tlsNetwork = context->wrapNetwork(network); - auto parsed = co_await network.parseAddress(addrStr, defaultPort); - co_return parsed.attach(kj::mv(context)); -} + classAndId = kj::mv(value); + }).fork(); + } + } + } -// ======================================================================================= + ~ActorContainer() noexcept(false) { + // Shutdown the tracker so we don't use active/inactive hooks anymore. + tracker->shutdown(); -// Helper to apply config::HttpOptions. -class Server::HttpRewriter { - // TODO(beta): Do we want to automatically add `Date`, `Server` (to outgoing responses), - // `User-Agent` (to outgoing requests), etc.? + for (auto& facet: facets) { + facet.value->abort(kj::none); + } - public: - HttpRewriter( - config::HttpOptions::Reader httpOptions, kj::HttpHeaderTable::Builder& headerTableBuilder) - : style(httpOptions.getStyle()), - requestInjector(httpOptions.getInjectRequestHeaders(), headerTableBuilder), - responseInjector(httpOptions.getInjectResponseHeaders(), headerTableBuilder) { - if (httpOptions.hasForwardedProtoHeader()) { - forwardedProtoHeader = headerTableBuilder.add(httpOptions.getForwardedProtoHeader()); + KJ_IF_SOME(a, actor) { + // Unknown broken reason. + auto reason = 0; + a->shutdown(reason); + } + + // Drop the container client reference + // If setInactivityTimeout() was called, there's still a timer holding a reference + // If not, this may be the last reference and the ContainerClient destructor will run + containerClient = kj::none; } - if (httpOptions.hasCfBlobHeader()) { - cfBlobHeader = headerTableBuilder.add(httpOptions.getCfBlobHeader()); + + void active() override { + // We're handling a new request, cancel the eviction promise. + shutdownTask = kj::none; } - if (httpOptions.hasCapnpConnectHost()) { - capnpConnectHost = httpOptions.getCapnpConnectHost(); + + void inactive() override { + // Durable objects are evictable by default. + bool isEvictable = true; + KJ_SWITCH_ONEOF(ns.config) { + KJ_CASE_ONEOF(c, Durable) { + isEvictable = c.isEvictable; + } + KJ_CASE_ONEOF(c, Ephemeral) { + isEvictable = c.isEvictable; + } + } + if (isEvictable) { + KJ_IF_SOME(a, actor) { + KJ_IF_SOME(m, a->getHibernationManager()) { + // The hibernation manager needs to survive actor eviction and be passed to the actor + // constructor next time we create it. + manager = m.addRef(); + } + } + shutdownTask = + handleShutdown().eagerlyEvaluate([](kj::Exception&& e) { KJ_LOG(ERROR, e); }); + } } - } - bool hasCfBlobHeader() { - return cfBlobHeader != kj::none; - } + kj::StringPtr getKey() { + return key; + } + RequestTracker& getTracker() { + return *tracker; + } + kj::Maybe> tryGetManagerRef() { + return manager.map( + [&](kj::Own& m) { return kj::addRef(*m); }); + } + void updateAccessTime() { + lastAccess = timer.now(); + KJ_IF_SOME(p, parent) { + p.updateAccessTime(); + } + } + kj::TimePoint getLastAccess() { + return lastAccess; + } - bool needsRewriteRequest() { - return style == config::HttpOptions::Style::HOST || hasCfBlobHeader() || - !requestInjector.empty(); - } + bool hasClients() { + // If anyone holds a reference to the container other than the actor map, then it must be + // a client. + if (isShared()) return true; + for (auto& facet: facets) { + if (facet.value->hasClients()) return true; + } + return false; + } + kj::Own addRef() { + return kj::addRef(*this); + } - // Attach this to the promise returned by request(). - struct Rewritten { - kj::Own headers; - kj::String ownUrl; - }; + // Get the actor, starting it if it's not already running. + kj::Promise> getActor() { + requireNotBroken(); - Rewritten rewriteOutgoingRequest( - kj::StringPtr& url, const kj::HttpHeaders& headers, kj::Maybe cfBlobJson) { - Rewritten result{kj::heap(headers.cloneShallow()), nullptr}; + if (actor == kj::none) { + KJ_IF_SOME(promise, classAndId.tryGet>()) { + co_await promise; + requireNotBroken(); + } - if (style == config::HttpOptions::Style::HOST) { - auto parsed = kj::Url::parse(url, kj::Url::HTTP_PROXY_REQUEST, - kj::Url::Options{.percentDecode = false, .allowEmpty = true}); - result.headers->set(kj::HttpHeaderId::HOST, kj::mv(parsed.host)); - KJ_IF_SOME(h, forwardedProtoHeader) { - result.headers->set(h, kj::mv(parsed.scheme)); - } - url = result.ownUrl = parsed.toString(kj::Url::HTTP_REQUEST); - } + auto& [actorClass, id] = KJ_ASSERT_NONNULL(classAndId.tryGet()); - KJ_IF_SOME(h, cfBlobHeader) { - KJ_IF_SOME(b, cfBlobJson) { - result.headers->setPtr(h, b); - } else { - result.headers->unset(h); + KJ_IF_SOME(promise, actorClass->whenReady()) { + co_await promise; + requireNotBroken(); + } + + // A concurrent request could have started the actor, so check again. + if (actor == kj::none) { + start(actorClass, id); + } } + + co_return KJ_ASSERT_NONNULL(actor)->addRef(); } - requestInjector.apply(*result.headers); + // Callers should `attach` a self-ref to this promise as it can outlive `ActorContainer` + // The ForkBranch created by `co_await classAndId.tryGet>()` keeps + // the `.then([this])` continuation set up in the constructor alive independently of the + // `ActorContainer` refcount. Without this self-ref, the `ActorContainer` can be freed + // (via ctx.facets.abort() + Fetcher GC) while the `getActor()` coroutine is suspended + // and the continuation would later run on a dangling `this`. + kj::Promise> startRequest( + IoChannelFactory::SubrequestMetadata metadata) { + auto actor = co_await getActor(); - return result; - } + if (ns.cleanupTask == kj::none) { + // Need to start the cleanup loop. + ns.cleanupTask = ns.cleanupLoop(); + } - kj::Maybe rewriteIncomingRequest(kj::StringPtr& url, - kj::StringPtr physicalProtocol, - const kj::HttpHeaders& headers, - kj::Maybe& cfBlobJson) { - Rewritten result{kj::heap(headers.cloneShallow()), nullptr}; + // Since `getActor()` completed, `classAndId` must be resolved. + auto& actorClass = KJ_ASSERT_NONNULL(classAndId.tryGet()).actorClass; - if (style == config::HttpOptions::Style::HOST) { - auto parsed = kj::Url::parse( - url, kj::Url::HTTP_REQUEST, kj::Url::Options{.percentDecode = false, .allowEmpty = true}); - parsed.host = kj::str(KJ_UNWRAP_OR_RETURN(headers.get(kj::HttpHeaderId::HOST), kj::none)); + co_return actorClass->startRequest(kj::mv(metadata), kj::mv(actor)) + .attach(kj::defer([self = kj::addRef(*this)]() mutable { self->updateAccessTime(); })); + } - KJ_IF_SOME(h, forwardedProtoHeader) { - KJ_IF_SOME(s, headers.get(h)) { - parsed.scheme = kj::str(s); - result.headers->unset(h); + // Abort this actor, shutting it down. + // + // It is the caller's responsibility to ensure that the aborted ActorContainer has been + // removed from any maps that would cause it to receive further traffic, since any further + // requests will be expected to fail. abort() does NOT attempt to remove the ActorContainer + // from the parent facet map since at most call sites it makes more sense to handle this + // directly. + void abort(kj::Maybe reason) { + if (brokenReason != kj::none) return; + + KJ_IF_SOME(a, actor) { + KJ_IF_SOME(r, reason) { + a->abort(r); + } else { + // Unknown broken reason. + a->shutdown(0, kj::none); } } - if (parsed.scheme == nullptr) parsed.scheme = kj::str(physicalProtocol); + for (auto& facet: facets) { + facet.value->abort(reason); + } - url = result.ownUrl = parsed.toString(kj::Url::HTTP_PROXY_REQUEST); - } + onBrokenTask = kj::none; + shutdownTask = kj::none; + manager = kj::none; + tracker->shutdown(); + actor = kj::none; + containerClient = kj::none; - KJ_IF_SOME(h, cfBlobHeader) { - KJ_IF_SOME(b, headers.get(h)) { - cfBlobJson = kj::str(b); - result.headers->unset(h); + KJ_IF_SOME(r, reason) { + brokenReason = r.clone(); + } else { + brokenReason = JSG_KJ_EXCEPTION(FAILED, Error, "Actor aborted for unknown reason."); } } - requestInjector.apply(*result.headers); + // Resets the actor's SQLite database while the connection is still open, + // avoiding file-locking issues on Windows. + void resetStorage() { + KJ_IF_SOME(a, actor) { + KJ_IF_SOME(cache, a->getPersistent()) { + KJ_IF_SOME(db, cache.getSqliteDatabase()) { + kj::runCatchingExceptions([&]() { db.reset(); }); + } + } + } + } - return result; - } + kj::Own getFacetContainer( + kj::String childKey, kj::Function()> getStartInfo) { + auto makeContainer = [&]() { + auto promise = callFacetStartCallback(kj::mv(getStartInfo)); + return kj::refcounted(kj::mv(childKey), ns, *this, kj::mv(promise), timer); + }; - bool needsRewriteResponse() { - return !responseInjector.empty(); - } + bool isNew = false; - void rewriteResponse(kj::HttpHeaders& headers) { - responseInjector.apply(headers); - } + auto& entry = facets.findOrCreateEntry(childKey, [&]() mutable { + isNew = true; + auto container = makeContainer(); + return ActorMap::Entry{container->getKey(), kj::mv(container)}; + }); - kj::Maybe getCapnpConnectHost() { - return capnpConnectHost; - } + return entry.value->addRef(); + } - private: - config::HttpOptions::Style style; - kj::Maybe forwardedProtoHeader; - kj::Maybe cfBlobHeader; - kj::Maybe capnpConnectHost; + uint getDepth() const override { + KJ_IF_SOME(p, parent) { + return 1 + p.getDepth(); + } + return 0; + } - class HeaderInjector { - public: - HeaderInjector(capnp::List::Reader headers, - kj::HttpHeaderTable::Builder& headerTableBuilder) - : injectedHeaders(KJ_MAP(header, headers) { - InjectedHeader result; - result.id = headerTableBuilder.add(header.getName()); - if (header.hasValue()) { - result.value = kj::str(header.getValue()); - } - return result; - }) {} + kj::Own getFacet( + kj::StringPtr name, kj::Function()> getStartInfo) override { + auto facet = getFacetContainer(kj::str(name), kj::mv(getStartInfo)); + return kj::refcounted(kj::mv(facet)); + } - bool empty() { - return injectedHeaders.size() == 0; + void abortFacet(kj::StringPtr name, kj::Exception reason) override { + KJ_IF_SOME(entry, facets.findEntry(name)) { + entry.value->abort(reason); + facets.erase(entry); + } } - void apply(kj::HttpHeaders& headers) { - for (auto& header: injectedHeaders) { - KJ_IF_SOME(v, header.value) { - headers.setPtr(header.id, v); - } else { - headers.unset(header.id); + void deleteFacet(kj::StringPtr name) override { + // First, abort any running facets. + abortFacet(name, JSG_KJ_EXCEPTION(FAILED, Error, "Facet was deleted.")); + + // Then delete the underlying storage. + KJ_IF_SOME(as, ns.actorStorage) { + // Note that if there's no facet index then there couldn't possibly be any child storage. + KJ_IF_SOME(index, getFacetTreeIndexIfNotEmpty()) { + uint childId = index.getId(getFacetId(), name); + deleteFacetImpl(*as.directory, index, childId); } } } - private: - struct InjectedHeader { - kj::HttpHeaderId id; - kj::Maybe value; - }; - kj::Array injectedHeaders; - }; + void cloneFacet(kj::StringPtr src, kj::StringPtr dst) override { + // Replacing a facet implies aborting it. + abortFacet(dst, JSG_KJ_EXCEPTION(FAILED, Error, "Facet was cloned-over.")); - HeaderInjector requestInjector; - HeaderInjector responseInjector; -}; - -// ======================================================================================= - -// Service used when the service's config is invalid. -class Server::InvalidConfigService final: public Service { - public: - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - JSG_FAIL_REQUIRE(Error, "Service cannot handle requests because its config is invalid."); - } + if (src == dst) { + // Cloning a facet to itself is equivalent to replacing it with an exact copy of its own + // data. Aborting matches the observable semantics of delete(dst), but we leave the + // storage untouched (since src == dst, copying it onto itself would be a no-op anyway). + return; + } - bool hasHandler(kj::StringPtr handlerName) override { - return false; - } + auto& as = KJ_UNWRAP_OR(ns.actorStorage, return); - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - // Can't get here because workerd would have failed to start. - KJ_UNREACHABLE; - } -}; + // If no index exists on disk, there can be no storage to delete or copy. + KJ_IF_SOME(index, getFacetTreeIndexIfNotEmpty()) { + uint parentId = getFacetId(); -class Server::InvalidConfigActorClass final: public ActorClass { - public: - void requireAllowsTransfer() override { - // Can't get here because workerd would have failed to start. - KJ_UNREACHABLE; - } - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - // Can't get here because workerd would have failed to start. - KJ_UNREACHABLE; - } + // Delete dst's existing storage first, mirroring the storage-side behavior of + // deleteFacet() (the abort was already handled above). + uint dstId = index.getId(parentId, dst); + deleteFacetImpl(*as.directory, index, dstId); - kj::Own newActor(kj::Maybe tracker, - Worker::Actor::Id actorId, - Worker::Actor::MakeActorCacheFunc makeActorCache, - Worker::Actor::MakeStorageFunc makeStorage, - kj::Own loopback, - kj::Maybe> manager, - kj::Maybe container, - kj::Maybe facetManager) override { - JSG_FAIL_REQUIRE( - Error, "Cannot instantiate Durable Object class because its config is invalid."); - } + // Now copy src to dst. If src's DB file does not exist, then src has no data, which + // in the Durable Objects model is indistinguishable from src never having run. In + // that case dst should also have no data, which it already does (we just deleted it). + uint srcId = index.getId(parentId, src); + cloneFacetImpl(*as.directory, index, srcId, dstId); + } + } - kj::Own startRequest( - IoChannelFactory::SubrequestMetadata metadata, kj::Own actor) override { - // Can't get here because creating the actor would have required calling the other method. - KJ_UNREACHABLE; - } -}; + void requireTransferrableStub() { + JSG_REQUIRE(parent == kj::none, DOMDataCloneError, + "Stubs pointing to Durable Object facets are not serializable."); + JSG_REQUIRE(ns.getConfig().is(), DOMDataCloneError, + "Stubs pointing to ephemeral objects are not serializable."); + } -// Return a fake Own pointing to the singleton. -kj::Own Server::makeInvalidConfigService() { - return {invalidConfigServiceSingleton.get(), kj::NullDisposer::instance}; -} + kj::OneOf, kj::Promise>> getChannelToken( + IoChannelFactory::ChannelTokenUsage usage) { + requireTransferrableStub(); -// A NetworkAddress whose connect() method waits for a Promise and then forwards -// to it. Used by ExternalHttpService so that we don't have to wait for DNS lookup before the -// server can start. -class PromisedNetworkAddress final: public kj::NetworkAddress { - // TODO(cleanup): kj::Network should be extended with a new version of parseAddress() which does - // not do DNS lookup immediately, and therefore can return a NetworkAddress synchronously. - // In fact, this version should be designed to redo the DNS lookup periodically to see if it - // changed, which would be nice for workerd when the remote address may change over time. - public: - PromisedNetworkAddress(kj::Promise> promise) - : promise(promise.then([this](kj::Own result) { addr = kj::mv(result); }) - .fork()) {} + kj::StringPtr uniqueKey = ns.getConfig().get().uniqueKey; - kj::Promise> connect() override { - KJ_IF_SOME(a, addr) { - co_return co_await a.get()->connect(); - } else { - co_await promise; - co_return co_await KJ_ASSERT_NONNULL(addr)->connect(); - } - } + KJ_SWITCH_ONEOF(classAndId) { + KJ_CASE_ONEOF(c, ClassAndId) { + return getChannelTokenImpl(usage, c.id); + } + KJ_CASE_ONEOF(promise, kj::ForkedPromise) { + return promise.addBranch().then([this, usage]() { + return getChannelTokenImpl( + usage, KJ_ASSERT_NONNULL(classAndId.tryGet()).id); + }); + } + } - kj::Promise connectAuthenticated() override { - KJ_IF_SOME(a, addr) { - co_return co_await a.get()->connectAuthenticated(); - } else { - co_await promise; - co_return co_await KJ_ASSERT_NONNULL(addr)->connectAuthenticated(); + KJ_UNREACHABLE; } - } - // We don't use any other methods, and they seem kinda annoying to implement. - kj::Own listen() override { - KJ_UNIMPLEMENTED("PromisedNetworkAddress::listen() not implemented"); - } - kj::Own clone() override { - KJ_UNIMPLEMENTED("PromisedNetworkAddress::clone() not implemented"); - } - kj::String toString() override { - KJ_UNIMPLEMENTED("PromisedNetworkAddress::toString() not implemented"); - } + private: + // The actor is constructed after the ActorContainer so it starts off empty. + kj::Maybe> actor; + + kj::String key; + kj::Own tracker; + ActorNamespace& ns; + ActorContainer& root; + kj::Maybe parent; + kj::Timer& timer; + kj::TimePoint lastAccess; + kj::Maybe> manager; + kj::Maybe> shutdownTask; + kj::Maybe> onBrokenTask; + kj::Maybe brokenReason; - private: - kj::ForkedPromise promise; - kj::Maybe> addr; -}; + // Reference to the ContainerClient (if container is enabled for this actor) + kj::Maybe> containerClient; -class Server::ExternalTcpService final: public Service, private WorkerInterface { - public: - ExternalTcpService(kj::Own addrParam): addr(kj::mv(addrParam)) {} + // If this is a `ForkedPromise`, await the promise. When it has resolved, then + // `classAndId` will have been replaced with the resolved `ClassAndId` value. + kj::OneOf> classAndId; - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - return {this, kj::NullDisposer::instance}; - } + // FacetTreeIndex for this actor. Only initialized on the root. + kj::Maybe> facetTreeIndex; - bool hasHandler(kj::StringPtr handlerName) override { - return handlerName == "fetch"_kj || handlerName == "connect"_kj; - } + // ID of this facet. Initialized when getFacetId() is first called. + kj::Maybe facetId; - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - JSG_FAIL_REQUIRE(DOMDataCloneError, "ExternalService can't be passed over RPC."); - } + ActorMap facets; - private: - kj::Own addr; + // Get the facet ID for this facet. The root facet always has ID zero, but all other facets + // need to be looked up in the index to make sure they are assigned consistent IDs. + uint getFacetId() { + KJ_IF_SOME(f, facetId) { + return f; + } - kj::Promise request(kj::HttpMethod method, - kj::StringPtr url, - const kj::HttpHeaders& headers, - kj::AsyncInputStream& requestBody, - kj::HttpService::Response& response) override { - throwUnsupported(); - } + ActorContainer& parent = KJ_UNWRAP_OR(this->parent, return 0); - kj::Promise connect(kj::StringPtr host, - const kj::HttpHeaders& headers, - kj::AsyncIoStream& connection, - ConnectResponse& tunnel, - kj::HttpConnectSettings settings) override { - TRACE_EVENT("workerd", "ExternalTcpService::connect()", "host", host.cStr()); - auto io_stream = co_await addr->connect(); + FacetTreeIndex& index = root.ensureFacetTreeIndex(); + return index.getId(parent.getFacetId(), key); + } - auto promises = kj::heapArrayBuilder>(2); + // Get the facet tree index, opening the file if it hasn't been opened yet, and creating it + // if it hasn't been created yet. + FacetTreeIndex& ensureFacetTreeIndex() { + KJ_REQUIRE(parent == kj::none, "only 'root' may ensureFacetTreeIndex()"); - promises.add(connection.pumpTo(*io_stream).then([&io_stream = *io_stream](uint64_t size) { - io_stream.shutdownWrite(); - })); + KJ_IF_SOME(i, facetTreeIndex) { + return *i; + } else { + // Facet tree index hasn't been initialized yet. Do that now (opening the existing file, + // or creating it if it doesn't exist). + auto& as = KJ_REQUIRE_NONNULL( + ns.actorStorage, "can't call getFacetId() when there's no backing storage"); + auto indexFile = as.directory->openFile( + kj::Path({kj::str(key, ".facets")}), kj::WriteMode::CREATE | kj::WriteMode::MODIFY); + return *facetTreeIndex.emplace(kj::heap(kj::mv(indexFile))); + } + } - promises.add(io_stream->pumpTo(connection).then([&connection](uint64_t size) { - connection.shutdownWrite(); - })); + // Like ensureFacetTreeIndex() but if the index doesn't exist on disk, return kj::none. + kj::Maybe getFacetTreeIndexIfNotEmpty() { + KJ_REQUIRE(parent == kj::none); - tunnel.accept(200, "OK", kj::HttpHeaders(kj::HttpHeaderTable{})); + KJ_IF_SOME(i, facetTreeIndex) { + return *i; + } else { + // Facet tree index hasn't been initialized yet. If the file exists, open it. Otherwise, + // assume empty and return none. + auto& as = KJ_UNWRAP_OR(ns.actorStorage, return kj::none); + auto indexFile = KJ_UNWRAP_OR( + as.directory->tryOpenFile(kj::Path({kj::str(key, ".facets")}), kj::WriteMode::MODIFY), + return kj::none); + return *facetTreeIndex.emplace(kj::heap(kj::mv(indexFile))); + } + } - co_await kj::joinPromisesFailFast(promises.finish()).attach(kj::mv(io_stream)); - } + // Get the path to the facet's sqlite database, within the actor namespace directory. + // + // `suffix` can be e.g. "-wal" or "-shm". + kj::Path getSqlitePathForId(uint id, kj::StringPtr suffix = ""_kj) { + if (id == 0) { + return kj::Path({kj::str(root.key, ".sqlite", suffix)}); + } else { + return kj::Path({kj::str(root.key, '.', id, ".sqlite", suffix)}); + } + } - kj::Promise prewarm(kj::StringPtr url) override { - return kj::READY_NOW; - } - kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { - throwUnsupported(); - } - kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { - throwUnsupported(); - } - kj::Promise customEvent(kj::Own event) override { - return event->notSupported(); - } + void deleteFacetImpl(const kj::Directory& dir, FacetTreeIndex& index, uint facetId) { + deleteDescendantStorage(dir, index, facetId); - [[noreturn]] void throwUnsupported() { - JSG_FAIL_REQUIRE(Error, "External TCP servers don't support this event type."); - } -}; + // Remove the database, WAL, and SHM files, if present. Note that the database may not + // exist at all if this facet didn't exist before delete() was called on it. + dir.tryRemove(getSqlitePathForId(facetId)); + dir.tryRemove(getSqlitePathForId(facetId, "-wal")); + dir.tryRemove(getSqlitePathForId(facetId, "-shm")); + } -// Service used when the service is configured as external HTTP service. -class Server::ExternalHttpService final: public Service { - public: - ExternalHttpService(kj::Own addrParam, - kj::Own rewriter, - kj::HttpHeaderTable& headerTable, - kj::Timer& timer, - kj::EntropySource& entropySource, - capnp::ByteStreamFactory& byteStreamFactory, - capnp::HttpOverCapnpFactory& httpOverCapnpFactory) - : addr(kj::mv(addrParam)), - webSocketErrorHandler(kj::heap()), - inner(kj::newHttpClient(timer, - headerTable, - *addr, - {.entropySource = entropySource, - .webSocketCompressionMode = kj::HttpClientSettings::MANUAL_COMPRESSION, - .webSocketErrorHandler = *webSocketErrorHandler})), - serviceAdapter(kj::newHttpService(*inner)), - rewriter(kj::mv(rewriter)), - headerTable(headerTable), - byteStreamFactory(byteStreamFactory), - httpOverCapnpFactory(httpOverCapnpFactory) {} + void deleteDescendantStorage(const kj::Directory& dir, uint parentId) { + KJ_IF_SOME(index, getFacetTreeIndexIfNotEmpty()) { + deleteDescendantStorage(dir, index, parentId); + } else { + // There's no index, so there must be no facets (other than the root). + KJ_ASSERT(parentId == 0); + } + } - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - return kj::heap(*this, kj::mv(metadata)); - } + void deleteDescendantStorage(const kj::Directory& dir, FacetTreeIndex& index, uint parentId) { + index.forEachChild(parentId, + [&](uint childId, kj::StringPtr childName) { deleteFacetImpl(dir, index, childId); }); + } - bool hasHandler(kj::StringPtr handlerName) override { - return handlerName == "fetch"_kj || handlerName == "connect"_kj; - } + // Recursively copy the subtree rooted at the facet with ID `srcParentId` to a new subtree + // rooted at the facet with ID `dstParentId`. + void cloneFacetImpl(const kj::Directory& dir, FacetTreeIndex& index, uint srcId, uint dstId) { + // Snapshot src's children before recursing, because the recursion mutates the index by + // allocating new IDs for the destination subtree, which would interfere with a live + // forEachChild iteration. + struct Child { + uint id; + kj::String name; + }; + kj::Vector children; + index.forEachChild(srcId, [&](uint childId, kj::StringPtr childName) { + children.add(Child{childId, kj::str(childName)}); + }); - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - JSG_FAIL_REQUIRE(DOMDataCloneError, "ExternalService can't be passed over RPC."); - } + for (auto& child: children) { + uint newChildId = index.getId(dstId, child.name); + cloneFacetImpl(dir, index, child.id, newChildId); + } - private: - kj::Own addr; + // Now that the children are copied, copy the main facet. + auto srcDb = getSqlitePathForId(srcId); - kj::Own webSocketErrorHandler; - kj::Own inner; - kj::Own serviceAdapter; + // It's possible there's no backing file on disk, if the facet existed previously but was + // deleted. If the source facet has no data, then leaving the destination with no data + // is correct. + if (!dir.exists(srcDb)) return; - kj::Own rewriter; + // Copy the database. Use KJ's Directory::transfer() which will use copy-on-write where + // available (e.g. FICLONE on Linux, if the FS supports it). + auto dstDb = getSqlitePathForId(dstId); + dir.transfer(dstDb, kj::WriteMode::CREATE, srcDb, kj::TransferMode::COPY); - kj::HttpHeaderTable& headerTable; - capnp::ByteStreamFactory& byteStreamFactory; - capnp::HttpOverCapnpFactory& httpOverCapnpFactory; + // Copy the WAL if it exists. We can't rely on the source's WAL having been checkpointed + // and truncated at close time -- e.g., a previous process may have crashed leaving a + // valid WAL. Copying the WAL alongside the DB preserves any unmerged data. + auto srcWal = getSqlitePathForId(srcId, "-wal"); + if (!dir.exists(srcWal)) return; + auto dstWal = getSqlitePathForId(dstId, "-wal"); + dir.transfer(dstWal, kj::WriteMode::CREATE, srcWal, kj::TransferMode::COPY); - struct CapnpClient { - kj::Own connection; - capnp::TwoPartyClient rpcSystem; + // Finally copy the SHM file if present. This is not strictly necessary but if the WAL is + // large this helps SQLite start up faster. + auto srcShm = getSqlitePathForId(srcId, "-shm"); + if (!dir.exists(srcShm)) return; + auto dstShm = getSqlitePathForId(dstId, "-shm"); + dir.transfer(dstShm, kj::WriteMode::CREATE, srcShm, kj::TransferMode::COPY); + } - CapnpClient(kj::Own connectionParam) - : connection(kj::mv(connectionParam)), - rpcSystem(*connection) {} - }; + void requireNotBroken() { + KJ_IF_SOME(e, brokenReason) { + kj::throwFatalException(e.clone()); + } + } - // capnpClient is created on-demand when RPC is needed. - kj::Maybe capnpClient; + kj::Promise monitorOnBroken(Worker::Actor& actor) { + try { + // It's possible for this to never resolve if the actor never breaks, + // in which case the returned promise will just be canceled. + co_await actor.onBroken(); + KJ_FAIL_ASSERT("actor.onBroken() resolved normally?"); + } catch (...) { + brokenReason = kj::getCaughtExceptionAsKj(); + } + + for (auto& facet: facets) { + facet.value->abort(brokenReason); + } + facets.clear(); + + // HACK: Dropping the ActorContainer will delete onBrokenTask, cancelling ourselves. This + // would crash. To avoid the problem, detach ourselves. This is safe because we know that + // once we return there's nothing left for this promise to do anyway. + KJ_ASSERT_NONNULL(onBrokenTask).detach([](kj::Exception&& e) {}); + + // Hollow out the object, so that if it still has references, they won't keep these parts + // alive. Since any further calls to `getActor()` will throw, we don't have to worry about + // the actor being recreated. + auto actorToDrop = kj::mv(this->actor); + tracker->shutdown(); + auto managerToDrop = kj::mv(manager); + + // Note that we remove the entire ActorContainer from the map -- this drops the + // HibernationManager so any connected hibernatable websockets will be disconnected. + KJ_IF_SOME(p, parent) { + p.facets.erase(key); + } else { + ns.actors.erase(key); + } + + // WARNING: `this` MAY HAVE BEEN DELETED as a result of the above `erase()`. Do not access + // it again here. + } + + // Processes the eviction of the Durable Object and hibernates active websockets. + kj::Promise handleShutdown() { + // After 10 seconds of inactivity, we destroy the Worker::Actor and hibernate any active + // JS WebSockets. + // TODO(someday): We could make this timeout configurable to make testing less burdensome. + co_await timer.afterDelay(10 * kj::SECONDS); + // Cancel the onBroken promise, since we're about to destroy the actor anyways and don't + // want to trigger it. + onBrokenTask = kj::none; + KJ_IF_SOME(a, actor) { + if (a->isShared()) { + // Our ActiveRequest refcounting has broken somewhere. This is likely because we're + // `addRef`-ing an actor that has had an ActiveRequest attached to its kj::Own (in other + // words, the ActiveRequest count is less than it should be). + // + // Rather than dropping our actor and possibly ending up with split-brain, + // we should opt out of the deferred proxy optimization and log the error to Sentry. + KJ_LOG(ERROR, + "Detected internal bug in hibernation: Durable Object has strong references " + "when hibernation timeout expired."); - // This task nulls out `capnpClient` when the connection is lost. - kj::Promise clearCapnpClientTask = nullptr; + co_return; + } + KJ_IF_SOME(m, manager) { + auto& worker = a->getWorker(); + auto workerStrongRef = kj::atomicAddRef(worker); + // Take an async lock, we can't use `takeAsyncLock(RequestObserver&)` since we don't + // have an `IncomingRequest` at this point. + // + // Note that we do not have a race here because this is part of the `shutdownTask` + // promise. If a new request comes in while we're waiting to get the lock then we will + // cancel this promise. + Worker::AsyncLock asyncLock = co_await worker.takeAsyncLockWithoutRequest(nullptr); + workerStrongRef->runInLockScope( + asyncLock, [&](Worker::Lock& lock) { m->hibernateWebSockets(lock); }); + } + a->shutdown(0, KJ_EXCEPTION(DISCONNECTED, "broken.dropped; Actor freed due to inactivity")); + } + // Destroy the last strong Worker::Actor reference. + actor = kj::none; - // Get an WorkerdBootstrap representing the service on the other end of an HTTP connection. May - // reuse an existing connection, or form a new one over `client`. - rpc::WorkerdBootstrap::Client getOutgoingCapnp(kj::HttpClient& client) { - KJ_IF_SOME(c, capnpClient) { - return c.rpcSystem.bootstrap().castAs(); + // Drop our reference to the ContainerClient + // If setInactivityTimeout() was called, the timer still holds a reference + // so the container stays alive until the timeout expires + containerClient = kj::none; } - // No existing client, need to create a new one. - kj::StringPtr host = KJ_UNWRAP_OR(rewriter->getCapnpConnectHost(), - { return JSG_KJ_EXCEPTION(FAILED, Error, "This ExternalServer not configured for RPC."); }); + void start(kj::Own& actorClass, Worker::Actor::Id& id) { + KJ_REQUIRE(actor == nullptr); - auto req = client.connect(host, kj::HttpHeaders(headerTable), {}); - auto& c = capnpClient.emplace(kj::mv(req.connection)); + auto makeActorCache = [this](const ActorCache::SharedLru& sharedLru, OutputGate& outputGate, + ActorCache::Hooks& hooks, SqliteObserver& sqliteObserver) mutable { + return ns.config.tryGet().map( + [&](const Durable& d) -> kj::Own { + KJ_IF_SOME(as, ns.actorStorage) { + kj::Own sqliteHooks; + if (parent == kj::none) { + KJ_IF_SOME(a, ns.alarmScheduler) { + sqliteHooks = kj::heap(a, ActorKey{.actorId = key}); + } else { + // No alarm scheduler available, use default hooks instance. + sqliteHooks = fakeOwn(ActorSqlite::Hooks::getDefaultHooks()); + } + } else { + // TODO(someday): Support alarms in facets, somehow. + sqliteHooks = fakeOwn(ActorSqlite::Hooks::getDefaultHooks()); + } - // Arrange that when the connection is lost, we'll null out `capnpClient`. This ensures that - // on the next event, we'll attempt to reconnect. - // - // TODO(perf): Time out idle connections? - clearCapnpClientTask = - c.rpcSystem.onDisconnect().attach(kj::defer([this]() { - capnpClient = kj::none; - })).eagerlyEvaluate(nullptr); + uint selfId = getFacetId(); + auto path = getSqlitePathForId(selfId); + auto db = kj::heap( + as.vfs, kj::mv(path), kj::WriteMode::CREATE | kj::WriteMode::MODIFY); + + // Before we do anything, make sure the database is in WAL mode. We also need to + // do this after reset() is used, so register a callback for that. + db->run("PRAGMA journal_mode=WAL;"); + + db->afterReset([this, &dir = *as.directory, selfId](SqliteDatabase& db) { + db.run("PRAGMA journal_mode=WAL;"); + + // reset() is used when the app called deleteAll(), in which case we also want to + // delete all child facets. + // TODO(someday): Arguably this should be transactional somehow so if we fail here + // we don't leave the facets still there after the parent has already been reset. + // But most filesystems do not support transactions, so we'd have to do something + // like store a flag in the parent DB saying "reset pending" so that on a restart + // we retry the deletions. Note that in production on SRS, this is actually + // transactional -- there's only a problem when running locally with workerd. + deleteDescendantStorage(dir, selfId); + }); + + return kj::heap(kj::mv(db), outputGate, + [](SpanParent) -> kj::Promise { return kj::READY_NOW; }, *sqliteHooks) + .attach(kj::mv(sqliteHooks)); + } else { + // Create an ActorCache backed by a fake, empty storage. Elsewhere, we configure + // ActorCache never to flush, so this effectively creates in-memory storage. + return kj::heap( + newEmptyReadOnlyActorStorage(), sharedLru, outputGate, hooks); + } + }); + }; - return c.rpcSystem.bootstrap().castAs(); - } + bool enableSql = true; + kj::Maybe containerOptions = + kj::none; + kj::Maybe uniqueKey; + KJ_SWITCH_ONEOF(ns.config) { + KJ_CASE_ONEOF(c, Durable) { + enableSql = c.enableSql; + containerOptions = c.containerOptions; + uniqueKey = c.uniqueKey; + } + KJ_CASE_ONEOF(c, Ephemeral) { + enableSql = c.enableSql; + } + } - class WorkerInterfaceImpl final: public WorkerInterface, private kj::HttpService::Response { - public: - WorkerInterfaceImpl(ExternalHttpService& parent, IoChannelFactory::SubrequestMetadata metadata) - : parent(kj::addRef(parent)), - metadata(kj::mv(metadata)) {} + auto makeStorage = + [enableSql = enableSql](jsg::Lock& js, const Worker::Api& api, + ActorCacheInterface& actorCache) -> jsg::Ref { + return js.alloc( + js, IoContext::current().addObject(actorCache), enableSql); + }; - kj::Promise request(kj::HttpMethod method, - kj::StringPtr url, - const kj::HttpHeaders& headers, - kj::AsyncInputStream& requestBody, - kj::HttpService::Response& response) override { - TRACE_EVENT("workerd", "ExternalHttpServer::request()"); - KJ_REQUIRE(wrappedResponse == kj::none, "object should only receive one request"); - wrappedResponse = response; - if (parent->rewriter->needsRewriteRequest()) { - auto rewrite = parent->rewriter->rewriteOutgoingRequest(url, headers, metadata.cfBlobJson); - return parent->serviceAdapter->request(method, url, *rewrite.headers, requestBody, *this) - .attach(kj::mv(rewrite)); - } else { - return parent->serviceAdapter->request(method, url, headers, requestBody, *this); + auto loopback = kj::refcounted(*this); + + kj::Maybe container = kj::none; + KJ_IF_SOME(config, containerOptions) { + KJ_ASSERT(config.hasImageName(), "Image name is required"); + auto imageName = config.getImageName(); + kj::String containerId; + KJ_SWITCH_ONEOF(id) { + KJ_CASE_ONEOF(globalId, kj::Own) { + containerId = globalId->toString(); + } + KJ_CASE_ONEOF(existingId, kj::String) { + containerId = kj::str(existingId); + } + } + + container = ns.getContainerClient( + kj::str("workerd-", KJ_ASSERT_NONNULL(uniqueKey), "-", containerId), imageName); } - } - kj::Promise connect(kj::StringPtr host, - const kj::HttpHeaders& headers, - kj::AsyncIoStream& connection, - ConnectResponse& tunnel, - kj::HttpConnectSettings settings) override { - TRACE_EVENT("workerd", "ExternalHttpServer::connect()"); - return parent->serviceAdapter->connect(host, headers, connection, tunnel, kj::mv(settings)); + auto actor = + actorClass->newActor(getTracker(), Worker::Actor::cloneId(id), kj::mv(makeActorCache), + kj::mv(makeStorage), kj::mv(loopback), tryGetManagerRef(), kj::mv(container), *this); + onBrokenTask = monitorOnBroken(*actor); + this->actor = kj::mv(actor); } - kj::Promise prewarm(kj::StringPtr url) override { - return kj::READY_NOW; - } - kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { - throwUnsupported(); - } - kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { - throwUnsupported(); + // Helper coroutine to call `getStartInfo()`, the start callback for a facet, while making + // sure the function stays alive until the returned promise resolves. + static kj::Promise callFacetStartCallback( + kj::Function()> getStartInfo) { + auto info = co_await getStartInfo(); + co_await info.ensureAllResolved(); + co_return ClassAndId(info.actorClass.downcast(), kj::mv(info.id)); } - kj::Promise customEvent(kj::Own event) override { - // We'll use capnp RPC for custom events. - auto bootstrap = parent->getOutgoingCapnp(*parent->inner); - auto dispatcher = - bootstrap.startEventRequest(capnp::MessageSize{4, 0}).send().getDispatcher(); - return event - ->sendRpc(parent->httpOverCapnpFactory, parent->byteStreamFactory, kj::mv(dispatcher)) - .attach(kj::mv(event)); + kj::Array getChannelTokenImpl( + IoChannelFactory::ChannelTokenUsage usage, const Worker::Actor::Id& id) { + kj::StringPtr uniqueKey = KJ_ASSERT_NONNULL(ns.getConfig().tryGet()).uniqueKey; + auto& abstractId = *KJ_ASSERT_NONNULL(id.tryGet>()); + auto& idImpl = + KJ_ASSERT_NONNULL(kj::tryDowncast(abstractId)); + return ns.channelTokenHandler.encodeActorChannelToken( + usage, uniqueKey, idImpl.getRaw(), idImpl.getName()); } + }; - private: - kj::Own parent; - IoChannelFactory::SubrequestMetadata metadata; - kj::Maybe wrappedResponse; - - [[noreturn]] void throwUnsupported() { - JSG_FAIL_REQUIRE(Error, "External HTTP servers don't support this event type."); - } + kj::Own getActorContainer(Worker::Actor::Id id) { + kj::String key; - kj::Own send(uint statusCode, - kj::StringPtr statusText, - const kj::HttpHeaders& headers, - kj::Maybe expectedBodySize) override { - TRACE_EVENT("workerd", "ExternalHttpService::send()", "status", statusCode); - auto& response = KJ_ASSERT_NONNULL(wrappedResponse); - if (parent->rewriter->needsRewriteResponse()) { - auto rewrite = headers.cloneShallow(); - parent->rewriter->rewriteResponse(rewrite); - return response.send(statusCode, statusText, rewrite, expectedBodySize); - } else { - return response.send(statusCode, statusText, headers, expectedBodySize); + KJ_SWITCH_ONEOF(id) { + KJ_CASE_ONEOF(obj, kj::Own) { + KJ_REQUIRE(config.is()); + key = obj->toString(); } - } - - kj::Own acceptWebSocket(const kj::HttpHeaders& headers) override { - TRACE_EVENT("workerd", "ExternalHttpService::acceptWebSocket()"); - auto& response = KJ_ASSERT_NONNULL(wrappedResponse); - if (parent->rewriter->needsRewriteResponse()) { - auto rewrite = headers.cloneShallow(); - parent->rewriter->rewriteResponse(rewrite); - return response.acceptWebSocket(rewrite); - } else { - return response.acceptWebSocket(headers); + KJ_CASE_ONEOF(str, kj::String) { + KJ_REQUIRE(config.is()); + key = kj::str(str); } } - }; -}; -kj::Own Server::makeExternalService(kj::StringPtr name, - config::ExternalServer::Reader conf, - kj::HttpHeaderTable::Builder& headerTableBuilder) { - TRACE_EVENT("workerd", "Server::makeExternalService()", "name", name.cStr()); - kj::StringPtr addrStr = nullptr; - kj::String ownAddrStr = nullptr; + return actors + .findOrCreate(key, [&]() mutable { + auto container = kj::refcounted(kj::mv(key), *this, kj::none, + ActorContainer::ClassAndId(kj::addRef(*actorClass), kj::mv(id)), timer); - KJ_IF_SOME(override, externalOverrides.findEntry(name)) { - addrStr = ownAddrStr = kj::mv(override.value); - externalOverrides.erase(override); - } else if (conf.hasAddress()) { - addrStr = conf.getAddress(); - } else { - reportConfigError(kj::str("External service \"", name, - "\" has no address in the config, so must be specified " - "on the command line with `--external-addr`.")); - return makeInvalidConfigService(); + return kj::HashMap>::Entry{ + container->getKey(), kj::mv(container)}; + })->addRef(); } - switch (conf.which()) { - case config::ExternalServer::HTTP: { - // We have to construct the rewriter upfront before waiting on any promises, since the - // HeaderTable::Builder is only available synchronously. - auto rewriter = kj::heap(conf.getHttp(), headerTableBuilder); - auto addr = kj::heap(network.parseAddress(addrStr, 80)); - return kj::refcounted(kj::mv(addr), kj::mv(rewriter), - headerTableBuilder.getFutureTable(), timer, entropySource, - globalContext->byteStreamFactory, globalContext->httpOverCapnpFactory); + kj::Own getContainerClient(kj::StringPtr containerId, kj::StringPtr imageName) { + KJ_IF_SOME(existingClient, containerClients.find(containerId)) { + return existingClient->addRef(); } - case config::ExternalServer::HTTPS: { - auto httpsConf = conf.getHttps(); - kj::Maybe certificateHost; - if (httpsConf.hasCertificateHost()) { - certificateHost = httpsConf.getCertificateHost(); - } - auto rewriter = kj::heap(httpsConf.getOptions(), headerTableBuilder); - auto addr = kj::heap( - makeTlsNetworkAddress(httpsConf.getTlsOptions(), addrStr, certificateHost, 443)); - return kj::refcounted(kj::mv(addr), kj::mv(rewriter), - headerTableBuilder.getFutureTable(), timer, entropySource, - globalContext->byteStreamFactory, globalContext->httpOverCapnpFactory); + + // No existing container in the map, create a new one + auto& dockerPathRef = KJ_ASSERT_NONNULL( + dockerPath, "dockerPath must be defined to enable containers on this Durable Object."); + + // Grab a branch of any pending cleanup from a previous ContainerClient for this + // container. If it exists, pass it to the container client so it knows that it has to sync. + kj::Promise previousCleanup = kj::READY_NOW; + KJ_IF_SOME(state, containerCleanupState.find(containerId)) { + previousCleanup = state.promise.addBranch(); } - case config::ExternalServer::TCP: { - auto tcpConf = conf.getTcp(); - auto addr = kj::heap(network.parseAddress(addrStr, 80)); - if (tcpConf.hasTlsOptions()) { - kj::Maybe certificateHost; - if (tcpConf.hasCertificateHost()) { - certificateHost = tcpConf.getCertificateHost(); + + // Upsert the cleanup state for this container ID. Replacing the + // canceler auto-cancels any in-flight cleanup tasks from the previous + // client's destructor. The generation counter is bumped on replacement + // so the cleanup callback can detect stale ownership without relying + // on raw pointer identity (which is vulnerable to address reuse). + auto canceler = kj::heap(); + uint64_t capturedGeneration = 0; + containerCleanupState.upsert(kj::str(containerId), + ContainerCleanupState{.canceler = kj::mv(canceler)}, + [&capturedGeneration](ContainerCleanupState& existing, ContainerCleanupState&& incoming) { + existing.canceler = kj::mv(incoming.canceler); + capturedGeneration = ++existing.generation; + }); + + // Cleanup callback: invoked from the ContainerClient destructor with the joined + // with a cleanup promise + kj::Function)> cleanupCallback = + [this, containerId = kj::str(containerId), capturedGeneration]( + kj::Promise cleanupPromise) mutable { + KJ_IF_SOME(state, containerCleanupState.find(containerId)) { + if (state.generation != capturedGeneration) { + // A newer ContainerClient has replaced us already with another destructor. + // drop the promise. + return; } - addr = kj::heap( - makeTlsNetworkAddress(tcpConf.getTlsOptions(), addrStr, certificateHost, 0)); + + containerClients.erase(containerId); + // Wrap with the canceler so a future client creation can cancel these + // tasks + auto cancellable = + state.canceler->wrap(kj::mv(cleanupPromise)).catch_([](kj::Exception&&) {}); + + auto forked = kj::mv(cancellable).fork(); + waitUntilTasks.add(forked.addBranch()); + state.promise = kj::mv(forked); } - return kj::refcounted(kj::mv(addr)); - } - } - reportConfigError(kj::str("External service named \"", name, - "\" has unrecognized protocol. Was the config " - "compiled with a newer version of the schema?")); - return makeInvalidConfigService(); -} + }; -// Service used when the service is configured as network service. -class Server::NetworkService final: public Service, private WorkerInterface { - public: - NetworkService(kj::HttpHeaderTable& headerTable, - kj::Timer& timer, - kj::EntropySource& entropySource, - kj::Own networkParam, - kj::Maybe> tlsNetworkParam, - kj::Maybe tlsContext) - : network(kj::mv(networkParam)), - tlsNetwork(kj::mv(tlsNetworkParam)), - webSocketErrorHandler(kj::heap()), - inner(kj::newHttpClient(timer, - headerTable, - *network, - tlsNetwork, - {.entropySource = entropySource, - .webSocketCompressionMode = kj::HttpClientSettings::MANUAL_COMPRESSION, - .webSocketErrorHandler = *webSocketErrorHandler, - .tlsContext = tlsContext})), - serviceAdapter(kj::newHttpService(*inner)) {} + auto client = kj::refcounted(byteStreamFactory, timer, dockerNetwork, + kj::str(dockerPathRef), kj::str(containerId), kj::str(imageName), + kj::str(KJ_ASSERT_NONNULL(containerEgressInterceptorImage, + "containerEgressInterceptorImage must be configured for containers.")), + waitUntilTasks, kj::mv(previousCleanup), kj::mv(cleanupCallback), channelTokenHandler); - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - return {this, kj::NullDisposer::instance}; - } + // Store raw pointer in map (does not own) + containerClients.insert(kj::str(containerId), client.get()); - bool hasHandler(kj::StringPtr handlerName) override { - return handlerName == "fetch"_kj || handlerName == "connect"_kj; + return kj::mv(client); } - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - JSG_FAIL_REQUIRE(DOMDataCloneError, "NetworkService can't be passed over RPC."); + void abortAll(kj::Maybe reason) { + for (auto& actor: actors) { + actor.value->abort(reason); + } + actors.clear(); } - private: - kj::Own network; - kj::Maybe> tlsNetwork; - kj::Own webSocketErrorHandler; - kj::Own inner; - kj::Own serviceAdapter; + // Resets all actor databases, aborts all actors, and cancels all alarms so DOs + // can be recreated with clean state. + void deleteAll(kj::Maybe reason) { + // Reset databases before aborting so connections are still open (avoids + // Windows file-locking issues with deferred handle release). + for (auto& actor: actors) { + actor.value->resetStorage(); + } - kj::Promise request(kj::HttpMethod method, - kj::StringPtr url, - const kj::HttpHeaders& headers, - kj::AsyncInputStream& requestBody, - kj::HttpService::Response& response) override { - TRACE_EVENT("workerd", "NetworkService::request()"); - return serviceAdapter->request(method, url, headers, requestBody, response); - } + abortAll(reason); - kj::Promise connect(kj::StringPtr host, - const kj::HttpHeaders& headers, - kj::AsyncIoStream& connection, - ConnectResponse& tunnel, - kj::HttpConnectSettings settings) override { - TRACE_EVENT("workerd", "NetworkService::connect()"); - // This code is hit when the global `connect` function is called in a JS worker script. - // It represents a proxy-less TCP connection, which means we can simply defer the handling of - // the connection to the service adapter (likely NetworkHttpClient). Its behavior will be to - // connect directly to the host over TCP. - return serviceAdapter->connect(host, headers, connection, tunnel, kj::mv(settings)); + KJ_IF_SOME(scheduler, ownAlarmScheduler) { + scheduler->deleteAll(); + } } - kj::Promise prewarm(kj::StringPtr url) override { - return kj::READY_NOW; - } - kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { - throwUnsupported(); - } - kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { - throwUnsupported(); - } - kj::Promise customEvent(kj::Own event) override { - return event->notSupported(); - } + private: + kj::Own actorClass; + const ActorConfig& config; + const kj::Clock& clock; - [[noreturn]] void throwUnsupported() { - JSG_FAIL_REQUIRE(Error, "External HTTP servers don't support this event type."); - } -}; + struct ActorStorage { + kj::Own directory; + SqliteDatabase::Vfs vfs; -kj::Own Server::makeNetworkService(config::Network::Reader conf) { - TRACE_EVENT("workerd", "Server::makeNetworkService()"); - auto restrictedNetwork = network.restrictPeers( KJ_MAP(a, conf.getAllow()) -> kj::StringPtr { - return a; - }, KJ_MAP(a, conf.getDeny()) -> kj::StringPtr { return a; }); + ActorStorage(kj::Own directoryParam) + : directory(kj::mv(directoryParam)), + vfs(*directory) {} + }; - kj::Maybe> tlsNetwork; - kj::Maybe tlsContext; - if (conf.hasTlsOptions()) { - auto ownedTlsContext = makeTlsContext(conf.getTlsOptions()); - tlsContext = ownedTlsContext; - tlsNetwork = ownedTlsContext->wrapNetwork(*restrictedNetwork).attach(kj::mv(ownedTlsContext)); - } + // Note: The Vfs, actorStorage, and ownAlarmScheduler must not be torn down until all actors + // have been torn down, so we declare them before `actors`. + kj::Maybe actorStorage; + kj::Maybe> ownAlarmScheduler; + + // Tracks the canceler and cleanup promise for a Docker container's lifecycle cleanup. + // Useful to await on async calls of a ContainerClient destructor when the new + // one appears before they've been resolved. + struct ContainerCleanupState { + // Canceler that wraps the promise fired in ~ContainerClient. Replacing + // it cancels any pending cleanup, which resolves the promise immediately. + kj::Own canceler; + + // Forked cleanup promise. A branch is added to waitUntilTasks to keep the I/O alive, + // and another branch is passed to the next ContainerClient so its status() can await. + kj::ForkedPromise promise = kj::Promise(kj::READY_NOW).fork(); + + // Monotonically increasing counter, bumped each time the canceler is replaced + // via upsert. The cleanup callback captures the generation at creation time and + // compares it to detect whether a newer ContainerClient has taken ownership, + // avoiding a raw-pointer identity check that is vulnerable to address reuse. + uint64_t generation = 0; + }; - return kj::refcounted(globalContext->headerTable, timer, entropySource, - kj::mv(restrictedNetwork), kj::mv(tlsNetwork), tlsContext); -} + // Per-container cleanup state: canceler + forked cleanup promise. + kj::HashMap containerCleanupState; -// Service used when the service is configured as disk directory service. -class Server::DiskDirectoryService final: public Service, private WorkerInterface { - public: - DiskDirectoryService(config::DiskDirectory::Reader conf, - kj::Own dir, - kj::HttpHeaderTable::Builder& headerTableBuilder) - : writable(*dir), - readable(kj::mv(dir)), - headerTable(headerTableBuilder.getFutureTable()), - hLastModified(headerTableBuilder.add("Last-Modified")), - allowDotfiles(conf.getAllowDotfiles()) {} - DiskDirectoryService(config::DiskDirectory::Reader conf, - kj::Own dir, - kj::HttpHeaderTable::Builder& headerTableBuilder) - : readable(kj::mv(dir)), - headerTable(headerTableBuilder.getFutureTable()), - hLastModified(headerTableBuilder.add("Last-Modified")), - allowDotfiles(conf.getAllowDotfiles()) {} - - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - return {this, kj::NullDisposer::instance}; - } - - kj::Maybe getWritable() { - return writable; - } - - bool hasHandler(kj::StringPtr handlerName) override { - return handlerName == "fetch"_kj; - } - - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - JSG_FAIL_REQUIRE(DOMDataCloneError, "DiskDirectoryService can't be passed over RPC."); - } + // Map of container IDs to ContainerClients (for reconnection support with inactivity timeouts). + // The map holds raw pointers (not ownership) - ContainerClients are owned by actors and timers. + // When the last reference is dropped, the destructor removes the entry from this map. + kj::HashMap containerClients; - private: - kj::Maybe writable; - kj::Own readable; - kj::HttpHeaderTable& headerTable; - kj::HttpHeaderId hLastModified; - bool allowDotfiles; + // If the actor is broken, we remove it from the map. However, if it's just evicted due to + // inactivity, we keep the ActorContainer in the map but drop the Own. When a new + // request comes in, we recreate the Own. + ActorMap actors; - kj::Promise request(kj::HttpMethod method, - kj::StringPtr urlStr, - const kj::HttpHeaders& requestHeaders, - kj::AsyncInputStream& requestBody, - kj::HttpService::Response& response) override { - TRACE_EVENT("workerd", "DiskDirectoryService::request()", "url", urlStr.cStr()); - auto url = kj::Url::parse(urlStr); + kj::Maybe> cleanupTask; + kj::Timer& timer; + capnp::ByteStreamFactory& byteStreamFactory; + ChannelTokenHandler& channelTokenHandler; + kj::Network& dockerNetwork; + kj::Maybe dockerPath; + kj::Maybe containerEgressInterceptorImage; + kj::TaskSet& waitUntilTasks; + kj::Maybe alarmScheduler; - bool blockedPath = false; - kj::Path path = nullptr; - KJ_IF_SOME(exception, - kj::runCatchingExceptions([&]() { path = kj::Path(url.path.releaseAsArray()); })) { - (void)exception; // squash compiler warning about unused var - // If the Path constructor throws, this path is not valid (e.g. it contains ".."). - blockedPath = true; - } + // Removes actors from `actors` after 70 seconds of last access. + kj::Promise cleanupLoop() { + constexpr auto EXPIRATION = 70 * kj::SECONDS; - if (!blockedPath && !allowDotfiles) { - for (auto& part: path) { - if (part.startsWith(".")) { - blockedPath = true; - break; - } + // Don't bother running the loop if the config doesn't allow eviction. + KJ_SWITCH_ONEOF(config) { + KJ_CASE_ONEOF(c, Durable) { + if (!c.isEvictable) co_return; } - } - - if (method == kj::HttpMethod::GET || method == kj::HttpMethod::HEAD) { - if (blockedPath) { - co_return co_await response.sendError(404, "Not Found", headerTable); + KJ_CASE_ONEOF(c, Ephemeral) { + if (!c.isEvictable) co_return; } + } - auto file = KJ_UNWRAP_OR(readable->tryOpenFile(path), - { co_return co_await response.sendError(404, "Not Found", headerTable); }); - - auto meta = file->stat(); - - switch (meta.type) { - case kj::FsNode::Type::FILE: { - // If this is a GET request with a Range header, return partial content if a single - // satisfiable range is specified. - // TODO(someday): consider supporting multiple ranges with multipart/byteranges - kj::Maybe range; - if (method == kj::HttpMethod::GET) { - KJ_IF_SOME(header, requestHeaders.get(kj::HttpHeaderId::RANGE)) { - KJ_SWITCH_ONEOF(kj::tryParseHttpRangeHeader(header.asArray(), meta.size)) { - KJ_CASE_ONEOF(ranges, kj::Array) { - KJ_ASSERT(ranges.size() > 0); - if (ranges.size() == 1) range = ranges[0]; - } - KJ_CASE_ONEOF(_, kj::HttpEverythingRange) {} - KJ_CASE_ONEOF(_, kj::HttpUnsatisfiableRange) { - kj::HttpHeaders headers(headerTable); - headers.set(kj::HttpHeaderId::CONTENT_RANGE, kj::str("bytes */", meta.size)); - co_return co_await response.sendError(416, "Range Not Satisfiable", headers); - } - } - } - } + while (true) { + auto now = timer.now(); + actors.eraseAll([&](auto&, kj::Own& entry) { + // Check getLastAccess() before hasClients() since it's faster. + if ((now - entry->getLastAccess()) <= EXPIRATION) { + // Used recently; don't evict. + return false; + } - kj::HttpHeaders headers(headerTable); - headers.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::OCTET_STREAM.toString()); - headers.set(hLastModified, httpTime(meta.lastModified)); + if (entry->hasClients()) { + // There's still an active client; don't evict. + return false; + } - // We explicitly set the Content-Length header because if we don't, and we were called - // by a local Worker (without an actual HTTP connection in between), then the Worker - // will not see a Content-Length header, but being able to query the content length - // (especially with HEAD requests) is quite useful. - // TODO(cleanup): Arguably the implementation of `fetch()` should be adjusted so that - // if no `Content-Length` header is returned, but the body size is known via the KJ - // HTTP API, then the header should be filled in automatically. Unclear if this is safe - // to change without a compat flag. + // No clients and not used in a while, evict this actor. + return true; + }); - if (method == kj::HttpMethod::HEAD) { - headers.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(meta.size)); - response.send(200, "OK", headers, meta.size); - co_return; - } else KJ_IF_SOME(r, range) { - KJ_ASSERT(r.start <= r.end); - auto rangeSize = r.end - r.start + 1; - headers.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(rangeSize)); - headers.set(kj::HttpHeaderId::CONTENT_RANGE, - kj::str("bytes ", r.start, "-", r.end, "/", meta.size)); - auto out = response.send(206, "Partial Content", headers, rangeSize); + co_await timer.atTime(now + EXPIRATION); + } + } - auto in = kj::heap(*file, r.start); - co_return co_await in->pumpTo(*out, rangeSize).ignoreResult(); - } else { - headers.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(meta.size)); - auto out = response.send(200, "OK", headers, meta.size); + class ActorChannelImpl final: public IoChannelFactory::ActorChannel { + public: + ActorChannelImpl(kj::Own actorContainer) + : actorContainer(kj::mv(actorContainer)) {} + ~ActorChannelImpl() noexcept(false) { + actorContainer->updateAccessTime(); + } - auto in = kj::heap(*file); - co_return co_await in->pumpTo(*out, meta.size).ignoreResult(); - } - } - case kj::FsNode::Type::DIRECTORY: { - // Whoooops, we opened a directory. Back up and start over. + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + return newPromisedWorkerInterface( + actorContainer->startRequest(kj::mv(metadata)).attach(actorContainer->addRef())); + } - auto dir = readable->openSubdir(path); + void requireAllowsTransfer() override { + actorContainer->requireTransferrableStub(); + } - kj::HttpHeaders headers(headerTable); - headers.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::JSON.toString()); - headers.set(hLastModified, httpTime(meta.lastModified)); + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + return actorContainer->getChannelToken(usage); + } - // We intentionally don't provide the expected size here in order to reserve the right - // to switch to streaming directory listing in the future. - auto out = response.send(200, "OK", headers); + private: + kj::Own actorContainer; + }; - if (method == kj::HttpMethod::HEAD) { - co_return; - } else { - auto entries = dir->listEntries(); - kj::Vector jsonEntries(entries.size()); - for (auto& entry: entries) { - if (!allowDotfiles && entry.name.startsWith(".")) { - continue; - } + // Implements actor loopback, which is used by websocket hibernation to deliver events to the + // actor from the websocket's read loop. + class Loopback: public Worker::Actor::Loopback, public kj::Refcounted { + public: + Loopback(ActorContainer& actorContainer): actorContainer(actorContainer) {} - kj::StringPtr type = "other"; - switch (entry.type) { - case kj::FsNode::Type::FILE: - type = "file"; - break; - case kj::FsNode::Type::DIRECTORY: - type = "directory"; - break; - case kj::FsNode::Type::SYMLINK: - type = "symlink"; - break; - case kj::FsNode::Type::BLOCK_DEVICE: - type = "blockDevice"; - break; - case kj::FsNode::Type::CHARACTER_DEVICE: - type = "characterDevice"; - break; - case kj::FsNode::Type::NAMED_PIPE: - type = "namedPipe"; - break; - case kj::FsNode::Type::SOCKET: - type = "socket"; - break; - case kj::FsNode::Type::OTHER: - type = "other"; - break; - } + kj::Own getWorker(IoChannelFactory::SubrequestMetadata metadata) override { + return newPromisedWorkerInterface( + actorContainer.startRequest(kj::mv(metadata)).attach(actorContainer.addRef())); + } - jsonEntries.add( - kj::str("{\"name\":", escapeJsonString(entry.name), ",\"type\":\"", type, "\"}")); - }; + kj::Own addRef() override { + return kj::addRef(*this); + } - auto content = kj::str('[', kj::strArray(jsonEntries, ","), ']'); + private: + ActorContainer& actorContainer; + }; - co_return co_await out->write(content.asBytes()); - } - } - default: - co_return co_await response.sendError(406, "Not Acceptable", headerTable); + class ActorSqliteHooks final: public ActorSqlite::Hooks { + public: + ActorSqliteHooks(AlarmScheduler& alarmScheduler, ActorKey actor) + : alarmScheduler(alarmScheduler), + actor(actor) {} + + // We ignore the priorTask in workerd because everything should run synchronously. + kj::Promise scheduleRun( + kj::Maybe newAlarmTime, kj::Promise priorTask) override { + KJ_IF_SOME(scheduledTime, newAlarmTime) { + alarmScheduler.setAlarm(actor, scheduledTime); + } else { + alarmScheduler.deleteAlarm(actor); } - } else if (method == kj::HttpMethod::PUT) { - auto& w = KJ_UNWRAP_OR(writable, - { co_return co_await response.sendError(405, "Method Not Allowed", headerTable); }); + return kj::READY_NOW; + } - if (blockedPath || path.size() == 0) { - co_return co_await response.sendError(403, "Unauthorized", headerTable); - } + private: + AlarmScheduler& alarmScheduler; + ActorKey actor; + }; +}; - auto replacer = w.replaceFile( - path, kj::WriteMode::CREATE | kj::WriteMode::MODIFY | kj::WriteMode::CREATE_PARENT); - auto stream = kj::heap(replacer->get()); +// ======================================================================================= - co_await requestBody.pumpTo(*stream); +kj::Own Server::makeTlsContext(config::TlsOptions::Reader conf) { + kj::TlsContext::Options options; - replacer->commit(); - kj::HttpHeaders headers(headerTable); - response.send(204, "No Content", headers); - co_return; - } else if (method == kj::HttpMethod::DELETE) { - auto& w = KJ_UNWRAP_OR(writable, - { co_return co_await response.sendError(405, "Method Not Allowed", headerTable); }); + struct Attachments { + kj::Maybe keypair; + kj::Array trustedCerts; + }; + auto attachments = kj::heap(); - if (blockedPath || path.size() == 0) { - co_return co_await response.sendError(403, "Unauthorized", headerTable); - } + if (conf.hasKeypair()) { + auto pairConf = conf.getKeypair(); + options.defaultKeypair = attachments->keypair.emplace( + kj::TlsKeypair{.privateKey = kj::TlsPrivateKey(pairConf.getPrivateKey()), + .certificate = kj::TlsCertificate(pairConf.getCertificateChain())}); + } - auto found = w.tryRemove(path); + options.verifyClients = conf.getRequireClientCerts(); + options.useSystemTrustStore = conf.getTrustBrowserCas(); - kj::HttpHeaders headers(headerTable); - if (found) { - response.send(204, "No Content", headers); - co_return; - } else { - co_return co_await response.sendError(404, "Not Found", headers); - } - } else { - co_return co_await response.sendError(501, "Not Implemented", headerTable); - } + auto trustList = conf.getTrustedCertificates(); + if (trustList.size() > 0) { + attachments->trustedCerts = KJ_MAP(cert, trustList) { return kj::TlsCertificate(cert); }; + options.trustedCertificates = attachments->trustedCerts; } - kj::Promise connect(kj::StringPtr host, - const kj::HttpHeaders& headers, - kj::AsyncIoStream& connection, - kj::HttpService::ConnectResponse& response, - kj::HttpConnectSettings settings) override { - throwUnsupported(); + switch (conf.getMinVersion()) { + case config::TlsOptions::Version::GOOD_DEFAULT: + // Don't change. + goto validVersion; + case config::TlsOptions::Version::SSL3: + options.minVersion = kj::TlsVersion::SSL_3; + goto validVersion; + case config::TlsOptions::Version::TLS1_DOT0: + options.minVersion = kj::TlsVersion::TLS_1_0; + goto validVersion; + case config::TlsOptions::Version::TLS1_DOT1: + options.minVersion = kj::TlsVersion::TLS_1_1; + goto validVersion; + case config::TlsOptions::Version::TLS1_DOT2: + options.minVersion = kj::TlsVersion::TLS_1_2; + goto validVersion; + case config::TlsOptions::Version::TLS1_DOT3: + options.minVersion = kj::TlsVersion::TLS_1_3; + goto validVersion; } - kj::Promise prewarm(kj::StringPtr url) override { - return kj::READY_NOW; + reportConfigError(kj::str("Encountered unknown TlsOptions::minVersion setting. Was the " + "config compiled with a newer version of the schema?")); + +validVersion: + if (conf.hasCipherList()) { + options.cipherList = conf.getCipherList(); } - kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { - throwUnsupported(); + + return kj::heap(kj::mv(options)); +} + +kj::Promise> Server::makeTlsNetworkAddress( + config::TlsOptions::Reader conf, + kj::StringPtr addrStr, + kj::Maybe certificateHost, + uint defaultPort) { + auto context = makeTlsContext(conf); + + KJ_IF_SOME(h, certificateHost) { + auto parsed = co_await network.parseAddress(addrStr, defaultPort); + co_return context->wrapAddress(kj::mv(parsed), h).attach(kj::mv(context)); } - kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { - throwUnsupported(); + + // Wrap the `Network` itself so we can use the TLS implementation's `parseAddress()` to extract + // the authority from the address. + auto tlsNetwork = context->wrapNetwork(network); + auto parsed = co_await network.parseAddress(addrStr, defaultPort); + co_return parsed.attach(kj::mv(context)); +} + +// ======================================================================================= + +// Helper to apply config::HttpOptions. +class Server::HttpRewriter { + // TODO(beta): Do we want to automatically add `Date`, `Server` (to outgoing responses), + // `User-Agent` (to outgoing requests), etc.? + + public: + HttpRewriter( + config::HttpOptions::Reader httpOptions, kj::HttpHeaderTable::Builder& headerTableBuilder) + : style(httpOptions.getStyle()), + requestInjector(httpOptions.getInjectRequestHeaders(), headerTableBuilder), + responseInjector(httpOptions.getInjectResponseHeaders(), headerTableBuilder) { + if (httpOptions.hasForwardedProtoHeader()) { + forwardedProtoHeader = headerTableBuilder.add(httpOptions.getForwardedProtoHeader()); + } + if (httpOptions.hasCfBlobHeader()) { + cfBlobHeader = headerTableBuilder.add(httpOptions.getCfBlobHeader()); + } + if (httpOptions.hasCapnpConnectHost()) { + capnpConnectHost = httpOptions.getCapnpConnectHost(); + } } - kj::Promise customEvent(kj::Own event) override { - return event->notSupported(); + + bool hasCfBlobHeader() { + return cfBlobHeader != kj::none; } - [[noreturn]] void throwUnsupported() { - JSG_FAIL_REQUIRE(Error, "Disk directory services don't support this event type."); + bool needsRewriteRequest() { + return style == config::HttpOptions::Style::HOST || hasCfBlobHeader() || + !requestInjector.empty(); } -}; -kj::Own Server::makeDiskDirectoryService(kj::StringPtr name, - config::DiskDirectory::Reader conf, - kj::HttpHeaderTable::Builder& headerTableBuilder) { - TRACE_EVENT("workerd", "Server::makeDiskDirectoryService()"); - kj::StringPtr pathStr = nullptr; - kj::String ownPathStr; + // Attach this to the promise returned by request(). + struct Rewritten { + kj::Own headers; + kj::String ownUrl; + }; - KJ_IF_SOME(override, directoryOverrides.findEntry(name)) { - pathStr = ownPathStr = kj::mv(override.value); - directoryOverrides.erase(override); - } else if (conf.hasPath()) { - pathStr = conf.getPath(); - } else { - reportConfigError(kj::str("Directory \"", name, - "\" has no path in the config, so must be specified on the " - "command line with `--directory-path`.")); - return makeInvalidConfigService(); - } + Rewritten rewriteOutgoingRequest( + kj::StringPtr& url, const kj::HttpHeaders& headers, kj::Maybe cfBlobJson) { + Rewritten result{kj::heap(headers.cloneShallow()), nullptr}; - auto path = fs.getCurrentPath().evalNative(pathStr); + if (style == config::HttpOptions::Style::HOST) { + auto parsed = kj::Url::parse(url, kj::Url::HTTP_PROXY_REQUEST, + kj::Url::Options{.percentDecode = false, .allowEmpty = true}); + result.headers->set(kj::HttpHeaderId::HOST, kj::mv(parsed.host)); + KJ_IF_SOME(h, forwardedProtoHeader) { + result.headers->set(h, kj::mv(parsed.scheme)); + } + url = result.ownUrl = parsed.toString(kj::Url::HTTP_REQUEST); + } - if (conf.getWritable()) { - auto openDir = KJ_UNWRAP_OR(fs.getRoot().tryOpenSubdir(kj::mv(path), kj::WriteMode::MODIFY), { - reportConfigError(kj::str("Directory named \"", name, "\" not found: ", pathStr)); - return makeInvalidConfigService(); - }); + KJ_IF_SOME(h, cfBlobHeader) { + KJ_IF_SOME(b, cfBlobJson) { + result.headers->setPtr(h, b); + } else { + result.headers->unset(h); + } + } - return kj::refcounted(conf, kj::mv(openDir), headerTableBuilder); - } else { - auto openDir = KJ_UNWRAP_OR(fs.getRoot().tryOpenSubdir(kj::mv(path)), { - reportConfigError(kj::str("Directory named \"", name, "\" not found: ", pathStr)); - return makeInvalidConfigService(); - }); + requestInjector.apply(*result.headers); - return kj::refcounted(conf, kj::mv(openDir), headerTableBuilder); + return result; } -} -// ======================================================================================= + kj::Maybe rewriteIncomingRequest(kj::StringPtr& url, + kj::StringPtr physicalProtocol, + const kj::HttpHeaders& headers, + kj::Maybe& cfBlobJson) { + Rewritten result{kj::heap(headers.cloneShallow()), nullptr}; -// This class exists to update the InspectorService's table of isolates when a config -// has multiple services. The InspectorService exists on the stack of its own thread and -// initializes state that is bound to the thread, e.g. a http server and an event loop. -// This class provides a small thread-safe interface to the InspectorService so : -// mappings can be added after the InspectorService has started. -// -// The Cloudflare devtools only show the first service in workerd configuration. This service -// is always contains a users code. However, in packaging user code wrangler may add -// additional services that also have code. If using Chrome devtools to inspect a workerd, -// instance all services are visible and can be debugged. -class Server::InspectorServiceIsolateRegistrar final { - public: - InspectorServiceIsolateRegistrar() {} - ~InspectorServiceIsolateRegistrar() noexcept(true); + if (style == config::HttpOptions::Style::HOST) { + auto parsed = kj::Url::parse( + url, kj::Url::HTTP_REQUEST, kj::Url::Options{.percentDecode = false, .allowEmpty = true}); + parsed.host = kj::str(KJ_UNWRAP_OR_RETURN(headers.get(kj::HttpHeaderId::HOST), kj::none)); - void registerIsolate(kj::StringPtr name, Worker::Isolate* isolate); + KJ_IF_SOME(h, forwardedProtoHeader) { + KJ_IF_SOME(s, headers.get(h)) { + parsed.scheme = kj::str(s); + result.headers->unset(h); + } + } - KJ_DISALLOW_COPY_AND_MOVE(InspectorServiceIsolateRegistrar); + if (parsed.scheme == nullptr) parsed.scheme = kj::str(physicalProtocol); - private: - void attach(const Server::InspectorService* anInspectorService) { - *inspectorService.lockExclusive() = anInspectorService; - } + url = result.ownUrl = parsed.toString(kj::Url::HTTP_PROXY_REQUEST); + } - void detach() { - *inspectorService.lockExclusive() = nullptr; - } + KJ_IF_SOME(h, cfBlobHeader) { + KJ_IF_SOME(b, headers.get(h)) { + cfBlobJson = kj::str(b); + result.headers->unset(h); + } + } - kj::MutexGuarded inspectorService; - friend class Server::InspectorService; -}; + requestInjector.apply(*result.headers); -// Implements the interface for the devtools inspector protocol. -// -// The InspectorService is created when workerd serve is called using the -i option -// to define the inspector socket. -class Server::InspectorService final: public kj::HttpService, public kj::HttpServerErrorHandler { - public: - InspectorService(kj::Own isolateThreadExecutor, - kj::Timer& timer, - kj::HttpHeaderTable::Builder& headerTableBuilder, - InspectorServiceIsolateRegistrar& registrar) - : isolateThreadExecutor(kj::mv(isolateThreadExecutor)), - timer(timer), - headerTable(headerTableBuilder.getFutureTable()), - server(timer, headerTable, *this, kj::HttpServerSettings{.errorHandler = *this}), - registrar(registrar) { - registrar.attach(this); + return result; } - ~InspectorService() { - KJ_IF_SOME(r, registrar) { - r.detach(); - } + bool needsRewriteResponse() { + return !responseInjector.empty(); } - void invalidateRegistrar() { - registrar = kj::none; + void rewriteResponse(kj::HttpHeaders& headers) { + responseInjector.apply(headers); } - kj::Promise handleApplicationError( - kj::Exception exception, kj::Maybe response) override { - if (exception.getType() == kj::Exception::Type::DISCONNECTED) { - // Don't send a response, just close connection. - co_return; - } - KJ_LOG(ERROR, kj::str("Uncaught exception: ", exception)); - KJ_IF_SOME(r, response) { - co_return co_await r.sendError(500, "Internal Server Error", headerTable); - } + kj::Maybe getCapnpConnectHost() { + return capnpConnectHost; } - kj::Promise request(kj::HttpMethod method, - kj::StringPtr url, - const kj::HttpHeaders& headers, - kj::AsyncInputStream& requestBody, - kj::HttpService::Response& response) override { - // The inspector protocol starts with the debug client sending ordinary HTTP GET requests - // to /json/version and then to /json or /json/list. These must respond with valid JSON - // documents that list the details of what isolates are available for inspection. Each - // isolate must be listed separately. In the advertisement for each isolate is a URL - // and a unique ID. The client will use the URL and ID to open a WebSocket request to - // actually connect the debug session. - kj::HttpHeaders responseHeaders(headerTable); - if (headers.isWebSocket()) { - KJ_IF_SOME(pos, url.findLast('/')) { - auto id = url.slice(pos + 1); + private: + config::HttpOptions::Style style; + kj::Maybe forwardedProtoHeader; + kj::Maybe cfBlobHeader; + kj::Maybe capnpConnectHost; - KJ_IF_SOME(isolate, isolates.find(id)) { - // If getting the strong ref doesn't work it means that the Worker::Isolate - // has already been cleaned up. We use a weak ref here in order to keep from - // having the Worker::Isolate itself having to know anything at all about the - // IsolateService and the registration process. So instead of having Isolate - // explicitly clean up after itself we lazily evaluate the weak ref and clean - // up when necessary. - KJ_IF_SOME(ref, isolate->tryAddStrongRef()) { - // When using --verbose, we'll output some logging to indicate when the - // inspector client is attached/detached. - KJ_LOG(INFO, kj::str("Inspector client attaching [", id, "]")); - auto webSocket = response.acceptWebSocket(responseHeaders); - kj::Duration timerOffset = 0 * kj::MILLISECONDS; - try { - co_return co_await ref->attachInspector( - isolateThreadExecutor->addRef(), timer, timerOffset, *webSocket); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - if (exception.getType() == kj::Exception::Type::DISCONNECTED) { - // This likely just means that the inspector client was closed. - // Nothing to do here but move along. - KJ_LOG(INFO, "Inspector client detached"_kj); - co_return; - } else { - // If it's any other kind of error, propagate it! - kj::throwFatalException(kj::mv(exception)); - } + class HeaderInjector { + public: + HeaderInjector(capnp::List::Reader headers, + kj::HttpHeaderTable::Builder& headerTableBuilder) + : injectedHeaders(KJ_MAP(header, headers) { + InjectedHeader result; + result.id = headerTableBuilder.add(header.getName()); + if (header.hasValue()) { + result.value = kj::str(header.getValue()); } - } else { - // If we can't get a strong ref to the isolate here, it's been cleaned - // up. The only thing we're going to do is clean up here and act like - // nothing happened. - isolates.erase(id); - } - } - - KJ_LOG(INFO, kj::str("Unknown worker session [", id, "]")); - co_return co_await response.sendError(404, "Unknown worker session", responseHeaders); - } - - // No / in url!? That's weird - co_return co_await response.sendError(400, "Invalid request", responseHeaders); - } + return result; + }) {} - // If the request is not a WebSocket request, it must be a GET to fetch details - // about the implementation. - if (method != kj::HttpMethod::GET) { - co_return co_await response.sendError(501, "Unsupported Operation", responseHeaders); + bool empty() { + return injectedHeaders.size() == 0; } - if (url.endsWith("/json/version")) { - responseHeaders.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::JSON.toString()); - auto content = kj::str("{\"Browser\": \"workerd\", \"Protocol-Version\": \"1.3\" }"); - auto out = response.send(200, "OK", responseHeaders, content.size()); - co_return co_await out->write(content.asBytes()); - } else if (url.endsWith("/json") || url.endsWith("/json/list") || - url.endsWith("/json/list?for_tab")) { - responseHeaders.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::JSON.toString()); - - auto baseWsUrl = KJ_UNWRAP_OR(headers.get(kj::HttpHeaderId::HOST), - { co_return co_await response.sendError(400, "Bad Request", responseHeaders); }); - - kj::Vector entries(isolates.size()); - kj::Vector toRemove; - for (auto& entry: isolates) { - // While we don't actually use the strong ref here we still attempt to acquire it - // in order to determine if the isolate is actually still around. If the isolate - // has been destroyed the weak ref will be cleared. We do it this way to keep from - // having the Worker::Isolate know anything at all about the InspectorService. - // We'll lazily clean up whenever we detect that the ref has been invalidated. - // - // TODO(cleanup): If we ever enable reloading of isolates for live services, we may - // want to refactor this such that the WorkerService holds a handle to the registration - // as opposed to using this lazy cleanup mechanism. For now, however, this is - // sufficient. - KJ_IF_SOME(ref, entry.value->tryAddStrongRef()) { - (void)ref; // squash compiler warning about unused ref - kj::Vector fields(9); - fields.add(kj::str("\"id\":\"", entry.key, "\"")); - fields.add(kj::str("\"title\":\"workerd: worker ", entry.key, "\"")); - fields.add(kj::str("\"type\":\"node\"")); - fields.add(kj::str("\"description\":\"workerd worker\"")); - fields.add(kj::str("\"webSocketDebuggerUrl\":\"ws://", baseWsUrl, "/", entry.key, "\"")); - fields.add(kj::str( - "\"devtoolsFrontendUrl\":\"devtools://devtools/bundled/js_app.html?experiments=true&v8only=true&ws=", - baseWsUrl, "/\"")); - fields.add(kj::str( - "\"devtoolsFrontendUrlCompat\":\"devtools://devtools/bundled/inspector.html?experiments=true&v8only=true&ws=", - baseWsUrl, "/\"")); - fields.add(kj::str("\"faviconUrl\":\"https://workers.cloudflare.com/favicon.ico\"")); - fields.add(kj::str("\"url\":\"https://workers.dev\"")); - entries.add(kj::str('{', kj::strArray(fields, ","), '}')); + void apply(kj::HttpHeaders& headers) { + for (auto& header: injectedHeaders) { + KJ_IF_SOME(v, header.value) { + headers.setPtr(header.id, v); } else { - // If we're not able to get a reference to the isolate here, it's - // been cleaned up and we should remove it from the list. We do this - // after iterating to make sure we don't invalidate the iterator. - toRemove.add(kj::str(entry.key)); + headers.unset(header.id); } } - // Clean up if necessary - for (auto& key: toRemove) { - isolates.erase(key); - } - - auto content = kj::str('[', kj::strArray(entries, ","), ']'); - - auto out = response.send(200, "OK", responseHeaders, content.size()); - co_return co_await out->write(content.asBytes()).attach(kj::mv(content), kj::mv(out)); } - co_return co_await response.sendError(500, "Not yet implemented", responseHeaders); - } + private: + struct InjectedHeader { + kj::HttpHeaderId id; + kj::Maybe value; + }; + kj::Array injectedHeaders; + }; - kj::Promise listen(kj::Own listener) { - // Note that we intentionally do not make inspector connections be part of the usual drain() - // procedure. Inspector connections are always long-lived WebSockets, and we do not want the - // existence of such a connection to hold the server open. We do, however, want the connection - // to stay open until all other requests are drained, for debugging purposes. - // - // Thus: - // * We let connection loop tasks live on `HttpServer`'s own `TaskSet`, rather than our - // server's main `TaskSet` which we wait to become empty on drain. - // * We do not add this `HttpServer` to the server's `httpServers` list, so it will not receive - // drain() requests. (However, our caller does cancel listening on the server port as soon - // as we begin draining, since we may want new connections to go to a new instance of the - // server.) - co_return co_await server.listenHttp(*listener); - } + HeaderInjector requestInjector; + HeaderInjector responseInjector; +}; - void registerIsolate(kj::StringPtr name, Worker::Isolate* isolate) { - isolates.insert(kj::str(name), isolate->getWeakRef()); - } +// ======================================================================================= - private: - kj::Own isolateThreadExecutor; - kj::Timer& timer; - kj::HttpHeaderTable& headerTable; - kj::HashMap> isolates; - kj::HttpServer server; - kj::Maybe registrar; -}; +// Service used when the service's config is invalid. +class Server::InvalidConfigService final: public Service { + public: + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + JSG_FAIL_REQUIRE(Error, "Service cannot handle requests because its config is invalid."); + } -Server::InspectorServiceIsolateRegistrar::~InspectorServiceIsolateRegistrar() noexcept(true) { - auto lockedInspectorService = this->inspectorService.lockExclusive(); - if (lockedInspectorService != nullptr) { - auto is = const_cast(*lockedInspectorService); - is->invalidateRegistrar(); + bool hasHandler(kj::StringPtr handlerName) override { + return false; } -} -void Server::InspectorServiceIsolateRegistrar::registerIsolate( - kj::StringPtr name, Worker::Isolate* isolate) { - auto lockedInspectorService = this->inspectorService.lockExclusive(); - if (lockedInspectorService != nullptr) { - auto is = const_cast(*lockedInspectorService); - is->registerIsolate(name, isolate); + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + // Can't get here because workerd would have failed to start. + KJ_UNREACHABLE; } -} +}; -// ======================================================================================= -namespace { -class RequestObserverWithTracer final: public RequestObserver, public WorkerInterface { +class Server::InvalidConfigActorClass final: public ActorClass { public: - RequestObserverWithTracer(kj::Maybe> tracer, kj::TaskSet& waitUntilTasks) - : tracer(kj::mv(tracer)) {} - - ~RequestObserverWithTracer() noexcept(false) { - KJ_IF_SOME(t, tracer) { - // for a more precise end time, set the end timestamp now, if available - KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { - auto time = ioContext.now(); - t->recordTimestamp(time); - } - t->setOutcome( - outcome, 0 * kj::MILLISECONDS /* cpu time */, 0 * kj::MILLISECONDS /* wall time */); - } + void requireAllowsTransfer() override { + // Can't get here because workerd would have failed to start. + KJ_UNREACHABLE; } - - WorkerInterface& wrapWorkerInterface(WorkerInterface& worker) override { - if (tracer != kj::none) { - inner = worker; - return *this; - } - return worker; + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + // Can't get here because workerd would have failed to start. + KJ_UNREACHABLE; } - void reportFailure( - const kj::Exception& exception, FailureSource source = FailureSource::OTHER) override { - outcome = RequestObserver::outcomeFromException(exception, source); + kj::Own newActor(kj::Maybe tracker, + Worker::Actor::Id actorId, + Worker::Actor::MakeActorCacheFunc makeActorCache, + Worker::Actor::MakeStorageFunc makeStorage, + kj::Own loopback, + kj::Maybe> manager, + kj::Maybe container, + kj::Maybe facetManager) override { + JSG_FAIL_REQUIRE( + Error, "Cannot instantiate Durable Object class because its config is invalid."); } - void setOutcome(EventOutcome newOutcome) override { - outcome = newOutcome; + kj::Own startRequest( + IoChannelFactory::SubrequestMetadata metadata, kj::Own actor) override { + // Can't get here because creating the actor would have required calling the other method. + KJ_UNREACHABLE; } +}; - // WorkerInterface - kj::Promise request(kj::HttpMethod method, - kj::StringPtr url, - const kj::HttpHeaders& headers, - kj::AsyncInputStream& requestBody, - kj::HttpService::Response& response) override { - try { - SimpleResponseObserver responseWrapper(&fetchStatus, response); - co_await KJ_ASSERT_NONNULL(inner).request(method, url, headers, requestBody, responseWrapper); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - // Overloaded-type exceptions generally represent some resource exhaustion (i.e. not - // necessarily an internal error) and correspond to HTTP error 503. - if (exception.getType() == kj::Exception::Type::OVERLOADED) { - fetchStatus = 503; - } else { - fetchStatus = 500; - } - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); - } - } - - kj::Promise connect(kj::StringPtr host, - const kj::HttpHeaders& headers, - kj::AsyncIoStream& connection, - ConnectResponse& response, - kj::HttpConnectSettings settings) override { - try { - co_return co_await KJ_ASSERT_NONNULL(inner).connect( - host, headers, connection, response, settings); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); - } - } +// Return a fake Own pointing to the singleton. +kj::Own Server::makeInvalidConfigService() { + return {invalidConfigServiceSingleton.get(), kj::NullDisposer::instance}; +} - kj::Promise prewarm(kj::StringPtr url) override { - try { - co_return co_await KJ_ASSERT_NONNULL(inner).prewarm(url); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); - } - } +// A NetworkAddress whose connect() method waits for a Promise and then forwards +// to it. Used by ExternalHttpService so that we don't have to wait for DNS lookup before the +// server can start. +class PromisedNetworkAddress final: public kj::NetworkAddress { + // TODO(cleanup): kj::Network should be extended with a new version of parseAddress() which does + // not do DNS lookup immediately, and therefore can return a NetworkAddress synchronously. + // In fact, this version should be designed to redo the DNS lookup periodically to see if it + // changed, which would be nice for workerd when the remote address may change over time. + public: + PromisedNetworkAddress(kj::Promise> promise) + : promise(promise.then([this](kj::Own result) { addr = kj::mv(result); }) + .fork()) {} - kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { - try { - co_return co_await KJ_ASSERT_NONNULL(inner).runScheduled(scheduledTime, cron); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); + kj::Promise> connect() override { + KJ_IF_SOME(a, addr) { + co_return co_await a.get()->connect(); + } else { + co_await promise; + co_return co_await KJ_ASSERT_NONNULL(addr)->connect(); } } - kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { - try { - co_return co_await KJ_ASSERT_NONNULL(inner).runAlarm(scheduledTime, retryCount); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); + kj::Promise connectAuthenticated() override { + KJ_IF_SOME(a, addr) { + co_return co_await a.get()->connectAuthenticated(); + } else { + co_await promise; + co_return co_await KJ_ASSERT_NONNULL(addr)->connectAuthenticated(); } } - kj::Promise test() override { - try { - co_return co_await KJ_ASSERT_NONNULL(inner).test(); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); - } + // We don't use any other methods, and they seem kinda annoying to implement. + kj::Own listen() override { + KJ_UNIMPLEMENTED("PromisedNetworkAddress::listen() not implemented"); } - - kj::Promise customEvent(kj::Own event) override { - try { - co_return co_await KJ_ASSERT_NONNULL(inner).customEvent(kj::mv(event)); - } catch (...) { - auto exception = kj::getCaughtExceptionAsKj(); - reportFailure(exception); - kj::throwFatalException(kj::mv(exception)); - } + kj::Own clone() override { + KJ_UNIMPLEMENTED("PromisedNetworkAddress::clone() not implemented"); } - - kj::Promise> abandonAlarm(kj::Date scheduledTime) override { - co_return co_await KJ_ASSERT_NONNULL(inner).abandonAlarm(scheduledTime); + kj::String toString() override { + KJ_UNIMPLEMENTED("PromisedNetworkAddress::toString() not implemented"); } private: - kj::Maybe> tracer; - kj::Maybe inner; - EventOutcome outcome = EventOutcome::OK; - kj::uint fetchStatus = 0; + kj::ForkedPromise promise; + kj::Maybe> addr; }; -class SequentialSpanSubmitter final: public SpanSubmitter { +class Server::ExternalTcpService final: public Service, private WorkerInterface { public: - SequentialSpanSubmitter(kj::Own weakTracer, kj::EntropySource& entropySource) - : weakTracer(kj::mv(weakTracer)), - entropySource(entropySource) {} - void submitSpanClose( - tracing::SpanId spanId, kj::Date startTime, kj::Date endTime, Span::TagMap&& tags) override { - weakTracer->runIfAlive([&](BaseTracer& tracer) { - tracing::SpanEndData spanEnd(spanId, endTime, kj::mv(tags)); - if (isPredictableModeForTest()) { - startTime = spanEnd.endTime = kj::UNIX_EPOCH; - } + ExternalTcpService(kj::Own addrParam): addr(kj::mv(addrParam)) {} - tracer.addSpanClose(kj::mv(spanEnd), startTime); - }); + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + return {this, kj::NullDisposer::instance}; } - bool submitSpanOpen(tracing::SpanId spanId, - tracing::SpanId parentSpanId, - kj::ConstString operationName, - kj::Date startTime) override { - bool submitted = false; - weakTracer->runIfAlive([&](BaseTracer& tracer) { - if (isPredictableModeForTest()) { - startTime = kj::UNIX_EPOCH; - } - tracer.addSpanOpen(spanId, parentSpanId, kj::mv(operationName), startTime); - submitted = true; - }); - return submitted; + bool hasHandler(kj::StringPtr handlerName) override { + return handlerName == "fetch"_kj || handlerName == "connect"_kj; } - tracing::SpanId makeSpanId() override { - if (isPredictableModeForTest()) { - return tracing::SpanId(nextSpanId++); - } - return tracing::SpanId::fromEntropy(entropySource); + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "ExternalService can't be passed over RPC."); } - KJ_DISALLOW_COPY_AND_MOVE(SequentialSpanSubmitter); private: - uint64_t nextSpanId = 1; - kj::Own weakTracer; - kj::EntropySource& entropySource; -}; + kj::Own addr; -// IsolateLimitEnforcer that enforces no limits. -class NullIsolateLimitEnforcer final: public IsolateLimitEnforcer { - public: - v8::Isolate::CreateParams getCreateParams() override { - return {}; + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + throwUnsupported(); } - void customizeIsolate(v8::Isolate* isolate) override {} - - ActorCacheSharedLruOptions getActorCacheLruOptions() override { - // TODO(someday): Make this configurable? - return {.softLimit = 16 * (1ull << 20), // 16 MiB - .hardLimit = 128 * (1ull << 20), // 128 MiB - .staleTimeout = 30 * kj::SECONDS, - .dirtyListByteLimit = 8 * (1ull << 20), // 8 MiB - .maxKeysPerRpc = 128, + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + ConnectResponse& tunnel, + kj::HttpConnectSettings settings) override { + TRACE_EVENT("workerd", "ExternalTcpService::connect()", "host", host.cStr()); + auto io_stream = co_await addr->connect(); - // For now, we use `neverFlush` to implement in-memory-only actors. - // See WorkerService::getActor(). - .neverFlush = true}; - } + auto promises = kj::heapArrayBuilder>(2); - kj::Own enterStartupJs( - jsg::Lock& lock, kj::OneOf&) const override { - return {}; - } + promises.add(connection.pumpTo(*io_stream).then([&io_stream = *io_stream](uint64_t size) { + io_stream.shutdownWrite(); + })); - kj::Own enterStartupPython( - jsg::Lock& lock, kj::OneOf&) const override { - return {}; - } + promises.add(io_stream->pumpTo(connection).then([&connection](uint64_t size) { + connection.shutdownWrite(); + })); - kj::Own enterDynamicImportJs( - jsg::Lock& lock, kj::OneOf&) const override { - return {}; - } + tunnel.accept(200, "OK", kj::HttpHeaders(kj::HttpHeaderTable{})); - kj::Own enterLoggingJs( - jsg::Lock& lock, kj::OneOf&) const override { - return {}; + co_await kj::joinPromisesFailFast(promises.finish()).attach(kj::mv(io_stream)); } - kj::Own enterInspectorJs( - jsg::Lock& loc, kj::OneOf&) const override { - return {}; + kj::Promise prewarm(kj::StringPtr url) override { + return kj::READY_NOW; } - - void completedRequest(kj::StringPtr id) const override {} - - bool exitJs(jsg::Lock& lock) const override { - return false; + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + throwUnsupported(); } - - void reportMetrics(IsolateObserver& isolateMetrics) const override {} - - kj::Maybe checkPbkdfIterations(jsg::Lock& lock, size_t iterations) const override { - // No limit on the number of iterations in workerd - return kj::none; + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + throwUnsupported(); } - - bool hasExcessivelyExceededHeapLimit() const override { - return false; + kj::Promise customEvent(kj::Own event) override { + return event->notSupported(); } - const TrackedWasmInstanceList& getTrackedWasmInstances() const override { - return trackedWasmInstances; + [[noreturn]] void throwUnsupported() { + JSG_FAIL_REQUIRE(Error, "External TCP servers don't support this event type."); } - - private: - TrackedWasmInstanceList trackedWasmInstances; }; -} // namespace - -// Shared ErrorReporter base implemnetation. The logic to collect entrypoint information is the -// same regardless of where the code came from. -struct Server::ErrorReporter: public Worker::ValidationErrorReporter { - // The `HashSet`s are the set of exported handlers, like `fetch`, `test`, etc. - kj::HashMap> namedEntrypoints; - kj::Maybe> defaultEntrypoint; - kj::HashSet actorClasses; - kj::HashSet workflowClasses; +// Service used when the service is configured as external HTTP service. +class Server::ExternalHttpService final: public Service { + public: + ExternalHttpService(kj::Own addrParam, + kj::Own rewriter, + kj::HttpHeaderTable& headerTable, + kj::Timer& timer, + kj::EntropySource& entropySource, + capnp::ByteStreamFactory& byteStreamFactory, + capnp::HttpOverCapnpFactory& httpOverCapnpFactory) + : addr(kj::mv(addrParam)), + webSocketErrorHandler(kj::heap()), + inner(kj::newHttpClient(timer, + headerTable, + *addr, + {.entropySource = entropySource, + .webSocketCompressionMode = kj::HttpClientSettings::MANUAL_COMPRESSION, + .webSocketErrorHandler = *webSocketErrorHandler})), + serviceAdapter(kj::newHttpService(*inner)), + rewriter(kj::mv(rewriter)), + headerTable(headerTable), + byteStreamFactory(byteStreamFactory), + httpOverCapnpFactory(httpOverCapnpFactory) {} - void addEntrypoint(kj::Maybe exportName, kj::Array methods) override { - kj::HashSet set; - for (auto& method: methods) { - set.insert(kj::mv(method)); - } - KJ_IF_SOME(e, exportName) { - namedEntrypoints.insert(kj::str(e), kj::mv(set)); - } else { - defaultEntrypoint = kj::mv(set); - } + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + return kj::heap(*this, kj::mv(metadata)); } - void addActorClass(kj::StringPtr exportName) override { - actorClasses.insert(kj::str(exportName)); + bool hasHandler(kj::StringPtr handlerName) override { + return handlerName == "fetch"_kj || handlerName == "connect"_kj; } - void addWorkflowClass(kj::StringPtr exportName, kj::Array methods) override { - // At runtime, we need to add it into the normal namedEntrypoints for Workflows to appear - // in `WorkerService`. This is a different method compared to `addEntrypoint` because we need to - // check for `WorkflowEntrypoint` inheritance at validation time. - kj::HashSet set; - for (auto& method: methods) { - set.insert(kj::mv(method)); - } - namedEntrypoints.insert(kj::str(exportName), kj::mv(set)); - workflowClasses.insert(kj::str(exportName)); + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "ExternalService can't be passed over RPC."); } -}; - -// Implementation of ErrorReporter specifically for reporting errors in the top-level workerd -// config. -struct Server::ConfigErrorReporter final: public ErrorReporter { - ConfigErrorReporter(Server& server, kj::StringPtr name): server(server), name(name) {} - - Server& server; - kj::StringPtr name; - void addError(kj::String error) override { - server.handleReportConfigError(kj::str("service ", name, ": ", error)); - } -}; + private: + kj::Own addr; -// Implementation of ErrorReporter for dynamically-loaded Workers. We'll collect the errors and -// report them in an exception at the end. -struct Server::DynamicErrorReporter final: public ErrorReporter { - kj::Vector errors; + kj::Own webSocketErrorHandler; + kj::Own inner; + kj::Own serviceAdapter; - void addError(kj::String error) override { - errors.add(kj::mv(error)); - } + kj::Own rewriter; - void throwIfErrors() { - if (!errors.empty()) { - JSG_FAIL_REQUIRE(Error, "Failed to start Worker:\n", kj::strArray(errors, "\n")); - } - } -}; + kj::HttpHeaderTable& headerTable; + capnp::ByteStreamFactory& byteStreamFactory; + capnp::HttpOverCapnpFactory& httpOverCapnpFactory; -class Server::WorkerService final: public Service, - private kj::TaskSet::ErrorHandler, - private IoChannelFactory, - private TimerChannel, - private LimitEnforcer { - public: - class ActorNamespace; + struct CapnpClient { + kj::Own connection; + capnp::TwoPartyClient rpcSystem; - // I/O channels, delivered when link() is called. - struct LinkedIoChannels { - kj::Array> subrequest; - kj::Array> actor; // null = configuration error - kj::Array> actorClass; - kj::Maybe> cache; - kj::Maybe actorStorage; - kj::Array> tails; - kj::Array> streamingTails; - kj::Array> workerLoaders; - kj::Maybe workerdDebugPortNetwork; + CapnpClient(kj::Own connectionParam) + : connection(kj::mv(connectionParam)), + rpcSystem(*connection) {} }; - using LinkCallback = - kj::Function; - using AbortActorsCallback = kj::Function reason)>; - using DeleteActorsCallback = kj::Function reason)>; - WorkerService(ChannelTokenHandler& channelTokenHandler, - kj::Maybe serviceName, - ThreadContext& threadContext, - const kj::MonotonicClock& monotonicClock, - kj::Own worker, - kj::Maybe> defaultEntrypointHandlers, - kj::HashMap> namedEntrypoints, - kj::HashSet actorClassEntrypointsParam, - LinkCallback linkCallback, - AbortActorsCallback abortActorsCallback, - DeleteActorsCallback deleteActorsCallback, - kj::Maybe dockerPathParam, - kj::Maybe containerEgressInterceptorImageParam, - bool isDynamic, - kj::Maybe> abortIsolateCallback = kj::none) - : channelTokenHandler(channelTokenHandler), - serviceName(serviceName), - threadContext(threadContext), - monotonicClock(monotonicClock), - ioChannels(kj::mv(linkCallback)), - worker(kj::mv(worker)), - defaultEntrypointHandlers(kj::mv(defaultEntrypointHandlers)), - namedEntrypoints(kj::mv(namedEntrypoints)), - actorClassEntrypoints(kj::mv(actorClassEntrypointsParam)), - waitUntilTasks(*this), - abortActorsCallback(kj::mv(abortActorsCallback)), - deleteActorsCallback(kj::mv(deleteActorsCallback)), - dockerPath(kj::mv(dockerPathParam)), - containerEgressInterceptorImage(kj::mv(containerEgressInterceptorImageParam)), - isDynamic(isDynamic), - abortIsolateCallback(kj::mv(abortIsolateCallback)) {} + // capnpClient is created on-demand when RPC is needed. + kj::Maybe capnpClient; - // Call immediately after the constructor to set up `actorNamespaces`. This can't happen during - // the constructor itself since it sets up cyclic references, which will throw an exception if - // done during the constructor. - void initActorNamespaces( - const kj::HashMap& actorClasses, kj::Network& network) { - actorNamespaces.reserve(actorClasses.size()); - for (auto& entry: actorClasses) { - if (!actorClassEntrypoints.contains(entry.key)) { - KJ_LOG(WARNING, - kj::str("A DurableObjectNamespace in the config referenced the class \"", entry.key, - "\", but no such Durable Object class is exported from the worker. Please make " - "sure the class name matches, it is exported, and the class extends " - "'DurableObject'. Attempts to call to this Durable Object class will fail at " - "runtime, but historically this was not a startup-time error. Future versions of " - "workerd may make this a startup-time error.")); - } + // This task nulls out `capnpClient` when the connection is lost. + kj::Promise clearCapnpClientTask = nullptr; - auto actorClass = kj::refcounted(*this, entry.key, Frankenvalue()); - auto ns = kj::heap(kj::mv(actorClass), entry.value, - kj::systemPreciseCalendarClock(), threadContext.getUnsafeTimer(), - threadContext.getByteStreamFactory(), channelTokenHandler, network, dockerPath, - containerEgressInterceptorImage, waitUntilTasks); - actorNamespaces.insert(entry.key, kj::mv(ns)); + // Get an WorkerdBootstrap representing the service on the other end of an HTTP connection. May + // reuse an existing connection, or form a new one over `client`. + rpc::WorkerdBootstrap::Client getOutgoingCapnp(kj::HttpClient& client) { + KJ_IF_SOME(c, capnpClient) { + return c.rpcSystem.bootstrap().castAs(); } - } - void requireAllowsTransfer() override { - if (isDynamic) throwDynamicEntrypointTransferError(); - } + // No existing client, need to create a new one. + kj::StringPtr host = KJ_UNWRAP_OR(rewriter->getCapnpConnectHost(), + { return JSG_KJ_EXCEPTION(FAILED, Error, "This ExternalServer not configured for RPC."); }); - kj::OneOf, kj::Promise>> getTokenMaybeSync( - IoChannelFactory::ChannelTokenUsage usage) override { - requireAllowsTransfer(); + auto req = client.connect(host, kj::HttpHeaders(headerTable), {}); + auto& c = capnpClient.emplace(kj::mv(req.connection)); - // encodeSubrequestChannelToken wants a reference to the props. It needs this reference to - // be non-const because it might refcount things. But if it's an empty object then there's - // nothing to refcount. So we can just declare this statically... - static Frankenvalue EMPTY_PROPS; + // Arrange that when the connection is lost, we'll null out `capnpClient`. This ensures that + // on the next event, we'll attempt to reconnect. + // + // TODO(perf): Time out idle connections? + clearCapnpClientTask = + c.rpcSystem.onDisconnect().attach(kj::defer([this]() { + capnpClient = kj::none; + })).eagerlyEvaluate(nullptr); - // If requireAllowsTransfer() passed, then we are not dynamic so should have a service name. - return channelTokenHandler.encodeSubrequestChannelToken( - usage, KJ_ASSERT_NONNULL(serviceName), kj::none, EMPTY_PROPS); + return c.rpcSystem.bootstrap().castAs(); } - kj::Maybe> getEntrypoint(kj::Maybe name, Frankenvalue props) { - const kj::HashSet* handlers; - KJ_IF_SOME(n, name) { - KJ_IF_SOME(entry, namedEntrypoints.findEntry(n)) { - name = entry.key; // replace with more-permanent string - handlers = &entry.value; - } else KJ_IF_SOME(className, actorClassEntrypoints.find(n)) { - // TODO(soon): Restore this warning once miniflare no longer generates config that causes - // it to log spuriously. - // - // KJ_LOG(WARNING, - // kj::str("A ServiceDesignator in the config referenced the entrypoint \"", n, - // "\", but this class does not extend 'WorkerEntrypoint'. Attempts to call this " - // "entrypoint will fail at runtime, but historically this was not a startup-time " - // "error. Future versions of workerd may make this a startup-time error.")); + class WorkerInterfaceImpl final: public WorkerInterface, private kj::HttpService::Response { + public: + WorkerInterfaceImpl(ExternalHttpService& parent, IoChannelFactory::SubrequestMetadata metadata) + : parent(kj::addRef(parent)), + metadata(kj::mv(metadata)) {} - static const kj::HashSet EMPTY_HANDLERS; - name = className; // replace with more-permanent string - handlers = &EMPTY_HANDLERS; - } else { - return kj::none; - } - } else { - KJ_IF_SOME(d, defaultEntrypointHandlers) { - handlers = &d; + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + TRACE_EVENT("workerd", "ExternalHttpServer::request()"); + KJ_REQUIRE(wrappedResponse == kj::none, "object should only receive one request"); + wrappedResponse = response; + if (parent->rewriter->needsRewriteRequest()) { + auto rewrite = parent->rewriter->rewriteOutgoingRequest(url, headers, metadata.cfBlobJson); + return parent->serviceAdapter->request(method, url, *rewrite.headers, requestBody, *this) + .attach(kj::mv(rewrite)); } else { - // It would appear that there is no default export, therefore this refers to an entrypoint - // that doesn't exist! However, this was historically allowed. For backwards-compatibility, - // we preserve this behavior, by returning a reference to the WorkerService itself, whose - // startRequest() will fail. - // - // What will happen if you invoke this entrypoint? Not what you think. Check out the - // test case in server-test.c++ entitled "referencing non-extant default entrypoint is not - // an error" for the sordid details. - return kj::addRef(*this); + return parent->serviceAdapter->request(method, url, headers, requestBody, *this); } } - return kj::refcounted(*this, name, kj::mv(props), *handlers); - } - // Like getEntrypoint() but used specifically to get the entrypoint for use in ctx.exports, - // where it can be used raw (props are empty), or can be specialized with props. - kj::Own getLoopbackEntrypoint(kj::Maybe name) { - const kj::HashSet* handlers; - KJ_IF_SOME(n, name) { - KJ_IF_SOME(entry, namedEntrypoints.findEntry(n)) { - name = entry.key; // replace with more-permanent string - handlers = &entry.value; + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + ConnectResponse& tunnel, + kj::HttpConnectSettings settings) override { + TRACE_EVENT("workerd", "ExternalHttpServer::connect()"); + return parent->serviceAdapter->connect(host, headers, connection, tunnel, kj::mv(settings)); + } + + kj::Promise prewarm(kj::StringPtr url) override { + return kj::READY_NOW; + } + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + throwUnsupported(); + } + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + throwUnsupported(); + } + + kj::Promise customEvent(kj::Own event) override { + // We'll use capnp RPC for custom events. + auto bootstrap = parent->getOutgoingCapnp(*parent->inner); + auto dispatcher = + bootstrap.startEventRequest(capnp::MessageSize{4, 0}).send().getDispatcher(); + return event + ->sendRpc(parent->httpOverCapnpFactory, parent->byteStreamFactory, kj::mv(dispatcher)) + .attach(kj::mv(event)); + } + + private: + kj::Own parent; + IoChannelFactory::SubrequestMetadata metadata; + kj::Maybe wrappedResponse; + + [[noreturn]] void throwUnsupported() { + JSG_FAIL_REQUIRE(Error, "External HTTP servers don't support this event type."); + } + + kj::Own send(uint statusCode, + kj::StringPtr statusText, + const kj::HttpHeaders& headers, + kj::Maybe expectedBodySize) override { + TRACE_EVENT("workerd", "ExternalHttpService::send()", "status", statusCode); + auto& response = KJ_ASSERT_NONNULL(wrappedResponse); + if (parent->rewriter->needsRewriteResponse()) { + auto rewrite = headers.cloneShallow(); + parent->rewriter->rewriteResponse(rewrite); + return response.send(statusCode, statusText, rewrite, expectedBodySize); } else { - KJ_FAIL_REQUIRE("getLoopbackEntrypoint() called for entrypoint that doesn't exist"); + return response.send(statusCode, statusText, headers, expectedBodySize); } - } else { - KJ_IF_SOME(d, defaultEntrypointHandlers) { - handlers = &d; + } + + kj::Own acceptWebSocket(const kj::HttpHeaders& headers) override { + TRACE_EVENT("workerd", "ExternalHttpService::acceptWebSocket()"); + auto& response = KJ_ASSERT_NONNULL(wrappedResponse); + if (parent->rewriter->needsRewriteResponse()) { + auto rewrite = headers.cloneShallow(); + parent->rewriter->rewriteResponse(rewrite); + return response.acceptWebSocket(rewrite); } else { - KJ_FAIL_REQUIRE("getLoopbackEntrypoint() called for entrypoint that doesn't exist"); + return response.acceptWebSocket(headers); } } - return kj::refcounted(*this, name, kj::none, *handlers); + }; +}; + +kj::Own Server::makeExternalService(kj::StringPtr name, + config::ExternalServer::Reader conf, + kj::HttpHeaderTable::Builder& headerTableBuilder) { + TRACE_EVENT("workerd", "Server::makeExternalService()", "name", name.cStr()); + kj::StringPtr addrStr = nullptr; + kj::String ownAddrStr = nullptr; + + KJ_IF_SOME(override, externalOverrides.findEntry(name)) { + addrStr = ownAddrStr = kj::mv(override.value); + externalOverrides.erase(override); + } else if (conf.hasAddress()) { + addrStr = conf.getAddress(); + } else { + reportConfigError(kj::str("External service \"", name, + "\" has no address in the config, so must be specified " + "on the command line with `--external-addr`.")); + return makeInvalidConfigService(); } - kj::Maybe> getActorClass(kj::Maybe name, Frankenvalue props) { - KJ_IF_SOME(className, actorClassEntrypoints.find(KJ_UNWRAP_OR(name, return kj::none))) { - return kj::refcounted(*this, className, kj::mv(props)); - } else { - return kj::none; + switch (conf.which()) { + case config::ExternalServer::HTTP: { + // We have to construct the rewriter upfront before waiting on any promises, since the + // HeaderTable::Builder is only available synchronously. + auto rewriter = kj::heap(conf.getHttp(), headerTableBuilder); + auto addr = kj::heap(network.parseAddress(addrStr, 80)); + return kj::refcounted(kj::mv(addr), kj::mv(rewriter), + headerTableBuilder.getFutureTable(), timer, entropySource, + globalContext->byteStreamFactory, globalContext->httpOverCapnpFactory); + } + case config::ExternalServer::HTTPS: { + auto httpsConf = conf.getHttps(); + kj::Maybe certificateHost; + if (httpsConf.hasCertificateHost()) { + certificateHost = httpsConf.getCertificateHost(); + } + auto rewriter = kj::heap(httpsConf.getOptions(), headerTableBuilder); + auto addr = kj::heap( + makeTlsNetworkAddress(httpsConf.getTlsOptions(), addrStr, certificateHost, 443)); + return kj::refcounted(kj::mv(addr), kj::mv(rewriter), + headerTableBuilder.getFutureTable(), timer, entropySource, + globalContext->byteStreamFactory, globalContext->httpOverCapnpFactory); + } + case config::ExternalServer::TCP: { + auto tcpConf = conf.getTcp(); + auto addr = kj::heap(network.parseAddress(addrStr, 80)); + if (tcpConf.hasTlsOptions()) { + kj::Maybe certificateHost; + if (tcpConf.hasCertificateHost()) { + certificateHost = tcpConf.getCertificateHost(); + } + addr = kj::heap( + makeTlsNetworkAddress(tcpConf.getTlsOptions(), addrStr, certificateHost, 0)); + } + return kj::refcounted(kj::mv(addr)); } } + reportConfigError(kj::str("External service named \"", name, + "\" has unrecognized protocol. Was the config " + "compiled with a newer version of the schema?")); + return makeInvalidConfigService(); +} - kj::Own getLoopbackActorClass(kj::StringPtr name) { - // Look up a more permanent class name string. (Also validates this is actually an export.) - kj::StringPtr className = KJ_REQUIRE_NONNULL(actorClassEntrypoints.find(name), - "getLoopbackActorClass() called for actor class that doesn't exist"); - - return kj::refcounted(*this, className, kj::none); - } +// Service used when the service is configured as network service. +class Server::NetworkService final: public Service, private WorkerInterface { + public: + NetworkService(kj::HttpHeaderTable& headerTable, + kj::Timer& timer, + kj::EntropySource& entropySource, + kj::Own networkParam, + kj::Maybe> tlsNetworkParam, + kj::Maybe tlsContext) + : network(kj::mv(networkParam)), + tlsNetwork(kj::mv(tlsNetworkParam)), + webSocketErrorHandler(kj::heap()), + inner(kj::newHttpClient(timer, + headerTable, + *network, + tlsNetwork, + {.entropySource = entropySource, + .webSocketCompressionMode = kj::HttpClientSettings::MANUAL_COMPRESSION, + .webSocketErrorHandler = *webSocketErrorHandler, + .tlsContext = tlsContext})), + serviceAdapter(kj::newHttpService(*inner)) {} - bool hasDefaultEntrypoint() { - return defaultEntrypointHandlers != kj::none; + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + return {this, kj::NullDisposer::instance}; } - kj::Array getEntrypointNames() { - return KJ_MAP(e, namedEntrypoints) -> kj::StringPtr { return e.key; }; + bool hasHandler(kj::StringPtr handlerName) override { + return handlerName == "fetch"_kj || handlerName == "connect"_kj; } - kj::Array getActorClassNames() { - return KJ_MAP(name, actorClassEntrypoints) -> kj::StringPtr { return name; }; + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "NetworkService can't be passed over RPC."); } - void link(Worker::ValidationErrorReporter& errorReporter) override { - LinkCallback callback = - kj::mv(KJ_REQUIRE_NONNULL(ioChannels.tryGet(), "already called link()")); - auto linked = callback(*this, errorReporter); - - for (auto& ns: actorNamespaces) { - ns.value->link(linked.actorStorage); - } + private: + kj::Own network; + kj::Maybe> tlsNetwork; + kj::Own webSocketErrorHandler; + kj::Own inner; + kj::Own serviceAdapter; - ioChannels = kj::mv(linked); + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + TRACE_EVENT("workerd", "NetworkService::request()"); + return serviceAdapter->request(method, url, headers, requestBody, response); } - void unlink() override { - // Need to remove all waited until tasks before destroying `ioChannels` - waitUntilTasks.clear(); - - // Need to tear down all actors before tearing down `ioChannels.actorStorage`. - actorNamespaces.clear(); - - // OK, now we can unlink. - ioChannels = {}; + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + ConnectResponse& tunnel, + kj::HttpConnectSettings settings) override { + TRACE_EVENT("workerd", "NetworkService::connect()"); + // This code is hit when the global `connect` function is called in a JS worker script. + // It represents a proxy-less TCP connection, which means we can simply defer the handling of + // the connection to the service adapter (likely NetworkHttpClient). Its behavior will be to + // connect directly to the host over TCP. + return serviceAdapter->connect(host, headers, connection, tunnel, kj::mv(settings)); } - kj::Maybe getActorNamespace(kj::StringPtr name) { - KJ_IF_SOME(a, actorNamespaces.find(name)) { - return *a; - } else { - return kj::none; - } + kj::Promise prewarm(kj::StringPtr url) override { + return kj::READY_NOW; } - - kj::HashMap>& getActorNamespaces() { - return actorNamespaces; + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + throwUnsupported(); } - - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - return startRequest(kj::mv(metadata), kj::none, {}); + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + throwUnsupported(); + } + kj::Promise customEvent(kj::Own event) override { + return event->notSupported(); } - bool hasHandler(kj::StringPtr handlerName) override { - KJ_IF_SOME(h, defaultEntrypointHandlers) { - return h.contains(handlerName); - } else { - return false; - } + [[noreturn]] void throwUnsupported() { + JSG_FAIL_REQUIRE(Error, "External HTTP servers don't support this event type."); } +}; - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata, - kj::Maybe entrypointName, - Frankenvalue props, - kj::Maybe> actor = kj::none, - bool isTracer = false) { - TRACE_EVENT("workerd", "Server::WorkerService::startRequest()"); +kj::Own Server::makeNetworkService(config::Network::Reader conf) { + TRACE_EVENT("workerd", "Server::makeNetworkService()"); + auto restrictedNetwork = network.restrictPeers( KJ_MAP(a, conf.getAllow()) -> kj::StringPtr { + return a; + }, KJ_MAP(a, conf.getDeny()) -> kj::StringPtr { return a; }); - auto& channels = KJ_ASSERT_NONNULL(ioChannels.tryGet()); + kj::Maybe> tlsNetwork; + kj::Maybe tlsContext; + if (conf.hasTlsOptions()) { + auto ownedTlsContext = makeTlsContext(conf.getTlsOptions()); + tlsContext = ownedTlsContext; + tlsNetwork = ownedTlsContext->wrapNetwork(*restrictedNetwork).attach(kj::mv(ownedTlsContext)); + } - kj::Vector> bufferedTailWorkers(channels.tails.size()); - kj::Vector> streamingTailWorkers(channels.streamingTails.size()); - auto addWorkerIfNotRecursiveTracer = [this, isTracer]( - kj::Vector>& workers, - IoChannelFactory::SubrequestChannel& channel) { - // Caution here... if the tail worker ends up having a circular dependency - // on the worker we'll end up with an infinite loop trying to initialize. - // We can test this directly but it's more difficult to test indirect - // loops (dependency of dependency, etc). Here we're just going to keep - // it simple and just check the direct dependency. - // If service refers to an EntrypointService, we need to compare with the underlying - // WorkerService to match this. - auto& service = KJ_UNWRAP_OR(kj::dynamicDowncastIfAvailable(channel), { - // Not a Service, probably not self-referential. - workers.add(channel.startRequest({})); - return; - }); + return kj::refcounted(globalContext->headerTable, timer, entropySource, + kj::mv(restrictedNetwork), kj::mv(tlsNetwork), tlsContext); +} + +// Service used when the service is configured as disk directory service. +class Server::DiskDirectoryService final: public Service, private WorkerInterface { + public: + DiskDirectoryService(config::DiskDirectory::Reader conf, + kj::Own dir, + kj::HttpHeaderTable::Builder& headerTableBuilder) + : writable(*dir), + readable(kj::mv(dir)), + headerTable(headerTableBuilder.getFutureTable()), + hLastModified(headerTableBuilder.add("Last-Modified")), + allowDotfiles(conf.getAllowDotfiles()) {} + DiskDirectoryService(config::DiskDirectory::Reader conf, + kj::Own dir, + kj::HttpHeaderTable::Builder& headerTableBuilder) + : readable(kj::mv(dir)), + headerTable(headerTableBuilder.getFutureTable()), + hLastModified(headerTableBuilder.add("Last-Modified")), + allowDotfiles(conf.getAllowDotfiles()) {} + + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + return {this, kj::NullDisposer::instance}; + } + + kj::Maybe getWritable() { + return writable; + } + + bool hasHandler(kj::StringPtr handlerName) override { + return handlerName == "fetch"_kj; + } + + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "DiskDirectoryService can't be passed over RPC."); + } + + private: + kj::Maybe writable; + kj::Own readable; + kj::HttpHeaderTable& headerTable; + kj::HttpHeaderId hLastModified; + bool allowDotfiles; + + kj::Promise request(kj::HttpMethod method, + kj::StringPtr urlStr, + const kj::HttpHeaders& requestHeaders, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + TRACE_EVENT("workerd", "DiskDirectoryService::request()", "url", urlStr.cStr()); + auto url = kj::Url::parse(urlStr); + + bool blockedPath = false; + kj::Path path = nullptr; + KJ_IF_SOME(exception, + kj::runCatchingExceptions([&]() { path = kj::Path(url.path.releaseAsArray()); })) { + (void)exception; // squash compiler warning about unused var + // If the Path constructor throws, this path is not valid (e.g. it contains ".."). + blockedPath = true; + } + + if (!blockedPath && !allowDotfiles) { + for (auto& part: path) { + if (part.startsWith(".")) { + blockedPath = true; + break; + } + } + } + + if (method == kj::HttpMethod::GET || method == kj::HttpMethod::HEAD) { + if (blockedPath) { + co_return co_await response.sendError(404, "Not Found", headerTable); + } + + auto file = KJ_UNWRAP_OR(readable->tryOpenFile(path), + { co_return co_await response.sendError(404, "Not Found", headerTable); }); + + auto meta = file->stat(); + + switch (meta.type) { + case kj::FsNode::Type::FILE: { + // If this is a GET request with a Range header, return partial content if a single + // satisfiable range is specified. + // TODO(someday): consider supporting multiple ranges with multipart/byteranges + kj::Maybe range; + if (method == kj::HttpMethod::GET) { + KJ_IF_SOME(header, requestHeaders.get(kj::HttpHeaderId::RANGE)) { + KJ_SWITCH_ONEOF(kj::tryParseHttpRangeHeader(header.asArray(), meta.size)) { + KJ_CASE_ONEOF(ranges, kj::Array) { + KJ_ASSERT(ranges.size() > 0); + if (ranges.size() == 1) range = ranges[0]; + } + KJ_CASE_ONEOF(_, kj::HttpEverythingRange) {} + KJ_CASE_ONEOF(_, kj::HttpUnsatisfiableRange) { + kj::HttpHeaders headers(headerTable); + headers.set(kj::HttpHeaderId::CONTENT_RANGE, kj::str("bytes */", meta.size)); + co_return co_await response.sendError(416, "Range Not Satisfiable", headers); + } + } + } + } + + kj::HttpHeaders headers(headerTable); + headers.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::OCTET_STREAM.toString()); + headers.set(hLastModified, httpTime(meta.lastModified)); + + // We explicitly set the Content-Length header because if we don't, and we were called + // by a local Worker (without an actual HTTP connection in between), then the Worker + // will not see a Content-Length header, but being able to query the content length + // (especially with HEAD requests) is quite useful. + // TODO(cleanup): Arguably the implementation of `fetch()` should be adjusted so that + // if no `Content-Length` header is returned, but the body size is known via the KJ + // HTTP API, then the header should be filled in automatically. Unclear if this is safe + // to change without a compat flag. + + if (method == kj::HttpMethod::HEAD) { + headers.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(meta.size)); + response.send(200, "OK", headers, meta.size); + co_return; + } else KJ_IF_SOME(r, range) { + KJ_ASSERT(r.start <= r.end); + auto rangeSize = r.end - r.start + 1; + headers.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(rangeSize)); + headers.set(kj::HttpHeaderId::CONTENT_RANGE, + kj::str("bytes ", r.start, "-", r.end, "/", meta.size)); + auto out = response.send(206, "Partial Content", headers, rangeSize); + + auto in = kj::heap(*file, r.start); + co_return co_await in->pumpTo(*out, rangeSize).ignoreResult(); + } else { + headers.set(kj::HttpHeaderId::CONTENT_LENGTH, kj::str(meta.size)); + auto out = response.send(200, "OK", headers, meta.size); + + auto in = kj::heap(*file); + co_return co_await in->pumpTo(*out, meta.size).ignoreResult(); + } + } + case kj::FsNode::Type::DIRECTORY: { + // Whoooops, we opened a directory. Back up and start over. + + auto dir = readable->openSubdir(path); + + kj::HttpHeaders headers(headerTable); + headers.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::JSON.toString()); + headers.set(hLastModified, httpTime(meta.lastModified)); + + // We intentionally don't provide the expected size here in order to reserve the right + // to switch to streaming directory listing in the future. + auto out = response.send(200, "OK", headers); + + if (method == kj::HttpMethod::HEAD) { + co_return; + } else { + auto entries = dir->listEntries(); + kj::Vector jsonEntries(entries.size()); + for (auto& entry: entries) { + if (!allowDotfiles && entry.name.startsWith(".")) { + continue; + } + + kj::StringPtr type = "other"; + switch (entry.type) { + case kj::FsNode::Type::FILE: + type = "file"; + break; + case kj::FsNode::Type::DIRECTORY: + type = "directory"; + break; + case kj::FsNode::Type::SYMLINK: + type = "symlink"; + break; + case kj::FsNode::Type::BLOCK_DEVICE: + type = "blockDevice"; + break; + case kj::FsNode::Type::CHARACTER_DEVICE: + type = "characterDevice"; + break; + case kj::FsNode::Type::NAMED_PIPE: + type = "namedPipe"; + break; + case kj::FsNode::Type::SOCKET: + type = "socket"; + break; + case kj::FsNode::Type::OTHER: + type = "other"; + break; + } + + jsonEntries.add( + kj::str("{\"name\":", escapeJsonString(entry.name), ",\"type\":\"", type, "\"}")); + }; + + auto content = kj::str('[', kj::strArray(jsonEntries, ","), ']'); + + co_return co_await out->write(content.asBytes()); + } + } + default: + co_return co_await response.sendError(406, "Not Acceptable", headerTable); + } + } else if (method == kj::HttpMethod::PUT) { + auto& w = KJ_UNWRAP_OR(writable, + { co_return co_await response.sendError(405, "Method Not Allowed", headerTable); }); + + if (blockedPath || path.size() == 0) { + co_return co_await response.sendError(403, "Unauthorized", headerTable); + } + + auto replacer = w.replaceFile( + path, kj::WriteMode::CREATE | kj::WriteMode::MODIFY | kj::WriteMode::CREATE_PARENT); + auto stream = kj::heap(replacer->get()); + + co_await requestBody.pumpTo(*stream); + + replacer->commit(); + kj::HttpHeaders headers(headerTable); + response.send(204, "No Content", headers); + co_return; + } else if (method == kj::HttpMethod::DELETE) { + auto& w = KJ_UNWRAP_OR(writable, + { co_return co_await response.sendError(405, "Method Not Allowed", headerTable); }); + + if (blockedPath || path.size() == 0) { + co_return co_await response.sendError(403, "Unauthorized", headerTable); + } + + auto found = w.tryRemove(path); + + kj::HttpHeaders headers(headerTable); + if (found) { + response.send(204, "No Content", headers); + co_return; + } else { + co_return co_await response.sendError(404, "Not Found", headers); + } + } else { + co_return co_await response.sendError(501, "Not Implemented", headerTable); + } + } + + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + kj::HttpService::ConnectResponse& response, + kj::HttpConnectSettings settings) override { + throwUnsupported(); + } + kj::Promise prewarm(kj::StringPtr url) override { + return kj::READY_NOW; + } + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + throwUnsupported(); + } + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + throwUnsupported(); + } + kj::Promise customEvent(kj::Own event) override { + return event->notSupported(); + } + + [[noreturn]] void throwUnsupported() { + JSG_FAIL_REQUIRE(Error, "Disk directory services don't support this event type."); + } +}; + +kj::Own Server::makeDiskDirectoryService(kj::StringPtr name, + config::DiskDirectory::Reader conf, + kj::HttpHeaderTable::Builder& headerTableBuilder) { + TRACE_EVENT("workerd", "Server::makeDiskDirectoryService()"); + kj::StringPtr pathStr = nullptr; + kj::String ownPathStr; + + KJ_IF_SOME(override, directoryOverrides.findEntry(name)) { + pathStr = ownPathStr = kj::mv(override.value); + directoryOverrides.erase(override); + } else if (conf.hasPath()) { + pathStr = conf.getPath(); + } else { + reportConfigError(kj::str("Directory \"", name, + "\" has no path in the config, so must be specified on the " + "command line with `--directory-path`.")); + return makeInvalidConfigService(); + } + + auto path = fs.getCurrentPath().evalNative(pathStr); + + if (conf.getWritable()) { + auto openDir = KJ_UNWRAP_OR(fs.getRoot().tryOpenSubdir(kj::mv(path), kj::WriteMode::MODIFY), { + reportConfigError(kj::str("Directory named \"", name, "\" not found: ", pathStr)); + return makeInvalidConfigService(); + }); + + return kj::refcounted(conf, kj::mv(openDir), headerTableBuilder); + } else { + auto openDir = KJ_UNWRAP_OR(fs.getRoot().tryOpenSubdir(kj::mv(path)), { + reportConfigError(kj::str("Directory named \"", name, "\" not found: ", pathStr)); + return makeInvalidConfigService(); + }); + + return kj::refcounted(conf, kj::mv(openDir), headerTableBuilder); + } +} + +// ======================================================================================= + +// This class exists to update the InspectorService's table of isolates when a config +// has multiple services. The InspectorService exists on the stack of its own thread and +// initializes state that is bound to the thread, e.g. a http server and an event loop. +// This class provides a small thread-safe interface to the InspectorService so : +// mappings can be added after the InspectorService has started. +// +// The Cloudflare devtools only show the first service in workerd configuration. This service +// is always contains a users code. However, in packaging user code wrangler may add +// additional services that also have code. If using Chrome devtools to inspect a workerd, +// instance all services are visible and can be debugged. +class Server::InspectorServiceIsolateRegistrar final { + public: + InspectorServiceIsolateRegistrar() {} + ~InspectorServiceIsolateRegistrar() noexcept(true); + + void registerIsolate(kj::StringPtr name, Worker::Isolate* isolate); + + KJ_DISALLOW_COPY_AND_MOVE(InspectorServiceIsolateRegistrar); + + private: + void attach(const Server::InspectorService* anInspectorService) { + *inspectorService.lockExclusive() = anInspectorService; + } + + void detach() { + *inspectorService.lockExclusive() = nullptr; + } + + kj::MutexGuarded inspectorService; + friend class Server::InspectorService; +}; + +// Implements the interface for the devtools inspector protocol. +// +// The InspectorService is created when workerd serve is called using the -i option +// to define the inspector socket. +class Server::InspectorService final: public kj::HttpService, public kj::HttpServerErrorHandler { + public: + InspectorService(kj::Own isolateThreadExecutor, + kj::Timer& timer, + kj::HttpHeaderTable::Builder& headerTableBuilder, + InspectorServiceIsolateRegistrar& registrar) + : isolateThreadExecutor(kj::mv(isolateThreadExecutor)), + timer(timer), + headerTable(headerTableBuilder.getFutureTable()), + server(timer, headerTable, *this, kj::HttpServerSettings{.errorHandler = *this}), + registrar(registrar) { + registrar.attach(this); + } + + ~InspectorService() { + KJ_IF_SOME(r, registrar) { + r.detach(); + } + } + + void invalidateRegistrar() { + registrar = kj::none; + } + + kj::Promise handleApplicationError( + kj::Exception exception, kj::Maybe response) override { + if (exception.getType() == kj::Exception::Type::DISCONNECTED) { + // Don't send a response, just close connection. + co_return; + } + KJ_LOG(ERROR, kj::str("Uncaught exception: ", exception)); + KJ_IF_SOME(r, response) { + co_return co_await r.sendError(500, "Internal Server Error", headerTable); + } + } - if (service.service() == this) { - if (!isTracer) { - // This is a self-reference. Create a request with isTracer=true. - KJ_IF_SOME(s, kj::dynamicDowncastIfAvailable(service)) { - workers.add(s.startRequest({}, kj::none, {}, kj::none, true)); - } else KJ_IF_SOME(s, kj::dynamicDowncastIfAvailable(service)) { - workers.add(s.startRequest({}, true)); + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + // The inspector protocol starts with the debug client sending ordinary HTTP GET requests + // to /json/version and then to /json or /json/list. These must respond with valid JSON + // documents that list the details of what isolates are available for inspection. Each + // isolate must be listed separately. In the advertisement for each isolate is a URL + // and a unique ID. The client will use the URL and ID to open a WebSocket request to + // actually connect the debug session. + kj::HttpHeaders responseHeaders(headerTable); + if (headers.isWebSocket()) { + KJ_IF_SOME(pos, url.findLast('/')) { + auto id = url.slice(pos + 1); + + KJ_IF_SOME(isolate, isolates.find(id)) { + // If getting the strong ref doesn't work it means that the Worker::Isolate + // has already been cleaned up. We use a weak ref here in order to keep from + // having the Worker::Isolate itself having to know anything at all about the + // IsolateService and the registration process. So instead of having Isolate + // explicitly clean up after itself we lazily evaluate the weak ref and clean + // up when necessary. + KJ_IF_SOME(ref, isolate->tryAddStrongRef()) { + // When using --verbose, we'll output some logging to indicate when the + // inspector client is attached/detached. + KJ_LOG(INFO, kj::str("Inspector client attaching [", id, "]")); + auto webSocket = response.acceptWebSocket(responseHeaders); + kj::Duration timerOffset = 0 * kj::MILLISECONDS; + try { + co_return co_await ref->attachInspector( + isolateThreadExecutor->addRef(), timer, timerOffset, *webSocket); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + if (exception.getType() == kj::Exception::Type::DISCONNECTED) { + // This likely just means that the inspector client was closed. + // Nothing to do here but move along. + KJ_LOG(INFO, "Inspector client detached"_kj); + co_return; + } else { + // If it's any other kind of error, propagate it! + kj::throwFatalException(kj::mv(exception)); + } + } } else { - KJ_FAIL_ASSERT("Unexpected service type in recursive tail worker declaration"); + // If we can't get a strong ref to the isolate here, it's been cleaned + // up. The only thing we're going to do is clean up here and act like + // nothing happened. + isolates.erase(id); } - } else { - // Intentionally left empty to prevent infinite recursion with tail workers tailing - // themselves } - } else { - workers.add(service.startRequest({})); - } - }; - // Do not add tracers for worker interfaces with the "test" entrypoint – we generally do not - // need to trace the test event, although this is useful to test that span tracing works, so - // we are not implementing a (more complex) mechanism to disable tracing for all test() events - // here. - if (entrypointName.orDefault("") != "test"_kj) { - for (auto& service: channels.tails) { - addWorkerIfNotRecursiveTracer(bufferedTailWorkers, *service); - } - for (auto& service: channels.streamingTails) { - addWorkerIfNotRecursiveTracer(streamingTailWorkers, *service); + KJ_LOG(INFO, kj::str("Unknown worker session [", id, "]")); + co_return co_await response.sendError(404, "Unknown worker session", responseHeaders); } + + // No / in url!? That's weird + co_return co_await response.sendError(400, "Invalid request", responseHeaders); } - kj::Maybe> workerTracer = kj::none; + // If the request is not a WebSocket request, it must be a GET to fetch details + // about the implementation. + if (method != kj::HttpMethod::GET) { + co_return co_await response.sendError(501, "Unsupported Operation", responseHeaders); + } - if (!bufferedTailWorkers.empty() || !streamingTailWorkers.empty()) { - // Setting up buffered tail workers support, but only if we actually have tail workers - // configured. - auto executionModel = - actor == kj::none ? ExecutionModel::STATELESS : ExecutionModel::DURABLE_OBJECT; - auto tailStreamWriter = tracing::initializeTailStreamWriter( - streamingTailWorkers.releaseAsArray(), waitUntilTasks); - auto trace = kj::refcounted(kj::none /* stableId */, kj::none /* scriptName */, - kj::none /* scriptVersion */, kj::none /* dispatchNamespace */, kj::none /* scriptId */, - nullptr /* scriptTags */, mapCopyString(entrypointName), executionModel, - kj::none /* durableObjectId */); - kj::Own tracer = kj::refcounted( - kj::none, kj::mv(trace), PipelineLogLevel::FULL, kj::none, kj::mv(tailStreamWriter)); + if (url.endsWith("/json/version")) { + responseHeaders.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::JSON.toString()); + auto content = kj::str("{\"Browser\": \"workerd\", \"Protocol-Version\": \"1.3\" }"); + auto out = response.send(200, "OK", responseHeaders, content.size()); + co_return co_await out->write(content.asBytes()); + } else if (url.endsWith("/json") || url.endsWith("/json/list") || + url.endsWith("/json/list?for_tab")) { + responseHeaders.set(kj::HttpHeaderId::CONTENT_TYPE, MimeType::JSON.toString()); - // When the tracer is complete, deliver traces to any buffered tail workers. We end up - // creating two references to the WorkerTracer, one held by the observer and one that will be - // passed to the IoContext. This ensures that the tracer lives long enough to receive all - // events. - if (!bufferedTailWorkers.empty()) { - waitUntilTasks.add(tracer->onComplete().then( - kj::coCapture([tailWorkers = bufferedTailWorkers.releaseAsArray()]( - kj::Own trace) mutable -> kj::Promise { - for (auto& worker: tailWorkers) { - auto event = kj::heap( - workerd::api::TraceCustomEvent::TYPE, kj::arr(kj::addRef(*trace))); - co_await worker->customEvent(kj::mv(event)).ignoreResult(); - } - co_return; - }))); + auto baseWsUrl = KJ_UNWRAP_OR(headers.get(kj::HttpHeaderId::HOST), + { co_return co_await response.sendError(400, "Bad Request", responseHeaders); }); + + kj::Vector entries(isolates.size()); + kj::Vector toRemove; + for (auto& entry: isolates) { + // While we don't actually use the strong ref here we still attempt to acquire it + // in order to determine if the isolate is actually still around. If the isolate + // has been destroyed the weak ref will be cleared. We do it this way to keep from + // having the Worker::Isolate know anything at all about the InspectorService. + // We'll lazily clean up whenever we detect that the ref has been invalidated. + // + // TODO(cleanup): If we ever enable reloading of isolates for live services, we may + // want to refactor this such that the WorkerService holds a handle to the registration + // as opposed to using this lazy cleanup mechanism. For now, however, this is + // sufficient. + KJ_IF_SOME(ref, entry.value->tryAddStrongRef()) { + (void)ref; // squash compiler warning about unused ref + kj::Vector fields(9); + fields.add(kj::str("\"id\":\"", entry.key, "\"")); + fields.add(kj::str("\"title\":\"workerd: worker ", entry.key, "\"")); + fields.add(kj::str("\"type\":\"node\"")); + fields.add(kj::str("\"description\":\"workerd worker\"")); + fields.add(kj::str("\"webSocketDebuggerUrl\":\"ws://", baseWsUrl, "/", entry.key, "\"")); + fields.add(kj::str( + "\"devtoolsFrontendUrl\":\"devtools://devtools/bundled/js_app.html?experiments=true&v8only=true&ws=", + baseWsUrl, "/\"")); + fields.add(kj::str( + "\"devtoolsFrontendUrlCompat\":\"devtools://devtools/bundled/inspector.html?experiments=true&v8only=true&ws=", + baseWsUrl, "/\"")); + fields.add(kj::str("\"faviconUrl\":\"https://workers.cloudflare.com/favicon.ico\"")); + fields.add(kj::str("\"url\":\"https://workers.dev\"")); + entries.add(kj::str('{', kj::strArray(fields, ","), '}')); + } else { + // If we're not able to get a reference to the isolate here, it's + // been cleaned up and we should remove it from the list. We do this + // after iterating to make sure we don't invalidate the iterator. + toRemove.add(kj::str(entry.key)); + } + } + // Clean up if necessary + for (auto& key: toRemove) { + isolates.erase(key); } - workerTracer = kj::mv(tracer); - } - KJ_IF_SOME(w, workerTracer) { - w->setMakeUserRequestSpanFunc( - [&w = *w, &entropySource = threadContext.getEntropySource()]( - tracing::TraceId traceId, kj::Maybe traceFlags) { - return SpanParent(kj::refcounted( - kj::refcounted(w.getWeakRef(), entropySource), kj::mv(traceId), - traceFlags)); - }); + auto content = kj::str('[', kj::strArray(entries, ","), ']'); + + auto out = response.send(200, "OK", responseHeaders, content.size()); + co_return co_await out->write(content.asBytes()).attach(kj::mv(content), kj::mv(out)); } - kj::Own observer = - kj::refcounted(mapAddRef(workerTracer), waitUntilTasks); - kj::Maybe triggerContext; - KJ_IF_SOME(ctx, metadata.userSpanParent.toSpanContext()) { - KJ_IF_SOME(spanId, ctx.getSpanId()) { - triggerContext = tracing::InvocationSpanContext( - ctx.getTraceId(), tracing::TraceId::nullId, spanId, ctx.getTraceFlags()); + co_return co_await response.sendError(500, "Not yet implemented", responseHeaders); + } + + kj::Promise listen(kj::Own listener) { + // Note that we intentionally do not make inspector connections be part of the usual drain() + // procedure. Inspector connections are always long-lived WebSockets, and we do not want the + // existence of such a connection to hold the server open. We do, however, want the connection + // to stay open until all other requests are drained, for debugging purposes. + // + // Thus: + // * We let connection loop tasks live on `HttpServer`'s own `TaskSet`, rather than our + // server's main `TaskSet` which we wait to become empty on drain. + // * We do not add this `HttpServer` to the server's `httpServers` list, so it will not receive + // drain() requests. (However, our caller does cancel listening on the server port as soon + // as we begin draining, since we may want new connections to go to a new instance of the + // server.) + co_return co_await server.listenHttp(*listener); + } + + void registerIsolate(kj::StringPtr name, Worker::Isolate* isolate) { + isolates.insert(kj::str(name), isolate->getWeakRef()); + } + + private: + kj::Own isolateThreadExecutor; + kj::Timer& timer; + kj::HttpHeaderTable& headerTable; + kj::HashMap> isolates; + kj::HttpServer server; + kj::Maybe registrar; +}; + +Server::InspectorServiceIsolateRegistrar::~InspectorServiceIsolateRegistrar() noexcept(true) { + auto lockedInspectorService = this->inspectorService.lockExclusive(); + if (lockedInspectorService != nullptr) { + auto is = const_cast(*lockedInspectorService); + is->invalidateRegistrar(); + } +} + +void Server::InspectorServiceIsolateRegistrar::registerIsolate( + kj::StringPtr name, Worker::Isolate* isolate) { + auto lockedInspectorService = this->inspectorService.lockExclusive(); + if (lockedInspectorService != nullptr) { + auto is = const_cast(*lockedInspectorService); + is->registerIsolate(name, isolate); + } +} + +// ======================================================================================= +namespace { +class RequestObserverWithTracer final: public RequestObserver, public WorkerInterface { + public: + RequestObserverWithTracer(kj::Maybe> tracer, kj::TaskSet& waitUntilTasks) + : tracer(kj::mv(tracer)) {} + + ~RequestObserverWithTracer() noexcept(false) { + KJ_IF_SOME(t, tracer) { + // for a more precise end time, set the end timestamp now, if available + KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { + auto time = ioContext.now(); + t->recordTimestamp(time); } + t->setOutcome( + outcome, 0 * kj::MILLISECONDS /* cpu time */, 0 * kj::MILLISECONDS /* wall time */); } - - return newWorkerEntrypoint(threadContext, kj::atomicAddRef(*worker), entrypointName, - kj::mv(props), kj::mv(actor), kj::Own(this, kj::NullDisposer::instance), - {}, // ioContextDependency - kj::Own(this, kj::NullDisposer::instance), kj::mv(observer), - waitUntilTasks, - true, // tunnelExceptions - kj::mv(workerTracer), // workerTracer - kj::mv(metadata.cfBlobJson), - kj::none, // versionInfo - kj::mv(triggerContext)); } - class ActorNamespace final { - public: - ActorNamespace(kj::Own actorClass, - const ActorConfig& config, - const kj::Clock& clock, - kj::Timer& timer, - capnp::ByteStreamFactory& byteStreamFactory, - ChannelTokenHandler& channelTokenHandler, - kj::Network& dockerNetwork, - kj::Maybe dockerPath, - kj::Maybe containerEgressInterceptorImage, - kj::TaskSet& waitUntilTasks) - : actorClass(kj::mv(actorClass)), - config(config), - clock(clock), - timer(timer), - byteStreamFactory(byteStreamFactory), - channelTokenHandler(channelTokenHandler), - dockerNetwork(dockerNetwork), - dockerPath(dockerPath), - containerEgressInterceptorImage(containerEgressInterceptorImage), - waitUntilTasks(waitUntilTasks) {} - - void link(kj::Maybe serviceActorStorage) { - KJ_IF_SOME(dir, serviceActorStorage) { - KJ_IF_SOME(d, config.tryGet()) { - this->actorStorage.emplace(dir.openSubdir( - kj::Path({d.uniqueKey}), kj::WriteMode::CREATE | kj::WriteMode::MODIFY)); - } - } + WorkerInterface& wrapWorkerInterface(WorkerInterface& worker) override { + if (tracer != kj::none) { + inner = worker; + return *this; + } + return worker; + } + + void reportFailure( + const kj::Exception& exception, FailureSource source = FailureSource::OTHER) override { + outcome = RequestObserver::outcomeFromException(exception, source); + } - KJ_IF_SOME(d, config.tryGet()) { - auto idFactory = kj::heap(d.uniqueKey); - AlarmScheduler::GetActorFn getActor = - [this, idFactory = kj::mv(idFactory)]( - kj::String idStr) mutable -> kj::Own { - Worker::Actor::Id id = idFactory->idFromString(kj::mv(idStr)); - auto actorContainer = this->getActorContainer(kj::mv(id)); - return newPromisedWorkerInterface(actorContainer->startRequest({})); - }; - - KJ_IF_SOME(as, this->actorStorage) { - // Create per-namespace alarm scheduler backed by on-disk storage in the - // namespace directory, alongside the per-actor .sqlite files. - this->ownAlarmScheduler = kj::heap( - clock, timer, as.vfs, kj::Path({"metadata.sqlite"}), kj::mv(getActor)); - } else { - // No on-disk storage -- create an in-memory alarm scheduler. - auto memDir = kj::newInMemoryDirectory(clock); - auto vfs = kj::heap(*memDir); - this->ownAlarmScheduler = kj::heap( - clock, timer, *vfs, kj::Path({"metadata.sqlite"}), kj::mv(getActor)) - .attach(kj::mv(vfs), kj::mv(memDir)); - } + void setOutcome(EventOutcome newOutcome) override { + outcome = newOutcome; + } - this->alarmScheduler = *KJ_ASSERT_NONNULL(ownAlarmScheduler); + // WorkerInterface + kj::Promise request(kj::HttpMethod method, + kj::StringPtr url, + const kj::HttpHeaders& headers, + kj::AsyncInputStream& requestBody, + kj::HttpService::Response& response) override { + try { + SimpleResponseObserver responseWrapper(&fetchStatus, response); + co_await KJ_ASSERT_NONNULL(inner).request(method, url, headers, requestBody, responseWrapper); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + // Overloaded-type exceptions generally represent some resource exhaustion (i.e. not + // necessarily an internal error) and correspond to HTTP error 503. + if (exception.getType() == kj::Exception::Type::OVERLOADED) { + fetchStatus = 503; + } else { + fetchStatus = 500; } + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); } + } - const ActorConfig& getConfig() { - return config; + kj::Promise connect(kj::StringPtr host, + const kj::HttpHeaders& headers, + kj::AsyncIoStream& connection, + ConnectResponse& response, + kj::HttpConnectSettings settings) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).connect( + host, headers, connection, response, settings); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); } + } - kj::Own getActorChannel(Worker::Actor::Id id) { - KJ_IF_SOME(doId, id.tryGet>()) { - KJ_IF_SOME(name, doId->getName()) { - // To emulate production, we preserve the name on the id, but only if it's <= 1024 bytes. - if (name.size() > 1024) { - auto* idImpl = dynamic_cast(doId.get()); - KJ_ASSERT(idImpl != nullptr, "Unexpected ActorId type?"); - idImpl->clearName(); - } - } - } - - return kj::refcounted(getActorContainer(kj::mv(id))); + kj::Promise prewarm(kj::StringPtr url) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).prewarm(url); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); } + } - class ActorContainer; - using ActorMap = kj::HashMap>; - - // ActorContainer mostly serves as a wrapper around Worker::Actor. - // We use it to associate a HibernationManager with the Worker::Actor, since the - // Worker::Actor can be destroyed during periods of prolonged inactivity. - // - // We use a RequestTracker to track strong references to this ActorContainer's Worker::Actor. - // Once there are no Worker::Actor's left (excluding our own), `inactive()` is triggered and we - // initiate the eviction of the Durable Object. If no requests arrive in the next 10 seconds, - // the DO is evicted, otherwise we cancel the eviction task. - class ActorContainer final: public RequestTracker::Hooks, - public kj::Refcounted, - public Worker::Actor::FacetManager { - public: - // Information which is needed before start() can be called, but may not be available yet - // when the ActorContainer is constructed (especially in the case of facets). - struct ClassAndId { - kj::Own actorClass; - Worker::Actor::Id id; - - ClassAndId(kj::Own actorClass, Worker::Actor::Id id) - : actorClass(kj::mv(actorClass)), - id(kj::mv(id)) {} - }; - - ActorContainer(kj::String key, - ActorNamespace& ns, - kj::Maybe parent, - kj::OneOf> classAndIdParam, - kj::Timer& timer) - : key(kj::mv(key)), - tracker(kj::refcounted(*this)), - ns(ns), - root(parent.map([](ActorContainer& p) -> ActorContainer& { return p.root; }) - .orDefault(*this)), - parent(parent), - timer(timer), - lastAccess(timer.now()) { - KJ_SWITCH_ONEOF(classAndIdParam) { - KJ_CASE_ONEOF(value, ClassAndId) { - // `classAndId` is immediately available. - classAndId = kj::mv(value); - } - KJ_CASE_ONEOF(promise, kj::Promise) { - // We are receiving a promise for a `ClassAndId` to come later. Arrange to initialize - // `classAndId` from the promise. Create a `ForkedPromise` that resolves when - // initialization is complete. - classAndId = promise - .then([this](ClassAndId value) { - auto& forked = KJ_ASSERT_NONNULL(classAndId.tryGet>()); - if (!forked.hasBranches()) { - // HACK: We're about to replace the ForkedPromise but it has no one waiting on it, - // so we'd end up cancelling ourselves. Add a branch and detach it so this doesn't - // happen. - forked.addBranch().detach([](auto&&) {}); - } - - classAndId = kj::mv(value); - }).fork(); - } - } - } + kj::Promise runScheduled(kj::Date scheduledTime, kj::StringPtr cron) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).runScheduled(scheduledTime, cron); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); + } + } - ~ActorContainer() noexcept(false) { - // Shutdown the tracker so we don't use active/inactive hooks anymore. - tracker->shutdown(); + kj::Promise runAlarm(kj::Date scheduledTime, uint32_t retryCount) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).runAlarm(scheduledTime, retryCount); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); + } + } - for (auto& facet: facets) { - facet.value->abort(kj::none); - } + kj::Promise test() override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).test(); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); + } + } - KJ_IF_SOME(a, actor) { - // Unknown broken reason. - auto reason = 0; - a->shutdown(reason); - } + kj::Promise customEvent(kj::Own event) override { + try { + co_return co_await KJ_ASSERT_NONNULL(inner).customEvent(kj::mv(event)); + } catch (...) { + auto exception = kj::getCaughtExceptionAsKj(); + reportFailure(exception); + kj::throwFatalException(kj::mv(exception)); + } + } - // Drop the container client reference - // If setInactivityTimeout() was called, there's still a timer holding a reference - // If not, this may be the last reference and the ContainerClient destructor will run - containerClient = kj::none; - } + kj::Promise> abandonAlarm(kj::Date scheduledTime) override { + co_return co_await KJ_ASSERT_NONNULL(inner).abandonAlarm(scheduledTime); + } - void active() override { - // We're handling a new request, cancel the eviction promise. - shutdownTask = kj::none; - } + private: + kj::Maybe> tracer; + kj::Maybe inner; + EventOutcome outcome = EventOutcome::OK; + kj::uint fetchStatus = 0; +}; - void inactive() override { - // Durable objects are evictable by default. - bool isEvictable = true; - KJ_SWITCH_ONEOF(ns.config) { - KJ_CASE_ONEOF(c, Durable) { - isEvictable = c.isEvictable; - } - KJ_CASE_ONEOF(c, Ephemeral) { - isEvictable = c.isEvictable; - } - } - if (isEvictable) { - KJ_IF_SOME(a, actor) { - KJ_IF_SOME(m, a->getHibernationManager()) { - // The hibernation manager needs to survive actor eviction and be passed to the actor - // constructor next time we create it. - manager = m.addRef(); - } - } - shutdownTask = - handleShutdown().eagerlyEvaluate([](kj::Exception&& e) { KJ_LOG(ERROR, e); }); - } +class SequentialSpanSubmitter final: public SpanSubmitter { + public: + SequentialSpanSubmitter(kj::Own weakTracer, kj::EntropySource& entropySource) + : weakTracer(kj::mv(weakTracer)), + entropySource(entropySource) {} + void submitSpanClose( + tracing::SpanId spanId, kj::Date startTime, kj::Date endTime, Span::TagMap&& tags) override { + weakTracer->runIfAlive([&](BaseTracer& tracer) { + tracing::SpanEndData spanEnd(spanId, endTime, kj::mv(tags)); + if (isPredictableModeForTest()) { + startTime = spanEnd.endTime = kj::UNIX_EPOCH; } - kj::StringPtr getKey() { - return key; - } - RequestTracker& getTracker() { - return *tracker; - } - kj::Maybe> tryGetManagerRef() { - return manager.map( - [&](kj::Own& m) { return kj::addRef(*m); }); - } - void updateAccessTime() { - lastAccess = timer.now(); - KJ_IF_SOME(p, parent) { - p.updateAccessTime(); - } - } - kj::TimePoint getLastAccess() { - return lastAccess; - } + tracer.addSpanClose(kj::mv(spanEnd), startTime); + }); + } - bool hasClients() { - // If anyone holds a reference to the container other than the actor map, then it must be - // a client. - if (isShared()) return true; - for (auto& facet: facets) { - if (facet.value->hasClients()) return true; - } - return false; - } - kj::Own addRef() { - return kj::addRef(*this); + bool submitSpanOpen(tracing::SpanId spanId, + tracing::SpanId parentSpanId, + kj::ConstString operationName, + kj::Date startTime) override { + bool submitted = false; + weakTracer->runIfAlive([&](BaseTracer& tracer) { + if (isPredictableModeForTest()) { + startTime = kj::UNIX_EPOCH; } + tracer.addSpanOpen(spanId, parentSpanId, kj::mv(operationName), startTime); + submitted = true; + }); + return submitted; + } - // Get the actor, starting it if it's not already running. - kj::Promise> getActor() { - requireNotBroken(); - - if (actor == kj::none) { - KJ_IF_SOME(promise, classAndId.tryGet>()) { - co_await promise; - } - - auto& [actorClass, id] = KJ_ASSERT_NONNULL(classAndId.tryGet()); - - KJ_IF_SOME(promise, actorClass->whenReady()) { - co_await promise; - } - - // A concurrent request could have started the actor, so check again. - if (actor == kj::none) { - start(actorClass, id); - } - } + tracing::SpanId makeSpanId() override { + if (isPredictableModeForTest()) { + return tracing::SpanId(nextSpanId++); + } + return tracing::SpanId::fromEntropy(entropySource); + } + KJ_DISALLOW_COPY_AND_MOVE(SequentialSpanSubmitter); - co_return KJ_ASSERT_NONNULL(actor)->addRef(); - } + private: + uint64_t nextSpanId = 1; + kj::Own weakTracer; + kj::EntropySource& entropySource; +}; - kj::Promise> startRequest( - IoChannelFactory::SubrequestMetadata metadata) { - auto actor = co_await getActor(); +// IsolateLimitEnforcer that enforces no limits. +class NullIsolateLimitEnforcer final: public IsolateLimitEnforcer { + public: + v8::Isolate::CreateParams getCreateParams() override { + return {}; + } - if (ns.cleanupTask == kj::none) { - // Need to start the cleanup loop. - ns.cleanupTask = ns.cleanupLoop(); - } + void customizeIsolate(v8::Isolate* isolate) override {} - // Since `getActor()` completed, `classAndId` must be resolved. - auto& actorClass = KJ_ASSERT_NONNULL(classAndId.tryGet()).actorClass; + ActorCacheSharedLruOptions getActorCacheLruOptions() override { + // TODO(someday): Make this configurable? + return {.softLimit = 16 * (1ull << 20), // 16 MiB + .hardLimit = 128 * (1ull << 20), // 128 MiB + .staleTimeout = 30 * kj::SECONDS, + .dirtyListByteLimit = 8 * (1ull << 20), // 8 MiB + .maxKeysPerRpc = 128, - co_return actorClass->startRequest(kj::mv(metadata), kj::mv(actor)) - .attach(kj::defer([self = kj::addRef(*this)]() mutable { self->updateAccessTime(); })); - } + // For now, we use `neverFlush` to implement in-memory-only actors. + // See WorkerService::getActor(). + .neverFlush = true}; + } - // Abort this actor, shutting it down. - // - // It is the caller's responsibility to ensure that the aborted ActorContainer has been - // removed from any maps that would cause it to receive further traffic, since any further - // requests will be expected to fail. abort() does NOT attempt to remove the ActorContainer - // from the parent facet map since at most call sites it makes more sense to handle this - // directly. - void abort(kj::Maybe reason) { - if (brokenReason != kj::none) return; + kj::Own enterStartupJs( + jsg::Lock& lock, kj::OneOf&) const override { + return {}; + } - KJ_IF_SOME(a, actor) { - // Unknown broken reason. - a->shutdown(0, reason); - } + kj::Own enterStartupPython( + jsg::Lock& lock, kj::OneOf&) const override { + return {}; + } - for (auto& facet: facets) { - facet.value->abort(reason); - } + kj::Own enterDynamicImportJs( + jsg::Lock& lock, kj::OneOf&) const override { + return {}; + } - onBrokenTask = kj::none; - shutdownTask = kj::none; - manager = kj::none; - tracker->shutdown(); - actor = kj::none; - containerClient = kj::none; + kj::Own enterLoggingJs( + jsg::Lock& lock, kj::OneOf&) const override { + return {}; + } - KJ_IF_SOME(r, reason) { - brokenReason = r.clone(); - } else { - brokenReason = JSG_KJ_EXCEPTION(FAILED, Error, "Actor aborted for uknown reason."); - } - } + kj::Own enterInspectorJs( + jsg::Lock& loc, kj::OneOf&) const override { + return {}; + } - // Resets the actor's SQLite database while the connection is still open, - // avoiding file-locking issues on Windows. - void resetStorage() { - KJ_IF_SOME(a, actor) { - KJ_IF_SOME(cache, a->getPersistent()) { - KJ_IF_SOME(db, cache.getSqliteDatabase()) { - kj::runCatchingExceptions([&]() { db.reset(); }); - } - } - } - } + void completedRequest(kj::StringPtr id) const override {} - kj::Own getFacetContainer( - kj::String childKey, kj::Function()> getStartInfo) { - auto makeContainer = [&]() { - auto promise = callFacetStartCallback(kj::mv(getStartInfo)); - return kj::refcounted( - kj::mv(childKey), ns, *this, kj::mv(promise), timer); - }; + bool exitJs(jsg::Lock& lock) const override { + return false; + } - bool isNew = false; + void reportMetrics(IsolateObserver& isolateMetrics) const override {} - auto& entry = facets.findOrCreateEntry(childKey, [&]() mutable { - isNew = true; - auto container = makeContainer(); - return ActorMap::Entry{container->getKey(), kj::mv(container)}; - }); + kj::Maybe checkPbkdfIterations(jsg::Lock& lock, size_t iterations) const override { + // No limit on the number of iterations in workerd + return kj::none; + } - return entry.value->addRef(); - } + bool hasExcessivelyExceededHeapLimit() const override { + return false; + } - uint getDepth() const override { - KJ_IF_SOME(p, parent) { - return 1 + p.getDepth(); - } - return 0; - } + const TrackedWasmInstanceList& getTrackedWasmInstances() const override { + return trackedWasmInstances; + } - kj::Own getFacet( - kj::StringPtr name, kj::Function()> getStartInfo) override { - auto facet = getFacetContainer(kj::str(name), kj::mv(getStartInfo)); - return kj::refcounted(kj::mv(facet)); - } + private: + TrackedWasmInstanceList trackedWasmInstances; +}; - void abortFacet(kj::StringPtr name, kj::Exception reason) override { - KJ_IF_SOME(entry, facets.findEntry(name)) { - entry.value->abort(reason); - facets.erase(entry); - } - } +} // namespace - void deleteFacet(kj::StringPtr name) override { - // First, abort any running facets. - abortFacet(name, JSG_KJ_EXCEPTION(FAILED, Error, "Facet was deleted.")); +// Shared ErrorReporter base implemnetation. The logic to collect entrypoint information is the +// same regardless of where the code came from. +struct Server::ErrorReporter: public Worker::ValidationErrorReporter { + // The `HashSet`s are the set of exported handlers, like `fetch`, `test`, etc. + kj::HashMap> namedEntrypoints; + kj::Maybe> defaultEntrypoint; + kj::HashSet actorClasses; + kj::HashSet workflowClasses; - // Then delete the underlying storage. - KJ_IF_SOME(as, ns.actorStorage) { - // Note that if there's no facet index then there couldn't possibly be any child storage. - KJ_IF_SOME(index, getFacetTreeIndexIfNotEmpty()) { - uint childId = index.getId(getFacetId(), name); - deleteDescendantStorage(*as.directory, childId); - as.directory->remove(getSqlitePathForId(childId)); - } - } - } + void addEntrypoint(kj::Maybe exportName, kj::Array methods) override { + kj::HashSet set; + for (auto& method: methods) { + set.insert(kj::mv(method)); + } + KJ_IF_SOME(e, exportName) { + namedEntrypoints.insert(kj::str(e), kj::mv(set)); + } else { + defaultEntrypoint = kj::mv(set); + } + } - private: - // The actor is constructed after the ActorContainer so it starts off empty. - kj::Maybe> actor; - - kj::String key; - kj::Own tracker; - ActorNamespace& ns; - ActorContainer& root; - kj::Maybe parent; - kj::Timer& timer; - kj::TimePoint lastAccess; - kj::Maybe> manager; - kj::Maybe> shutdownTask; - kj::Maybe> onBrokenTask; - kj::Maybe brokenReason; - - // Reference to the ContainerClient (if container is enabled for this actor) - kj::Maybe> containerClient; - - // If this is a `ForkedPromise`, await the promise. When it has resolved, then - // `classAndId` will have been replaced with the resolved `ClassAndId` value. - kj::OneOf> classAndId; - - // FacetTreeIndex for this actor. Only initialized on the root. - kj::Maybe> facetTreeIndex; - - // ID of this facet. Initialized when getFacetId() is first called. - kj::Maybe facetId; - - ActorMap facets; - - // Get the facet ID for this facet. The root facet always has ID zero, but all other facets - // need to be looked up in the index to make sure they are assigned consistent IDs. - uint getFacetId() { - KJ_IF_SOME(f, facetId) { - return f; - } + void addActorClass(kj::StringPtr exportName) override { + actorClasses.insert(kj::str(exportName)); + } - ActorContainer& parent = KJ_UNWRAP_OR(this->parent, return 0); + void addWorkflowClass(kj::StringPtr exportName, kj::Array methods) override { + // At runtime, we need to add it into the normal namedEntrypoints for Workflows to appear + // in `WorkerService`. This is a different method compared to `addEntrypoint` because we need to + // check for `WorkflowEntrypoint` inheritance at validation time. + kj::HashSet set; + for (auto& method: methods) { + set.insert(kj::mv(method)); + } + namedEntrypoints.insert(kj::str(exportName), kj::mv(set)); + workflowClasses.insert(kj::str(exportName)); + } +}; - FacetTreeIndex& index = root.ensureFacetTreeIndex(); - return index.getId(parent.getFacetId(), key); - } +// Implementation of ErrorReporter specifically for reporting errors in the top-level workerd +// config. +struct Server::ConfigErrorReporter final: public ErrorReporter { + ConfigErrorReporter(Server& server, kj::StringPtr name): server(server), name(name) {} - // Get the facet tree index, opening the file if it hasn't been opened yet, and creating it - // if it hasn't been created yet. - FacetTreeIndex& ensureFacetTreeIndex() { - KJ_REQUIRE(parent == kj::none, "only 'root' may ensureFacetTreeIndex()"); + Server& server; + kj::StringPtr name; - KJ_IF_SOME(i, facetTreeIndex) { - return *i; - } else { - // Facet tree index hasn't been initialized yet. Do that now (opening the existing file, - // or creating it if it doesn't exist). - auto& as = KJ_REQUIRE_NONNULL( - ns.actorStorage, "can't call getFacetId() when there's no backing storage"); - auto indexFile = as.directory->openFile( - kj::Path({kj::str(key, ".facets")}), kj::WriteMode::CREATE | kj::WriteMode::MODIFY); - return *facetTreeIndex.emplace(kj::heap(kj::mv(indexFile))); - } - } + void addError(kj::String error) override { + server.handleReportConfigError(kj::str("service ", name, ": ", error)); + } +}; - // Like ensureFacetTreeIndex() but if the index doesn't exist on disk, return kj::none. - kj::Maybe getFacetTreeIndexIfNotEmpty() { - KJ_REQUIRE(parent == kj::none); +// Implementation of ErrorReporter for dynamically-loaded Workers. We'll collect the errors and +// report them in an exception at the end. +struct Server::DynamicErrorReporter final: public ErrorReporter { + kj::Vector errors; - KJ_IF_SOME(i, facetTreeIndex) { - return *i; - } else { - // Facet tree index hasn't been initialized yet. If the file exists, open it. Otherwise, - // assume empty and return none. - auto& as = KJ_UNWRAP_OR(ns.actorStorage, return kj::none); - auto indexFile = KJ_UNWRAP_OR( - as.directory->tryOpenFile(kj::Path({kj::str(key, ".facets")}), kj::WriteMode::MODIFY), - return kj::none); - return *facetTreeIndex.emplace(kj::heap(kj::mv(indexFile))); - } - } + void addError(kj::String error) override { + errors.add(kj::mv(error)); + } - // Get the path to the facet's sqlite database, within the actor namespace directory. - kj::Path getSqlitePathForId(uint id) { - if (id == 0) { - return kj::Path({kj::str(root.key, ".sqlite")}); - } else { - return kj::Path({kj::str(root.key, '.', id, ".sqlite")}); - } - } + void throwIfErrors() { + if (!errors.empty()) { + JSG_FAIL_REQUIRE(Error, "Failed to start Worker:\n", kj::strArray(errors, "\n")); + } + } +}; - void deleteDescendantStorage(const kj::Directory& dir, uint parentId) { - KJ_IF_SOME(index, getFacetTreeIndexIfNotEmpty()) { - deleteDescendantStorage(dir, index, parentId); - } else { - // There's no index, so there must be no facets (other than the root). - KJ_ASSERT(parentId == 0); - } - } +class Server::WorkerService final: public Service, + private kj::TaskSet::ErrorHandler, + private IoChannelFactory, + private TimerChannel, + private LimitEnforcer { + public: + // I/O channels, delivered when link() is called. + struct LinkedIoChannels { + kj::Array> subrequest; + kj::Array> actor; // null = configuration error + kj::Array> actorClass; + kj::Maybe> cache; + kj::Maybe actorStorage; + kj::Array> tails; + kj::Array> streamingTails; + kj::Array> workerLoaders; + kj::Maybe workerdDebugPortNetwork; + }; + using LinkCallback = + kj::Function; + using AbortActorsCallback = kj::Function reason)>; + using DeleteActorsCallback = kj::Function reason)>; - void deleteDescendantStorage(const kj::Directory& dir, FacetTreeIndex& index, uint parentId) { - index.forEachChild(parentId, [&](uint childId, kj::StringPtr childName) { - deleteDescendantStorage(dir, index, childId); - dir.remove(getSqlitePathForId(childId)); - }); - } + WorkerService(ChannelTokenHandler& channelTokenHandler, + kj::Maybe serviceName, + ThreadContext& threadContext, + const kj::MonotonicClock& monotonicClock, + kj::Own worker, + kj::Maybe> defaultEntrypointHandlers, + kj::HashMap> namedEntrypoints, + kj::HashSet actorClassEntrypointsParam, + LinkCallback linkCallback, + AbortActorsCallback abortActorsCallback, + DeleteActorsCallback deleteActorsCallback, + kj::Maybe dockerPathParam, + kj::Maybe containerEgressInterceptorImageParam, + bool isDynamic, + kj::Maybe> abortIsolateCallback = kj::none) + : channelTokenHandler(channelTokenHandler), + serviceName(serviceName), + threadContext(threadContext), + monotonicClock(monotonicClock), + ioChannels(kj::mv(linkCallback)), + worker(kj::mv(worker)), + defaultEntrypointHandlers(kj::mv(defaultEntrypointHandlers)), + namedEntrypoints(kj::mv(namedEntrypoints)), + actorClassEntrypoints(kj::mv(actorClassEntrypointsParam)), + waitUntilTasks(*this), + abortActorsCallback(kj::mv(abortActorsCallback)), + deleteActorsCallback(kj::mv(deleteActorsCallback)), + dockerPath(kj::mv(dockerPathParam)), + containerEgressInterceptorImage(kj::mv(containerEgressInterceptorImageParam)), + isDynamic(isDynamic), + abortIsolateCallback(kj::mv(abortIsolateCallback)) {} - void requireNotBroken() { - KJ_IF_SOME(e, brokenReason) { - kj::throwFatalException(e.clone()); - } + // Call immediately after the constructor to set up `actorNamespaces`. This can't happen during + // the constructor itself since it sets up cyclic references, which will throw an exception if + // done during the constructor. + void initActorNamespaces(const kj::HashMap& actorClasses, + kj::HashMap& actorNamespacesByUniqueKey, + kj::Network& network) { + actorNamespaces.reserve(actorClasses.size()); + for (auto& entry: actorClasses) { + if (!actorClassEntrypoints.contains(entry.key)) { + KJ_LOG(WARNING, + kj::str("A DurableObjectNamespace in the config referenced the class \"", entry.key, + "\", but no such Durable Object class is exported from the worker. Please make " + "sure the class name matches, it is exported, and the class extends " + "'DurableObject'. Attempts to call to this Durable Object class will fail at " + "runtime, but historically this was not a startup-time error. Future versions of " + "workerd may make this a startup-time error.")); } - kj::Promise monitorOnBroken(Worker::Actor& actor) { - try { - // It's possible for this to never resolve if the actor never breaks, - // in which case the returned promise will just be canceled. - co_await actor.onBroken(); - KJ_FAIL_ASSERT("actor.onBroken() resolved normally?"); - } catch (...) { - brokenReason = kj::getCaughtExceptionAsKj(); - } - - for (auto& facet: facets) { - facet.value->abort(brokenReason); - } - facets.clear(); - - // HACK: Dropping the ActorContainer will delete onBrokenTask, cancelling ourselves. This - // would crash. To avoid the problem, detach ourselves. This is safe because we know that - // once we return there's nothing left for this promise to do anyway. - KJ_ASSERT_NONNULL(onBrokenTask).detach([](kj::Exception&& e) {}); - - // Hollow out the object, so that if it still has references, they won't keep these parts - // alive. Since any further calls to `getActor()` will throw, we don't have to worry about - // the actor being recreated. - auto actorToDrop = kj::mv(this->actor); - tracker->shutdown(); - auto managerToDrop = kj::mv(manager); - - // Note that we remove the entire ActorContainer from the map -- this drops the - // HibernationManager so any connected hibernatable websockets will be disconnected. - KJ_IF_SOME(p, parent) { - p.facets.erase(key); - } else { - ns.actors.erase(key); - } - - // WARNING: `this` MAY HAVE BEEN DELETED as a result of the above `erase()`. Do not access - // it again here. + auto actorClass = kj::refcounted(*this, entry.key, Frankenvalue()); + auto ns = kj::heap(kj::mv(actorClass), entry.value, + kj::systemPreciseCalendarClock(), threadContext.getUnsafeTimer(), + threadContext.getByteStreamFactory(), channelTokenHandler, network, dockerPath, + containerEgressInterceptorImage, waitUntilTasks); + KJ_IF_SOME(d, entry.value.tryGet()) { + actorNamespacesByUniqueKey.insert(d.uniqueKey, ns.get()); } + actorNamespaces.insert(entry.key, kj::mv(ns)); + } + } - // Processes the eviction of the Durable Object and hibernates active websockets. - kj::Promise handleShutdown() { - // After 10 seconds of inactivity, we destroy the Worker::Actor and hibernate any active - // JS WebSockets. - // TODO(someday): We could make this timeout configurable to make testing less burdensome. - co_await timer.afterDelay(10 * kj::SECONDS); - // Cancel the onBroken promise, since we're about to destroy the actor anyways and don't - // want to trigger it. - onBrokenTask = kj::none; - KJ_IF_SOME(a, actor) { - if (a->isShared()) { - // Our ActiveRequest refcounting has broken somewhere. This is likely because we're - // `addRef`-ing an actor that has had an ActiveRequest attached to its kj::Own (in other - // words, the ActiveRequest count is less than it should be). - // - // Rather than dropping our actor and possibly ending up with split-brain, - // we should opt out of the deferred proxy optimization and log the error to Sentry. - KJ_LOG(ERROR, - "Detected internal bug in hibernation: Durable Object has strong references " - "when hibernation timeout expired."); - - co_return; - } - KJ_IF_SOME(m, manager) { - auto& worker = a->getWorker(); - auto workerStrongRef = kj::atomicAddRef(worker); - // Take an async lock, we can't use `takeAsyncLock(RequestObserver&)` since we don't - // have an `IncomingRequest` at this point. - // - // Note that we do not have a race here because this is part of the `shutdownTask` - // promise. If a new request comes in while we're waiting to get the lock then we will - // cancel this promise. - Worker::AsyncLock asyncLock = co_await worker.takeAsyncLockWithoutRequest(nullptr); - workerStrongRef->runInLockScope( - asyncLock, [&](Worker::Lock& lock) { m->hibernateWebSockets(lock); }); - } - a->shutdown( - 0, KJ_EXCEPTION(DISCONNECTED, "broken.dropped; Actor freed due to inactivity")); - } - // Destroy the last strong Worker::Actor reference. - actor = kj::none; - - // Drop our reference to the ContainerClient - // If setInactivityTimeout() was called, the timer still holds a reference - // so the container stays alive until the timeout expires - containerClient = kj::none; - } - - void start(kj::Own& actorClass, Worker::Actor::Id& id) { - KJ_REQUIRE(actor == nullptr); - - auto makeActorCache = [this](const ActorCache::SharedLru& sharedLru, OutputGate& outputGate, - ActorCache::Hooks& hooks, - SqliteObserver& sqliteObserver) mutable { - return ns.config.tryGet().map( - [&](const Durable& d) -> kj::Own { - KJ_IF_SOME(as, ns.actorStorage) { - kj::Own sqliteHooks; - if (parent == kj::none) { - KJ_IF_SOME(a, ns.alarmScheduler) { - sqliteHooks = kj::heap(a, ActorKey{.actorId = key}); - } else { - // No alarm scheduler available, use default hooks instance. - sqliteHooks = fakeOwn(ActorSqlite::Hooks::getDefaultHooks()); - } - } else { - // TODO(someday): Support alarms in facets, somehow. - sqliteHooks = fakeOwn(ActorSqlite::Hooks::getDefaultHooks()); - } - - uint selfId = getFacetId(); - auto path = getSqlitePathForId(selfId); - auto db = kj::heap( - as.vfs, kj::mv(path), kj::WriteMode::CREATE | kj::WriteMode::MODIFY); - - // Before we do anything, make sure the database is in WAL mode. We also need to - // do this after reset() is used, so register a callback for that. - db->run("PRAGMA journal_mode=WAL;"); - - db->afterReset([this, &dir = *as.directory, selfId](SqliteDatabase& db) { - db.run("PRAGMA journal_mode=WAL;"); - - // reset() is used when the app called deleteAll(), in which case we also want to - // delete all child facets. - // TODO(someday): Arguably this should be transactional somehow so if we fail here - // we don't leave the facets still there after the parent has already been reset. - // But most filesystems do not support transactions, so we'd have to do something - // like store a flag in the parent DB saying "reset pending" so that on a restart - // we retry the deletions. Note that in production on SRS, this is actually - // transactional -- there's only a problem when running locally with workerd. - deleteDescendantStorage(dir, selfId); - }); - - return kj::heap(kj::mv(db), outputGate, - [](SpanParent) -> kj::Promise { return kj::READY_NOW; }, *sqliteHooks) - .attach(kj::mv(sqliteHooks)); - } else { - // Create an ActorCache backed by a fake, empty storage. Elsewhere, we configure - // ActorCache never to flush, so this effectively creates in-memory storage. - return kj::heap( - newEmptyReadOnlyActorStorage(), sharedLru, outputGate, hooks); - } - }); - }; + void requireAllowsTransfer() override { + if (isDynamic) throwDynamicEntrypointTransferError(); + } - bool enableSql = true; - kj::Maybe - containerOptions = kj::none; - kj::Maybe uniqueKey; - KJ_SWITCH_ONEOF(ns.config) { - KJ_CASE_ONEOF(c, Durable) { - enableSql = c.enableSql; - containerOptions = c.containerOptions; - uniqueKey = c.uniqueKey; - } - KJ_CASE_ONEOF(c, Ephemeral) { - enableSql = c.enableSql; - } - } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + requireAllowsTransfer(); - auto makeStorage = - [enableSql = enableSql](jsg::Lock& js, const Worker::Api& api, - ActorCacheInterface& actorCache) -> jsg::Ref { - return js.alloc( - js, IoContext::current().addObject(actorCache), enableSql); - }; - - auto loopback = kj::refcounted(*this); - - kj::Maybe container = kj::none; - KJ_IF_SOME(config, containerOptions) { - KJ_ASSERT(config.hasImageName(), "Image name is required"); - auto imageName = config.getImageName(); - kj::String containerId; - KJ_SWITCH_ONEOF(id) { - KJ_CASE_ONEOF(globalId, kj::Own) { - containerId = globalId->toString(); - } - KJ_CASE_ONEOF(existingId, kj::String) { - containerId = kj::str(existingId); - } - } + // encodeSubrequestChannelToken wants a reference to the props. It needs this reference to + // be non-const because it might refcount things. But if it's an empty object then there's + // nothing to refcount. So we can just declare this statically... + static Frankenvalue EMPTY_PROPS; - container = ns.getContainerClient( - kj::str("workerd-", KJ_ASSERT_NONNULL(uniqueKey), "-", containerId), imageName); - } + // If requireAllowsTransfer() passed, then we are not dynamic so should have a service name. + return channelTokenHandler.encodeSubrequestChannelToken( + usage, KJ_ASSERT_NONNULL(serviceName), kj::none, EMPTY_PROPS); + } - auto actor = actorClass->newActor(getTracker(), Worker::Actor::cloneId(id), - kj::mv(makeActorCache), kj::mv(makeStorage), kj::mv(loopback), tryGetManagerRef(), - kj::mv(container), *this); - onBrokenTask = monitorOnBroken(*actor); - this->actor = kj::mv(actor); - } + kj::Maybe> getEntrypoint(kj::Maybe name, Frankenvalue props) { + const kj::HashSet* handlers; + KJ_IF_SOME(n, name) { + KJ_IF_SOME(entry, namedEntrypoints.findEntry(n)) { + name = entry.key; // replace with more-permanent string + handlers = &entry.value; + } else KJ_IF_SOME(className, actorClassEntrypoints.find(n)) { + // TODO(soon): Restore this warning once miniflare no longer generates config that causes + // it to log spuriously. + // + // KJ_LOG(WARNING, + // kj::str("A ServiceDesignator in the config referenced the entrypoint \"", n, + // "\", but this class does not extend 'WorkerEntrypoint'. Attempts to call this " + // "entrypoint will fail at runtime, but historically this was not a startup-time " + // "error. Future versions of workerd may make this a startup-time error.")); - // Helper coroutine to call `getStartInfo()`, the start callback for a facet, while making - // sure the function stays alive until the returned promise resolves. - static kj::Promise callFacetStartCallback( - kj::Function()> getStartInfo) { - auto info = co_await getStartInfo(); - co_await info.ensureAllResolved(); - co_return ClassAndId(info.actorClass.downcast(), kj::mv(info.id)); + static const kj::HashSet EMPTY_HANDLERS; + name = className; // replace with more-permanent string + handlers = &EMPTY_HANDLERS; + } else { + return kj::none; } - }; - - kj::Own getActorContainer(Worker::Actor::Id id) { - kj::String key; - - KJ_SWITCH_ONEOF(id) { - KJ_CASE_ONEOF(obj, kj::Own) { - KJ_REQUIRE(config.is()); - key = obj->toString(); - } - KJ_CASE_ONEOF(str, kj::String) { - KJ_REQUIRE(config.is()); - key = kj::str(str); - } + } else { + KJ_IF_SOME(d, defaultEntrypointHandlers) { + handlers = &d; + } else { + // It would appear that there is no default export, therefore this refers to an entrypoint + // that doesn't exist! However, this was historically allowed. For backwards-compatibility, + // we preserve this behavior, by returning a reference to the WorkerService itself, whose + // startRequest() will fail. + // + // What will happen if you invoke this entrypoint? Not what you think. Check out the + // test case in server-test.c++ entitled "referencing non-extant default entrypoint is not + // an error" for the sordid details. + return kj::addRef(*this); } - - return actors - .findOrCreate(key, [&]() mutable { - auto container = kj::refcounted(kj::mv(key), *this, kj::none, - ActorContainer::ClassAndId(kj::addRef(*actorClass), kj::mv(id)), timer); - - return kj::HashMap>::Entry{ - container->getKey(), kj::mv(container)}; - })->addRef(); } + return kj::refcounted(*this, name, kj::mv(props), *handlers); + } - kj::Own getContainerClient( - kj::StringPtr containerId, kj::StringPtr imageName) { - KJ_IF_SOME(existingClient, containerClients.find(containerId)) { - return existingClient->addRef(); + // Like getEntrypoint() but used specifically to get the entrypoint for use in ctx.exports, + // where it can be used raw (props are empty), or can be specialized with props. + kj::Own getLoopbackEntrypoint(kj::Maybe name) { + const kj::HashSet* handlers; + KJ_IF_SOME(n, name) { + KJ_IF_SOME(entry, namedEntrypoints.findEntry(n)) { + name = entry.key; // replace with more-permanent string + handlers = &entry.value; + } else { + KJ_FAIL_REQUIRE("getLoopbackEntrypoint() called for entrypoint that doesn't exist"); + } + } else { + KJ_IF_SOME(d, defaultEntrypointHandlers) { + handlers = &d; + } else { + KJ_FAIL_REQUIRE("getLoopbackEntrypoint() called for entrypoint that doesn't exist"); } + } + return kj::refcounted(*this, name, kj::none, *handlers); + } - // No existing container in the map, create a new one - auto& dockerPathRef = KJ_ASSERT_NONNULL( - dockerPath, "dockerPath must be defined to enable containers on this Durable Object."); + kj::Maybe> getActorClass(kj::Maybe name, Frankenvalue props) { + KJ_IF_SOME(className, actorClassEntrypoints.find(KJ_UNWRAP_OR(name, return kj::none))) { + return kj::refcounted(*this, className, kj::mv(props)); + } else { + return kj::none; + } + } - // Grab a branch of any pending cleanup from a previous ContainerClient for this - // container. If it exists, pass it to the container client so it knows that it has to sync. - kj::Promise previousCleanup = kj::READY_NOW; - KJ_IF_SOME(state, containerCleanupState.find(containerId)) { - previousCleanup = state.promise.addBranch(); - } - - // Upsert the cleanup state for this container ID. Replacing the - // canceler auto-cancels any in-flight cleanup tasks from the previous - // client's destructor. The generation counter is bumped on replacement - // so the cleanup callback can detect stale ownership without relying - // on raw pointer identity (which is vulnerable to address reuse). - auto canceler = kj::heap(); - uint64_t capturedGeneration = 0; - containerCleanupState.upsert(kj::str(containerId), - ContainerCleanupState{.canceler = kj::mv(canceler)}, - [&capturedGeneration](ContainerCleanupState& existing, ContainerCleanupState&& incoming) { - existing.canceler = kj::mv(incoming.canceler); - capturedGeneration = ++existing.generation; - }); + kj::Own getLoopbackActorClass(kj::StringPtr name) { + // Look up a more permanent class name string. (Also validates this is actually an export.) + kj::StringPtr className = KJ_REQUIRE_NONNULL(actorClassEntrypoints.find(name), + "getLoopbackActorClass() called for actor class that doesn't exist"); - // Cleanup callback: invoked from the ContainerClient destructor with the joined - // with a cleanup promise - kj::Function)> cleanupCallback = - [this, containerId = kj::str(containerId), capturedGeneration]( - kj::Promise cleanupPromise) mutable { - KJ_IF_SOME(state, containerCleanupState.find(containerId)) { - if (state.generation != capturedGeneration) { - // A newer ContainerClient has replaced us already with another destructor. - // drop the promise. - return; - } + return kj::refcounted(*this, className, kj::none); + } - containerClients.erase(containerId); - // Wrap with the canceler so a future client creation can cancel these - // tasks - auto cancellable = - state.canceler->wrap(kj::mv(cleanupPromise)).catch_([](kj::Exception&&) {}); + bool hasDefaultEntrypoint() { + return defaultEntrypointHandlers != kj::none; + } - auto forked = kj::mv(cancellable).fork(); - waitUntilTasks.add(forked.addBranch()); - state.promise = kj::mv(forked); - } - }; + kj::Array getEntrypointNames() { + return KJ_MAP(e, namedEntrypoints) -> kj::StringPtr { return e.key; }; + } - auto client = kj::refcounted(byteStreamFactory, timer, dockerNetwork, - kj::str(dockerPathRef), kj::str(containerId), kj::str(imageName), - kj::str(KJ_ASSERT_NONNULL(containerEgressInterceptorImage, - "containerEgressInterceptorImage must be configured for containers.")), - waitUntilTasks, kj::mv(previousCleanup), kj::mv(cleanupCallback), channelTokenHandler); + kj::Array getActorClassNames() { + return KJ_MAP(name, actorClassEntrypoints) -> kj::StringPtr { return name; }; + } - // Store raw pointer in map (does not own) - containerClients.insert(kj::str(containerId), client.get()); + void link(Worker::ValidationErrorReporter& errorReporter) override { + LinkCallback callback = + kj::mv(KJ_REQUIRE_NONNULL(ioChannels.tryGet(), "already called link()")); + auto linked = callback(*this, errorReporter); - return kj::mv(client); + for (auto& ns: actorNamespaces) { + ns.value->link(linked.actorStorage); } - void abortAll(kj::Maybe reason) { - for (auto& actor: actors) { - actor.value->abort(reason); - } - actors.clear(); - } + ioChannels = kj::mv(linked); + } - // Resets all actor databases, aborts all actors, and cancels all alarms so DOs - // can be recreated with clean state. - void deleteAll(kj::Maybe reason) { - // Reset databases before aborting so connections are still open (avoids - // Windows file-locking issues with deferred handle release). - for (auto& actor: actors) { - actor.value->resetStorage(); - } + void unlink() override { + // Need to remove all waited until tasks before destroying `ioChannels` + waitUntilTasks.clear(); - abortAll(reason); + // Need to tear down all actors before tearing down `ioChannels.actorStorage`. + actorNamespaces.clear(); - KJ_IF_SOME(scheduler, ownAlarmScheduler) { - scheduler->deleteAll(); - } - } + // OK, now we can unlink. + ioChannels = {}; + } - private: - kj::Own actorClass; - const ActorConfig& config; - const kj::Clock& clock; + kj::Maybe getActorNamespace(kj::StringPtr name) { + KJ_IF_SOME(a, actorNamespaces.find(name)) { + return *a; + } else { + return kj::none; + } + } - struct ActorStorage { - kj::Own directory; - SqliteDatabase::Vfs vfs; + kj::HashMap>& getActorNamespaces() { + return actorNamespaces; + } - ActorStorage(kj::Own directoryParam) - : directory(kj::mv(directoryParam)), - vfs(*directory) {} - }; + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + return startRequest(kj::mv(metadata), kj::none, {}); + } - // Note: The Vfs, actorStorage, and ownAlarmScheduler must not be torn down until all actors - // have been torn down, so we declare them before `actors`. - kj::Maybe actorStorage; - kj::Maybe> ownAlarmScheduler; - - // Tracks the canceler and cleanup promise for a Docker container's lifecycle cleanup. - // Useful to await on async calls of a ContainerClient destructor when the new - // one appears before they've been resolved. - struct ContainerCleanupState { - // Canceler that wraps the promise fired in ~ContainerClient. Replacing - // it cancels any pending cleanup, which resolves the promise immediately. - kj::Own canceler; - - // Forked cleanup promise. A branch is added to waitUntilTasks to keep the I/O alive, - // and another branch is passed to the next ContainerClient so its status() can await. - kj::ForkedPromise promise = kj::Promise(kj::READY_NOW).fork(); - - // Monotonically increasing counter, bumped each time the canceler is replaced - // via upsert. The cleanup callback captures the generation at creation time and - // compares it to detect whether a newer ContainerClient has taken ownership, - // avoiding a raw-pointer identity check that is vulnerable to address reuse. - uint64_t generation = 0; - }; + bool hasHandler(kj::StringPtr handlerName) override { + KJ_IF_SOME(h, defaultEntrypointHandlers) { + return h.contains(handlerName); + } else { + return false; + } + } - // Per-container cleanup state: canceler + forked cleanup promise. - kj::HashMap containerCleanupState; + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata, + kj::Maybe entrypointName, + Frankenvalue props, + kj::Maybe> actor = kj::none, + bool isTracer = false) { + TRACE_EVENT("workerd", "Server::WorkerService::startRequest()"); - // Map of container IDs to ContainerClients (for reconnection support with inactivity timeouts). - // The map holds raw pointers (not ownership) - ContainerClients are owned by actors and timers. - // When the last reference is dropped, the destructor removes the entry from this map. - kj::HashMap containerClients; + auto& channels = KJ_ASSERT_NONNULL(ioChannels.tryGet()); - // If the actor is broken, we remove it from the map. However, if it's just evicted due to - // inactivity, we keep the ActorContainer in the map but drop the Own. When a new - // request comes in, we recreate the Own. - ActorMap actors; + kj::Vector> bufferedTailWorkers(channels.tails.size()); + kj::Vector> streamingTailWorkers(channels.streamingTails.size()); + auto addWorkerIfNotRecursiveTracer = [this, isTracer]( + kj::Vector>& workers, + IoChannelFactory::SubrequestChannel& channel) { + // Caution here... if the tail worker ends up having a circular dependency + // on the worker we'll end up with an infinite loop trying to initialize. + // We can test this directly but it's more difficult to test indirect + // loops (dependency of dependency, etc). Here we're just going to keep + // it simple and just check the direct dependency. + // If service refers to an EntrypointService, we need to compare with the underlying + // WorkerService to match this. + auto& service = KJ_UNWRAP_OR(kj::tryDowncast(channel), { + // Not a Service, probably not self-referential. + workers.add(channel.startRequest({})); + return; + }); - kj::Maybe> cleanupTask; - kj::Timer& timer; - capnp::ByteStreamFactory& byteStreamFactory; - ChannelTokenHandler& channelTokenHandler; - kj::Network& dockerNetwork; - kj::Maybe dockerPath; - kj::Maybe containerEgressInterceptorImage; - kj::TaskSet& waitUntilTasks; - kj::Maybe alarmScheduler; - - // Removes actors from `actors` after 70 seconds of last access. - kj::Promise cleanupLoop() { - constexpr auto EXPIRATION = 70 * kj::SECONDS; - - // Don't bother running the loop if the config doesn't allow eviction. - KJ_SWITCH_ONEOF(config) { - KJ_CASE_ONEOF(c, Durable) { - if (!c.isEvictable) co_return; - } - KJ_CASE_ONEOF(c, Ephemeral) { - if (!c.isEvictable) co_return; + if (service.service() == this) { + if (!isTracer) { + // This is a self-reference. Create a request with isTracer=true. + KJ_IF_SOME(s, kj::tryDowncast(service)) { + workers.add(s.startRequest({}, kj::none, {}, kj::none, true)); + } else KJ_IF_SOME(s, kj::tryDowncast(service)) { + workers.add(s.startRequest({}, true)); + } else { + KJ_FAIL_ASSERT("Unexpected service type in recursive tail worker declaration"); + } + } else { + // Intentionally left empty to prevent infinite recursion with tail workers tailing + // themselves } + } else { + workers.add(service.startRequest({})); } + }; - while (true) { - auto now = timer.now(); - actors.eraseAll([&](auto&, kj::Own& entry) { - // Check getLastAccess() before hasClients() since it's faster. - if ((now - entry->getLastAccess()) <= EXPIRATION) { - // Used recently; don't evict. - return false; - } - - if (entry->hasClients()) { - // There's still an active client; don't evict. - return false; - } - - // No clients and not used in a while, evict this actor. - return true; - }); - - co_await timer.atTime(now + EXPIRATION); + // Do not add tracers for worker interfaces with the "test" entrypoint – we generally do not + // need to trace the test event, although this is useful to test that span tracing works, so + // we are not implementing a (more complex) mechanism to disable tracing for all test() events + // here. + if (entrypointName.orDefault("") != "test"_kj) { + for (auto& service: channels.tails) { + addWorkerIfNotRecursiveTracer(bufferedTailWorkers, *service); + } + for (auto& service: channels.streamingTails) { + addWorkerIfNotRecursiveTracer(streamingTailWorkers, *service); } } - // Implements actor loopback, which is used by websocket hibernation to deliver events to the - // actor from the websocket's read loop. - class Loopback: public Worker::Actor::Loopback, public kj::Refcounted { - public: - Loopback(ActorContainer& actorContainer): actorContainer(actorContainer) {} + kj::Maybe> workerTracer = kj::none; - kj::Own getWorker(IoChannelFactory::SubrequestMetadata metadata) override { - return newPromisedWorkerInterface(actorContainer.startRequest(kj::mv(metadata))); - } + if (!bufferedTailWorkers.empty() || !streamingTailWorkers.empty()) { + // Setting up buffered tail workers support, but only if we actually have tail workers + // configured. + auto executionModel = + actor == kj::none ? ExecutionModel::STATELESS : ExecutionModel::DURABLE_OBJECT; + auto tailStreamWriter = tracing::initializeTailStreamWriter( + streamingTailWorkers.releaseAsArray(), waitUntilTasks); + auto trace = kj::refcounted(kj::none /* stableId */, kj::none /* scriptName */, + kj::none /* scriptVersion */, kj::none /* dispatchNamespace */, kj::none /* scriptId */, + nullptr /* scriptTags */, mapCopyString(entrypointName), executionModel, + kj::none /* durableObjectId */); + kj::Own tracer = kj::refcounted( + kj::none, kj::mv(trace), PipelineLogLevel::FULL, kj::none, kj::mv(tailStreamWriter)); - kj::Own addRef() override { - return kj::addRef(*this); + // When the tracer is complete, deliver traces to any buffered tail workers. We end up + // creating two references to the WorkerTracer, one held by the observer and one that will be + // passed to the IoContext. This ensures that the tracer lives long enough to receive all + // events. + if (!bufferedTailWorkers.empty()) { + waitUntilTasks.add(tracer->onComplete().then( + kj::coCapture([tailWorkers = bufferedTailWorkers.releaseAsArray()]( + kj::Own trace) mutable -> kj::Promise { + for (auto& worker: tailWorkers) { + auto event = kj::heap( + workerd::api::TraceCustomEvent::TYPE, kj::arr(kj::addRef(*trace))); + co_await worker->customEvent(kj::mv(event)).ignoreResult(); + } + co_return; + }))); } + workerTracer = kj::mv(tracer); + } - private: - ActorContainer& actorContainer; - }; + KJ_IF_SOME(w, workerTracer) { + w->setMakeUserRequestSpanFunc( + [&w = *w, &entropySource = threadContext.getEntropySource()]( + tracing::TraceId traceId, kj::Maybe traceFlags) { + return SpanParent(kj::refcounted( + kj::refcounted(w.getWeakRef(), entropySource), kj::mv(traceId), + traceFlags)); + }); + } + kj::Own observer = + kj::refcounted(mapAddRef(workerTracer), waitUntilTasks); - class ActorSqliteHooks final: public ActorSqlite::Hooks { - public: - ActorSqliteHooks(AlarmScheduler& alarmScheduler, ActorKey actor) - : alarmScheduler(alarmScheduler), - actor(actor) {} - - // We ignore the priorTask in workerd because everything should run synchronously. - kj::Promise scheduleRun( - kj::Maybe newAlarmTime, kj::Promise priorTask) override { - KJ_IF_SOME(scheduledTime, newAlarmTime) { - alarmScheduler.setAlarm(actor, scheduledTime); - } else { - alarmScheduler.deleteAlarm(actor); - } - return kj::READY_NOW; + kj::Maybe triggerContext; + KJ_IF_SOME(ctx, metadata.userSpanParent.toSpanContext()) { + KJ_IF_SOME(spanId, ctx.getSpanId()) { + triggerContext = tracing::InvocationSpanContext( + ctx.getTraceId(), tracing::TraceId::nullId, spanId, ctx.getTraceFlags()); } + } - private: - AlarmScheduler& alarmScheduler; - ActorKey actor; - }; - }; + return newWorkerEntrypoint(threadContext, kj::atomicAddRef(*worker), entrypointName, + kj::mv(props), kj::mv(actor), + kj::attachRef(static_cast(*this), kj::addRef(*this)), + {}, // ioContextDependency + kj::attachRef(static_cast(*this), kj::addRef(*this)), kj::mv(observer), + waitUntilTasks, + true, // tunnelExceptions + kj::mv(workerTracer), // workerTracer + kj::mv(metadata.cfBlobJson), + kj::none, // versionInfo + kj::mv(triggerContext)); + } private: class EntrypointService final: public Service { @@ -3384,22 +3551,6 @@ class Server::WorkerService final: public Service, bool isDynamic; kj::Maybe> abortIsolateCallback; - class ActorChannelImpl final: public IoChannelFactory::ActorChannel { - public: - ActorChannelImpl(kj::Own actorContainer) - : actorContainer(kj::mv(actorContainer)) {} - ~ActorChannelImpl() noexcept(false) { - actorContainer->updateAccessTime(); - } - - kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { - return newPromisedWorkerInterface(actorContainer->startRequest(kj::mv(metadata))); - } - - private: - kj::Own actorContainer; - }; - // --------------------------------------------------------------------------- // implements kj::TaskSet::ErrorHandler @@ -3534,8 +3685,8 @@ class Server::WorkerService final: public Service, KJ_IF_SOME(p, props) { // Requesting specialization of loopback (ctx.exports) entrypoint with props. - auto& service = KJ_REQUIRE_NONNULL(kj::dynamicDowncastIfAvailable(channelRef), - "referenced channel is not a loopback channel"); + auto& service = KJ_REQUIRE_NONNULL( + kj::tryDowncast(channelRef), "referenced channel is not a loopback channel"); return service.forProps(kj::mv(p)); } @@ -3593,10 +3744,13 @@ class Server::WorkerService final: public Service, KJ_REQUIRE(channel < channels.actorClass.size(), "invalid actor class channel number"); - ActorClass& cls = *channels.actorClass[channel]; + ActorClassChannel& cls = *channels.actorClass[channel]; KJ_IF_SOME(p, props) { - return cls.forProps(kj::mv(p)); + // Requesting specialization of loopback (ctx.exports) actor class with props. + auto& typed = KJ_REQUIRE_NONNULL( + kj::tryDowncast(cls), "referenced channel is not a loopback channel"); + return typed.forProps(kj::mv(p)); } return kj::addRef(cls); @@ -3746,15 +3900,16 @@ struct FutureActorChannel { }; struct FutureActorClassChannel { - kj::OneOf> designator; + kj::OneOf> + designator; kj::String errorContext; - kj::Own lookup(Server& server) && { + kj::Own lookup(Server& server) && { KJ_SWITCH_ONEOF(designator) { KJ_CASE_ONEOF(conf, config::ServiceDesignator::Reader) { return server.lookupActorClass(conf, kj::mv(errorContext)); } - KJ_CASE_ONEOF(channel, kj::Own) { + KJ_CASE_ONEOF(channel, kj::Own) { return kj::mv(channel); } } @@ -4149,8 +4304,8 @@ uint startInspector( void Server::abortAllActors(kj::Maybe reason) { for (auto& service: services) { - if (WorkerService* worker = dynamic_cast(&*service.value)) { - for (auto& [className, ns]: worker->getActorNamespaces()) { + KJ_IF_SOME(worker, kj::tryDowncast(*service.value)) { + for (auto& [className, ns]: worker.getActorNamespaces()) { bool isEvictable = true; KJ_SWITCH_ONEOF(ns->getConfig()) { KJ_CASE_ONEOF(c, Durable) { @@ -4168,8 +4323,8 @@ void Server::abortAllActors(kj::Maybe reason) { void Server::deleteAllActors(kj::Maybe reason) { for (auto& service: services) { - if (WorkerService* worker = dynamic_cast(&*service.value)) { - for (auto& [className, ns]: worker->getActorNamespaces()) { + KJ_IF_SOME(worker, kj::tryDowncast(*service.value)) { + for (auto& [className, ns]: worker.getActorNamespaces()) { bool isEvictable = true; KJ_SWITCH_ONEOF(ns->getConfig()) { KJ_CASE_ONEOF(c, Durable) { @@ -4347,6 +4502,17 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted, private kj::TaskSet: ~WorkerStubImpl() { unlink(); + // Defer destruction of `WorkerService` to the next turn of the event loop. This is needed + // for ephemeral dynamic workers as they are torn down synchronously under GC cycles of the + // parent isolate, and this nested isolate teardown breaks a few invariants: + // - Failed `KJ_ASSERT(!inCppgcShimDestructor)` in `HeapTracer::clearWrappers()`, because + // `inCppgcShimDestructor` is set to `true` by the parent isolate + // - If we bypass the previous failure by shifting the flag to be per-isolate, we trigger + // a V8 assertion `AllowGarbageCollection::IsAllowed()` during isolate teardown, as the + // `no_gc_during_gc` was constructed as part of the parent isolate's GC cycle + KJ_IF_SOME(ioContext, IoContext::tryCurrent()) { + ioContext.addTask(kj::evalLater([service = kj::mv(service)]() {})); + } } void unlink() { @@ -4392,19 +4558,19 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted, private kj::TaskSet: kj::Vector subrequestChannels; kj::Vector actorClassChannels; source.env.rewriteCaps([&](kj::Own entry) { - if (auto channel = dynamic_cast(entry.get())) { + KJ_IF_SOME(channel, kj::tryDowncast(*entry)) { uint channelNumber = subrequestChannels.size() + IoContext::SPECIAL_SUBREQUEST_CHANNEL_COUNT; subrequestChannels.add(FutureSubrequestChannel{ - .designator = kj::addRef(*channel), + .designator = kj::addRef(channel), .errorContext = kj::str("Worker's env"), }); return kj::heap( IoChannelCapTableEntry::SUBREQUEST, channelNumber); - } else if (auto channel = dynamic_cast(entry.get())) { + } else KJ_IF_SOME(channel, kj::tryDowncast(*entry)) { uint channelNumber = actorClassChannels.size(); actorClassChannels.add(FutureActorClassChannel{ - .designator = kj::addRef(*channel), + .designator = kj::addRef(channel), .errorContext = kj::str("Worker's env"), }); return kj::heap( @@ -4653,10 +4819,11 @@ kj::Promise> Server::makeWorker(kj::StringPtr name, // Use FUTURE_FOR_TEST to allow any valid date (including far future like 2999-12-31) // without validation against CODE_VERSION or current date. compileCompatibilityFlags(overrideDate, conf.getCompatibilityFlags(), featureFlags, - errorReporter, experimental, CompatibilityDateValidation::FUTURE_FOR_TEST); + errorReporter, experimental, CompatibilityDateValidation::FUTURE_FOR_TEST, nullptr); } else if (conf.hasCompatibilityDate()) { compileCompatibilityFlags(conf.getCompatibilityDate(), conf.getCompatibilityFlags(), - featureFlags, errorReporter, experimental, CompatibilityDateValidation::CODE_VERSION); + featureFlags, errorReporter, experimental, CompatibilityDateValidation::CODE_VERSION, + nullptr); } else { errorReporter.addError(kj::str("Worker must specify compatibilityDate.")); } @@ -4782,10 +4949,7 @@ kj::Promise> Server::makeWorkerImpl(kj::StringPtr } using ArtifactBundler = workerd::api::pyodide::ArtifactBundler; - auto isPythonWorker = def.featureFlags.getPythonWorkers(); - auto artifactBundler = isPythonWorker - ? ArtifactBundler::makePackagesOnlyBundler(pythonConfig.pyodidePackageManager) - : ArtifactBundler::makeDisabledBundler(); + auto artifactBundler = ArtifactBundler::makeDisabledBundler(); newModuleRegistry = WorkerdApi::newWorkerdModuleRegistry(*jsgobserver, def.source.variant.tryGet(), def.featureFlags, pythonConfig, @@ -4863,10 +5027,7 @@ kj::Promise> Server::makeWorkerImpl(kj::StringPtr } using ArtifactBundler = workerd::api::pyodide::ArtifactBundler; - auto isPythonWorker = def.featureFlags.getPythonWorkers(); - auto artifactBundler = isPythonWorker - ? ArtifactBundler::makePackagesOnlyBundler(pythonConfig.pyodidePackageManager) - : ArtifactBundler::makeDisabledBundler(); + auto artifactBundler = ArtifactBundler::makeDisabledBundler(); auto script = isolate->newScript(name, def.source, IsolateObserver::StartType::COLD, SpanParent(nullptr), workerFs.attach(kj::mv(def.maybeOwnedSourceCode)), false, errorReporter, @@ -5002,15 +5163,14 @@ kj::Promise> Server::makeWorkerImpl(kj::StringPtr result.subrequest = services.finish(); // Set up actor class channels - auto actorClasses = kj::heapArrayBuilder>( + auto actorClasses = kj::heapArrayBuilder>( def.actorClassChannels.size() + actorClassNames.size()); for (auto& channel: def.actorClassChannels) { actorClasses.add(kj::mv(channel).lookup(*this)); } - auto linkedActorChannels = - kj::heapArrayBuilder>(totalActorChannels); + auto linkedActorChannels = kj::heapArrayBuilder>(totalActorChannels); for (auto& channel: def.actorChannels) { WorkerService* targetService = &workerService; @@ -5020,12 +5180,11 @@ kj::Promise> Server::makeWorkerImpl(kj::StringPtr linkedActorChannels.add(kj::none); continue; }); - targetService = dynamic_cast(svc.get()); - if (targetService == nullptr) { + targetService = &KJ_UNWRAP_OR(kj::tryDowncast(*svc), { // error was reported earlier linkedActorChannels.add(kj::none); continue; - } + }); } // (If getActorNamespace() returns null, an error was reported earlier.) @@ -5056,16 +5215,17 @@ kj::Promise> Server::makeWorkerImpl(kj::StringPtr if (def.actorStorageConf.isLocalDisk()) { kj::StringPtr diskName = def.actorStorageConf.getLocalDisk(); KJ_IF_SOME(svc, this->services.find(def.actorStorageConf.getLocalDisk())) { - auto diskSvc = dynamic_cast(svc.get()); - if (diskSvc == nullptr) { + KJ_IF_SOME(diskSvc, kj::tryDowncast(*svc)) { + KJ_IF_SOME(dir, diskSvc.getWritable()) { + result.actorStorage = dir; + } else { + errorReporter.addError( + kj::str("durableObjectStorage config refers to the disk service \"", diskName, + "\", but that service is defined read-only.")); + } + } else { errorReporter.addError(kj::str("durableObjectStorage config refers to the service \"", diskName, "\", but that service is not a local disk service.")); - } else KJ_IF_SOME(dir, diskSvc->getWritable()) { - result.actorStorage = dir; - } else { - errorReporter.addError( - kj::str("durableObjectStorage config refers to the disk service \"", diskName, - "\", but that service is defined read-only.")); } } else { errorReporter.addError(kj::str("durableObjectStorage config refers to a service \"", @@ -5126,7 +5286,7 @@ kj::Promise> Server::makeWorkerImpl(kj::StringPtr kj::mv(linkCallback), KJ_BIND_METHOD(*this, abortAllActors), KJ_BIND_METHOD(*this, deleteAllActors), kj::mv(dockerPath), kj::mv(containerEgressInterceptorImage), def.isDynamic, kj::mv(abortIsolateCallback)); - result->initActorNamespaces(def.localActorConfigs, network); + result->initActorNamespaces(def.localActorConfigs, actorNamespacesByUniqueKey, network); co_return result; } @@ -5193,8 +5353,8 @@ kj::Own Server::lookupService( return {}; }(); - if (WorkerService* worker = dynamic_cast(service)) { - KJ_IF_SOME(ep, worker->getEntrypoint(entrypointName, kj::mv(props))) { + KJ_IF_SOME(worker, kj::tryDowncast(*service)) { + KJ_IF_SOME(ep, worker.getEntrypoint(entrypointName, kj::mv(props))) { return kj::mv(ep); } else KJ_IF_SOME(ep, entrypointName) { reportConfigError(kj::str(errorContext, " refers to service \"", targetName, @@ -5252,8 +5412,8 @@ kj::Own Server::lookupActorClass( return {}; }(); - if (WorkerService* worker = dynamic_cast(service)) { - KJ_IF_SOME(ep, worker->getActorClass(entrypointName, kj::mv(props))) { + KJ_IF_SOME(worker, kj::tryDowncast(*service)) { + KJ_IF_SOME(ep, worker.getActorClass(entrypointName, kj::mv(props))) { return kj::mv(ep); } else KJ_IF_SOME(ep, entrypointName) { reportConfigError(kj::str(errorContext, " refers to service \"", targetName, @@ -5307,6 +5467,17 @@ kj::Own Server::resolveActorClass( entrypoint.orDefault("default")); } +kj::Own Server::resolveActor( + kj::StringPtr namespaceKey, kj::ArrayPtr id, kj::Maybe name) { + auto& ns = *KJ_REQUIRE_NONNULL(actorNamespacesByUniqueKey.find(namespaceKey), + "couldn't deserialize actor stub pointing at unknown namespace", namespaceKey); + + auto idFactory = kj::heap(namespaceKey); + auto idObj = idFactory->idFromRaw(id, name.clone()); + + return ns.getActorChannel(kj::mv(idObj)); +} + // ======================================================================================= class Server::WorkerdBootstrapImpl final: public rpc::WorkerdBootstrap::Server { @@ -5455,8 +5626,7 @@ class Server::HttpListener final: public kj::Refcounted { kj::PeerIdentity* peerId; - KJ_IF_SOME(tlsId, - kj::dynamicDowncastIfAvailable(*stream.peerIdentity)) { + KJ_IF_SOME(tlsId, kj::tryDowncast(*stream.peerIdentity)) { peerId = &tlsId.getNetworkIdentity(); // TODO(someday): Add client certificate info to the cf blob? At present, KJ only @@ -5466,9 +5636,9 @@ class Server::HttpListener final: public kj::Refcounted { peerId = stream.peerIdentity; } - KJ_IF_SOME(remote, kj::dynamicDowncastIfAvailable(*peerId)) { + KJ_IF_SOME(remote, kj::tryDowncast(*peerId)) { cfBlobJson = kj::str("{\"clientIp\": ", escapeJsonString(remote.toString()), "}"); - } else KJ_IF_SOME(local, kj::dynamicDowncastIfAvailable(*peerId)) { + } else KJ_IF_SOME(local, kj::tryDowncast(*peerId)) { auto creds = local.getCredentials(); kj::Vector parts; @@ -5755,8 +5925,7 @@ class Server::DebugPortListener { kj::Own targetService; // Try to cast to WorkerService to support entrypoints and props - auto* workerService = dynamic_cast(service); - if (workerService != nullptr) { + KJ_IF_SOME(workerService, kj::tryDowncast(*service)) { // This is a WorkerService, use getEntrypoint which supports both entrypoints and props kj::Maybe maybeEntrypoint; if (params.hasEntrypoint()) { @@ -5764,7 +5933,7 @@ class Server::DebugPortListener { } targetService = - KJ_ASSERT_NONNULL(workerService->getEntrypoint(maybeEntrypoint, kj::mv(props)), + KJ_ASSERT_NONNULL(workerService.getEntrypoint(maybeEntrypoint, kj::mv(props)), kj::str("jsg.Error: Worker does not export an entrypoint named \"", maybeEntrypoint.orDefault("(default)"), "\"")); } else { @@ -5799,11 +5968,11 @@ class Server::DebugPortListener { auto service = serviceEntry->service(); // Try to cast to WorkerService - auto* workerService = dynamic_cast(service); - KJ_REQUIRE(workerService != nullptr, "jsg.Error: Worker does not support Durable Objects"); + auto& workerService = KJ_REQUIRE_NONNULL(kj::tryDowncast(*service), + "jsg.Error: Worker does not support Durable Objects"); // Look up the actor namespace - auto& actorNamespace = KJ_ASSERT_NONNULL(workerService->getActorNamespace(entrypointName), + auto& actorNamespace = KJ_ASSERT_NONNULL(workerService.getActorNamespace(entrypointName), kj::str("jsg.Error: Worker does not export a Durable Object class named \"", entrypointName, "\"")); @@ -5969,19 +6138,10 @@ kj::Promise Server::preloadPython( KJ_IF_SOME(release, pythonRelease) { auto version = getPythonBundleName(release); - // Fetch the Pyodide bundle. - co_await server::fetchPyodideBundle(pythonConfig, kj::mv(version), network, timer); - - // Preload Python packages. - KJ_IF_SOME(modulesSource, workerDef.source.variant.tryGet()) { - if (modulesSource.isPython) { - auto pythonRequirements = getPythonRequirements(modulesSource); - - // Store the packages in the package manager that is stored in the pythonConfig - co_await server::fetchPyodidePackages(pythonConfig, pythonConfig.pyodidePackageManager, - pythonRequirements, release, network, timer); - } - } + // Fetch the Pyodide bundle, verifying its integrity against the expected checksum. The + // bundle embeds the CPython stdlib packages directly, so there is nothing else to preload. + co_await server::fetchPyodideBundle( + pythonConfig, kj::mv(version), release.getIntegrity(), network, timer); } } } @@ -6377,10 +6537,10 @@ kj::Promise Server::test(jsg::V8System& v8System, co_await doTest(*service.value, service.key); } - if (WorkerService* worker = dynamic_cast(service.value.get())) { - for (auto& name: worker->getEntrypointNames()) { + KJ_IF_SOME(worker, kj::tryDowncast(*service.value)) { + for (auto& name: worker.getEntrypointNames()) { if (entrypointGlob.matches(name)) { - kj::Own ep = KJ_ASSERT_NONNULL(worker->getEntrypoint(name, /*props=*/{})); + kj::Own ep = KJ_ASSERT_NONNULL(worker.getEntrypoint(name, /*props=*/{})); if (ep->hasHandler("test"_kj)) { co_await doTest(*ep, kj::str(service.key, ':', name)); } diff --git a/src/workerd/server/server.h b/src/workerd/server/server.h index 4ddfd5447a0..ac6a8c418ed 100644 --- a/src/workerd/server/server.h +++ b/src/workerd/server/server.h @@ -189,6 +189,9 @@ class Server final: private kj::TaskSet::ErrorHandler, private ChannelTokenHandl kj::HashMap> services; + class ActorNamespace; + kj::HashMap actorNamespacesByUniqueKey; + class WorkerLoaderNamespace; kj::HashMap> workerLoaderNamespaces; kj::Vector> anonymousWorkerLoaderNamespaces; @@ -273,6 +276,9 @@ class Server final: private kj::TaskSet::ErrorHandler, private ChannelTokenHandl kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue props) override; kj::Own resolveActorClass( kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue props) override; + kj::Own resolveActor(kj::StringPtr namespaceKey, + kj::ArrayPtr id, + kj::Maybe name) override; kj::Array encodeChannelToken(IoChannelFactory::ChannelTokenUsage usage, kj::StringPtr serviceName, diff --git a/src/workerd/server/tests/inspector/driver.mjs b/src/workerd/server/tests/inspector/driver.mjs index 0c035b28f23..e4ed8d50c07 100644 --- a/src/workerd/server/tests/inspector/driver.mjs +++ b/src/workerd/server/tests/inspector/driver.mjs @@ -3,6 +3,7 @@ // https://opensource.org/licenses/Apache-2.0 import { env } from 'node:process'; import { beforeEach, afterEach, test } from 'node:test'; +import { scheduler } from 'node:timers/promises'; import assert from 'node:assert'; import CDP from 'chrome-remote-interface'; import { WorkerdServerHarness } from '../server-harness.mjs'; @@ -64,7 +65,7 @@ async function profileAndExpectDeriveBitsFrames(inspectorClient) { // Drive the worker with a test request. A single one is sufficient. let httpPort = await workerd.getListenPort('http'); - const response = await fetch(`http://localhost:${httpPort}`); + const response = await fetch(`http://localhost:${httpPort}/pbkdf2Derive`); await response.arrayBuffer(); // Stop and disable profiling. @@ -118,3 +119,50 @@ test('Profiler mostly sees deriveBits() frames, and can safely reconnect', async await inspectorClient.close(); } }); + +// Regression test for use-after-free when sending Unicode exception messages to inspector. +// Before the fix, this would cause memory corruption or crashes due to the scratch buffer +// being freed before the inspector finished reading from it. +test('Inspector correctly receives exceptions with Unicode characters', async () => { + const inspectorClient = await connectInspector( + await workerd.getListenInspectorPort() + ); + + // Collect exceptions reported to the inspector + const exceptions = []; + inspectorClient.on('Runtime.exceptionThrown', (params) => { + exceptions.push(params); + }); + await inspectorClient.Runtime.enable(); + + // Make the worker throw an exception with non-ascii. + const message = '💥 错误 오류 エラー Ошибка'; + const httpPort = await workerd.getListenPort('http'); + const url = new URL(`http://localhost:${httpPort}/throwException`); + url.searchParams.set('message', message); + const response = await fetch(url); + assert.strictEqual(response.status, 500); + + // Wait to receive the exception events + let iters = 0; + while (exceptions.length < 2) { + await scheduler.wait(50); + iters += 1; + if (iters > 50) { + assert.fail('timed out waiting for exceptions'); + } + } + + // We actually receive two records for the exception, one "uncaught in promise" and one + // "uncaught in response". + assert.strictEqual(exceptions.length, 2); + + const lastException = exceptions[exceptions.length - 1]; + assert.strictEqual( + lastException.exceptionDetails.text, + `Uncaught Error: ${message}` + ); + + await inspectorClient.Runtime.disable(); + await inspectorClient.close(); +}); diff --git a/src/workerd/server/tests/inspector/index.mjs b/src/workerd/server/tests/inspector/index.mjs index 4e500d6c408..dac8c4f751a 100644 --- a/src/workerd/server/tests/inspector/index.mjs +++ b/src/workerd/server/tests/inspector/index.mjs @@ -26,6 +26,13 @@ async function pbkdf2Derive(password) { export default { async fetch(request, env, ctx) { - return new Response(await pbkdf2Derive('hello!')); + if (request.url.includes('/pbkdf2Derive')) { + return new Response(await pbkdf2Derive('hello!')); + } + if (request.url.includes('/throwException')) { + const url = new URL(request.url); + throw new Error(url.searchParams.get('message')); + } + return new Response('Not found', { status: 404 }); }, }; diff --git a/src/workerd/server/tests/python/BUILD.bazel b/src/workerd/server/tests/python/BUILD.bazel index 9f0798c973a..21bc3755a7b 100644 --- a/src/workerd/server/tests/python/BUILD.bazel +++ b/src/workerd/server/tests/python/BUILD.bazel @@ -1,5 +1,4 @@ load("@rules_shell//shell:sh_test.bzl", "sh_test") -load("//src/workerd/server/tests/python:import_tests.bzl", "gen_rust_import_tests") load("//src/workerd/server/tests/python:py_wd_test.bzl", "py_wd_test", "python_test_setup") load("//src/workerd/server/tests/python/vendor_pkg_tests:vendor_test.bzl", "vendored_py_wd_test") @@ -37,16 +36,10 @@ py_wd_test( compat_date = "2026-01-01", ) -gen_rust_import_tests() - py_wd_test("undefined-handler") py_wd_test("vendor_dir") -py_wd_test("dont-snapshot-pyodide") - -py_wd_test("filter-non-py-files") - py_wd_test("durable-object") py_wd_test( @@ -71,25 +64,13 @@ py_wd_test("vendor_dir_compat_flag") py_wd_test("default-class-with-legacy-global-handlers") -py_wd_test( - "fastapi", - make_snapshot = False, - use_snapshot = "fastapi", -) - -py_wd_test( - "numpy", - make_snapshot = False, - use_snapshot = "numpy", -) - py_wd_test("python-compat-flag") py_wd_test("pth-file") -# Shell-driven test for the python-abort-isolate-on-fatal-error autogate. The Python worker -# triggers a fatal error which (with the autogate enabled) calls abortIsolate(), terminating the -# workerd process. The shell script verifies the expected fatal output and non-zero exit. +# Shell-driven test for aborting the isolate on a Python fatal error. The Python worker +# triggers a fatal error which calls abortIsolate(), terminating the workerd process. The shell +# script verifies the expected fatal output and non-zero exit. sh_test( name = "python-abort-isolate-on-fatal-test", size = "enormous", diff --git a/src/workerd/server/tests/python/dont-snapshot-pyodide/dont-snapshot-pyodide.wd-test b/src/workerd/server/tests/python/dont-snapshot-pyodide/dont-snapshot-pyodide.wd-test deleted file mode 100644 index e1d4e1161de..00000000000 --- a/src/workerd/server/tests/python/dont-snapshot-pyodide/dont-snapshot-pyodide.wd-test +++ /dev/null @@ -1,15 +0,0 @@ -using Workerd = import "/workerd/workerd.capnp"; - -const unitTests :Workerd.Config = ( - services = [ - ( name = "dont-snapshot-pyodide", - worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "worker.py"), - (name = "numpy", pythonRequirement = "") - ], - compatibilityFlags = [%PYTHON_FEATURE_FLAGS, "disable_python_no_global_handlers"], - ) - ), - ], -); diff --git a/src/workerd/server/tests/python/dont-snapshot-pyodide/worker.py b/src/workerd/server/tests/python/dont-snapshot-pyodide/worker.py deleted file mode 100644 index 4792572fd29..00000000000 --- a/src/workerd/server/tests/python/dont-snapshot-pyodide/worker.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -To trigger the bug we need to do two things: -1. import `pyodide` at top level -2. ensure that there is some package requirement in wd-test -3. make test() async - -Importing numpy isn't really necessary but we need to include it as a requirement in the wd-test -file so that we consider making a package snapshot. In the buggy code, importing pyodide at top -level then makes the package snapshot import pyodide while making the snapshot. Importing pyodide -before calling finalizeBootstrap messes up the runtime state and causes various weird and malign -symptoms. -""" - -import numpy - -import pyodide - - -async def test(): - # Mention imports so that ruff won't remove them - pyodide # noqa: B018 - numpy # noqa: B018 diff --git a/src/workerd/server/tests/python/fastapi/fastapi.wd-test b/src/workerd/server/tests/python/fastapi/fastapi.wd-test deleted file mode 100644 index 4d557ee21ec..00000000000 --- a/src/workerd/server/tests/python/fastapi/fastapi.wd-test +++ /dev/null @@ -1,15 +0,0 @@ -using Workerd = import "/workerd/workerd.capnp"; - -const unitTests :Workerd.Config = ( - services = [ - ( name = "fastapi", - worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "worker.py"), - (name = "fastapi", pythonRequirement = "fastapi") - ], - compatibilityFlags = [%PYTHON_FEATURE_FLAGS, "disable_python_dedicated_snapshot"], - ) - ), - ], -); diff --git a/src/workerd/server/tests/python/fastapi/worker.py b/src/workerd/server/tests/python/fastapi/worker.py deleted file mode 100644 index 5254b7ac9cf..00000000000 --- a/src/workerd/server/tests/python/fastapi/worker.py +++ /dev/null @@ -1,7 +0,0 @@ -import fastapi -from workers import WorkerEntrypoint - - -class Default(WorkerEntrypoint): - def test(self): - assert fastapi.__version__ in {"0.110.0", "0.116.1"} diff --git a/src/workerd/server/tests/python/filter-non-py-files/filter-files.wd-test b/src/workerd/server/tests/python/filter-non-py-files/filter-files.wd-test deleted file mode 100644 index 8a1d1c4ba10..00000000000 --- a/src/workerd/server/tests/python/filter-non-py-files/filter-files.wd-test +++ /dev/null @@ -1,21 +0,0 @@ -using Workerd = import "/workerd/workerd.capnp"; - -# This is a really slow way to test that PyodideMetadataReader::getWorkerFiles works. -# TODO: replace with a unit test? -const unitTests :Workerd.Config = ( - services = [ - ( name = "dont-snapshot-pyodide", - worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "worker.py"), - # a file with no `.py` extension to get filtered out - (name = "fake_shared_library.so", data = "This isn't really a shared library..."), - # We need a package dependency to trigger the package snapshot logic which we're trying to - # test. - (name = "numpy", pythonRequirement = "") - ], - compatibilityFlags = [%PYTHON_FEATURE_FLAGS, "disable_python_no_global_handlers"], - ) - ), - ], -); diff --git a/src/workerd/server/tests/python/filter-non-py-files/worker.py b/src/workerd/server/tests/python/filter-non-py-files/worker.py deleted file mode 100644 index f174823854e..00000000000 --- a/src/workerd/server/tests/python/filter-non-py-files/worker.py +++ /dev/null @@ -1,2 +0,0 @@ -def test(): - pass diff --git a/src/workerd/server/tests/python/import_tests.bzl b/src/workerd/server/tests/python/import_tests.bzl deleted file mode 100644 index 29e2d72fa6f..00000000000 --- a/src/workerd/server/tests/python/import_tests.bzl +++ /dev/null @@ -1,136 +0,0 @@ -load("@bazel_skylib//rules:write_file.bzl", "write_file") -load("//:build/python_metadata.bzl", "BUNDLE_VERSION_INFO", "PYTHON_IMPORTS_TO_TEST") -load("//src/workerd/server/tests/python:py_wd_test.bzl", "py_wd_test") - -def _generate_import_py_file(imports): - res = "" - for imp in imports: - res += "import " + imp + "\n" - - res += "from workers import WorkerEntrypoint\n" - res += "class Default(WorkerEntrypoint):\n" - res += " def test(self):\n" - res += " pass" - return res - -WD_FILE_TEMPLATE = """ -using Workerd = import "/workerd/workerd.capnp"; - -const unitTests :Workerd.Config = ( - services = [ - ( name = "python-import-{name}", - worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "./worker.py"), - {requirements} - ], - compatibilityFlags = [%PYTHON_FEATURE_FLAGS], - ) - ), - ] -);""" - -def _generate_wd_test_file(name, requirements): - l = [] - for req in requirements: - l.append('(name = "{}", pythonRequirement = ""),\n'.format(req)) - requirements = "".join(l) - return WD_FILE_TEMPLATE.format(name = name, requirements = requirements) - -def _test(name, directory, wd_test, py_file, python_version, **kwds): - py_wd_test( - name = name, - directory = directory, - src = wd_test, - python_flags = [python_version], - use_snapshot = None, - make_snapshot = False, - skip_default_data = True, - data = [py_file], - **kwds - ) - -# to_test is a dictionary from library name to list of imports -def _gen_import_tests(to_test, python_version, pkg_skip_versions): - for lib in to_test.keys(): - skip_python_flags = [version for version, packages in pkg_skip_versions.items() if lib in packages] - if BUNDLE_VERSION_INFO["development"]["real_pyodide_version"] in skip_python_flags: - skip_python_flags.append("development") - if lib.endswith("-tests"): - # TODO: The pyodide-build-scripts should be updated to not emit these packages. Once - # that's done we can remove this check. - continue - - prefix = "import/" + lib - worker_py_fname = python_version + "/" + prefix + "/worker.py" - wd_test_fname = python_version + "/" + prefix + "/import.wd-test" - write_file( - name = worker_py_fname + "@rule", - out = worker_py_fname, - content = [_generate_import_py_file(to_test[lib])], - ) - write_file( - name = wd_test_fname + "@rule", - out = wd_test_fname, - content = [_generate_wd_test_file(lib, [lib])], - ) - - _test( - name = prefix, - directory = lib, - wd_test = wd_test_fname, - py_file = worker_py_fname, - python_version = python_version, - skip_python_flags = skip_python_flags, - ) - -def gen_import_tests(*, pkg_skip_versions = {}): - for python_version, info in BUNDLE_VERSION_INFO.items(): - to_test = PYTHON_IMPORTS_TO_TEST[info["packages"]] - _gen_import_tests(to_test, python_version, pkg_skip_versions = pkg_skip_versions) - -def _rotations(lst): - result = [] - cur = lst - for i in range(len(lst)): - result.append(cur) - cur = cur[1:] + [cur[0]] - return result - -def _pkg_permutations(lst): - return _rotations(lst) + _rotations(reversed(lst)) - -def _gen_rust_import_tests(python_version): - pyodide_version = BUNDLE_VERSION_INFO[python_version]["real_pyodide_version"] - if pyodide_version == "0.26.0a2": - pkgs = _rotations(["tiktoken", "pydantic"]) - else: - pkgs = _pkg_permutations(["cryptography", "jiter", "tiktoken", "pydantic"]) - - for res in pkgs: - name = "-".join(res) - prefix = "import2/" + name - worker_py_fname = python_version + "/" + prefix + "/worker.py" - wd_test_fname = python_version + "/" + prefix + "/import.wd-test" - write_file( - name = worker_py_fname + "@rule", - out = worker_py_fname, - content = [_generate_import_py_file(res)], - ) - write_file( - name = wd_test_fname + "@rule", - out = wd_test_fname, - content = [_generate_wd_test_file(name, res)], - ) - - _test( - name = prefix, - directory = name, - wd_test = wd_test_fname, - py_file = worker_py_fname, - python_version = python_version, - ) - -def gen_rust_import_tests(): - for python_version in BUNDLE_VERSION_INFO.keys(): - _gen_rust_import_tests(python_version) diff --git a/src/workerd/server/tests/python/numpy/numpy.wd-test b/src/workerd/server/tests/python/numpy/numpy.wd-test deleted file mode 100644 index 8aae55f16d0..00000000000 --- a/src/workerd/server/tests/python/numpy/numpy.wd-test +++ /dev/null @@ -1,15 +0,0 @@ -using Workerd = import "/workerd/workerd.capnp"; - -const unitTests :Workerd.Config = ( - services = [ - ( name = "numpy", - worker = ( - modules = [ - (name = "worker.py", pythonModule = embed "worker.py"), - (name = "numpy", pythonRequirement = "numpy") - ], - compatibilityFlags = [%PYTHON_FEATURE_FLAGS, "disable_python_dedicated_snapshot"], - ) - ), - ], -); diff --git a/src/workerd/server/tests/python/py_wd_test.bzl b/src/workerd/server/tests/python/py_wd_test.bzl index 651f8392cca..7c60ab4d607 100644 --- a/src/workerd/server/tests/python/py_wd_test.bzl +++ b/src/workerd/server/tests/python/py_wd_test.bzl @@ -30,15 +30,12 @@ def _py_wd_test_helper( templated_src = name_flag.replace("/", "-") + "@template" templated_src = "/".join(src.split("/")[:-1] + [templated_src]) - pkg_tag = BUNDLE_VERSION_INFO[python_flag]["packages"] - data = data + ["@all_pyodide_wheels_%s//:whls" % pkg_tag] - args = args + ["--pyodide-package-disk-cache-dir"] + pyodide_version = BUNDLE_VERSION_INFO[python_flag]["real_pyodide_version"] - # +pyodide+ is a bzlmod canonical repository name - args.append("../+pyodide+all_pyodide_wheels_%s" % pkg_tag) + # The CPython stdlib packages are now extracted and embedded directly in the Pyodide bundle, so + # there are no wheels to download or cache on disk at runtime. load_snapshot = None - pyodide_version = BUNDLE_VERSION_INFO[python_flag]["real_pyodide_version"] if use_snapshot == "stacked": if pyodide_version == "0.26.0a2": use_snapshot = None @@ -103,16 +100,12 @@ def _snapshot_file(snapshot): def _snapshot_files( name, baseline_snapshot = None, - numpy_snapshot = None, - fastapi_snapshot = None, dedicated_fastapi_snapshot = None, **_kwds): if name == "development": return [] result = [] result += _snapshot_file(baseline_snapshot) - result += _snapshot_file(numpy_snapshot) - result += _snapshot_file(fastapi_snapshot) result += _snapshot_file(dedicated_fastapi_snapshot) return result diff --git a/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.sh b/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.sh index b6c0412945e..d422cf0871b 100755 --- a/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.sh +++ b/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.sh @@ -1,6 +1,5 @@ #!/bin/bash -# Test that when the python-abort-isolate-on-fatal-error autogate is enabled, -# triggering a Python fatal error causes the workerd process to abort. +# Test that triggering a Python fatal error causes the workerd process to abort. set -uo pipefail diff --git a/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.wd-test b/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.wd-test index 03e7ce3637e..0a74a9697da 100644 --- a/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.wd-test +++ b/src/workerd/server/tests/python/python-abort-isolate-on-fatal/python-abort-isolate-on-fatal.wd-test @@ -1,7 +1,6 @@ using Workerd = import "/workerd/workerd.capnp"; const config :Workerd.Config = ( - autogates = ["workerd-autogate-python-abort-isolate-on-fatal-error"], services = [ ( name = "python-abort-isolate-on-fatal", worker = ( diff --git a/src/workerd/server/tests/python/python-abort-isolate-on-fatal/worker.py b/src/workerd/server/tests/python/python-abort-isolate-on-fatal/worker.py index 1cd717d44b8..325d5b4af11 100644 --- a/src/workerd/server/tests/python/python-abort-isolate-on-fatal/worker.py +++ b/src/workerd/server/tests/python/python-abort-isolate-on-fatal/worker.py @@ -4,9 +4,8 @@ async def test(ctrl, env, ctx): - # _pyodide_core.trigger_fatal_error() invokes Pyodide's on_fatal handler. With the - # python-abort-isolate-on-fatal-error autogate enabled, the on_fatal handler calls - # abortIsolate() which terminates the workerd process. + # _pyodide_core.trigger_fatal_error() invokes Pyodide's on_fatal handler. The on_fatal + # handler calls abortIsolate() which terminates the workerd process. from _pyodide_core import trigger_fatal_error trigger_fatal_error() diff --git a/src/workerd/server/tests/python/vendor_pkg_tests/BUILD b/src/workerd/server/tests/python/vendor_pkg_tests/BUILD index 2a2f82aaa71..e6f94d84823 100644 --- a/src/workerd/server/tests/python/vendor_pkg_tests/BUILD +++ b/src/workerd/server/tests/python/vendor_pkg_tests/BUILD @@ -3,7 +3,12 @@ load(":vendor_test.bzl", "vendored_py_wd_test") python_test_setup() -vendored_py_wd_test("fastapi") +vendored_py_wd_test( + "fastapi", + make_snapshot = True, + python_flags = ["0.28.2"], + use_snapshot = "baseline", +) vendored_py_wd_test("beautifulsoup4") @@ -20,6 +25,13 @@ vendored_py_wd_test( vendored_package_name = "python-workers-runtime-sdk", ) +vendored_py_wd_test( + "numpy", + make_snapshot = True, + python_flags = ["0.28.2"], + use_snapshot = "baseline", +) + # vendored_py_wd_test("scipy") vendored_py_wd_test( diff --git a/src/workerd/server/tests/python/numpy/worker.py b/src/workerd/server/tests/python/vendor_pkg_tests/numpy.py similarity index 88% rename from src/workerd/server/tests/python/numpy/worker.py rename to src/workerd/server/tests/python/vendor_pkg_tests/numpy.py index fef9cfed8bd..59420d25942 100644 --- a/src/workerd/server/tests/python/numpy/worker.py +++ b/src/workerd/server/tests/python/vendor_pkg_tests/numpy.py @@ -3,6 +3,6 @@ class Default(WorkerEntrypoint): - def test(self): + async def test(self): res = np.arange(12).reshape((3, -1))[::-2, ::-2] assert str(res) == "[[11 9]\n [ 3 1]]" diff --git a/src/workerd/server/tests/python/vendor_pkg_tests/numpy_vendor.wd-test b/src/workerd/server/tests/python/vendor_pkg_tests/numpy_vendor.wd-test new file mode 100644 index 00000000000..d902f37c125 --- /dev/null +++ b/src/workerd/server/tests/python/vendor_pkg_tests/numpy_vendor.wd-test @@ -0,0 +1,15 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "numpy-vendor-test", + worker = ( + modules = [ + (name = "main.py", pythonModule = embed "numpy.py"), + %PYTHON_VENDORED_MODULES% + ], + compatibilityFlags = [%PYTHON_FEATURE_FLAGS], + ) + ), + ], +); diff --git a/src/workerd/server/v8-platform-impl.h b/src/workerd/server/v8-platform-impl.h index 6665c02f053..d4259b107be 100644 --- a/src/workerd/server/v8-platform-impl.h +++ b/src/workerd/server/v8-platform-impl.h @@ -75,6 +75,15 @@ class WorkerdPlatform final: public v8::Platform { return inner.GetTracingController(); } + v8::ThreadIsolatedAllocator* GetThreadIsolatedAllocator() noexcept override { + // Forward to the inner platform so that V8's ThreadIsolation can use PKU + // (Memory Protection Keys) to enforce W^X on JIT code pages and + // write-protect the code pointer tables. Without this, the + // DefaultPlatform's allocator (which calls pkey_alloc) was silently + // dropped and ThreadIsolation was disabled. + return inner.GetThreadIsolatedAllocator(); + } + private: v8::Platform& inner; }; diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index c48b01819a9..6ba06c22718 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -213,27 +213,6 @@ class EmptyReadOnlyActorStorageImpl final: public rpc::ActorStorage::Stage::Serv } // namespace -/** - * This function matches the implementation of `getPythonRequirements` in the internal repo. But it - * works on the workerd ModulesSource definition rather than the WorkerBundle. - */ -kj::Array getPythonRequirements(const Worker::Script::ModulesSource& source) { - kj::Vector requirements; - - for (auto& def: source.modules) { - KJ_SWITCH_ONEOF(def.content) { - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { - requirements.add(api::pyodide::canonicalizePythonPackageName(def.name)); - } - KJ_CASE_ONEOF_DEFAULT { - break; - } - } - } - - return requirements.releaseAsArray(); -} - struct WorkerdApi::Impl final { kj::Own features; capnp::List::Reader extensions; @@ -509,8 +488,9 @@ Worker::Script::Module WorkerdApi::readModuleConf(config::Worker::Module::Reader } case config::Worker::Module::PYTHON_MODULE: return Worker::Script::PythonModule{conf.getPythonModule()}; - case config::Worker::Module::PYTHON_REQUIREMENT: - return Worker::Script::PythonRequirement{}; + case config::Worker::Module::OBSOLETE_PYTHON_REQUIREMENT: + KJ_FAIL_REQUIRE( + "NOSENTRY Worker bundle specified Python requirement which is no longer supported"); case config::Worker::Module::OBSOLETE: { // A non-supported or obsolete module type was configured KJ_FAIL_REQUIRE("Worker bundle specified an unsupported module type"); @@ -628,7 +608,8 @@ static v8::Local createBindingValue(JsgWorkerdIsolate::Lock& lock, api::SubtleCrypto::ImportKeyData keyData; KJ_SWITCH_ONEOF(key.keyData) { KJ_CASE_ONEOF(data, kj::Array) { - keyData = kj::heapArray(data.asPtr()); + auto u8 = jsg::JsBufferSource(jsg::JsUint8Array::create(lock, data)); + keyData = u8.addRef(lock); } KJ_CASE_ONEOF(json, Global::Json) { v8::Local str = lock.wrap(context, kj::mv(json.text)); @@ -873,14 +854,11 @@ const WorkerdApi& WorkerdApi::from(const Worker::Api& api) { // TODO(soon): These are required for python workers but we don't support those yet // with the new module registry. Uncomment these when we do. // namespace { -// static constexpr auto PYTHON_TAR_READER = "export default { }"_kj; - // static const auto metadataSpecifier = "pyodide-internal:runtime-generated/metadata"_url; // static const auto artifactsSpecifier = "pyodide-internal:artifacts"_url; // static const auto internalJaegerSpecifier = "pyodide-internal:internalJaeger"_url; // static const auto diskCacheSpecifier = "pyodide-internal:disk_cache"_url; // static const auto limiterSpecifier = "pyodide-internal:limiter"_url; -// static const auto tarReaderSpecifier = "pyodide-internal:packages_tar_reader"_url; // } // namespace kj::Arc WorkerdApi::newWorkerdModuleRegistry( @@ -937,8 +915,6 @@ kj::Arc WorkerdApi::newWorkerdModuleRegistry( // jsg::modules::ModuleBundle::getBuiltInBundleFromCapnp(pyodideBundleBuilder, PYODIDE_BUNDLE); // jsg::modules::ModuleBundle::getBuiltInBundleFromCapnp(pyodideBundleBuilder, bundle); - // pyodideBundleBuilder.addEsm(tarReaderSpecifier, PYTHON_TAR_READER); - // api::pyodide::CreateBaselineSnapshot createBaselineSnapshot( // pythonConfig.createBaselineSnapshot); // api::pyodide::SnapshotToDisk snapshotToDisk( @@ -1125,7 +1101,7 @@ kj::Arc WorkerdApi::newWorkerdModuleRegistry( KJ_LOG(WARNING, "Fallback service returned a Python module"); return kj::none; } - KJ_CASE_ONEOF(content, Worker::Script::PythonRequirement) { + KJ_CASE_ONEOF(content, Worker::Script::ObsoletePythonRequirement) { // Python requirement modules are not supported.in fallback KJ_LOG(WARNING, "Fallback service returned a Python requirement"); return kj::none; diff --git a/src/workerd/server/workerd-api.h b/src/workerd/server/workerd-api.h index 40e3abb3a6e..edc04cc5755 100644 --- a/src/workerd/server/workerd-api.h +++ b/src/workerd/server/workerd-api.h @@ -347,8 +347,6 @@ class WorkerdApi final: public Worker::Api { kj::Own impl; }; -kj::Array getPythonRequirements(const Worker::Script::ModulesSource& source); - // An ActorStorage implementation which will always respond to reads as if the state is empty, // and will fail any writes. Defined here to be used by test-fixture and server. kj::Own newEmptyReadOnlyActorStorage(); diff --git a/src/workerd/server/workerd.capnp b/src/workerd/server/workerd.capnp index d433362ff99..de1c488c1d3 100644 --- a/src/workerd/server/workerd.capnp +++ b/src/workerd/server/workerd.capnp @@ -303,13 +303,8 @@ struct Worker { # A Python module. All bundles containing this value type are converted into a JS/WASM Worker # Bundle prior to execution. - pythonRequirement @9 :Text; - # A Python package that is required by this bundle. The package must be supported by - # Pyodide (https://pyodide.org/en/stable/usage/packages-in-pyodide.html). All packages listed - # will be installed prior to the execution of the worker. - # - # The value of this field is ignored and should always be an empty string. Only the module - # name matters. The field should have been declared `Void`, but it's difficult to change now. + obsoletePythonRequirement @9 :Text; + # This position used to be the pythonRequirement type that has now been deprecated. } namedExports @10 :List(Text); diff --git a/src/workerd/tests/BUILD.bazel b/src/workerd/tests/BUILD.bazel index 593d98b31e9..298440ae427 100644 --- a/src/workerd/tests/BUILD.bazel +++ b/src/workerd/tests/BUILD.bazel @@ -140,8 +140,7 @@ wd_cc_benchmark( ], ) -# Benchmark for PumpToReader (ReadableStream::pumpTo path in standard.c++). -# Run before and after DrainingReader adoption to measure improvement. +# Benchmark for ReadableStream::pumpTo path in standard.c++. # bazel run --config=opt //src/workerd/tests:bench-pumpto wd_cc_benchmark( name = "bench-pumpto", diff --git a/src/workerd/tests/bench-pumpto.c++ b/src/workerd/tests/bench-pumpto.c++ index b15669be930..5753edff023 100644 --- a/src/workerd/tests/bench-pumpto.c++ +++ b/src/workerd/tests/bench-pumpto.c++ @@ -2,31 +2,28 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -// Benchmark for PumpToReader in standard.c++. +// Benchmark for ReadableStream::pumpTo() in standard.c++. // // Measures the performance of ReadableStream::pumpTo() which routes through -// ReadableStreamJsController::pumpTo() → PumpToReader::pumpLoop(). +// ReadableStreamJsController::pumpTo() and DrainingReader. // -// This benchmark establishes a baseline before the DrainingReader adoption, -// then the same benchmarks are re-run after the change to quantify improvement. -// This test was originally written to measure improvement from DrainingReader -// adoption (deployed by an autogate), but remains broadly useful as a benchmark -// even after we remove the autogate. +// This benchmark was originally written to measure improvement from DrainingReader +// adoption, but remains broadly useful for tracking pumpTo throughput and batching. // // Usage: -// # Capture baseline (before changes): +// # Capture baseline: // bazel run --config=opt //src/workerd/tests:bench-pumpto \ // -- --benchmark_format=json --benchmark_out=baseline.json // -// # Capture comparison (after changes): +// # Capture comparison: // bazel run --config=opt //src/workerd/tests:bench-pumpto \ // -- --benchmark_format=json --benchmark_out=after.json // // Key metrics: // - bytes_per_second: Primary throughput metric. // - WriteOps: Average sink write calls per iteration. Directly measures batching. -// Before DrainingReader adoption: WriteOps ≈ numChunks (one write per chunk). -// After: WriteOps ≪ numChunks (one vectored write per drain cycle). +// With synchronous streams, WriteOps should be much lower than numChunks +// because pumpTo writes one vectored batch per drain cycle. #include #include @@ -208,7 +205,7 @@ jsg::Ref createConfiguredStream( // Core benchmark function // ============================================================================= -// Exercises: ReadableStream::pumpTo() → ReadableStreamJsController::pumpTo() → PumpToReader +// Exercises: ReadableStream::pumpTo() → ReadableStreamJsController::pumpTo(). static void benchPumpTo( benchmark::State& state, size_t chunkSize, size_t numChunks, const StreamConfig& config) { capnp::MallocMessageBuilder message; @@ -226,7 +223,7 @@ static void benchPumpTo( auto stream = createConfiguredStream(env.js, chunkSize, numChunks, config); // Wrap DiscardingSink as a WritableStreamSink via newSystemStream. - // This is the production path: PumpToReader receives a WritableStreamSink. + // This is the production path: pumpTo receives a WritableStreamSink. kj::Own fakeOwn(&sink, kj::NullDisposer::instance); auto writableSink = newSystemStream(kj::mv(fakeOwn), StreamEncoding::IDENTITY, env.context); @@ -302,7 +299,7 @@ static void PumpTo_64KB_Byte(benchmark::State& state) { // ============================================================================= // Each chunk requires a KJ event loop yield, simulating real network I/O. // DrainingReader cannot batch these (at most 1 chunk per drain cycle). -// These verify no regression from the PumpToReader change. +// These verify no regression when the stream cannot be batched. // Smaller total payload because each chunk incurs real event loop overhead. static void PumpTo_256B_IoLatency(benchmark::State& state) { diff --git a/src/workerd/tests/bench-stream-piping.c++ b/src/workerd/tests/bench-stream-piping.c++ index 59207c82e58..1f3173d6913 100644 --- a/src/workerd/tests/bench-stream-piping.c++ +++ b/src/workerd/tests/bench-stream-piping.c++ @@ -2,10 +2,9 @@ // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 -// Benchmark to compare stream piping implementations: -// 1. Existing approach (ReadableStream::pumpTo via PumpToReader) - uses JS promise-based loop -// 2. New approach (ReadableSourceKjAdapter::pumpTo) - uses DrainingReader to pull all -// synchronously available data at once, then writes with vectored I/O +// Benchmark to compare stream piping entry points: +// 1. ReadableStream::pumpTo() - standard stream controller path. +// 2. ReadableSourceKjAdapter::pumpTo() - kj adapter path. // // Run with: bazel run --config=opt //src/workerd/tests:bench-stream-piping @@ -393,7 +392,7 @@ jsg::Ref createConfiguredStream( } // ============================================================================= -// Benchmark: New approach using ReadableSourceKjAdapter::pumpTo +// Benchmark: ReadableSourceKjAdapter::pumpTo // ============================================================================= static void benchNewApproachPumpTo( @@ -426,7 +425,7 @@ static void benchNewApproachPumpTo( }); // Verify all expected bytes were written - KJ_ASSERT(sink.bytesWritten == expectedBytes, "New approach: expected", expectedBytes, + KJ_ASSERT(sink.bytesWritten == expectedBytes, "Adapter path: expected", expectedBytes, "bytes but got", sink.bytesWritten); } @@ -437,7 +436,7 @@ static void benchNewApproachPumpTo( } // ============================================================================= -// Benchmark: Existing approach using ReadableStream::pumpTo (PumpToReader) +// Benchmark: ReadableStream::pumpTo // ============================================================================= static void benchExistingApproachPumpTo( @@ -469,7 +468,7 @@ static void benchExistingApproachPumpTo( }); // Verify all expected bytes were written - KJ_ASSERT(sink.bytesWritten == expectedBytes, "Existing approach: expected", expectedBytes, + KJ_ASSERT(sink.bytesWritten == expectedBytes, "ReadableStream path: expected", expectedBytes, "bytes but got", sink.bytesWritten); } diff --git a/src/workerd/tests/libreprl/libreprl.c b/src/workerd/tests/libreprl/libreprl.c index 4ad127de83b..bdee6206ae0 100644 --- a/src/workerd/tests/libreprl/libreprl.c +++ b/src/workerd/tests/libreprl/libreprl.c @@ -182,7 +182,9 @@ static int reprl_error(struct reprl_context* ctx, const char *format, ...) va_list args; va_start(args, format); free(ctx->last_error); - vasprintf(&ctx->last_error, format, args); + if (vasprintf(&ctx->last_error, format, args) < 0) { + ctx->last_error = nullptr; + } return -1; } @@ -239,10 +241,12 @@ static void reprl_terminate_child(struct reprl_context* ctx) static int reprl_spawn_child(struct reprl_context* ctx) { // This is also a good time to ensure the data channel backing files don't grow too large. - ftruncate(ctx->data_in->fd, REPRL_MAX_DATA_SIZE); - ftruncate(ctx->data_out->fd, REPRL_MAX_DATA_SIZE); - if (ctx->child_stdout) ftruncate(ctx->child_stdout->fd, REPRL_MAX_DATA_SIZE); - if (ctx->child_stderr) ftruncate(ctx->child_stderr->fd, REPRL_MAX_DATA_SIZE); + if (ftruncate(ctx->data_in->fd, REPRL_MAX_DATA_SIZE) != 0 || + ftruncate(ctx->data_out->fd, REPRL_MAX_DATA_SIZE) != 0 || + (ctx->child_stdout && ftruncate(ctx->child_stdout->fd, REPRL_MAX_DATA_SIZE) != 0) || + (ctx->child_stderr && ftruncate(ctx->child_stderr->fd, REPRL_MAX_DATA_SIZE) != 0)) { + return reprl_error(ctx, "Failed to truncate data channel file: %s", strerror(errno)); + } int crpipe[2] = { 0, 0 }; // control pipe child -> reprl int cwpipe[2] = { 0, 0 }; // control pipe reprl -> child diff --git a/src/workerd/tests/test-fixture.c++ b/src/workerd/tests/test-fixture.c++ index 1cea7bae98e..675162deb4c 100644 --- a/src/workerd/tests/test-fixture.c++ +++ b/src/workerd/tests/test-fixture.c++ @@ -285,7 +285,8 @@ struct MockResponse final: public kj::HttpService::Response { class MockActorLoopback: public Worker::Actor::Loopback, public kj::Refcounted { public: kj::Own getWorker(IoChannelFactory::SubrequestMetadata metadata) override { - return kj::Own(); + return WorkerInterface::fromException( + KJ_EXCEPTION(FAILED, "MockActorLoopback::getWorker() not available in test fixture")); }; kj::Own addRef() override { @@ -367,26 +368,51 @@ TestFixture::TestFixture(SetupParams&& params) errorHandler(kj::heap()), waitUntilTasks(*errorHandler), headerTable(headerTableBuilder.build()), - ioChannelFactory(kj::mv(params.ioChannelFactory)) { + ioChannelFactory(kj::mv(params.ioChannelFactory)), + requestObserverFactory(kj::mv(params.requestObserverFactory)) { KJ_IF_SOME(id, params.actorId) { - auto makeActorCache = [](const ActorCache::SharedLru& sharedLru, OutputGate& outputGate, - ActorCache::Hooks& hooks, SqliteObserver& sqliteObserver) { - return kj::heap( - server::newEmptyReadOnlyActorStorage(), sharedLru, outputGate, hooks); - }; - auto makeStorage = [](jsg::Lock& js, const Worker::Api& api, - ActorCacheInterface& actorCache) -> jsg::Ref { - return js.alloc( - js, IoContext::current().addObject(actorCache), /*enableSql=*/false); - }; - actor = kj::refcounted(*worker, /*tracker=*/kj::none, kj::mv(id), - /*hasTransient=*/false, makeActorCache, - /*classname=*/kj::none, /*props=*/Frankenvalue(), makeStorage, - kj::refcounted(), *timerChannel, kj::refcounted(), - kj::none, kj::none); + KJ_IF_SOME(provided, params.actorLoopback) { + savedActorLoopback = kj::mv(provided); + } else { + savedActorLoopback = kj::refcounted(); + } + actor = makeActor(kj::mv(id)); } } +namespace { + +// Factory functions used by TestFixture's actor construction; passed by name (decay to +// function pointers) into kj::Function-typed parameters. +kj::Maybe> actorCacheFactory(const ActorCache::SharedLru& sharedLru, + OutputGate& outputGate, + ActorCache::Hooks& hooks, + SqliteObserver& sqliteObserver) { + return kj::heap(server::newEmptyReadOnlyActorStorage(), sharedLru, outputGate, hooks); +} + +jsg::Ref storageFactory( + jsg::Lock& js, const Worker::Api& api, ActorCacheInterface& actorCache) { + return js.alloc( + js, IoContext::current().addObject(actorCache), /*enableSql=*/false); +} + +} // namespace + +kj::Own TestFixture::makeActor(Worker::Actor::Id id) { + auto& loopback = KJ_ASSERT_NONNULL(savedActorLoopback); + return kj::refcounted(*worker, /*tracker=*/kj::none, kj::mv(id), + /*hasTransient=*/false, actorCacheFactory, /*classname=*/kj::none, + /*props=*/Frankenvalue(), storageFactory, loopback->addRef(), *timerChannel, + kj::refcounted(), kj::none, kj::none); +} + +void TestFixture::resetActor() { + auto id = KJ_ASSERT_NONNULL(actor)->cloneId(); + actor = kj::none; // Drop the old Actor (and its OutputGate / InputGate / ActorCache). + actor = makeActor(kj::mv(id)); +} + void TestFixture::runInIoContext(kj::Function(const Environment&)>&& callback, const kj::ArrayPtr errorsToIgnore) { auto ignoreDescription = [&errorsToIgnore](kj::StringPtr description) { @@ -418,17 +444,31 @@ void TestFixture::runInIoContext(kj::Function(const Environmen } } -kj::Own TestFixture::createIncomingRequest() { - auto context = kj::refcounted( +kj::Own TestFixture::newIoContext() { + return kj::refcounted( threadContext, kj::atomicAddRef(*worker), actor, kj::heap()); +} + +kj::Own TestFixture::newIncomingRequest() { + auto context = newIoContext(); + return newIncomingRequest(*context); +} + +kj::Own TestFixture::newIncomingRequest(IoContext& context) { kj::Own channelFactory; KJ_IF_SOME(factory, ioChannelFactory) { channelFactory = factory(*timerChannel); } else { channelFactory = kj::heap(*timerChannel); } - auto incomingRequest = kj::heap(kj::addRef(*context), - kj::mv(channelFactory), kj::refcounted(), kj::none, kj::none); + kj::Own observer; + KJ_IF_SOME(factory, requestObserverFactory) { + observer = factory(); + } else { + observer = kj::refcounted(); + } + auto incomingRequest = kj::heap( + kj::addRef(context), kj::mv(channelFactory), kj::mv(observer), kj::none, kj::none); incomingRequest->delivered(); return incomingRequest; } diff --git a/src/workerd/tests/test-fixture.h b/src/workerd/tests/test-fixture.h index 6086ed40e13..565e4db3a60 100644 --- a/src/workerd/tests/test-fixture.h +++ b/src/workerd/tests/test-fixture.h @@ -32,6 +32,16 @@ struct TestFixture { // If set, used instead of the default DummyIoChannelFactory when creating incoming requests. // The factory receives the TimerChannel reference. kj::Maybe(TimerChannel&)>> ioChannelFactory; + // If set, used as the actor's Loopback (only meaningful when actorId is set). Defaults to a + // MockActorLoopback that throws on getWorker(). Tests that need to intercept hibernation + // event dispatch can supply a custom Loopback here, then later retrieve it (or a fresh ref + // to it) via actor.getLoopback(). This way the actor and the HibernationManager share a + // single Loopback, mirroring production. + kj::Maybe> actorLoopback; + // If set, called to create the RequestObserver for each IncomingRequest instead of the default + // no-op base RequestObserver. Lets tests observe metrics hooks (e.g. recording the values + // passed to setNextSubrequestBodyRewindable()). + kj::Maybe()>> requestObserverFactory; }; TestFixture(SetupParams&& params = {.useRealTimers = false}); @@ -60,10 +70,10 @@ struct TestFixture { // callback should accept const Environment& parameter and return Promise|void. // For void callbacks run waits for their completion, for promises waits for their resolution // and returns the result. - template - auto runInIoContext(CallBack&& callback) + template + auto runInIoContext(Callback&& callback) -> RunReturnType()))>::Type { - auto request = createIncomingRequest(); + auto request = newIncomingRequest(); kj::WaitScope* waitScope; KJ_IF_SOME(ws, this->waitScope) { waitScope = &ws; @@ -94,6 +104,107 @@ struct TestFixture { // Performs HTTP request on the default module handler, and waits for full response. Response runRequest(kj::HttpMethod method, kj::StringPtr url, kj::StringPtr body); + // Create a new IoContext, owned by the caller. Use this when you need an IoContext that + // outlives a single IncomingRequest, e.g. to model an actor receiving multiple requests. + kj::Own newIoContext(); + + // Create a new IncomingRequest bound to a fresh IoContext. The returned IncomingRequest is the + // sole owner of that IoContext (via kj refcounting): destroying the IR destroys the IoContext. + // If you need the IoContext to outlive the IR (e.g. to model multiple sequential or overlapping + // IRs against one actor), call newIoContext() first and use the two-arg overload below. Use + // enterContext() to run code within this context. + kj::Own newIncomingRequest(); + + // Create a new IncomingRequest bound to an existing IoContext. Use this to model multiple + // IncomingRequests against the same actor (and hence the same IoContext). The IoContext must + // outlive the returned IncomingRequest. + kj::Own newIncomingRequest(IoContext& context); + + // Enter an IoContext. Callback receives Environment& and must return void (NOT a + // Promise — the Worker::Lock is only valid for the synchronous duration of the + // callback). The context is NOT destroyed afterwards — the caller still owns the + // IncomingRequest. + template + void enterContext(IoContext::IncomingRequest& request, Callback&& callback) { + auto& context = request.getContext(); + context + .run([&](Worker::Lock& lock) { + auto& js = jsg::Lock::from(lock.getIsolate()); + Environment env = {{.isolate = lock.getIsolate()}, context, lock, js}; + callback(env); + }).wait(getWaitScope()); + } + + // Acquire a Worker::Lock without an IoContext. Useful for operations that need + // the V8 isolate lock but not a request context (e.g., hibernateWebSockets). + template + void enterWorkerLock(Callback&& callback) { + auto asyncLock = worker->takeAsyncLockWithoutRequest(nullptr).wait(getWaitScope()); + worker->runInLockScope(asyncLock, [&](Worker::Lock& lock) { callback(lock); }); + } + + kj::WaitScope& getWaitScope() { + KJ_IF_SOME(ws, waitScope) { + return ws; + } else { + return KJ_REQUIRE_NONNULL(io).waitScope; + } + } + + // Drive the event loop for a duration. Useful when test progress depends on a real timer + // firing. For tests that just need pending work to run, prefer pollEventLoop() — it's + // deterministic and faster. + // + // Requires SetupParams::useRealTimers = true; will fail at runtime otherwise because the + // provider's timer is only set up when real timers are enabled. + void pumpEventLoop(kj::Duration duration) { + KJ_REQUIRE_NONNULL(io).provider->getTimer().afterDelay(duration).wait(getWaitScope()); + } + + // Run any work pending on the event loop until idle (no blocking). Returns the number of + // events processed. Use this to deterministically drive background tasks (e.g. HM's + // readLoop) to a stable point. + uint pollEventLoop() { + return getWaitScope().poll(); + } + + // Drain an IncomingRequest (waiting on its waitUntil tasks) and then destroy it. Tests should + // use this rather than letting the IncomingRequest's Own go out of scope, otherwise the + // IncomingRequest destructor logs a warning about un-drained waitUntil tasks. Production code + // paths always drain. + // + // For actor IncomingRequests, drain() returns when all waitUntil tasks are empty, the actor is + // shut down, or a new IncomingRequest takes over. In tests the second is unlikely so we mostly + // rely on the first. + void drainAndDestroy(kj::Own request) { + request->drain(waitUntilTasks, kj::mv(request)); + waitUntilTasks.onEmpty().wait(getWaitScope()); + } + + // Accessors for tests that want to construct objects (e.g. HibernationManagerImpl) outside any + // IoContext, to keep their construction paths free of ambient state. Production usually + // constructs such objects lazily inside an IoContext just because the trigger (a JS handler) + // happens to run there, but the constructors themselves don't need one. + Worker::Actor& getActor() { + return *KJ_ASSERT_NONNULL(actor); + } + TimerChannel& getTimerChannel() { + return *timerChannel; + } + + // Destroy the current Worker::Actor and construct a fresh one with the same id and Loopback. + // Useful for simulating actor eviction: after this call, getActor() returns a different Actor + // with a fresh InputGate / OutputGate, so a new IoContext can be constructed against it. The + // previous IoContext (and any IncomingRequests still tied to it) MUST be torn down via + // drainAndDestroy before calling this; otherwise the old IoContext's non-owning Actor reference + // becomes dangling. + // + // Production has the actor's owning namespace pull the HibernationManager off the dying actor + // and pass it to the new actor's constructor (see Server's actor namespace handling). Tests + // here typically hold the HM directly and don't need to plumb it through the actor — the HM + // outlives the actor by virtue of the test holding it. + void resetActor(); + private: kj::Maybe waitScope; capnp::MallocMessageBuilder configArena; @@ -104,6 +215,10 @@ struct TestFixture { kj::Own timerChannel; kj::Own entropySource; kj::Maybe> actor; + // Saved so resetActor() can construct a new actor with the same Loopback (mirroring production, + // where the namespace's Loopback outlives any single actor instance). Held via addRef so we + // can hand fresh refs to actors as we reconstruct them. + kj::Maybe> savedActorLoopback; capnp::ByteStreamFactory byteStreamFactory; kj::HttpHeaderTable::Builder headerTableBuilder; ThreadContext::HeaderIdBundle threadContextHeaderBundle; @@ -120,8 +235,10 @@ struct TestFixture { kj::TaskSet waitUntilTasks; kj::Own headerTable; kj::Maybe(TimerChannel&)>> ioChannelFactory; + kj::Maybe()>> requestObserverFactory; - kj::Own createIncomingRequest(); + // Construct a fresh Worker::Actor with the given id, using the saved Loopback. + kj::Own makeActor(Worker::Actor::Id id); public: // Default IoChannelFactory used by tests. Exposed so tests can subclass it diff --git a/src/workerd/util/BUILD.bazel b/src/workerd/util/BUILD.bazel index 9f8ded578bc..63c9bd040a9 100644 --- a/src/workerd/util/BUILD.bazel +++ b/src/workerd/util/BUILD.bazel @@ -250,7 +250,12 @@ wd_cc_library( wd_cc_library( name = "sentry", + srcs = ["sentry.c++"], hdrs = ["sentry.h"], + implementation_deps = [ + ":entropy", + ":thread-scopes", + ], visibility = ["//visibility:public"], deps = [ "@capnp-cpp//src/kj", diff --git a/src/workerd/util/autogate.c++ b/src/workerd/util/autogate.c++ index a0749b93b0d..38f31a64be5 100644 --- a/src/workerd/util/autogate.c++ +++ b/src/workerd/util/autogate.c++ @@ -19,8 +19,6 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) { switch (key) { case AutogateKey::TEST_WORKERD: return "test-workerd"_kj; - case AutogateKey::TCP_SOCKET_CONNECT_OUTPUT_GATE: - return "tcp-socket-connect-output-gate"_kj; case AutogateKey::V8_FAST_API: return "v8-fast-api"_kj; case AutogateKey::STREAMING_TAIL_WORKER: @@ -29,22 +27,14 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) { return "tail-stream-refactor"_kj; case AutogateKey::RUST_BACKED_NODE_DNS: return "rust-backed-node-dns"_kj; - case AutogateKey::WASM_SHUTDOWN_SIGNAL_SHIM: - return "wasm-shutdown-signal-shim"_kj; case AutogateKey::ENABLE_FAST_TEXTENCODER: return "enable-fast-textencoder"_kj; - case AutogateKey::ENABLE_DRAINING_READ_ON_STANDARD_STREAMS: - return "enable-draining-read-on-standard-streams"_kj; - case AutogateKey::SQL_RESTRICT_RESERVED_NAMES: - return "sql-restrict-reserved-names"_kj; case AutogateKey::INCREASE_SQLITE_HARD_HEAP_LIMIT: return "increase-sqlite-hard-heap-limit"_kj; - case AutogateKey::USER_SPAN_CONTEXT_PROPAGATION: - return "user-span-context-propagation"_kj; case AutogateKey::UPDATED_AUTO_ALLOCATE_CHUNK_SIZE: return "updated-auto-allocate-chunk-size"_kj; - case AutogateKey::PYTHON_ABORT_ISOLATE_ON_FATAL_ERROR: - return "python-abort-isolate-on-fatal-error"_kj; + case AutogateKey::STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME: + return "starttls-reject-expected-server-hostname"_kj; case AutogateKey::NumOfKeys: KJ_FAIL_ASSERT("NumOfKeys should not be used in getName"); } diff --git a/src/workerd/util/autogate.h b/src/workerd/util/autogate.h index 9494877938a..d51ba456fcf 100644 --- a/src/workerd/util/autogate.h +++ b/src/workerd/util/autogate.h @@ -22,9 +22,6 @@ WD_STRONG_BOOL(IgnoreAllAutogatesEnv); // Workerd-specific list of autogate keys (can also be used in internal repo). enum class AutogateKey { TEST_WORKERD, - // Defers TCP socket connect() to wait for DO output gate, preventing - // network outputs while storage writes are pending. - TCP_SOCKET_CONNECT_OUTPUT_GATE, V8_FAST_API, // Enables support for the streaming tail worker. Note that this is currently also guarded behind // an experimental compat flag. @@ -33,23 +30,15 @@ enum class AutogateKey { TAIL_STREAM_REFACTOR, // Enable Rust-backed Node.js DNS implementation RUST_BACKED_NODE_DNS, - // Enable the WebAssembly.instantiate shim that detects modules exporting __instance_signal / - // __instance_terminated and registers them for receiving the CPU-limit shutdown signal. - WASM_SHUTDOWN_SIGNAL_SHIM, // Enable fast TextEncoder implementation using simdutf ENABLE_FAST_TEXTENCODER, - // Enable draining read on standard streams - ENABLE_DRAINING_READ_ON_STANDARD_STREAMS, - // Make SqlStorage::isAllowedName case-insensitive and enforce it on virtual tables (FTS5). - SQL_RESTRICT_RESERVED_NAMES, // Increase the SQLite hard heap limit from 512 MiB to 8 GiB. INCREASE_SQLITE_HARD_HEAP_LIMIT, - // Enable user span context propagation across worker-to-worker subrequests. - USER_SPAN_CONTEXT_PROPAGATION, // Apply an updated default autoAllocateChunkSize for ReadableStreams UPDATED_AUTO_ALLOCATE_CHUNK_SIZE, - // Call abortIsolate() when a Python worker encounters a fatal error. - PYTHON_ABORT_ISOLATE_ON_FATAL_ERROR, + // When enabled, reject startTls calls that pass the expectedServerHostname option, + // which is not currently supported. When disabled, log the usage instead. + STARTTLS_REJECT_EXPECTED_SERVER_HOSTNAME, NumOfKeys // Reserved for iteration. }; diff --git a/src/workerd/util/entropy.c++ b/src/workerd/util/entropy.c++ index 59b92e8cb16..54faa3404c2 100644 --- a/src/workerd/util/entropy.c++ +++ b/src/workerd/util/entropy.c++ @@ -57,11 +57,10 @@ void getEntropy(kj::ArrayPtr output) { } size_t toCopy = kj::min(state.data.size(), output.size()); - output.first(toCopy).copyFrom(state.data.first(toCopy)); + output.write(state.data.first(toCopy)); // Zero out the source buffer after copying to prevent sensitive data from remaining in memory OPENSSL_cleanse(state.data.first(toCopy).begin(), toCopy); state.data = state.data.slice(toCopy); - output = output.slice(toCopy); } } diff --git a/src/workerd/util/exception.h b/src/workerd/util/exception.h index 1919b46a42b..3c264201f8d 100644 --- a/src/workerd/util/exception.h +++ b/src/workerd/util/exception.h @@ -12,6 +12,8 @@ namespace workerd { // If an exception is thrown for exceeding CPU time limits, it will contain this detail. constexpr kj::Exception::DetailTypeId CPU_LIMIT_DETAIL_ID = 0xfdcb787ba4240576ull; +// If an exception is thrown for exceeding wall time limits, it will contain this detail. +constexpr kj::Exception::DetailTypeId WALL_TIME_LIMIT_DETAIL_ID = 0x6e8f2b4a1c9d3e5bull; // If an exception is thrown for exceeding memory limits, it will contain this detail. constexpr kj::Exception::DetailTypeId MEMORY_LIMIT_DETAIL_ID = 0xbaf76dd7ce5bd8cfull; // If an exception is thrown for worker killed before start, it will contain this detail. diff --git a/src/workerd/util/sentry.c++ b/src/workerd/util/sentry.c++ new file mode 100644 index 00000000000..f36a76ae6f1 --- /dev/null +++ b/src/workerd/util/sentry.c++ @@ -0,0 +1,28 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +#include "sentry.h" + +#include +#include + +namespace workerd { + +InternalErrorId makeInternalErrorId() { + InternalErrorId id; + if (isPredictableModeForTest()) { + // In testing mode, use content that generates a "0123456789abcdefghijklm" ID: + for (auto i: kj::indices(id)) { + id[i] = i; + } + } else { + getEntropy(kj::asBytes(id)); + } + for (auto i: kj::indices(id)) { + id[i] = "0123456789abcdefghijklmnopqrstuv"[static_cast(id[i]) % 32]; + } + return id; +} + +} // namespace workerd diff --git a/src/workerd/util/sentry.h b/src/workerd/util/sentry.h index d78be7fd564..2ff914fd637 100644 --- a/src/workerd/util/sentry.h +++ b/src/workerd/util/sentry.h @@ -16,6 +16,20 @@ namespace workerd { +// For internal errors, we generate an ID to include when rendering user-facing "internal error" +// exceptions and writing internal exception logs, to make it easier to search for logs +// corresponding to "internal error" exceptions reported by users. +// +// We'll use an ID of 24 base-32 encoded characters, just because its relatively simple to +// generate from random bytes. This should give us a value with 120 bits of uniqueness, which is +// about as good as a UUID. +// +// (We're not using base-64 encoding to avoid issues with case insensitive search, as well as +// ensuring that the id is easy to select and copy via double-clicking.) +using InternalErrorId = kj::FixedArray; + +InternalErrorId makeInternalErrorId(); + // Log out an exception with context but without frills. This macro excludes any variadic arguments // from the macro so that we do not accidentally make a more granular fingerprint. It also will only // take a `context` argument that is known at compile time (via constexpr assignment). @@ -25,6 +39,12 @@ namespace workerd { KJ_LOG(ERROR, e, sentryErrorContext); \ }(exception) +#define LOG_EXCEPTION_WITH_ID(context, exception, id) \ + [&](const kj::Exception& e) { \ + constexpr auto sentryErrorContext = context; \ + KJ_LOG(ERROR, e, sentryErrorContext, id); \ + }(exception) + #define ACTOR_STORAGE_OP_PREFIX "; actorStorageOp = " inline bool isInterestingException(const kj::Exception& e) { diff --git a/src/workerd/util/sqlite-kv.c++ b/src/workerd/util/sqlite-kv.c++ index 031967f9f03..db640bb7b79 100644 --- a/src/workerd/util/sqlite-kv.c++ +++ b/src/workerd/util/sqlite-kv.c++ @@ -40,6 +40,14 @@ SqliteKv::SqliteKv(SqliteDatabase& db): ResetListener(db) { tableCreated = true; state.init(db); } + + // Independently check whether the _cf_EXTERNALS table has been created in a past session, so + // that we can prepare its statements without first re-creating the table. + if (!db.run("SELECT name FROM sqlite_master WHERE type='table' AND name='_cf_EXTERNALS'") + .isDone()) { + externalsTableCreated = true; + externalsState.init(db); + } } SqliteKv::~SqliteKv() noexcept(false) { @@ -145,6 +153,7 @@ void SqliteKv::put(KeyPtr key, ValuePtr value) { void SqliteKv::put(KeyPtr key, ValuePtr value, WriteOptions options) { ensureInitialized(options.allowUnconfirmed) .stmtPut.run({.allowUnconfirmed = options.allowUnconfirmed}, key, value); + clearExternalsIfPresent(key); } bool SqliteKv::delete_(KeyPtr key) { @@ -154,9 +163,25 @@ bool SqliteKv::delete_(KeyPtr key) { bool SqliteKv::delete_(KeyPtr key, WriteOptions options) { auto query = ensureInitialized(options.allowUnconfirmed) .stmtDelete.run({.allowUnconfirmed = options.allowUnconfirmed}, key); + clearExternalsIfPresent(key); return query.changeCount() > 0; } +void SqliteKv::clearExternalsIfPresent(KeyPtr key) { + // If the externals table hasn't been created yet, there's nothing to clear. We deliberately + // avoid creating it here -- it should only be created when externals are actually being + // written. + if (!externalsTableCreated) return; + + // The table exists, so the statements struct must already be initialized (either by the + // constructor or by a prior ensureExternalsInitialized()). + auto& stmts = KJ_ASSERT_NONNULL(externalsState.tryGet()); + + // Externals writes are always paired with a regular KV write, and that paired write decides + // whether the operation is allowed to be unconfirmed. So we always pass allowUnconfirmed here. + stmts.stmtDeleteExternals.run({.allowUnconfirmed = true}, key); +} + uint SqliteKv::deleteAll() { // TODO(perf): Consider introducing a compatibility flag that causes deleteAll() to always return // 1. Apps almost certainly don't care about the return value but historically we returned the @@ -166,9 +191,72 @@ uint SqliteKv::deleteAll() { return count; } +SqliteKv::ExternalsInitialized& SqliteKv::ensureExternalsInitialized() { + if (!externalsTableCreated) { + // Externals writes are always paired with a regular KV write, and that paired write decides + // whether the operation is allowed to be unconfirmed. So we always pass allowUnconfirmed here. + db.run(SqliteDatabase::QueryOptions{.regulator = SqliteDatabase::TRUSTED, + .allowUnconfirmed = true}, + R"( + CREATE TABLE IF NOT EXISTS _cf_EXTERNALS ( + key TEXT, + idx INTEGER, + token BLOB, + PRIMARY KEY (key, idx) + ) WITHOUT ROWID; + )"); + + externalsTableCreated = true; + + // If we're in a transaction and it gets rolled back, we better mark that the table is actually + // not created anymore. + db.onRollback([this]() { externalsTableCreated = false; }); + } + + KJ_SWITCH_ONEOF(externalsState) { + KJ_CASE_ONEOF(uninitialized, Uninitialized) { + return externalsState.init(db); + } + KJ_CASE_ONEOF(initialized, ExternalsInitialized) { + return initialized; + } + } + KJ_UNREACHABLE; +} + +kj::Array> SqliteKv::getExternals(kj::StringPtr key) { + if (!externalsTableCreated) return nullptr; + auto& stmts = KJ_UNWRAP_OR(externalsState.tryGet(), return nullptr); + + auto query = stmts.stmtGetExternals.run(key); + + kj::Vector> result; + while (!query.isDone()) { + result.add(kj::heapArray(query.getBlob(0))); + query.nextRow(); + } + return result.releaseAsArray(); +} + +void SqliteKv::putExternals(kj::StringPtr key, kj::Array> tokens) { + auto& stmts = ensureExternalsInitialized(); + + // Externals writes are always paired with a regular KV write, and that paired write decides + // whether the operation is allowed to be unconfirmed. So we always pass allowUnconfirmed here. + + // Replace any existing tokens for this key with the new set. + stmts.stmtDeleteExternals.run({.allowUnconfirmed = true}, key); + + for (auto i: kj::indices(tokens)) { + stmts.stmtPutExternal.run( + {.allowUnconfirmed = true}, key, static_cast(i), tokens[i].asPtr()); + } +} + void SqliteKv::beforeSqliteReset() { - // We'll need to recreate the table on the next operation. + // We'll need to recreate the tables on the next operation. tableCreated = false; + externalsTableCreated = false; } void SqliteKv::rollbackMultiPut(Initialized& stmts, WriteOptions options) { diff --git a/src/workerd/util/sqlite-kv.h b/src/workerd/util/sqlite-kv.h index b3e7f64f5a2..5292c990c77 100644 --- a/src/workerd/util/sqlite-kv.h +++ b/src/workerd/util/sqlite-kv.h @@ -85,6 +85,16 @@ class SqliteKv: private SqliteDatabase::ResetListener { // extension might help here, though it can only support arrays of NUL-terminated strings, not // byte blobs or strings containing NUL bytes. + // Get/put "externals", which are lists of tokens associated with keys. These are stored in a + // separate table (_cf_EXTERNALS) which is lazily created. + // + // Note that `put()` and `delete_()` automatically clear the externals for the key. If the data + // written by `put()` references externals, a call to `putExternals()` must be made later on, + // but must NOT be made if another `put()` to the same key happens first. This is all managed + // by `StoredExternalHandler`, which should be the only caller of these methods. + kj::Array> getExternals(kj::StringPtr key); + void putExternals(kj::StringPtr key, kj::Array> tokens); + private: struct Uninitialized {}; @@ -162,12 +172,39 @@ class SqliteKv: private SqliteDatabase::ResetListener { Initialized(SqliteDatabase& db): db(db) {} }; + // Prepared statements for the _cf_EXTERNALS table. Created lazily on first use, since the + // table must exist before its statements can be prepared. + struct ExternalsInitialized { + SqliteDatabase& db; + + static constexpr SqliteKvRegulator regulator; + + SqliteDatabase::Statement stmtGetExternals = db.prepare(regulator, R"( + SELECT token FROM _cf_EXTERNALS WHERE key = ? ORDER BY idx + )"); + SqliteDatabase::Statement stmtPutExternal = db.prepare(regulator, R"( + INSERT INTO _cf_EXTERNALS VALUES(?, ?, ?) + )"); + SqliteDatabase::Statement stmtDeleteExternals = db.prepare(regulator, R"( + DELETE FROM _cf_EXTERNALS WHERE key = ? + )"); + + ExternalsInitialized(SqliteDatabase& db): db(db) {} + }; + kj::OneOf state; + // Lazily-initialized state for the _cf_EXTERNALS table. Distinct from `state` so that the + // externals table is only created if/when externals are actually used. + kj::OneOf externalsState = Uninitialized{}; + // Has the _cf_KV table been created? This is separate from Uninitialized/Initialized since it // has to be repeated after a reset, whereas the statements do not need to be recreated. bool tableCreated = false; + // Has the _cf_EXTERNALS table been created? Same caveat as `tableCreated`. + bool externalsTableCreated = false; + kj::Maybe currentCursor; void cancelCurrentCursor(); @@ -176,6 +213,15 @@ class SqliteKv: private SqliteDatabase::ResetListener { // Make sure the KV table is created and prepared statements are ready. Not called until the // first write. + // Make sure the _cf_EXTERNALS table is created and its prepared statements are ready. Not + // called until the first call to putExternals(). + ExternalsInitialized& ensureExternalsInitialized(); + + // Delete all externals associated with `key`, if the _cf_EXTERNALS table exists. No-op if the + // table has never been created. Called automatically from put() and delete_(). Always runs + // with `allowUnconfirmed = true` since the paired KV write decides confirmation semantics. + void clearExternalsIfPresent(KeyPtr key); + void beforeSqliteReset() override; // Helper function that rolls back a multi-put statement and swallows any exceptions that may diff --git a/src/workerd/util/sqlite-test.c++ b/src/workerd/util/sqlite-test.c++ index 9fb039cb998..130fa570611 100644 --- a/src/workerd/util/sqlite-test.c++ +++ b/src/workerd/util/sqlite-test.c++ @@ -1770,6 +1770,43 @@ KJ_TEST("SQLite critical error handling for SQLITE_FULL") { }); } +KJ_TEST("SQLite Regulator blocks RENAME TO reserved name") { + // Regression test: ALTER TABLE ... RENAME TO must be checked against the regulator's + // isAllowedName for the DESTINATION name, not just the source. Without the SQLite + // patch (0005-authorizer-rename-to-destination-name.patch), the authorizer only sees + // the source table name, allowing renames into the _cf_ reserved namespace. + + TempDirOnDisk dir; + SqliteDatabase::Vfs vfs(*dir); + SqliteDatabase db(vfs, kj::Path({"foo"}), kj::WriteMode::CREATE | kj::WriteMode::MODIFY); + + // Regulator that blocks names starting with _cf_ (mirrors SqlStorageRegulator). + class CfRegulator: public SqliteDatabase::Regulator { + public: + bool isAllowedName(kj::StringPtr name) const override { + return !name.startsWith("_cf_"); + } + }; + static CfRegulator reg; + + // Create a user table and populate it. + db.run("CREATE TABLE user_data (key TEXT PRIMARY KEY, value BLOB)"); + db.run("INSERT INTO user_data VALUES ('k', x'deadbeef')"); + + // Renaming to a non-reserved name should succeed. + db.run({.regulator = reg}, "ALTER TABLE user_data RENAME TO other_data"); + KJ_EXPECT(db.prepare(reg, "SELECT value FROM other_data").run().getBlob(0).size() == 4); + + // Renaming into the _cf_ namespace must be blocked by the authorizer. + KJ_EXPECT_THROW_MESSAGE( + "not authorized", db.run({.regulator = reg}, "ALTER TABLE other_data RENAME TO _cf_KV")); + + // Verify the table was NOT renamed — it should still be other_data. + KJ_EXPECT(db.prepare(reg, "SELECT value FROM other_data").run().getBlob(0).size() == 4); +} + +// NOTE: This test sets a process-global SQLite hard_heap_limit that is never reset. +// It must remain the LAST test in this file. KJ_TEST("SQLite critical error handling for SQLITE_NOMEM") { testCriticalError("out of memory", [](SqliteDatabase& db, SqliteDatabase::Vfs& vfs) { db.run("CREATE TABLE test_nomem (id INTEGER PRIMARY KEY, data BLOB)"); diff --git a/src/workerd/util/sqlite.c++ b/src/workerd/util/sqlite.c++ index 1a090cd0f48..0b2ae719115 100644 --- a/src/workerd/util/sqlite.c++ +++ b/src/workerd/util/sqlite.c++ @@ -1056,7 +1056,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, if (actionCode == SQLITE_ALTER_TABLE || actionCode == SQLITE_DETACH) { auto swap = param1; // contains dbName param1 = param2; // contains table name (for SQLITE_ALTER_TABLE, null otherwise) - param2 = dbName; // should always be null + param2 = dbName; // RENAME TO destination name (patched), null for other ALTER ops dbName = swap; } @@ -1118,8 +1118,15 @@ bool SqliteDatabase::isAuthorized(int actionCode, // See https://www.sqlite.org/fileformat2.html#stat1tab for more details. return true; - case SQLITE_ALTER_TABLE: /* Table Name NULL (modified) */ - return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); + case SQLITE_ALTER_TABLE: /* Table Name New Name (for RENAME, patched) */ + if (!regulator->isAllowedName(KJ_ASSERT_NONNULL(param1))) return false; + // For RENAME TO, our patched SQLite passes the destination name as the + // 5th authorizer arg (mapped to param2 after the swap above). Block + // renames into reserved namespaces (e.g. _cf_KV). + KJ_IF_SOME(newName, param2) { + return regulator->isAllowedName(newName); + } + return true; case SQLITE_READ: /* Table Name Column Name */ case SQLITE_UPDATE: /* Table Name Column Name */ @@ -1287,14 +1294,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, KJ_IF_SOME(moduleName, param2) { if (strcasecmp(moduleName.begin(), "fts5") == 0 || strcasecmp(moduleName.begin(), "fts5vocab") == 0) { - if (util::Autogate::isEnabled(util::AutogateKey::SQL_RESTRICT_RESERVED_NAMES)) { - return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); - } - auto& tableName = KJ_ASSERT_NONNULL(param1); - if (tableName.size() >= 4 && strncasecmp(tableName.begin(), "_cf_", 4) == 0) { - LOG_WARNING_PERIODICALLY("FTS5 virtual table uses reserved _cf_ prefix"); - } - return true; + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); } } return false; diff --git a/src/workerd/util/stream-utils.c++ b/src/workerd/util/stream-utils.c++ index afb32614884..83a7d607980 100644 --- a/src/workerd/util/stream-utils.c++ +++ b/src/workerd/util/stream-utils.c++ @@ -38,7 +38,7 @@ class MemoryInputStream final: public kj::AsyncInputStream { auto ptr = kj::arrayPtr(static_cast(buffer), maxBytes); size_t toRead = kj::min(data.size(), ptr.size()); if (toRead == 0) return toRead; - ptr.first(toRead).copyFrom(data.first(toRead)); + ptr.write(data.first(toRead)); data = data.slice(toRead); return toRead; } diff --git a/src/wpt/fetch/api-test.ts b/src/wpt/fetch/api-test.ts index 65d8efe86e9..183981fe7f2 100644 --- a/src/wpt/fetch/api-test.ts +++ b/src/wpt/fetch/api-test.ts @@ -845,14 +845,5 @@ export default { 'response/response-stream-disturbed-6.any.js': {}, 'response/response-stream-disturbed-by-pipe.any.js': {}, 'response/response-stream-disturbed-util.js': {}, - 'response/response-stream-with-broken-then.any.js': { - comment: - 'Triggers an internal error: promise.h:103: failed: expected Wrappable::tryUnwrapOpaque(isolate, handle) != nullptr', - expectedFailures: [ - 'Attempt to inject {done: false, value: bye} via Object.prototype.then.', - 'Attempt to inject value: undefined via Object.prototype.then.', - 'Attempt to inject undefined via Object.prototype.then.', - 'Attempt to inject 8.2 via Object.prototype.then.', - ], - }, + 'response/response-stream-with-broken-then.any.js': {}, } satisfies TestRunnerConfig; diff --git a/src/wpt/streams-test.ts b/src/wpt/streams-test.ts index c0db76fef0c..7f19c8d6f22 100644 --- a/src/wpt/streams-test.ts +++ b/src/wpt/streams-test.ts @@ -112,14 +112,7 @@ export default { ? ['pipeThrough() should throw if readable/writable getters throw'] : [], }, - 'piping/then-interception.any.js': { - comment: - 'failed: expected Wrappable::tryUnwrapOpaque(isolate, handle) != nullptr', - expectedFailures: [ - 'piping should not be observable', - 'tee should not be observable', - ], - }, + 'piping/then-interception.any.js': {}, 'piping/throwing-options.any.js': {}, 'piping/transform-streams.any.js': {}, diff --git a/src/wpt/urlpattern-test.ts b/src/wpt/urlpattern-test.ts index d872cb0e132..4fb81fde53a 100644 --- a/src/wpt/urlpattern-test.ts +++ b/src/wpt/urlpattern-test.ts @@ -25,7 +25,6 @@ export default { expectedFailures: [ // Each of these *ought* to pass. They are included here because we // know they currently do not. Each needs to be investigated. - 'Pattern: ["((?R)):"] Inputs: undefined', 'Pattern: [{"pathname":"/foo/bar","baseURL":"https://example.com?query#hash"}] Inputs: [{"pathname":"/foo/bar"}]', 'Pattern: [{"pathname":"/foo/bar","baseURL":"https://example.com?query#hash"}] Inputs: [{"hostname":"example.com","pathname":"/foo/bar"}]', 'Pattern: [{"pathname":"/foo/bar","baseURL":"https://example.com?query#hash"}] Inputs: [{"protocol":"https","hostname":"example.com","pathname":"/foo/bar"}]', diff --git a/tools/base.tsconfig.json b/tools/base.tsconfig.json index 143c60e78f5..236bb7f297d 100644 --- a/tools/base.tsconfig.json +++ b/tools/base.tsconfig.json @@ -9,6 +9,7 @@ "allowJs": true, "allowUnreachableCode": false, "allowUnusedLabels": false, + "composite": true, "exactOptionalPropertyTypes": true, "noFallthroughCasesInSwitch": true, "noImplicitOverride": true, diff --git a/tools/cross/format.py b/tools/cross/format.py index 62d34ed5580..6ef2291b368 100755 --- a/tools/cross/format.py +++ b/tools/cross/format.py @@ -88,7 +88,9 @@ def matches_any_glob(globs: tuple[str, ...], file: Path) -> bool: return any(file.match(glob) for glob in globs) -def _ensure_bazel_tool(tool_name: str, build_target: str | None = None) -> Path: +def _ensure_bazel_tool( + tool_name: str, build_target: str | None = None, verify_version: bool = False +) -> Path: """Ensure a bazel-built formatter tool exists and return its path.""" tool_suffix = Path("build") / "deps" / "formatters" / tool_name internal_tool_path = ( @@ -96,12 +98,14 @@ def _ensure_bazel_tool(tool_name: str, build_target: str | None = None) -> Path: ) workerd_tool_path = BAZEL_BIN / tool_suffix - if internal_tool_path.exists(): - return internal_tool_path - if workerd_tool_path.exists(): - return workerd_tool_path + if not verify_version: + if internal_tool_path.exists(): + return internal_tool_path + if workerd_tool_path.exists(): + return workerd_tool_path - # Tool not cached; build it once. + # Build the tool. When verify_version is set this ensures we pick up tool + # version changes instead of silently reusing a stale cached binary. if build_target is None: build_target = f"@workerd//build/deps/formatters:{tool_name}@rule" download_result = subprocess.run(["bazel", "build", build_target]) @@ -280,9 +284,28 @@ def main() -> None: ) if matched: needed_formatters.add(config.formatter) - for name in needed_formatters: - if name in ("clang-format", "buildifier", "ruff", "rustfmt"): - _ensure_bazel_tool(name) + # When formatting the full repo (no git subcommand), always rebuild tools + # via bazel to pick up version changes. For the git/--staged path (used by + # the pre-commit hook) skip the rebuild to avoid bazel startup latency on + # every commit. + verify_version = options.subcommand != "git" + if verify_version: + # Batch all targets into a single bazel build to avoid repeated JVM + # startup overhead. + targets = [] + for name in needed_formatters: + if name in ("clang-format", "buildifier", "ruff", "rustfmt"): + targets.append(f"@workerd//build/deps/formatters:{name}@rule") + elif name == "prettier": + targets.append("//:node_modules/prettier") + if targets: + result = subprocess.run(["bazel", "build", *targets]) + if result.returncode != 0: + raise RuntimeError("Failed to download formatter tools") + else: + for name in needed_formatters: + if name in ("clang-format", "buildifier", "ruff", "rustfmt"): + _ensure_bazel_tool(name) all_ok = True diff --git a/tools/unix/create-external.sh b/tools/unix/create-external.sh index 7c94a22f19f..a3f63fc9b9c 100755 --- a/tools/unix/create-external.sh +++ b/tools/unix/create-external.sh @@ -6,8 +6,12 @@ output_path=$(bazel info output_path) workspace=$(bazel info workspace) +# Note: -n (--no-dereference) is required so that an existing "external" +# symlink pointing at a directory is replaced rather than dereferenced (which +# would create the new link *inside* the target directory). Both GNU and BSD +# ln support -n. The previously used -F flag is a no-op for this case on Linux. external="${workspace}/external" -ln -sfF "${output_path}/../../../external" "${external}" +ln -sfn "${output_path}/../../../external" "${external}" # Temporary warning that compile_commands.json exists and will # interfere with the intended clangd setup. diff --git a/types/defines/access.d.ts b/types/defines/access.d.ts new file mode 100644 index 00000000000..db2b1a5307e --- /dev/null +++ b/types/defines/access.d.ts @@ -0,0 +1,36 @@ +/** + * Represents the identity of a user authenticated via Cloudflare Access. + * This matches the result of calling /cdn-cgi/access/get-identity. + * + * The exact structure of the returned object depends on the identity provider + * configuration for the Access application. The fields below represent commonly + * available properties, but additional provider-specific fields may be present. + */ +interface CloudflareAccessIdentity extends Record { + /** The user's email address, if available from the identity provider. */ + email?: string; + /** The user's display name. */ + name?: string; + /** The user's unique identifier. */ + user_uuid?: string; + /** The Cloudflare account ID. */ + account_id?: string; + /** Login timestamp (Unix epoch seconds). */ + iat?: number; + /** The user's IP address at authentication time. */ + ip?: string; + /** Authentication methods used (e.g., "pwd"). */ + amr?: string[]; + /** Identity provider information. */ + idp?: { id: string; type: string }; + /** Geographic information about where the user authenticated. */ + geo?: { country: string }; + /** Group memberships from the identity provider. */ + groups?: Array<{ id: string; name: string; email?: string }>; + /** Device posture check results, keyed by check ID. */ + devicePosture?: Record; + /** True if the user connected via Cloudflare WARP. */ + is_warp?: boolean; + /** True if the user is authenticated via Cloudflare Gateway. */ + is_gateway?: boolean; +} diff --git a/types/defines/cf.d.ts b/types/defines/cf.d.ts index ff4d6626f37..d8170940aee 100644 --- a/types/defines/cf.d.ts +++ b/types/defines/cf.d.ts @@ -156,6 +156,17 @@ interface RequestInitCfProperties extends Record { cacheReserveMinimumFileSize?: number; scrapeShield?: boolean; apps?: boolean; + /** + * Controls whether an outbound gRPC-web subrequest from this Worker is + * converted to gRPC at the Cloudflare edge. + * + * - `"passthrough"`: forward the subrequest unchanged as gRPC-web (default). + * - `"convert"`: convert the gRPC-web subrequest to gRPC at the edge. + * + * Provides per-request control over the same edge conversion behavior + * gated by the `auto_grpc_convert` compatibility flag. + */ + grpcWeb?: "passthrough" | "convert"; image?: RequestInitCfPropertiesImage; minify?: RequestInitCfPropertiesImageMinify; mirage?: boolean; diff --git a/types/defines/trace.d.ts b/types/defines/trace.d.ts index c57de695b32..ebdda871503 100644 --- a/types/defines/trace.d.ts +++ b/types/defines/trace.d.ts @@ -80,7 +80,8 @@ interface ConnectEventInfo { type EventOutcome = "ok" | "canceled" | "exception" | "unknown" | "killSwitch" | "daemonDown" | "exceededCpu" | "exceededMemory" | "loadShed" | - "responseStreamDisconnected" | "scriptNotFound" | "internalError"; + "responseStreamDisconnected" | "scriptNotFound" | "internalError" | + "exceededWallTime"; interface ScriptVersion { readonly id: string; diff --git a/types/generated-snapshot/experimental/index.d.ts b/types/generated-snapshot/experimental/index.d.ts index de4579a7804..19fdbc03bb5 100755 --- a/types/generated-snapshot/experimental/index.d.ts +++ b/types/generated-snapshot/experimental/index.d.ts @@ -501,7 +501,8 @@ interface ExecutionContext { readonly key?: string; readonly override?: string; }; - tracing?: Tracing; + readonly access?: CloudflareAccessContext; + tracing: Tracing; abort(reason?: any): void; } type ExportedHandlerFetchHandler< @@ -604,6 +605,10 @@ interface CachePurgeOptions { interface CacheContext { purge(options: CachePurgeOptions): Promise; } +interface CloudflareAccessContext { + readonly aud: string; + getIdentity(): Promise; +} declare abstract class ColoLocalActorNamespace { get(actorId: string): Fetcher; } @@ -836,6 +841,7 @@ interface DurableObjectFacets { ): Fetcher; abort(name: string, reason: any): void; delete(name: string): void; + clone(src: string, dst: string): void; } interface FacetStartupOptions< T extends Rpc.DurableObjectBranded | undefined = undefined, @@ -4728,11 +4734,62 @@ interface Tracing { callback: (span: Span, ...args: A) => T, ...args: A ): T; + startActiveSpan( + name: string, + callback: (span: Span, ...args: A) => T, + ...args: A + ): T; Span: typeof Span; } declare abstract class Span { get isTraced(): boolean; setAttribute(key: string, value?: boolean | number | string): void; + end(): void; +} +/** + * Represents the identity of a user authenticated via Cloudflare Access. + * This matches the result of calling /cdn-cgi/access/get-identity. + * + * The exact structure of the returned object depends on the identity provider + * configuration for the Access application. The fields below represent commonly + * available properties, but additional provider-specific fields may be present. + */ +interface CloudflareAccessIdentity extends Record { + /** The user's email address, if available from the identity provider. */ + email?: string; + /** The user's display name. */ + name?: string; + /** The user's unique identifier. */ + user_uuid?: string; + /** The Cloudflare account ID. */ + account_id?: string; + /** Login timestamp (Unix epoch seconds). */ + iat?: number; + /** The user's IP address at authentication time. */ + ip?: string; + /** Authentication methods used (e.g., "pwd"). */ + amr?: string[]; + /** Identity provider information. */ + idp?: { + id: string; + type: string; + }; + /** Geographic information about where the user authenticated. */ + geo?: { + country: string; + }; + /** Group memberships from the identity provider. */ + groups?: Array<{ + id: string; + name: string; + email?: string; + }>; + /** Device posture check results, keyed by check ID. */ + devicePosture?: Record; + /** True if the user connected via Cloudflare WARP. */ + is_warp?: boolean; + /** True if the user is authenticated via Cloudflare Gateway. */ + is_gateway?: boolean; } // ============ AI Search Error Interfaces ============ interface AiSearchInternalError extends Error {} @@ -12236,6 +12293,17 @@ interface RequestInitCfProperties extends Record { cacheReserveMinimumFileSize?: number; scrapeShield?: boolean; apps?: boolean; + /** + * Controls whether an outbound gRPC-web subrequest from this Worker is + * converted to gRPC at the Cloudflare edge. + * + * - `"passthrough"`: forward the subrequest unchanged as gRPC-web (default). + * - `"convert"`: convert the gRPC-web subrequest to gRPC at the edge. + * + * Provides per-request control over the same edge conversion behavior + * gated by the `auto_grpc_convert` compatibility flag. + */ + grpcWeb?: "passthrough" | "convert"; image?: RequestInitCfPropertiesImage; minify?: RequestInitCfPropertiesImageMinify; mirage?: boolean; @@ -15354,7 +15422,8 @@ declare namespace TailStream { | "loadShed" | "responseStreamDisconnected" | "scriptNotFound" - | "internalError"; + | "internalError" + | "exceededWallTime"; interface ScriptVersion { readonly id: string; readonly tag?: string; diff --git a/types/generated-snapshot/experimental/index.ts b/types/generated-snapshot/experimental/index.ts index 4ffed877479..3b99f046bf2 100755 --- a/types/generated-snapshot/experimental/index.ts +++ b/types/generated-snapshot/experimental/index.ts @@ -503,7 +503,8 @@ export interface ExecutionContext { readonly key?: string; readonly override?: string; }; - tracing?: Tracing; + readonly access?: CloudflareAccessContext; + tracing: Tracing; abort(reason?: any): void; } export type ExportedHandlerFetchHandler< @@ -606,6 +607,10 @@ export interface CachePurgeOptions { export interface CacheContext { purge(options: CachePurgeOptions): Promise; } +export interface CloudflareAccessContext { + readonly aud: string; + getIdentity(): Promise; +} export declare abstract class ColoLocalActorNamespace { get(actorId: string): Fetcher; } @@ -838,6 +843,7 @@ export interface DurableObjectFacets { ): Fetcher; abort(name: string, reason: any): void; delete(name: string): void; + clone(src: string, dst: string): void; } export interface FacetStartupOptions< T extends Rpc.DurableObjectBranded | undefined = undefined, @@ -4734,11 +4740,62 @@ export interface Tracing { callback: (span: Span, ...args: A) => T, ...args: A ): T; + startActiveSpan( + name: string, + callback: (span: Span, ...args: A) => T, + ...args: A + ): T; Span: typeof Span; } export declare abstract class Span { get isTraced(): boolean; setAttribute(key: string, value?: boolean | number | string): void; + end(): void; +} +/** + * Represents the identity of a user authenticated via Cloudflare Access. + * This matches the result of calling /cdn-cgi/access/get-identity. + * + * The exact structure of the returned object depends on the identity provider + * configuration for the Access application. The fields below represent commonly + * available properties, but additional provider-specific fields may be present. + */ +export interface CloudflareAccessIdentity extends Record { + /** The user's email address, if available from the identity provider. */ + email?: string; + /** The user's display name. */ + name?: string; + /** The user's unique identifier. */ + user_uuid?: string; + /** The Cloudflare account ID. */ + account_id?: string; + /** Login timestamp (Unix epoch seconds). */ + iat?: number; + /** The user's IP address at authentication time. */ + ip?: string; + /** Authentication methods used (e.g., "pwd"). */ + amr?: string[]; + /** Identity provider information. */ + idp?: { + id: string; + type: string; + }; + /** Geographic information about where the user authenticated. */ + geo?: { + country: string; + }; + /** Group memberships from the identity provider. */ + groups?: Array<{ + id: string; + name: string; + email?: string; + }>; + /** Device posture check results, keyed by check ID. */ + devicePosture?: Record; + /** True if the user connected via Cloudflare WARP. */ + is_warp?: boolean; + /** True if the user is authenticated via Cloudflare Gateway. */ + is_gateway?: boolean; } // ============ AI Search Error Interfaces ============ export interface AiSearchInternalError extends Error {} @@ -12248,6 +12305,17 @@ export interface RequestInitCfProperties extends Record { cacheReserveMinimumFileSize?: number; scrapeShield?: boolean; apps?: boolean; + /** + * Controls whether an outbound gRPC-web subrequest from this Worker is + * converted to gRPC at the Cloudflare edge. + * + * - `"passthrough"`: forward the subrequest unchanged as gRPC-web (default). + * - `"convert"`: convert the gRPC-web subrequest to gRPC at the edge. + * + * Provides per-request control over the same edge conversion behavior + * gated by the `auto_grpc_convert` compatibility flag. + */ + grpcWeb?: "passthrough" | "convert"; image?: RequestInitCfPropertiesImage; minify?: RequestInitCfPropertiesImageMinify; mirage?: boolean; @@ -15315,7 +15383,8 @@ export declare namespace TailStream { | "loadShed" | "responseStreamDisconnected" | "scriptNotFound" - | "internalError"; + | "internalError" + | "exceededWallTime"; interface ScriptVersion { readonly id: string; readonly tag?: string; diff --git a/types/generated-snapshot/latest/index.d.ts b/types/generated-snapshot/latest/index.d.ts index bd2bc8f354e..9450123248b 100755 --- a/types/generated-snapshot/latest/index.d.ts +++ b/types/generated-snapshot/latest/index.d.ts @@ -480,7 +480,8 @@ interface ExecutionContext { readonly exports: Cloudflare.Exports; readonly props: Props; cache?: CacheContext; - tracing?: Tracing; + readonly access?: CloudflareAccessContext; + tracing: Tracing; } type ExportedHandlerFetchHandler< Env = unknown, @@ -581,6 +582,10 @@ interface CachePurgeOptions { interface CacheContext { purge(options: CachePurgeOptions): Promise; } +interface CloudflareAccessContext { + readonly aud: string; + getIdentity(): Promise; +} declare abstract class ColoLocalActorNamespace { get(actorId: string): Fetcher; } @@ -788,6 +793,7 @@ interface DurableObjectFacets { ): Fetcher; abort(name: string, reason: any): void; delete(name: string): void; + clone(src: string, dst: string): void; } interface FacetStartupOptions< T extends Rpc.DurableObjectBranded | undefined = undefined, @@ -3849,6 +3855,28 @@ interface EventSourceEventSourceInit { withCredentials?: boolean; fetcher?: Fetcher; } +interface ExecOutput { + readonly stdout: ArrayBuffer; + readonly stderr: ArrayBuffer; + readonly exitCode: number; +} +interface ContainerExecOptions { + cwd?: string; + env?: Record; + user?: string; + stdin?: ReadableStream | "pipe"; + stdout?: "pipe" | "ignore"; + stderr?: "pipe" | "ignore" | "combined"; +} +interface ExecProcess { + readonly stdin: WritableStream | null; + readonly stdout: ReadableStream | null; + readonly stderr: ReadableStream | null; + readonly pid: number; + readonly exitCode: Promise; + output(): Promise; + kill(signal?: number): void; +} interface Container { get running(): boolean; start(options?: ContainerStartupOptions): void; @@ -3866,6 +3894,7 @@ interface Container { options: ContainerSnapshotOptions, ): Promise; interceptOutboundHttps(addr: string, binding: Fetcher): Promise; + exec(cmd: string[], options?: ContainerExecOptions): Promise; } interface ContainerDirectorySnapshot { id: string; @@ -4060,11 +4089,62 @@ interface Tracing { callback: (span: Span, ...args: A) => T, ...args: A ): T; + startActiveSpan( + name: string, + callback: (span: Span, ...args: A) => T, + ...args: A + ): T; Span: typeof Span; } declare abstract class Span { get isTraced(): boolean; setAttribute(key: string, value?: boolean | number | string): void; + end(): void; +} +/** + * Represents the identity of a user authenticated via Cloudflare Access. + * This matches the result of calling /cdn-cgi/access/get-identity. + * + * The exact structure of the returned object depends on the identity provider + * configuration for the Access application. The fields below represent commonly + * available properties, but additional provider-specific fields may be present. + */ +interface CloudflareAccessIdentity extends Record { + /** The user's email address, if available from the identity provider. */ + email?: string; + /** The user's display name. */ + name?: string; + /** The user's unique identifier. */ + user_uuid?: string; + /** The Cloudflare account ID. */ + account_id?: string; + /** Login timestamp (Unix epoch seconds). */ + iat?: number; + /** The user's IP address at authentication time. */ + ip?: string; + /** Authentication methods used (e.g., "pwd"). */ + amr?: string[]; + /** Identity provider information. */ + idp?: { + id: string; + type: string; + }; + /** Geographic information about where the user authenticated. */ + geo?: { + country: string; + }; + /** Group memberships from the identity provider. */ + groups?: Array<{ + id: string; + name: string; + email?: string; + }>; + /** Device posture check results, keyed by check ID. */ + devicePosture?: Record; + /** True if the user connected via Cloudflare WARP. */ + is_warp?: boolean; + /** True if the user is authenticated via Cloudflare Gateway. */ + is_gateway?: boolean; } // ============ AI Search Error Interfaces ============ interface AiSearchInternalError extends Error {} @@ -11568,6 +11648,17 @@ interface RequestInitCfProperties extends Record { cacheReserveMinimumFileSize?: number; scrapeShield?: boolean; apps?: boolean; + /** + * Controls whether an outbound gRPC-web subrequest from this Worker is + * converted to gRPC at the Cloudflare edge. + * + * - `"passthrough"`: forward the subrequest unchanged as gRPC-web (default). + * - `"convert"`: convert the gRPC-web subrequest to gRPC at the edge. + * + * Provides per-request control over the same edge conversion behavior + * gated by the `auto_grpc_convert` compatibility flag. + */ + grpcWeb?: "passthrough" | "convert"; image?: RequestInitCfPropertiesImage; minify?: RequestInitCfPropertiesImageMinify; mirage?: boolean; @@ -14686,7 +14777,8 @@ declare namespace TailStream { | "loadShed" | "responseStreamDisconnected" | "scriptNotFound" - | "internalError"; + | "internalError" + | "exceededWallTime"; interface ScriptVersion { readonly id: string; readonly tag?: string; diff --git a/types/generated-snapshot/latest/index.ts b/types/generated-snapshot/latest/index.ts index 99d1e985b9d..8495f88fddd 100755 --- a/types/generated-snapshot/latest/index.ts +++ b/types/generated-snapshot/latest/index.ts @@ -482,7 +482,8 @@ export interface ExecutionContext { readonly exports: Cloudflare.Exports; readonly props: Props; cache?: CacheContext; - tracing?: Tracing; + readonly access?: CloudflareAccessContext; + tracing: Tracing; } export type ExportedHandlerFetchHandler< Env = unknown, @@ -583,6 +584,10 @@ export interface CachePurgeOptions { export interface CacheContext { purge(options: CachePurgeOptions): Promise; } +export interface CloudflareAccessContext { + readonly aud: string; + getIdentity(): Promise; +} export declare abstract class ColoLocalActorNamespace { get(actorId: string): Fetcher; } @@ -790,6 +795,7 @@ export interface DurableObjectFacets { ): Fetcher; abort(name: string, reason: any): void; delete(name: string): void; + clone(src: string, dst: string): void; } export interface FacetStartupOptions< T extends Rpc.DurableObjectBranded | undefined = undefined, @@ -3855,6 +3861,28 @@ export interface EventSourceEventSourceInit { withCredentials?: boolean; fetcher?: Fetcher; } +export interface ExecOutput { + readonly stdout: ArrayBuffer; + readonly stderr: ArrayBuffer; + readonly exitCode: number; +} +export interface ContainerExecOptions { + cwd?: string; + env?: Record; + user?: string; + stdin?: ReadableStream | "pipe"; + stdout?: "pipe" | "ignore"; + stderr?: "pipe" | "ignore" | "combined"; +} +export interface ExecProcess { + readonly stdin: WritableStream | null; + readonly stdout: ReadableStream | null; + readonly stderr: ReadableStream | null; + readonly pid: number; + readonly exitCode: Promise; + output(): Promise; + kill(signal?: number): void; +} export interface Container { get running(): boolean; start(options?: ContainerStartupOptions): void; @@ -3872,6 +3900,7 @@ export interface Container { options: ContainerSnapshotOptions, ): Promise; interceptOutboundHttps(addr: string, binding: Fetcher): Promise; + exec(cmd: string[], options?: ContainerExecOptions): Promise; } export interface ContainerDirectorySnapshot { id: string; @@ -4066,11 +4095,62 @@ export interface Tracing { callback: (span: Span, ...args: A) => T, ...args: A ): T; + startActiveSpan( + name: string, + callback: (span: Span, ...args: A) => T, + ...args: A + ): T; Span: typeof Span; } export declare abstract class Span { get isTraced(): boolean; setAttribute(key: string, value?: boolean | number | string): void; + end(): void; +} +/** + * Represents the identity of a user authenticated via Cloudflare Access. + * This matches the result of calling /cdn-cgi/access/get-identity. + * + * The exact structure of the returned object depends on the identity provider + * configuration for the Access application. The fields below represent commonly + * available properties, but additional provider-specific fields may be present. + */ +export interface CloudflareAccessIdentity extends Record { + /** The user's email address, if available from the identity provider. */ + email?: string; + /** The user's display name. */ + name?: string; + /** The user's unique identifier. */ + user_uuid?: string; + /** The Cloudflare account ID. */ + account_id?: string; + /** Login timestamp (Unix epoch seconds). */ + iat?: number; + /** The user's IP address at authentication time. */ + ip?: string; + /** Authentication methods used (e.g., "pwd"). */ + amr?: string[]; + /** Identity provider information. */ + idp?: { + id: string; + type: string; + }; + /** Geographic information about where the user authenticated. */ + geo?: { + country: string; + }; + /** Group memberships from the identity provider. */ + groups?: Array<{ + id: string; + name: string; + email?: string; + }>; + /** Device posture check results, keyed by check ID. */ + devicePosture?: Record; + /** True if the user connected via Cloudflare WARP. */ + is_warp?: boolean; + /** True if the user is authenticated via Cloudflare Gateway. */ + is_gateway?: boolean; } // ============ AI Search Error Interfaces ============ export interface AiSearchInternalError extends Error {} @@ -11580,6 +11660,17 @@ export interface RequestInitCfProperties extends Record { cacheReserveMinimumFileSize?: number; scrapeShield?: boolean; apps?: boolean; + /** + * Controls whether an outbound gRPC-web subrequest from this Worker is + * converted to gRPC at the Cloudflare edge. + * + * - `"passthrough"`: forward the subrequest unchanged as gRPC-web (default). + * - `"convert"`: convert the gRPC-web subrequest to gRPC at the edge. + * + * Provides per-request control over the same edge conversion behavior + * gated by the `auto_grpc_convert` compatibility flag. + */ + grpcWeb?: "passthrough" | "convert"; image?: RequestInitCfPropertiesImage; minify?: RequestInitCfPropertiesImageMinify; mirage?: boolean; @@ -14647,7 +14738,8 @@ export declare namespace TailStream { | "loadShed" | "responseStreamDisconnected" | "scriptNotFound" - | "internalError"; + | "internalError" + | "exceededWallTime"; interface ScriptVersion { readonly id: string; readonly tag?: string; diff --git a/types/tsconfig.json b/types/tsconfig.json index 118194b89da..8a96fe4ab3b 100644 --- a/types/tsconfig.json +++ b/types/tsconfig.json @@ -11,9 +11,7 @@ "@workerd/*": ["../bazel-bin/src/workerd/*"] }, "checkJs": true, - "composite": true, "skipLibCheck": true, - "exactOptionalPropertyTypes": false, "strictNullChecks": false, "noImplicitReturns": false,