From b5c310a3f17259cfb1fab91e0d1cad394fe937c8 Mon Sep 17 00:00:00 2001 From: atacan Date: Sat, 17 Jan 2026 20:52:47 +0100 Subject: [PATCH 01/10] initial implementation of public func convertToMarkdown --- Sources/Demark/Demark.swift | 79 ++++++++ Sources/Demark/DemarkTypes.swift | 74 +++++++ Sources/Demark/URLLoadingRuntime.swift | 254 +++++++++++++++++++++++++ 3 files changed, 407 insertions(+) create mode 100644 Sources/Demark/URLLoadingRuntime.swift diff --git a/Sources/Demark/Demark.swift b/Sources/Demark/Demark.swift index 32403e2..238af62 100644 --- a/Sources/Demark/Demark.swift +++ b/Sources/Demark/Demark.swift @@ -16,6 +16,14 @@ final class ConversionRuntime { private let logger = Logger(subsystem: "com.demark", category: "conversion") private let turndownRuntime = TurndownRuntime() private let htmlToMdRuntime = HTMLToMdRuntime() + private var _urlLoadingRuntime: URLLoadingRuntime? + + private var urlLoadingRuntime: URLLoadingRuntime { + if _urlLoadingRuntime == nil { + _urlLoadingRuntime = URLLoadingRuntime() + } + return _urlLoadingRuntime! + } // MARK: - Public Methods @@ -57,6 +65,22 @@ final class ConversionRuntime { return normalizeMarkdown(rawMarkdown, bulletMarker: options.bulletListMarker) } + /// Load URL and convert to Markdown + func urlToMarkdown(_ url: URL, options: DemarkOptions, loadingOptions: URLLoadingOptions) async throws -> String { + logger.info("Loading URL for conversion: \(url.absoluteString)") + + // Validate URL scheme + guard url.scheme == "http" || url.scheme == "https" else { + throw DemarkError.invalidURLScheme("Only http and https URLs are supported, got: \(url.scheme ?? "nil")") + } + + // Load and extract HTML + let html = try await urlLoadingRuntime.loadAndExtract(url: url, options: loadingOptions) + + // Convert using existing pipeline + return try await htmlToMarkdown(html, options: options) + } + // MARK: - Normalization helpers /// Normalize list markers to match expectations in tests (single space after marker) @@ -196,4 +220,59 @@ public final class Demark { public func convertToMarkdown(_ html: String, options: DemarkOptions = DemarkOptions()) async throws -> String { try await conversionRuntime.htmlToMarkdown(html, options: options) } + + /// Convert a website URL to Markdown format + /// + /// Loads the URL in a WebView, waits for JavaScript execution to complete, + /// extracts the rendered HTML, and converts it to Markdown. + /// + /// - Parameters: + /// - url: The URL to load and convert + /// - options: Configuration options for the HTML to Markdown conversion process + /// - loadingOptions: Configuration options for URL loading behavior + /// - Returns: The converted Markdown string + /// - Throws: DemarkError if loading or conversion fails + /// + /// ## Example + /// + /// ```swift + /// let demark = Demark() + /// let url = URL(string: "https://example.com")! + /// + /// // Basic usage with defaults + /// let markdown = try await demark.convertToMarkdown(url: url) + /// + /// // Extract only article content with custom timeout + /// let loadingOptions = URLLoadingOptions( + /// timeout: 60, + /// contentSelector: "article" + /// ) + /// let markdown = try await demark.convertToMarkdown( + /// url: url, + /// loadingOptions: loadingOptions + /// ) + /// ``` + /// + /// ## Security + /// + /// Uses an ephemeral WebView with non-persistent storage for security. + /// Each URL load creates a fresh WebView to prevent cookie/cache pollution. + /// + /// ## Network Requirements + /// + /// Plain HTTP URLs may require App Transport Security exceptions. + /// Only `http` and `https` URL schemes are supported. + /// + /// ## See Also + /// + /// - `URLLoadingOptions`: Configuration options for URL loading + /// - `DemarkOptions`: Configuration options for HTML to Markdown conversion + /// - `DemarkError`: Error types that can be thrown during loading or conversion + public func convertToMarkdown( + url: URL, + options: DemarkOptions = DemarkOptions(), + loadingOptions: URLLoadingOptions = URLLoadingOptions() + ) async throws -> String { + try await conversionRuntime.urlToMarkdown(url, options: options, loadingOptions: loadingOptions) + } } diff --git a/Sources/Demark/DemarkTypes.swift b/Sources/Demark/DemarkTypes.swift index 17c1db5..071e912 100644 --- a/Sources/Demark/DemarkTypes.swift +++ b/Sources/Demark/DemarkTypes.swift @@ -174,6 +174,68 @@ public struct DemarkOptions: Sendable { } } +// MARK: - URL Loading Options + +/// Options for loading URLs in a WebView before conversion +/// +/// Controls how Demark loads web pages, including timeout behavior, +/// JavaScript idle detection, and content extraction. +/// +/// ## Example Usage +/// +/// ```swift +/// // Basic usage with defaults +/// let markdown = try await demark.convertToMarkdown(url: url) +/// +/// // Extract only article content with custom timeout +/// let options = URLLoadingOptions( +/// timeout: 60, +/// contentSelector: "article" +/// ) +/// let markdown = try await demark.convertToMarkdown(url: url, loadingOptions: options) +/// ``` +public struct URLLoadingOptions: Sendable { + /// Default configuration with sensible settings + public static let `default` = URLLoadingOptions() + + /// Maximum time to wait for page load (seconds) + public var timeout: TimeInterval + + /// Wait for JavaScript to settle after page load + public var waitForIdle: Bool + + /// Additional delay after page appears loaded (seconds) + public var idleDelay: TimeInterval + + /// CSS selector to extract specific content (e.g., "article", "main") + public var contentSelector: String? + + /// Custom user agent string + public var userAgent: String? + + /// Create URL loading options with custom configuration + /// + /// - Parameters: + /// - timeout: Maximum time to wait for page load (default: 30 seconds) + /// - waitForIdle: Wait for JavaScript to settle after page load (default: true) + /// - idleDelay: Additional delay after page appears loaded (default: 0.5 seconds) + /// - contentSelector: CSS selector to extract specific content (default: nil, extracts full page) + /// - userAgent: Custom user agent string (default: nil, uses system default) + public init( + timeout: TimeInterval = 30, + waitForIdle: Bool = true, + idleDelay: TimeInterval = 0.5, + contentSelector: String? = nil, + userAgent: String? = nil + ) { + self.timeout = timeout + self.waitForIdle = waitForIdle + self.idleDelay = idleDelay + self.contentSelector = contentSelector + self.userAgent = userAgent + } +} + // MARK: - Error Types /// Errors that can occur during HTML to Markdown conversion. @@ -189,6 +251,10 @@ public enum DemarkError: LocalizedError, Sendable { case jsException(String) case bundleResourceMissing(String) case webViewInitializationFailed + case urlLoadingTimeout(String) + case urlNavigationFailed(String) + case invalidURLScheme(String) + case contentSelectorNotFound(String) // MARK: Public @@ -217,6 +283,14 @@ public enum DemarkError: LocalizedError, Sendable { "Required bundle resource missing: \(resource)" case .webViewInitializationFailed: "Failed to initialize WKWebView" + case let .urlLoadingTimeout(details): + "URL loading timed out: \(details)" + case let .urlNavigationFailed(details): + "URL navigation failed: \(details)" + case let .invalidURLScheme(details): + "Invalid URL scheme: \(details)" + case let .contentSelectorNotFound(selector): + "Content selector '\(selector)' matched no elements" } } } diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift new file mode 100644 index 0000000..28678b3 --- /dev/null +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -0,0 +1,254 @@ +// +// URLLoadingRuntime.swift +// Demark +// +// Copyright © 2026 atacan. All rights reserved. +// + +import Foundation +import os.log +import WebKit + +/// WebView-based URL loading runtime for fetching JavaScript-rendered content +/// +/// This implementation uses WKWebView to load URLs and extract rendered HTML: +/// - Real browser DOM environment +/// - JavaScript execution and rendering +/// - Ephemeral storage for security isolation +/// - Main thread execution required for WKWebView +/// - Cross-platform support (iOS, macOS, tvOS, watchOS, visionOS) +@MainActor +final class URLLoadingRuntime { + // MARK: - Properties + + private let logger = Logger(subsystem: "com.demark", category: "url-loading") + private var webView: WKWebView? + private var navigationDelegate: URLNavigationDelegate? + + // MARK: - Lifecycle + + deinit { + logger.info("URLLoadingRuntime being deallocated") + } + + // MARK: - Public Methods + + /// Load a URL in a WebView and extract rendered HTML + /// + /// Creates an ephemeral WebView for each load to ensure isolation between + /// untrusted pages. Supports waiting for JavaScript to settle and extracting + /// specific content via CSS selectors. + /// + /// - Parameters: + /// - url: The URL to load + /// - options: Loading configuration options + /// - Returns: The rendered HTML content + /// - Throws: DemarkError if loading fails + func loadAndExtract(url: URL, options: URLLoadingOptions) async throws -> String { + // Create fresh ephemeral WebView for each load + let webView = createWebView(userAgent: options.userAgent) + self.webView = webView + + defer { + self.webView?.stopLoading() + self.webView = nil + self.navigationDelegate = nil + } + + return try await withTaskCancellationHandler { + try await performLoad(webView: webView, url: url, options: options) + } onCancel: { + Task { @MainActor in + self.webView?.stopLoading() + } + } + } + + // MARK: - Private Methods + + private func createWebView(userAgent: String?) -> WKWebView { + let config = WKWebViewConfiguration() + config.userContentController = WKUserContentController() + + // Use ephemeral storage - no cookies/cache pollution between loads + config.websiteDataStore = .nonPersistent() + + // Platform-specific configuration + #if os(macOS) + config.preferences.javaScriptCanOpenWindowsAutomatically = false + #elseif os(iOS) || os(visionOS) + config.allowsInlineMediaPlayback = false + config.mediaTypesRequiringUserActionForPlayback = .all + #endif + + let webView: WKWebView + #if os(watchOS) || os(tvOS) + webView = WKWebView(frame: CGRect(x: 0, y: 0, width: 100, height: 100), configuration: config) + #else + webView = WKWebView(frame: .zero, configuration: config) + #endif + + // Set user agent before loading + if let userAgent { + webView.customUserAgent = userAgent + } + + return webView + } + + private func performLoad(webView: WKWebView, url: URL, options: URLLoadingOptions) async throws -> String { + try Task.checkCancellation() + + return try await withCheckedThrowingContinuation { continuation in + let delegate = URLNavigationDelegate( + url: url, + options: options, + logger: logger, + continuation: continuation + ) + self.navigationDelegate = delegate + webView.navigationDelegate = delegate + + let request = URLRequest(url: url) + webView.load(request) + + // Set up timeout (delegate will cancel this task on completion) + delegate.timeoutTask = Task { + try? await Task.sleep(nanoseconds: UInt64(options.timeout * 1_000_000_000)) + delegate.handleTimeout() + } + } + } +} + +// MARK: - Navigation Delegate + +@MainActor +private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { + private let url: URL + private let options: URLLoadingOptions + private let logger: Logger + private var continuation: CheckedContinuation? + private var hasCompleted = false + + /// Timeout task - cancelled on successful completion to prevent leaks + var timeoutTask: Task? + + init(url: URL, options: URLLoadingOptions, logger: Logger, continuation: CheckedContinuation) { + self.url = url + self.options = options + self.logger = logger + self.continuation = continuation + super.init() + } + + func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) { + logger.info("Navigation finished for: \(self.url.absoluteString)") + + Task { @MainActor in + do { + try Task.checkCancellation() + + if options.waitForIdle { + try await waitForIdle(webView: webView) + } + + if options.idleDelay > 0 { + try await Task.sleep(nanoseconds: UInt64(options.idleDelay * 1_000_000_000)) + } + + try Task.checkCancellation() + let html = try await extractHTML(from: webView) + complete(with: .success(html)) + } catch { + complete(with: .failure(error)) + } + } + } + + func webView(_ webView: WKWebView, didFail navigation: WKNavigation!, withError error: Error) { + logger.error("Navigation failed: \(error.localizedDescription)") + complete(with: .failure(DemarkError.urlNavigationFailed("\(url.absoluteString): \(error.localizedDescription)"))) + } + + func webView(_ webView: WKWebView, didFailProvisionalNavigation navigation: WKNavigation!, withError error: Error) { + logger.error("Provisional navigation failed: \(error.localizedDescription)") + complete(with: .failure(DemarkError.urlNavigationFailed("\(url.absoluteString): \(error.localizedDescription)"))) + } + + func handleTimeout() { + guard !hasCompleted else { return } + logger.warning("Page load timed out for: \(self.url.absoluteString)") + complete(with: .failure(DemarkError.urlLoadingTimeout("\(url.absoluteString) after \(Int(options.timeout)) seconds"))) + } + + private func waitForIdle(webView: WKWebView) async throws { + var attempts = 0 + let maxAttempts = 50 // 5 seconds max polling + + while attempts < maxAttempts { + try Task.checkCancellation() + let readyState = try await webView.evaluateJavaScript("document.readyState") as? String + logger.debug("Document readyState: \(readyState ?? "unknown")") + if readyState == "complete" { + return + } + try await Task.sleep(nanoseconds: 100_000_000) // 100ms + attempts += 1 + } + logger.warning("Document never reached 'complete' state, proceeding anyway") + } + + private func extractHTML(from webView: WKWebView) async throws -> String { + let script: String + if let selector = options.contentSelector { + // Use JSON serialization for proper escaping (handles quotes, newlines, special chars) + let escapedSelector = try escapeForJS(selector) + script = """ + (function() { + var el = document.querySelector(\(escapedSelector)); + return el ? el.outerHTML : null; + })(); + """ + } else { + script = "document.documentElement.outerHTML" + } + + let result = try await webView.evaluateJavaScript(script) + + if options.contentSelector != nil, result == nil || (result as? NSNull) != nil { + throw DemarkError.contentSelectorNotFound(options.contentSelector!) + } + + guard let html = result as? String else { + throw DemarkError.conversionFailed + } + + logger.info("Extracted HTML length: \(html.count) characters") + return html + } + + /// Escape string for JavaScript using JSON serialization (handles all special characters) + private func escapeForJS(_ string: String) throws -> String { + let data = try JSONSerialization.data(withJSONObject: string) + guard let escaped = String(data: data, encoding: .utf8) else { + throw DemarkError.invalidInput("Failed to escape selector: \(string)") + } + return escaped // Returns properly quoted string like "article" or "div[data-id=\"foo\"]" + } + + private func complete(with result: Result) { + guard !hasCompleted else { return } + hasCompleted = true + + // Cancel timeout task to prevent leak + timeoutTask?.cancel() + timeoutTask = nil + + switch result { + case let .success(html): continuation?.resume(returning: html) + case let .failure(error): continuation?.resume(throwing: error) + } + continuation = nil + } +} From cce9a5a301bf78760ef57fb3e9e499df6817f36f Mon Sep 17 00:00:00 2001 From: atacan Date: Sat, 17 Jan 2026 21:16:37 +0100 Subject: [PATCH 02/10] add tests for url loading --- Sources/Demark/URLLoadingRuntime.swift | 10 +- Tests/DemarkTests/DemarkURLLoadingTests.swift | 369 ++++++++++++++++++ 2 files changed, 376 insertions(+), 3 deletions(-) create mode 100644 Tests/DemarkTests/DemarkURLLoadingTests.swift diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift index 28678b3..a4b0c2e 100644 --- a/Sources/Demark/URLLoadingRuntime.swift +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -230,11 +230,15 @@ private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { /// Escape string for JavaScript using JSON serialization (handles all special characters) private func escapeForJS(_ string: String) throws -> String { - let data = try JSONSerialization.data(withJSONObject: string) - guard let escaped = String(data: data, encoding: .utf8) else { + // Wrap in array since JSONSerialization requires a collection as top-level object + let data = try JSONSerialization.data(withJSONObject: [string]) + guard let arrayString = String(data: data, encoding: .utf8) else { throw DemarkError.invalidInput("Failed to escape selector: \(string)") } - return escaped // Returns properly quoted string like "article" or "div[data-id=\"foo\"]" + // Extract the quoted string from the array: ["value"] -> "value" + let startIndex = arrayString.index(after: arrayString.startIndex) // Skip [ + let endIndex = arrayString.index(before: arrayString.endIndex) // Skip ] + return String(arrayString[startIndex ..< endIndex]) } private func complete(with result: Result) { diff --git a/Tests/DemarkTests/DemarkURLLoadingTests.swift b/Tests/DemarkTests/DemarkURLLoadingTests.swift new file mode 100644 index 0000000..fab4266 --- /dev/null +++ b/Tests/DemarkTests/DemarkURLLoadingTests.swift @@ -0,0 +1,369 @@ +import Foundation +import Testing +@testable import Demark + +@MainActor +struct DemarkURLLoadingTests { + // MARK: - Unit Tests: Invalid URL Schemes + + @Test("Invalid URL scheme - file:// rejected") + func invalidURLSchemeFile() async { + let service = Demark() + let url = URL(string: "file:///tmp/test.html")! + + do { + _ = try await service.convertToMarkdown(url: url) + #expect(Bool(false), "Expected DemarkError.invalidURLScheme for file:// URL") + } catch DemarkError.invalidURLScheme(let details) { + #expect(details.contains("file")) + #expect(details.contains("Only http and https")) + } catch { + #expect(Bool(false), "Unexpected error: \(error)") + } + } + + @Test("Invalid URL scheme - ftp:// rejected") + func invalidURLSchemeFTP() async { + let service = Demark() + let url = URL(string: "ftp://example.com/file.txt")! + + do { + _ = try await service.convertToMarkdown(url: url) + #expect(Bool(false), "Expected DemarkError.invalidURLScheme for ftp:// URL") + } catch DemarkError.invalidURLScheme(let details) { + #expect(details.contains("ftp")) + } catch { + #expect(Bool(false), "Unexpected error: \(error)") + } + } + + @Test("Invalid URL scheme - custom scheme rejected") + func invalidURLSchemeCustom() async { + let service = Demark() + let url = URL(string: "myapp://page/content")! + + do { + _ = try await service.convertToMarkdown(url: url) + #expect(Bool(false), "Expected DemarkError.invalidURLScheme for custom scheme") + } catch DemarkError.invalidURLScheme(let details) { + #expect(details.contains("myapp")) + } catch { + #expect(Bool(false), "Unexpected error: \(error)") + } + } + + // MARK: - Unit Tests: URLLoadingOptions + + @Test("URLLoadingOptions default values") + func urlLoadingOptionsDefaults() { + let options = URLLoadingOptions() + + #expect(options.timeout == 30) + #expect(options.waitForIdle == true) + #expect(options.idleDelay == 0.5) + #expect(options.contentSelector == nil) + #expect(options.userAgent == nil) + } + + @Test("URLLoadingOptions custom values") + func urlLoadingOptionsCustom() { + let options = URLLoadingOptions( + timeout: 60, + waitForIdle: false, + idleDelay: 1.0, + contentSelector: "article", + userAgent: "TestBot/1.0" + ) + + #expect(options.timeout == 60) + #expect(options.waitForIdle == false) + #expect(options.idleDelay == 1.0) + #expect(options.contentSelector == "article") + #expect(options.userAgent == "TestBot/1.0") + } + + @Test("URLLoadingOptions.default matches init()") + func urlLoadingOptionsDefaultStatic() { + let defaultOptions = URLLoadingOptions.default + let initOptions = URLLoadingOptions() + + #expect(defaultOptions.timeout == initOptions.timeout) + #expect(defaultOptions.waitForIdle == initOptions.waitForIdle) + #expect(defaultOptions.idleDelay == initOptions.idleDelay) + #expect(defaultOptions.contentSelector == initOptions.contentSelector) + #expect(defaultOptions.userAgent == initOptions.userAgent) + } + + // MARK: - Unit Tests: Error Descriptions + + @Test("Error description - urlLoadingTimeout") + func errorDescriptionTimeout() { + let error = DemarkError.urlLoadingTimeout("https://example.com after 30 seconds") + let description = error.errorDescription ?? "" + + #expect(description.contains("timed out")) + #expect(description.contains("https://example.com")) + #expect(description.contains("30 seconds")) + } + + @Test("Error description - urlNavigationFailed") + func errorDescriptionNavigation() { + let error = DemarkError.urlNavigationFailed("https://example.com: Connection refused") + let description = error.errorDescription ?? "" + + #expect(description.contains("navigation failed")) + #expect(description.contains("https://example.com")) + } + + @Test("Error description - invalidURLScheme") + func errorDescriptionScheme() { + let error = DemarkError.invalidURLScheme("Only http and https URLs are supported, got: file") + let description = error.errorDescription ?? "" + + #expect(description.contains("Invalid URL scheme")) + #expect(description.contains("file")) + } + + @Test("Error description - contentSelectorNotFound") + func errorDescriptionSelector() { + let error = DemarkError.contentSelectorNotFound("article.main-content") + let description = error.errorDescription ?? "" + + #expect(description.contains("selector")) + #expect(description.contains("article.main-content")) + #expect(description.contains("matched no elements")) + } +} + +// MARK: - Integration Tests + +@MainActor +struct DemarkURLLoadingIntegrationTests { + @Test("Load example.com and convert to markdown") + func loadExampleDotCom() async throws { + let service = Demark() + let url = URL(string: "https://example.com")! + + let loadingOptions = URLLoadingOptions( + timeout: 30, + waitForIdle: true, + idleDelay: 0.5 + ) + + let markdown = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + + // example.com has a simple page with "Example Domain" heading + #expect(markdown.contains("Example Domain")) + #expect(!markdown.isEmpty) + } + + @Test("Load with content selector extracts specific element") + func loadWithContentSelector() async throws { + let service = Demark() + let url = URL(string: "https://example.com")! + + // example.com has a
container with the main content + let loadingOptions = URLLoadingOptions( + timeout: 30, + contentSelector: "div" + ) + + let markdown = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + + #expect(markdown.contains("Example Domain")) + } + + @Test("Content selector not found throws error") + func contentSelectorNotFound() async { + let service = Demark() + let url = URL(string: "https://example.com")! + + let loadingOptions = URLLoadingOptions( + timeout: 30, + contentSelector: "article.nonexistent-class-xyz" + ) + + do { + _ = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + #expect(Bool(false), "Expected DemarkError.contentSelectorNotFound") + } catch DemarkError.contentSelectorNotFound(let selector) { + #expect(selector == "article.nonexistent-class-xyz") + } catch { + #expect(Bool(false), "Unexpected error: \(error)") + } + } + + @Test("Custom user agent is applied") + func customUserAgent() async throws { + let service = Demark() + let url = URL(string: "https://example.com")! + + let loadingOptions = URLLoadingOptions( + timeout: 30, + userAgent: "DemarkTest/1.0" + ) + + // Just verify the request succeeds with custom user agent + let markdown = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + #expect(!markdown.isEmpty) + } + + @Test("Short timeout with slow request") + func shortTimeoutError() async { + let service = Demark() + // Use a URL that will definitely timeout with 0.1s timeout + let url = URL(string: "https://example.com")! + + let loadingOptions = URLLoadingOptions( + timeout: 0.001, // 1ms - will timeout + waitForIdle: false, + idleDelay: 0 + ) + + do { + _ = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + // If it succeeds (very fast network), that's fine too + } catch DemarkError.urlLoadingTimeout(let details) { + #expect(details.contains("example.com")) + } catch { + // Other network errors are acceptable + } + } +} + +// MARK: - Edge Case Tests + +@MainActor +struct DemarkURLLoadingEdgeCaseTests { + @Test("URL with query parameters") + func urlWithQueryParams() async throws { + let service = Demark() + // example.com ignores query params but we're testing URL handling + let url = URL(string: "https://example.com/?foo=bar&baz=123")! + + let markdown = try await service.convertToMarkdown(url: url) + #expect(markdown.contains("Example Domain")) + } + + @Test("URL with fragment") + func urlWithFragment() async throws { + let service = Demark() + let url = URL(string: "https://example.com/#section")! + + let markdown = try await service.convertToMarkdown(url: url) + #expect(markdown.contains("Example Domain")) + } + + @Test("URL with encoded characters") + func urlWithEncodedChars() async throws { + let service = Demark() + // %20 is space, example.com will handle this gracefully + let url = URL(string: "https://example.com/path%20with%20spaces")! + + do { + let markdown = try await service.convertToMarkdown(url: url) + // May get error page but shouldn't crash + #expect(!markdown.isEmpty) + } catch DemarkError.urlNavigationFailed { + // 404 or similar is acceptable + } + } + + @Test("Selector with attribute") + func selectorWithAttribute() async throws { + let service = Demark() + let url = URL(string: "https://example.com")! + + // Test that attribute selectors work + let loadingOptions = URLLoadingOptions( + contentSelector: "a[href]" + ) + + let markdown = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + // example.com has a link - verify we got a markdown link + #expect(markdown.contains("[") && markdown.contains("](")) + } + + @Test("Selector with quotes in attribute") + func selectorWithQuotes() async throws { + let service = Demark() + let url = URL(string: "https://example.com")! + + // Test selector with quoted attribute value - use the actual IANA link + let loadingOptions = URLLoadingOptions( + contentSelector: "a[href*=\"iana.org\"]" + ) + + let markdown = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + // Verify we got the link content + #expect(markdown.contains("iana.org")) + } + + @Test("Minimal timeout and idle settings") + func minimalDelays() async throws { + let service = Demark() + let url = URL(string: "https://example.com")! + + let loadingOptions = URLLoadingOptions( + timeout: 30, + waitForIdle: false, + idleDelay: 0 + ) + + let markdown = try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + #expect(!markdown.isEmpty) + } + + @Test("HTTP URL scheme accepted") + func httpSchemeAccepted() async { + let service = Demark() + // Note: May fail due to ATS, but should not throw invalidURLScheme + let url = URL(string: "http://example.com")! + + do { + let markdown = try await service.convertToMarkdown(url: url) + #expect(!markdown.isEmpty) + } catch DemarkError.invalidURLScheme { + #expect(Bool(false), "http:// should be accepted, not rejected as invalid scheme") + } catch { + // Network errors (ATS, connection issues) are acceptable + } + } +} + +// MARK: - Cancellation Tests + +@MainActor +struct DemarkURLLoadingCancellationTests { + @Test("Task cancellation stops loading") + func taskCancellation() async { + let service = Demark() + let url = URL(string: "https://example.com")! + + let loadingOptions = URLLoadingOptions( + timeout: 60, // Long timeout + waitForIdle: true, + idleDelay: 5 // Long delay to ensure we can cancel + ) + + let task = Task { + try await service.convertToMarkdown(url: url, loadingOptions: loadingOptions) + } + + // Cancel quickly + try? await Task.sleep(nanoseconds: 100_000_000) // 100ms + task.cancel() + + let result = await task.result + switch result { + case .success: + // Fast completion before cancel is OK + break + case .failure(let error): + // CancellationError or wrapped version is expected + let isCancellation = error is CancellationError || + String(describing: error).contains("cancel") + #expect(isCancellation || error is DemarkError, "Expected cancellation or Demark error, got: \(error)") + } + } +} From f1c941d550238e062f347ce234924d91c907cc1a Mon Sep 17 00:00:00 2001 From: atacan Date: Sat, 17 Jan 2026 21:35:27 +0100 Subject: [PATCH 03/10] URL conversion option --- .../DemarkExample/ContentView-iOS.swift | 91 +++++++++-- .../Sources/DemarkExample/ContentView.swift | 141 +++++++++++++++++- 2 files changed, 213 insertions(+), 19 deletions(-) diff --git a/Example/Sources/DemarkExample/ContentView-iOS.swift b/Example/Sources/DemarkExample/ContentView-iOS.swift index 0c86ba4..f3e7e4e 100644 --- a/Example/Sources/DemarkExample/ContentView-iOS.swift +++ b/Example/Sources/DemarkExample/ContentView-iOS.swift @@ -10,28 +10,90 @@ import SwiftUI #if os(iOS) extension ContentView { + private var hasValidInputForIOS: Bool { + switch inputMode { + case .html: + return !htmlInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + case .url: + let trimmed = urlInput.trimmingCharacters(in: .whitespacesAndNewlines) + return !trimmed.isEmpty + } + } + + private var iOSContentSelectorBinding: Binding { + Binding( + get: { urlLoadingOptions.contentSelector ?? "" }, + set: { urlLoadingOptions.contentSelector = $0.isEmpty ? nil : $0 } + ) + } + var iOSLayout: some View { NavigationStack { ScrollView { VStack(spacing: 20) { - // HTML Input Section + // Input Mode Picker + Picker("Input Mode", selection: $inputMode) { + ForEach(InputMode.allCases, id: \.self) { mode in + Label(mode.rawValue, systemImage: mode.icon).tag(mode) + } + } + .pickerStyle(.segmented) + .padding(.horizontal) + + // Input Section VStack(alignment: .leading, spacing: 12) { HStack { - Label("HTML Input", systemImage: "chevron.left.forwardslash.chevron.right") - .font(.headline) + Label( + inputMode == .html ? "HTML Input" : "URL Input", + systemImage: inputMode.icon + ) + .font(.headline) Spacer() - sampleHTMLMenu + if inputMode == .html { + sampleHTMLMenu + } } - TextEditor(text: $htmlInput) - .font(.system(.body, design: .monospaced)) - .frame(minHeight: 200) - .overlay( - RoundedRectangle(cornerRadius: 8) - .stroke(Color.secondary.opacity(0.3), lineWidth: 1) - ) + if inputMode == .html { + TextEditor(text: $htmlInput) + .font(.system(.body, design: .monospaced)) + .frame(minHeight: 200) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.secondary.opacity(0.3), lineWidth: 1) + ) + } else { + VStack(alignment: .leading, spacing: 16) { + VStack(alignment: .leading, spacing: 8) { + Text("URL") + .font(.caption) + .foregroundColor(.secondary) + + TextField("https://example.com", text: $urlInput) + .font(.system(.body, design: .monospaced)) + .textFieldStyle(.roundedBorder) + .autocapitalization(.none) + .keyboardType(.URL) + } + + VStack(alignment: .leading, spacing: 8) { + Text("Content Selector (optional)") + .font(.caption) + .foregroundColor(.secondary) + + TextField("e.g., article, main", text: iOSContentSelectorBinding) + .font(.system(.body, design: .monospaced)) + .textFieldStyle(.roundedBorder) + .autocapitalization(.none) + + Text("CSS selector to extract specific content") + .font(.caption2) + .foregroundColor(.secondary) + } + } + } } .padding() .background(Color(.secondarySystemBackground)) @@ -108,18 +170,18 @@ import SwiftUI .cornerRadius(12) // Convert Button - Button(action: convertHTML) { + Button(action: performConversion) { HStack { Image(systemName: "arrow.right.circle.fill") Text("Convert to Markdown") } .frame(maxWidth: .infinity) .padding() - .background(isConverting || htmlInput.isEmpty ? Color.gray : Color.accentColor) + .background(isConverting || !hasValidInputForIOS ? Color.gray : Color.accentColor) .foregroundColor(.white) .cornerRadius(12) } - .disabled(isConverting || htmlInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + .disabled(isConverting || !hasValidInputForIOS) // Output Section if !markdownOutput.isEmpty || conversionError != nil { @@ -168,3 +230,4 @@ import SwiftUI } } #endif + diff --git a/Example/Sources/DemarkExample/ContentView.swift b/Example/Sources/DemarkExample/ContentView.swift index fc0384b..4dfd1f2 100644 --- a/Example/Sources/DemarkExample/ContentView.swift +++ b/Example/Sources/DemarkExample/ContentView.swift @@ -14,16 +14,31 @@ import SwiftUI #endif struct ContentView: View { + @State var inputMode: InputMode = .html @State var htmlInput: String = SampleHTML.defaultHTML + @State var urlInput: String = "https://example.com" @State var markdownOutput: String = "" @State var isConverting: Bool = false @State var conversionError: String? @State var selectedTab: OutputTab = .source @State var options = DemarkOptions() @State var selectedEngine: ConversionEngine = .turndown + @State var urlLoadingOptions = URLLoadingOptions() private let demark = Demark() + enum InputMode: String, CaseIterable { + case html = "HTML" + case url = "URL" + + var icon: String { + switch self { + case .html: "chevron.left.forwardslash.chevron.right" + case .url: "link" + } + } + } + enum OutputTab: String, CaseIterable { case source = "Source" case rendered = "Rendered" @@ -49,23 +64,47 @@ struct ContentView: View { var inputHeader: some View { VStack(alignment: .leading, spacing: 8) { HStack { - Label("HTML Input", systemImage: "chevron.left.forwardslash.chevron.right") + Label(inputMode == .html ? "HTML Input" : "URL Input", systemImage: inputMode.icon) .font(.title2) .fontWeight(.semibold) Spacer() - sampleHTMLMenu + inputModePicker + + if inputMode == .html { + sampleHTMLMenu + } } - Text("Paste or type your HTML content below") + Text(inputMode == .html ? "Paste or type your HTML content below" : "Enter a URL to fetch and convert") .font(.caption) .foregroundColor(.secondary) } .padding() } + var inputModePicker: some View { + Picker("Input Mode", selection: $inputMode) { + ForEach(InputMode.allCases, id: \.self) { mode in + Label(mode.rawValue, systemImage: mode.icon).tag(mode) + } + } + .pickerStyle(.segmented) + .frame(width: 140) + } + + @ViewBuilder var inputEditor: some View { + switch inputMode { + case .html: + htmlInputEditor + case .url: + urlInputEditor + } + } + + var htmlInputEditor: some View { ScrollView { TextEditor(text: $htmlInput) .font(.system(.body, design: .monospaced)) @@ -81,6 +120,51 @@ struct ContentView: View { .padding(.horizontal) } + var urlInputEditor: some View { + VStack(alignment: .leading, spacing: 16) { + VStack(alignment: .leading, spacing: 8) { + Text("URL") + .font(.caption) + .foregroundColor(.secondary) + + TextField("https://example.com", text: $urlInput) + .font(.system(.body, design: .monospaced)) + .textFieldStyle(.roundedBorder) + } + + VStack(alignment: .leading, spacing: 8) { + Text("Content Selector (optional)") + .font(.caption) + .foregroundColor(.secondary) + + TextField("e.g., article, main, .content", text: contentSelectorBinding) + .font(.system(.body, design: .monospaced)) + .textFieldStyle(.roundedBorder) + + Text("CSS selector to extract specific content from the page") + .font(.caption2) + .foregroundColor(.secondary) + } + + Spacer() + } + .padding() + .background(platformBackgroundColor) + .cornerRadius(8) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.secondary.opacity(0.3), lineWidth: 1) + ) + .padding(.horizontal) + } + + private var contentSelectorBinding: Binding { + Binding( + get: { urlLoadingOptions.contentSelector ?? "" }, + set: { urlLoadingOptions.contentSelector = $0.isEmpty ? nil : $0 } + ) + } + var sampleHTMLMenu: some View { Menu("Sample HTML") { ForEach(SampleHTML.allCases, id: \.self) { sample in @@ -312,14 +396,23 @@ struct ContentView: View { // MARK: - Action Buttons var convertButton: some View { - Button(action: convertHTML) { + Button(action: performConversion) { HStack { Image(systemName: "arrow.right.circle.fill") Text("Convert") } } .keyboardShortcut(.return, modifiers: .command) - .disabled(isConverting || htmlInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + .disabled(isConverting || !hasValidInput) + } + + private var hasValidInput: Bool { + switch inputMode { + case .html: + return !htmlInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + case .url: + return URL(string: urlInput) != nil && !urlInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + } } private var copyButton: some View { @@ -332,6 +425,16 @@ struct ContentView: View { // MARK: - Actions + @MainActor + func performConversion() { + switch inputMode { + case .html: + convertHTML() + case .url: + convertURL() + } + } + @MainActor func convertHTML() { guard !htmlInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { return } @@ -353,6 +456,34 @@ struct ContentView: View { } } + @MainActor + func convertURL() { + guard let url = URL(string: urlInput) else { + conversionError = "Invalid URL" + return + } + + isConverting = true + conversionError = nil + + Task { + do { + let result = try await demark.convertToMarkdown( + url: url, + options: options, + loadingOptions: urlLoadingOptions + ) + markdownOutput = result + conversionError = nil + } catch { + conversionError = error.localizedDescription + markdownOutput = "" + } + + isConverting = false + } + } + func copyMarkdown() { #if os(macOS) let pasteboard = NSPasteboard.general From 2170b58362c6b362284754c275658fa57c86b911 Mon Sep 17 00:00:00 2001 From: atacan Date: Sat, 17 Jan 2026 21:43:13 +0100 Subject: [PATCH 04/10] Now when the WebContent process gets frozen by iOS and TurndownService disappears, the reinit will either properly restore it or throw a clear error rather than silently failing and falling back to html-to-md. 1. Replaced unreliable sleep with proper page load detection 2. Actually verify TurndownService after loading script 3. Clean up old WebView and verify after reinit --- Sources/Demark/TurndownRuntime.swift | 66 +++++++++++++++++++--------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/Sources/Demark/TurndownRuntime.swift b/Sources/Demark/TurndownRuntime.swift index 38d51f6..96ee8e1 100644 --- a/Sources/Demark/TurndownRuntime.swift +++ b/Sources/Demark/TurndownRuntime.swift @@ -89,11 +89,23 @@ final class TurndownRuntime { guard try await turndownIsAvailable(in: webView) else { logger.warning("TurndownService missing, reinitializing WKWebView...") + + // Clean up old WebView before creating new one + self.webView = nil isInitialized = false + try await initializeJavaScriptEnvironment() + guard let refreshedWebView = self.webView else { throw DemarkError.jsEnvironmentInitializationFailed } + + // Verify TurndownService is available after reinit + guard try await turndownIsAvailable(in: refreshedWebView) else { + logger.error("TurndownService still not available after reinitialization") + throw DemarkError.jsEnvironmentInitializationFailed + } + return refreshedWebView } @@ -249,8 +261,8 @@ final class TurndownRuntime { // Load a blank page first webView.loadHTMLString("", baseURL: nil) - // Wait for page to load - try await Task.sleep(nanoseconds: 100_000_000) // 100ms + // Wait for page to actually be ready (poll document.readyState) + try await waitForDocumentReady(webView: webView) // Load Turndown library logger.info("Loading Turndown from: \(turndownPath)") @@ -261,28 +273,14 @@ final class TurndownRuntime { _ = try await webView.evaluateJavaScript(turndownScript) logger.info("Successfully loaded Turndown JavaScript library") - // Wait a bit for the script to fully initialize - try await Task.sleep(nanoseconds: 50_000_000) // 50ms - - // Check what's available in the global scope - let globalCheck = try await webView.evaluateJavaScript(""" - JSON.stringify({ - hasTurndownService: typeof TurndownService !== 'undefined', - hasTurndown: typeof Turndown !== 'undefined', - hasWindowTurndownService: typeof window.TurndownService !== 'undefined', - hasWindowTurndown: typeof window.Turndown !== 'undefined' - }) - """) - - if let checkResult = globalCheck as? String { - logger.info("Global scope check: \(checkResult)") + // Verify TurndownService is actually available + guard try await turndownIsAvailable(in: webView) else { + logger.error("TurndownService not available after loading script") + throw DemarkError.libraryLoadingFailed("TurndownService not available in global scope") } - // Since TurndownService is available, we don't need to do anything else - // The global scope check confirmed it's there - isInitialized = true - logger.info("WKWebView runtime ready with Turndown 🎉") + logger.info("WKWebView runtime ready with Turndown") } catch let error as DemarkError { throw error } catch { @@ -290,4 +288,30 @@ final class TurndownRuntime { throw DemarkError.libraryLoadingFailed(error.localizedDescription) } } + + /// Wait for document to be ready by polling document.readyState + private func waitForDocumentReady(webView: WKWebView) async throws { + let maxAttempts = 50 // 5 seconds max + var attempts = 0 + + while attempts < maxAttempts { + try Task.checkCancellation() + + do { + let readyState = try await webView.evaluateJavaScript("document.readyState") as? String + logger.debug("Document readyState: \(readyState ?? "unknown")") + if readyState == "complete" || readyState == "interactive" { + return + } + } catch { + // If we can't even evaluate JS, the page isn't ready yet + logger.debug("Waiting for document... (\(error.localizedDescription))") + } + + try await Task.sleep(nanoseconds: 100_000_000) // 100ms between polls + attempts += 1 + } + + logger.warning("Document never reached ready state, proceeding anyway") + } } From 9a6b1c8b8fac94a0ffde8adcdf1366d1a7f68b03 Mon Sep 17 00:00:00 2001 From: atacan Date: Sat, 17 Jan 2026 22:06:50 +0100 Subject: [PATCH 05/10] use lazy instead of computed dance --- Sources/Demark/Demark.swift | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Sources/Demark/Demark.swift b/Sources/Demark/Demark.swift index 238af62..d2b6922 100644 --- a/Sources/Demark/Demark.swift +++ b/Sources/Demark/Demark.swift @@ -16,14 +16,7 @@ final class ConversionRuntime { private let logger = Logger(subsystem: "com.demark", category: "conversion") private let turndownRuntime = TurndownRuntime() private let htmlToMdRuntime = HTMLToMdRuntime() - private var _urlLoadingRuntime: URLLoadingRuntime? - - private var urlLoadingRuntime: URLLoadingRuntime { - if _urlLoadingRuntime == nil { - _urlLoadingRuntime = URLLoadingRuntime() - } - return _urlLoadingRuntime! - } + private lazy var urlLoadingRuntime = URLLoadingRuntime() // MARK: - Public Methods From 06f3fd42edd94d9aef2bf0975b4df98e31582a3e Mon Sep 17 00:00:00 2001 From: atacan Date: Sun, 18 Jan 2026 17:34:04 +0100 Subject: [PATCH 06/10] Concurrent URL loads don't cross-cancel --- Sources/Demark/URLLoadingRuntime.swift | 10 ++++---- Tests/DemarkTests/DemarkURLLoadingTests.swift | 23 +++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift index a4b0c2e..ceacdf5 100644 --- a/Sources/Demark/URLLoadingRuntime.swift +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -22,7 +22,6 @@ final class URLLoadingRuntime { // MARK: - Properties private let logger = Logger(subsystem: "com.demark", category: "url-loading") - private var webView: WKWebView? private var navigationDelegate: URLNavigationDelegate? // MARK: - Lifecycle @@ -39,6 +38,9 @@ final class URLLoadingRuntime { /// untrusted pages. Supports waiting for JavaScript to settle and extracting /// specific content via CSS selectors. /// + /// This method supports concurrent calls - each invocation uses its own webView + /// and cleanup is isolated to that specific request. + /// /// - Parameters: /// - url: The URL to load /// - options: Loading configuration options @@ -47,11 +49,9 @@ final class URLLoadingRuntime { func loadAndExtract(url: URL, options: URLLoadingOptions) async throws -> String { // Create fresh ephemeral WebView for each load let webView = createWebView(userAgent: options.userAgent) - self.webView = webView defer { - self.webView?.stopLoading() - self.webView = nil + webView.stopLoading() self.navigationDelegate = nil } @@ -59,7 +59,7 @@ final class URLLoadingRuntime { try await performLoad(webView: webView, url: url, options: options) } onCancel: { Task { @MainActor in - self.webView?.stopLoading() + webView.stopLoading() } } } diff --git a/Tests/DemarkTests/DemarkURLLoadingTests.swift b/Tests/DemarkTests/DemarkURLLoadingTests.swift index fab4266..8c208a7 100644 --- a/Tests/DemarkTests/DemarkURLLoadingTests.swift +++ b/Tests/DemarkTests/DemarkURLLoadingTests.swift @@ -367,3 +367,26 @@ struct DemarkURLLoadingCancellationTests { } } } + +// MARK: - Concurrent Load Tests + +@MainActor +struct DemarkURLLoadingConcurrencyTests { + @Test("Concurrent URL loads don't cross-cancel") + func concurrentURLLoads() async throws { + let service = Demark() + let urls = [ + URL(string: "https://example.com")!, + URL(string: "https://example.org")!, + ] + + try await withThrowingTaskGroup(of: String.self) { group in + for url in urls { + group.addTask { try await service.convertToMarkdown(url: url) } + } + for try await result in group { + #expect(!result.isEmpty) + } + } + } +} From eefb6443fabc481a518a0eb692ff9f669ec82113 Mon Sep 17 00:00:00 2001 From: atacan Date: Sun, 18 Jan 2026 18:36:27 +0100 Subject: [PATCH 07/10] clamp negative values --- Sources/Demark/URLLoadingRuntime.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift index ceacdf5..df03a1f 100644 --- a/Sources/Demark/URLLoadingRuntime.swift +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -114,7 +114,7 @@ final class URLLoadingRuntime { // Set up timeout (delegate will cancel this task on completion) delegate.timeoutTask = Task { - try? await Task.sleep(nanoseconds: UInt64(options.timeout * 1_000_000_000)) + try? await Task.sleep(nanoseconds: UInt64(max(0, options.timeout) * 1_000_000_000)) delegate.handleTimeout() } } @@ -154,7 +154,7 @@ private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { } if options.idleDelay > 0 { - try await Task.sleep(nanoseconds: UInt64(options.idleDelay * 1_000_000_000)) + try await Task.sleep(nanoseconds: UInt64(max(0, options.idleDelay) * 1_000_000_000)) } try Task.checkCancellation() From b5199e77f85bd9e44a6498c640e48e527584ef9c Mon Sep 17 00:00:00 2001 From: atacan Date: Tue, 27 Jan 2026 21:43:39 +0100 Subject: [PATCH 08/10] clampedNanoseconds --- Sources/Demark/URLLoadingRuntime.swift | 27 ++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift index df03a1f..4fe3df3 100644 --- a/Sources/Demark/URLLoadingRuntime.swift +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -113,9 +113,11 @@ final class URLLoadingRuntime { webView.load(request) // Set up timeout (delegate will cancel this task on completion) - delegate.timeoutTask = Task { - try? await Task.sleep(nanoseconds: UInt64(max(0, options.timeout) * 1_000_000_000)) - delegate.handleTimeout() + if let nanoseconds = clampedNanoseconds(options.timeout) { + delegate.timeoutTask = Task { + try? await Task.sleep(nanoseconds: nanoseconds) + delegate.handleTimeout() + } } } } @@ -153,8 +155,8 @@ private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { try await waitForIdle(webView: webView) } - if options.idleDelay > 0 { - try await Task.sleep(nanoseconds: UInt64(max(0, options.idleDelay) * 1_000_000_000)) + if let nanoseconds = clampedNanoseconds(options.idleDelay), nanoseconds > 0 { + try await Task.sleep(nanoseconds: nanoseconds) } try Task.checkCancellation() @@ -179,7 +181,13 @@ private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { func handleTimeout() { guard !hasCompleted else { return } logger.warning("Page load timed out for: \(self.url.absoluteString)") - complete(with: .failure(DemarkError.urlLoadingTimeout("\(url.absoluteString) after \(Int(options.timeout)) seconds"))) + let secondsDescription: String + if let nanoseconds = clampedNanoseconds(options.timeout) { + secondsDescription = String(nanoseconds / 1_000_000_000) + } else { + secondsDescription = "∞" + } + complete(with: .failure(DemarkError.urlLoadingTimeout("\(url.absoluteString) after \(secondsDescription) seconds"))) } private func waitForIdle(webView: WKWebView) async throws { @@ -256,3 +264,10 @@ private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { continuation = nil } } + +private func clampedNanoseconds(_ seconds: TimeInterval) -> UInt64? { + guard seconds.isFinite else { return nil } + let maxSeconds = Double(UInt64.max) / 1_000_000_000 + let clampedSeconds = max(0, min(seconds, maxSeconds)) + return UInt64(clampedSeconds * 1_000_000_000) +} From 1d1de29474a394cfead7b03ef9ceca87b4468275 Mon Sep 17 00:00:00 2001 From: atacan Date: Tue, 27 Jan 2026 22:04:32 +0100 Subject: [PATCH 09/10] Now each concurrent loadAndExtract call maintains its own strong reference to its delegate in the dictionary, preventing premature deallocation when multiple requests run simultaneously. --- Sources/Demark/URLLoadingRuntime.swift | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift index 4fe3df3..e8af508 100644 --- a/Sources/Demark/URLLoadingRuntime.swift +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -22,7 +22,7 @@ final class URLLoadingRuntime { // MARK: - Properties private let logger = Logger(subsystem: "com.demark", category: "url-loading") - private var navigationDelegate: URLNavigationDelegate? + private var activeDelegates: [ObjectIdentifier: URLNavigationDelegate] = [:] // MARK: - Lifecycle @@ -52,7 +52,7 @@ final class URLLoadingRuntime { defer { webView.stopLoading() - self.navigationDelegate = nil + self.activeDelegates.removeValue(forKey: ObjectIdentifier(webView)) } return try await withTaskCancellationHandler { @@ -60,6 +60,9 @@ final class URLLoadingRuntime { } onCancel: { Task { @MainActor in webView.stopLoading() + if let delegate = self.activeDelegates[ObjectIdentifier(webView)] { + delegate.cancel() + } } } } @@ -106,7 +109,7 @@ final class URLLoadingRuntime { logger: logger, continuation: continuation ) - self.navigationDelegate = delegate + self.activeDelegates[ObjectIdentifier(webView)] = delegate webView.navigationDelegate = delegate let request = URLRequest(url: url) @@ -190,6 +193,11 @@ private final class URLNavigationDelegate: NSObject, WKNavigationDelegate { complete(with: .failure(DemarkError.urlLoadingTimeout("\(url.absoluteString) after \(secondsDescription) seconds"))) } + func cancel() { + guard !hasCompleted else { return } + complete(with: .failure(CancellationError())) + } + private func waitForIdle(webView: WKWebView) async throws { var attempts = 0 let maxAttempts = 50 // 5 seconds max polling From 41c8637486049519c6a436c4d41c175a4715f694 Mon Sep 17 00:00:00 2001 From: atacan Date: Tue, 27 Jan 2026 22:18:53 +0100 Subject: [PATCH 10/10] =?UTF-8?q?Applied=20URLLoadingOptions.timeout=20to?= =?UTF-8?q?=20the=20URLRequest=20when=20it=E2=80=99s=20finite=20and=20>=20?= =?UTF-8?q?0,=20so=20the=20underlying=20load=20honors=20longer=20timeouts.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/Demark/URLLoadingRuntime.swift | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/Demark/URLLoadingRuntime.swift b/Sources/Demark/URLLoadingRuntime.swift index e8af508..8d687a3 100644 --- a/Sources/Demark/URLLoadingRuntime.swift +++ b/Sources/Demark/URLLoadingRuntime.swift @@ -112,7 +112,10 @@ final class URLLoadingRuntime { self.activeDelegates[ObjectIdentifier(webView)] = delegate webView.navigationDelegate = delegate - let request = URLRequest(url: url) + var request = URLRequest(url: url) + if options.timeout.isFinite, options.timeout > 0 { + request.timeoutInterval = options.timeout + } webView.load(request) // Set up timeout (delegate will cancel this task on completion)