From a4ea86f5f1ef5221c091e88ca394552b3606ce02 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 30 Mar 2026 19:35:52 +0200 Subject: [PATCH 1/3] Add OGG support and dynamically query supported audio formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded format lists with a dynamic query to AVURLAsset.audiovisualTypes(), which returns every audio/video format the OS can actually decode. This means new codecs Apple adds in future macOS releases are picked up automatically with zero code changes. Closes #213 Previously the supported formats were hardcoded in 4 separate places (9 extensions). They are now centralised in MeetingTranscriptionService and derived at launch from AVFoundation. On macOS 26 this resolves to 52 formats: 3g2, 3gp, aa, aac, aax, ac3, aifc, aiff, aivu, amr, au, avi, awb, caf, dv, eac3, flac, itt, loas, m1a, m2a, m2p, m2v, m3u, m4a, m4b, m4p, m4r, m4v, mod, mov, mp1, mp2, mp3, mp4, mpa, mpg, mqv, mts, ogg, pls, qta, sc2, scc, ts, ttml, vob, vtt, w64, wav, webvtt, xhe Notably this includes OGG (via org.xiph.ogg-audio), which macOS 26 decodes natively through AVFoundation — no third-party dependency needed. Changes: - MeetingTranscriptionService: query AVURLAsset.audiovisualTypes() and convert to file extensions via UTType; centralise allowedContentTypes, user-facing description, and drop-error copy - MeetingTranscriptionView: replace all 4 hardcoded format definitions with references to the service's centralised constants # Conflicts: # Sources/Fluid/Services/MeetingTranscriptionService.swift # Sources/Fluid/UI/MeetingTranscriptionView.swift --- .../MeetingTranscriptionService.swift | 24 ++++++++++++++++--- .../Fluid/UI/MeetingTranscriptionView.swift | 16 ++++--------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Sources/Fluid/Services/MeetingTranscriptionService.swift b/Sources/Fluid/Services/MeetingTranscriptionService.swift index e17c5ed3..f376c1a7 100644 --- a/Sources/Fluid/Services/MeetingTranscriptionService.swift +++ b/Sources/Fluid/Services/MeetingTranscriptionService.swift @@ -2,6 +2,7 @@ import AVFoundation import Combine import CoreMedia import Foundation +import UniformTypeIdentifiers /// Result of a transcription operation struct TranscriptionResult: Identifiable, Sendable, Codable { @@ -67,6 +68,24 @@ final class MeetingTranscriptionService: ObservableObject { @Published var error: String? @Published var result: TranscriptionResult? + // MARK: - Supported Formats + + /// File extensions the OS can actually decode, queried dynamically from AVFoundation. + static let supportedFileExtensions: Set = { + let avTypes = AVURLAsset.audiovisualTypes() + let extensions = avTypes.compactMap { UTType($0.rawValue)?.preferredFilenameExtension } + return Set(extensions) + }() + + /// Content types accepted by the file picker — broad categories so the OS filters naturally. + static let allowedContentTypes: [UTType] = [.audio, .movie] + + /// User-facing description of supported formats (curated for readability). + static let supportedFormatsDescription = "Supported: WAV, MP3, M4A, OGG, MP4, MOV, and more" + + /// Error copy shown when a dropped file is not accepted. + static let dropErrorCopy = "Accepted file types: WAV, MP3, M4A, OGG, MP4, MOV, and more." + /// Share the ASR service instance to avoid loading models twice private let asrService: ASRService @@ -159,11 +178,10 @@ final class MeetingTranscriptionService: ObservableObject { // Check file extension let fileExtension = fileURL.pathExtension.lowercased() - let supportedFormats = ["wav", "mp3", "m4a", "ogg", "aac", "flac", "aiff", "caf", "mp4", "mov"] - guard supportedFormats.contains(fileExtension) else { + guard Self.supportedFileExtensions.contains(fileExtension) else { throw TranscriptionError - .fileNotSupported("Format .\(fileExtension) not supported. Supported: \(supportedFormats.joined(separator: ", "))") + .fileNotSupported("Format .\(fileExtension) not supported. \(Self.supportedFormatsDescription)") } // Get audio duration for progress display diff --git a/Sources/Fluid/UI/MeetingTranscriptionView.swift b/Sources/Fluid/UI/MeetingTranscriptionView.swift index 78a520c0..bea63c77 100644 --- a/Sources/Fluid/UI/MeetingTranscriptionView.swift +++ b/Sources/Fluid/UI/MeetingTranscriptionView.swift @@ -192,7 +192,7 @@ struct MeetingTranscriptionView: View { Text("Choose Audio or Video File") .font(.headline) - Text("Supported: WAV, MP3, M4A, OGG, MP4, MOV, and more") + Text(MeetingTranscriptionService.supportedFormatsDescription) .font(.caption) .foregroundColor(.secondary) } @@ -221,15 +221,7 @@ struct MeetingTranscriptionView: View { } .fileImporter( isPresented: self.$showingFilePicker, - allowedContentTypes: [ - .audio, - .movie, - .mpeg4Movie, - UTType(filenameExtension: "wav") ?? .audio, - UTType(filenameExtension: "mp3") ?? .audio, - UTType(filenameExtension: "m4a") ?? .audio, - UTType(filenameExtension: "ogg") ?? .audio, - ], + allowedContentTypes: MeetingTranscriptionService.allowedContentTypes, allowsMultipleSelection: false ) { result in switch result { @@ -545,9 +537,9 @@ struct MeetingTranscriptionView: View { // MARK: - Helper Functions - private static let supportedFileExtensions = ["wav", "mp3", "m4a", "ogg", "aac", "flac", "aiff", "caf", "mp4", "mov"] + private static let supportedFileExtensions = MeetingTranscriptionService.supportedFileExtensions - private static let dropErrorCopy = "Accepted file types: WAV, MP3, M4A, OGG, MP4, MOV, and more." + private static let dropErrorCopy = MeetingTranscriptionService.dropErrorCopy private func handleDrop(providers: [NSItemProvider]) -> Bool { guard let provider = providers.first else { return false } From 756d21714e6b358d2c7868acf44754d1d8049c52 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Tue, 31 Mar 2026 13:38:42 +0200 Subject: [PATCH 2/3] Filter out subtitles, playlists, etc. from supported formats --- Sources/Fluid/Services/MeetingTranscriptionService.swift | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Sources/Fluid/Services/MeetingTranscriptionService.swift b/Sources/Fluid/Services/MeetingTranscriptionService.swift index f376c1a7..dcfb92a4 100644 --- a/Sources/Fluid/Services/MeetingTranscriptionService.swift +++ b/Sources/Fluid/Services/MeetingTranscriptionService.swift @@ -71,9 +71,14 @@ final class MeetingTranscriptionService: ObservableObject { // MARK: - Supported Formats /// File extensions the OS can actually decode, queried dynamically from AVFoundation. + /// Filtered to audio/video types only — excludes subtitles, playlists, etc. static let supportedFileExtensions: Set = { let avTypes = AVURLAsset.audiovisualTypes() - let extensions = avTypes.compactMap { UTType($0.rawValue)?.preferredFilenameExtension } + let extensions = avTypes.compactMap { fileType -> String? in + guard let utType = UTType(fileType.rawValue) else { return nil } + guard utType.conforms(to: .audio) || utType.conforms(to: .movie) else { return nil } + return utType.preferredFilenameExtension + } return Set(extensions) }() From 840fdc6606dc52dea8458caf955667e9beafb624 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Wed, 1 Apr 2026 16:48:40 +0200 Subject: [PATCH 3/3] Make `isVideoContainer` accept any supported video --- Sources/Fluid/Services/MeetingTranscriptionService.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Sources/Fluid/Services/MeetingTranscriptionService.swift b/Sources/Fluid/Services/MeetingTranscriptionService.swift index dcfb92a4..be7391e4 100644 --- a/Sources/Fluid/Services/MeetingTranscriptionService.swift +++ b/Sources/Fluid/Services/MeetingTranscriptionService.swift @@ -204,7 +204,8 @@ final class MeetingTranscriptionService: ObservableObject { DebugLogger.shared.warning("Could not determine audio duration: \(error.localizedDescription)", source: "MeetingTranscriptionService") } - let isVideoContainer = ["mp4", "mov"].contains(fileExtension) + let isVideoContainer = UTType(filenameExtension: fileExtension) + .map { $0.conforms(to: .movie) } ?? false if provider.prefersNativeFileTranscription && !isVideoContainer { self.currentStatus = duration > 0 ? "Transcribing audio (\(Int(duration))s)..." : "Transcribing audio..."