diff --git a/.flutter-plugins-dependencies b/.flutter-plugins-dependencies index 0ebf3d0..7b6ae5d 100644 --- a/.flutter-plugins-dependencies +++ b/.flutter-plugins-dependencies @@ -1 +1 @@ -{"info":"This is a generated file; do not edit or check into version control.","plugins":{"ios":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database-10.5.7\\\\","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"android":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database-10.5.7\\\\","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"macos":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database-10.5.7\\\\","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"linux":[{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"windows":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"web":[{"name":"firebase_core_web","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core_web-2.24.0\\\\","dependencies":[],"dev_dependency":false},{"name":"firebase_database_web","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database_web-0.2.5+7\\\\","dependencies":["firebase_core_web"],"dev_dependency":false}]},"dependencyGraph":[{"name":"firebase_core","dependencies":["firebase_core_web"]},{"name":"firebase_core_web","dependencies":[]},{"name":"firebase_database","dependencies":["firebase_core","firebase_database_web"]},{"name":"firebase_database_web","dependencies":["firebase_core","firebase_core_web"]},{"name":"onnxruntime","dependencies":[]}],"date_created":"2025-11-25 14:39:01.438376","version":"3.38.2","swift_package_manager_enabled":{"ios":false,"macos":false}} \ No newline at end of file +{"info":"This is a generated file; do not edit or check into version control.","plugins":{"ios":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database-10.5.7/","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"android":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database-10.5.7/","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"macos":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database-10.5.7/","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"linux":[{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"windows":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"web":[{"name":"firebase_core_web","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core_web-2.17.5/","dependencies":[],"dev_dependency":false},{"name":"firebase_database_web","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database_web-0.2.5+7/","dependencies":["firebase_core_web"],"dev_dependency":false}]},"dependencyGraph":[{"name":"firebase_core","dependencies":["firebase_core_web"]},{"name":"firebase_core_web","dependencies":[]},{"name":"firebase_database","dependencies":["firebase_core","firebase_database_web"]},{"name":"firebase_database_web","dependencies":["firebase_core","firebase_core_web"]},{"name":"onnxruntime","dependencies":[]}],"date_created":"2026-04-16 22:26:44.479591","version":"3.41.2","swift_package_manager_enabled":{"ios":false,"macos":false}} \ No newline at end of file diff --git a/lib/isar_agent_memory.dart b/lib/isar_agent_memory.dart index 27b0206..e32dd75 100644 --- a/lib/isar_agent_memory.dart +++ b/lib/isar_agent_memory.dart @@ -12,6 +12,8 @@ export 'src/embeddings_adapter.dart'; export 'src/gemini_embeddings_adapter.dart'; export 'src/fallback_embeddings_adapter.dart'; export 'src/on_device_embeddings_adapter.dart'; +export 'src/embeddings/medical_embeddings_adapter.dart'; +export 'src/utils/medical_tokenizer.dart'; export 'src/vector_index.dart'; export 'src/vector_index_objectbox.dart'; export 'src/hierarchical_graph.dart'; diff --git a/lib/src/embeddings/medical_embeddings_adapter.dart b/lib/src/embeddings/medical_embeddings_adapter.dart new file mode 100644 index 0000000..bde440a --- /dev/null +++ b/lib/src/embeddings/medical_embeddings_adapter.dart @@ -0,0 +1,41 @@ +import 'package:isar_agent_memory/src/embeddings_adapter.dart'; +import 'package:isar_agent_memory/src/utils/medical_tokenizer.dart'; + +/// An [EmbeddingsAdapter] decorator that enhances medical text processing. +/// +/// It uses [MedicalTokenizer] to expand medical abbreviations (Spanish/English) +/// before passing the text to the underlying adapter. +class MedicalEmbeddingsAdapter implements EmbeddingsAdapter { + /// The underlying embeddings adapter. + final EmbeddingsAdapter inner; + + /// The tokenizer used for medical text expansion. + final MedicalTokenizer tokenizer; + + /// Creates a [MedicalEmbeddingsAdapter] wrapping an [inner] adapter. + MedicalEmbeddingsAdapter(this.inner, {MedicalTokenizer? tokenizer}) + : tokenizer = tokenizer ?? MedicalTokenizer(); + + @override + int get dimension => inner.dimension; + + @override + String get providerName => 'medical_enhanced(${inner.providerName})'; + + /// Generates an embedding by first expanding medical abbreviations. + @override + Future> embed(String text) async { + final expandedText = tokenizer.expandAbbreviations(text); + return inner.embed(expandedText); + } + + /// Generates a normalized embedding for medical domain text. + /// + /// This implementation expands abbreviations and then uses the inner adapter's + /// [medicalNormalized] if available, or its [embed] method. + @override + Future> medicalNormalized(String text) async { + final expandedText = tokenizer.expandAbbreviations(text); + return inner.medicalNormalized(expandedText); + } +} diff --git a/lib/src/embeddings_adapter.dart b/lib/src/embeddings_adapter.dart index eb85558..0935914 100644 --- a/lib/src/embeddings_adapter.dart +++ b/lib/src/embeddings_adapter.dart @@ -3,6 +3,10 @@ abstract class EmbeddingsAdapter { /// Generates an embedding vector for the given text. Future> embed(String text); + /// Generates a normalized embedding for medical domain text. + /// Default implementation just calls [embed]. + Future> medicalNormalized(String text) => embed(text); + /// The dimension of the embeddings generated by this adapter. int get dimension; diff --git a/lib/src/fallback_embeddings_adapter.dart b/lib/src/fallback_embeddings_adapter.dart index 263ea70..39de404 100644 --- a/lib/src/fallback_embeddings_adapter.dart +++ b/lib/src/fallback_embeddings_adapter.dart @@ -25,6 +25,21 @@ class FallbackEmbeddingsAdapter implements EmbeddingsAdapter { @override int get dimension => 0; // Unknown until first embed + @override + Future> medicalNormalized(String text) async { + try { + final v = await primary.medicalNormalized(text); + if (fallbackOnEmpty && (v.isEmpty)) { + final fv = await fallback.medicalNormalized(text); + return fv; + } + return v; + } catch (_) { + final fv = await fallback.medicalNormalized(text); + return fv; + } + } + @override Future> embed(String text) async { try { diff --git a/lib/src/gemini_embeddings_adapter.dart b/lib/src/gemini_embeddings_adapter.dart index b646110..c43c17c 100644 --- a/lib/src/gemini_embeddings_adapter.dart +++ b/lib/src/gemini_embeddings_adapter.dart @@ -31,6 +31,9 @@ class GeminiEmbeddingsAdapter implements EmbeddingsAdapter { @override int get dimension => _cachedDim ?? 768; // Will update after first embed. + @override + Future> medicalNormalized(String text) => embed(text); + /// Generates an embedding vector for the given text using Gemini API. @override Future> embed(String text) async { diff --git a/lib/src/multi_modal_adapter.dart b/lib/src/multi_modal_adapter.dart index ba8dc22..f6dafc9 100644 --- a/lib/src/multi_modal_adapter.dart +++ b/lib/src/multi_modal_adapter.dart @@ -144,6 +144,9 @@ class CodeEmbeddingsAdapter implements EmbeddingsAdapter { @override int get dimension => _dimensions; + @override + Future> medicalNormalized(String text) => embed(text); + @override Future> embed(String code) async { // TODO: Implement CodeBERT or GraphCodeBERT diff --git a/lib/src/on_device_embeddings_adapter.dart b/lib/src/on_device_embeddings_adapter.dart index a51658c..8e7ff45 100644 --- a/lib/src/on_device_embeddings_adapter.dart +++ b/lib/src/on_device_embeddings_adapter.dart @@ -33,6 +33,9 @@ class OnDeviceEmbeddingsAdapter implements EmbeddingsAdapter { @override int get dimension => _dimension; + @override + Future> medicalNormalized(String text) => embed(text); + /// Initializes the ONNX session and loads the vocabulary. /// This must be called before [embed]. Future initialize() async { diff --git a/lib/src/utils/medical_tokenizer.dart b/lib/src/utils/medical_tokenizer.dart new file mode 100644 index 0000000..4788225 --- /dev/null +++ b/lib/src/utils/medical_tokenizer.dart @@ -0,0 +1,77 @@ +/// A utility for pre-processing medical text, specifically expanding abbreviations. +/// Supports both Spanish and English medical terms. +class MedicalTokenizer { + /// Map of medical abbreviations to their full forms. + /// Keys are lowercase for case-insensitive matching. + static const Map _abbreviations = { + // Spanish + 'ta': 'tensión arterial', + 'fc': 'frecuencia cardíaca', + 'spo2': 'saturación de oxígeno', + 'fr': 'frecuencia respiratoria', + 'tª': 'temperatura', + 'hta': 'hipertensión arterial', + 'dm': 'diabetes mellitus', + 'ecg': 'electrocardiograma', + 'rx': 'radiografía', + 'tac': 'tomografía axial computarizada', + 'rmn': 'resonancia magnética nuclear', + 'scq': 'superficie corporal quemada', + 'avd': 'actividades de la vida diaria', + 'ev': 'vía endovenosa', + 'im': 'vía intramuscular', + 'sc': 'vía subcutánea', + 'sl': 'vía sublingual', + + // English + 'bp': 'blood pressure', + 'hr': 'heart rate', + 'rr': 'respiratory rate', + 'temp': 'temperature', + 'htn': 'hypertension', + 'ekg': 'electrocardiogram', + 'ct': 'computed tomography', + 'mri': 'magnetic resonance imaging', + 'iv': 'intravenous', + 'icu': 'intensive care unit', + 'er': 'emergency room', + 'prn': 'pro re nata (as needed)', + 'bid': 'twice a day', + 'tid': 'three times a day', + 'qid': 'four times a day', + }; + + /// Expands abbreviations in the given [text]. + /// + /// This handles both Spanish and English abbreviations defined in [_abbreviations]. + /// It performs case-insensitive matching but attempts to preserve the context. + String expandAbbreviations(String text) { + if (text.isEmpty) return text; + + String expandedText = text; + + // Sort keys by length descending to avoid partial matches (e.g., 'ta' in 'tac') + final sortedKeys = _abbreviations.keys.toList() + ..sort((a, b) => b.length.compareTo(a.length)); + + for (final key in sortedKeys) { + // Use regex with word boundaries to avoid matching inside words + // e.g. "TA" should match but "taza" should not. + // We handle Tª specifically as it has a special character. + final escapedKey = RegExp.escape(key); + final regex = RegExp('\\b$escapedKey\\b', caseSensitive: false); + + // Special case for Tª since \b might not work as expected with ª + if (key == 'tª') { + expandedText = expandedText.replaceAll( + RegExp(r'Tª', caseSensitive: false), _abbreviations[key]!); + } else { + expandedText = expandedText.replaceAllMapped(regex, (match) { + return _abbreviations[key]!; + }); + } + } + + return expandedText; + } +} diff --git a/test/medical_embeddings_adapter_test.dart b/test/medical_embeddings_adapter_test.dart new file mode 100644 index 0000000..6b66775 --- /dev/null +++ b/test/medical_embeddings_adapter_test.dart @@ -0,0 +1,87 @@ +import 'package:test/test.dart'; +import 'package:isar_agent_memory/isar_agent_memory.dart'; + +class MockEmbeddingsAdapter implements EmbeddingsAdapter { + String lastText = ''; + + @override + int get dimension => 3; + + @override + String get providerName => 'mock'; + + @override + Future> embed(String text) async { + lastText = text; + return [1.0, 2.0, 3.0]; + } + + @override + Future> medicalNormalized(String text) async { + lastText = 'normalized:$text'; + return [1.0, 2.0, 3.0]; + } +} + +void main() { + group('MedicalTokenizer', () { + final tokenizer = MedicalTokenizer(); + + test('expands Spanish abbreviations', () { + expect(tokenizer.expandAbbreviations('El paciente tiene TA alta'), + contains('tensión arterial')); + expect(tokenizer.expandAbbreviations('FC: 80 lpm'), + contains('frecuencia cardíaca')); + expect(tokenizer.expandAbbreviations('SpO2 al 98%'), + contains('saturación de oxígeno')); + expect(tokenizer.expandAbbreviations('Se solicita TAC de tórax'), + contains('tomografía axial computarizada')); + }); + + test('expands English abbreviations', () { + expect(tokenizer.expandAbbreviations('Patient BP is normal'), + contains('blood pressure')); + expect(tokenizer.expandAbbreviations('HR: 72 bpm'), + contains('heart rate')); + expect(tokenizer.expandAbbreviations('Admitted to ICU'), + contains('intensive care unit')); + }); + + test('handles case insensitivity', () { + expect(tokenizer.expandAbbreviations('ta'), contains('tensión arterial')); + expect(tokenizer.expandAbbreviations('TA'), contains('tensión arterial')); + }); + + test('uses word boundaries to avoid partial matches', () { + // 'ta' is an abbreviation, but 'taza' contains 'ta'. It should not be expanded. + expect(tokenizer.expandAbbreviations('taza'), equals('taza')); + expect(tokenizer.expandAbbreviations('estadio'), equals('estadio')); + }); + + test('handles special character Tª', () { + expect(tokenizer.expandAbbreviations('Tª de 38ºC'), + contains('temperatura')); + }); + }); + + group('MedicalEmbeddingsAdapter', () { + test('expands text before calling inner adapter', () async { + final mock = MockEmbeddingsAdapter(); + final adapter = MedicalEmbeddingsAdapter(mock); + + await adapter.embed('Paciente con HTA'); + expect(mock.lastText, contains('hipertensión arterial')); + + await adapter.medicalNormalized('TA normal'); + expect(mock.lastText, contains('normalized:tensión arterial normal')); + }); + + test('preserves dimension and provider name', () { + final mock = MockEmbeddingsAdapter(); + final adapter = MedicalEmbeddingsAdapter(mock); + + expect(adapter.dimension, equals(3)); + expect(adapter.providerName, contains('medical_enhanced(mock)')); + }); + }); +}