-
Notifications
You must be signed in to change notification settings - Fork 1
Medical Domain Embeddings Adapter #45
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| {"info":"This is a generated file; do not edit or check into version control.","plugins":{"ios":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database-10.5.7\\\\","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"android":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database-10.5.7\\\\","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"macos":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database-10.5.7\\\\","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"linux":[{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"windows":[{"name":"firebase_core","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core-2.32.0\\\\","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"onnxruntime","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\onnxruntime-1.4.1\\\\","native_build":true,"dependencies":[],"dev_dependency":false}],"web":[{"name":"firebase_core_web","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_core_web-2.24.0\\\\","dependencies":[],"dev_dependency":false},{"name":"firebase_database_web","path":"C:\\\\Users\\\\belal\\\\AppData\\\\Local\\\\Pub\\\\Cache\\\\hosted\\\\pub.dev\\\\firebase_database_web-0.2.5+7\\\\","dependencies":["firebase_core_web"],"dev_dependency":false}]},"dependencyGraph":[{"name":"firebase_core","dependencies":["firebase_core_web"]},{"name":"firebase_core_web","dependencies":[]},{"name":"firebase_database","dependencies":["firebase_core","firebase_database_web"]},{"name":"firebase_database_web","dependencies":["firebase_core","firebase_core_web"]},{"name":"onnxruntime","dependencies":[]}],"date_created":"2025-11-25 14:39:01.438376","version":"3.38.2","swift_package_manager_enabled":{"ios":false,"macos":false}} | ||
| {"info":"This is a generated file; do not edit or check into version control.","plugins":{"ios":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database-10.5.7/","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"android":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database-10.5.7/","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"macos":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"firebase_database","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database-10.5.7/","native_build":true,"dependencies":["firebase_core"],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"linux":[{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"windows":[{"name":"firebase_core","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core-2.32.0/","native_build":true,"dependencies":[],"dev_dependency":false},{"name":"onnxruntime","path":"/home/jules/.pub-cache/hosted/pub.dev/onnxruntime-1.4.1/","native_build":true,"dependencies":[],"dev_dependency":false}],"web":[{"name":"firebase_core_web","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_core_web-2.17.5/","dependencies":[],"dev_dependency":false},{"name":"firebase_database_web","path":"/home/jules/.pub-cache/hosted/pub.dev/firebase_database_web-0.2.5+7/","dependencies":["firebase_core_web"],"dev_dependency":false}]},"dependencyGraph":[{"name":"firebase_core","dependencies":["firebase_core_web"]},{"name":"firebase_core_web","dependencies":[]},{"name":"firebase_database","dependencies":["firebase_core","firebase_database_web"]},{"name":"firebase_database_web","dependencies":["firebase_core","firebase_core_web"]},{"name":"onnxruntime","dependencies":[]}],"date_created":"2026-04-16 22:26:44.479591","version":"3.41.2","swift_package_manager_enabled":{"ios":false,"macos":false}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| import 'package:isar_agent_memory/src/embeddings_adapter.dart'; | ||
| import 'package:isar_agent_memory/src/utils/medical_tokenizer.dart'; | ||
|
|
||
| /// An [EmbeddingsAdapter] decorator that enhances medical text processing. | ||
| /// | ||
| /// It uses [MedicalTokenizer] to expand medical abbreviations (Spanish/English) | ||
| /// before passing the text to the underlying adapter. | ||
| class MedicalEmbeddingsAdapter implements EmbeddingsAdapter { | ||
| /// The underlying embeddings adapter. | ||
| final EmbeddingsAdapter inner; | ||
|
|
||
| /// The tokenizer used for medical text expansion. | ||
| final MedicalTokenizer tokenizer; | ||
|
|
||
| /// Creates a [MedicalEmbeddingsAdapter] wrapping an [inner] adapter. | ||
| MedicalEmbeddingsAdapter(this.inner, {MedicalTokenizer? tokenizer}) | ||
| : tokenizer = tokenizer ?? MedicalTokenizer(); | ||
|
|
||
| @override | ||
| int get dimension => inner.dimension; | ||
|
|
||
| @override | ||
| String get providerName => 'medical_enhanced(${inner.providerName})'; | ||
|
|
||
| /// Generates an embedding by first expanding medical abbreviations. | ||
| @override | ||
| Future<List<double>> embed(String text) async { | ||
| final expandedText = tokenizer.expandAbbreviations(text); | ||
| return inner.embed(expandedText); | ||
| } | ||
|
|
||
| /// Generates a normalized embedding for medical domain text. | ||
| /// | ||
| /// This implementation expands abbreviations and then uses the inner adapter's | ||
| /// [medicalNormalized] if available, or its [embed] method. | ||
| @override | ||
| Future<List<double>> medicalNormalized(String text) async { | ||
| final expandedText = tokenizer.expandAbbreviations(text); | ||
| return inner.medicalNormalized(expandedText); | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| /// A utility for pre-processing medical text, specifically expanding abbreviations. | ||
| /// Supports both Spanish and English medical terms. | ||
| class MedicalTokenizer { | ||
| /// Map of medical abbreviations to their full forms. | ||
| /// Keys are lowercase for case-insensitive matching. | ||
| static const Map<String, String> _abbreviations = { | ||
| // Spanish | ||
| 'ta': 'tensión arterial', | ||
| 'fc': 'frecuencia cardíaca', | ||
| 'spo2': 'saturación de oxígeno', | ||
| 'fr': 'frecuencia respiratoria', | ||
| 'tª': 'temperatura', | ||
| 'hta': 'hipertensión arterial', | ||
| 'dm': 'diabetes mellitus', | ||
| 'ecg': 'electrocardiograma', | ||
| 'rx': 'radiografía', | ||
| 'tac': 'tomografía axial computarizada', | ||
| 'rmn': 'resonancia magnética nuclear', | ||
| 'scq': 'superficie corporal quemada', | ||
| 'avd': 'actividades de la vida diaria', | ||
| 'ev': 'vía endovenosa', | ||
| 'im': 'vía intramuscular', | ||
| 'sc': 'vía subcutánea', | ||
| 'sl': 'vía sublingual', | ||
|
|
||
| // English | ||
| 'bp': 'blood pressure', | ||
| 'hr': 'heart rate', | ||
| 'rr': 'respiratory rate', | ||
| 'temp': 'temperature', | ||
| 'htn': 'hypertension', | ||
| 'ekg': 'electrocardiogram', | ||
| 'ct': 'computed tomography', | ||
| 'mri': 'magnetic resonance imaging', | ||
| 'iv': 'intravenous', | ||
| 'icu': 'intensive care unit', | ||
| 'er': 'emergency room', | ||
| 'prn': 'pro re nata (as needed)', | ||
| 'bid': 'twice a day', | ||
| 'tid': 'three times a day', | ||
| 'qid': 'four times a day', | ||
| }; | ||
|
|
||
| /// Expands abbreviations in the given [text]. | ||
| /// | ||
| /// This handles both Spanish and English abbreviations defined in [_abbreviations]. | ||
| /// It performs case-insensitive matching but attempts to preserve the context. | ||
| String expandAbbreviations(String text) { | ||
| if (text.isEmpty) return text; | ||
|
|
||
| String expandedText = text; | ||
|
|
||
| // Sort keys by length descending to avoid partial matches (e.g., 'ta' in 'tac') | ||
| final sortedKeys = _abbreviations.keys.toList() | ||
| ..sort((a, b) => b.length.compareTo(a.length)); | ||
|
|
||
| for (final key in sortedKeys) { | ||
| // Use regex with word boundaries to avoid matching inside words | ||
| // e.g. "TA" should match but "taza" should not. | ||
| // We handle Tª specifically as it has a special character. | ||
| final escapedKey = RegExp.escape(key); | ||
| final regex = RegExp('\\b$escapedKey\\b', caseSensitive: false); | ||
|
|
||
| // Special case for Tª since \b might not work as expected with ª | ||
| if (key == 'tª') { | ||
| expandedText = expandedText.replaceAll( | ||
| RegExp(r'Tª', caseSensitive: false), _abbreviations[key]!); | ||
| } else { | ||
| expandedText = expandedText.replaceAllMapped(regex, (match) { | ||
| return _abbreviations[key]!; | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| return expandedText; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import 'package:test/test.dart'; | ||
| import 'package:isar_agent_memory/isar_agent_memory.dart'; | ||
|
|
||
| class MockEmbeddingsAdapter implements EmbeddingsAdapter { | ||
| String lastText = ''; | ||
|
|
||
| @override | ||
| int get dimension => 3; | ||
|
|
||
| @override | ||
| String get providerName => 'mock'; | ||
|
|
||
| @override | ||
| Future<List<double>> embed(String text) async { | ||
| lastText = text; | ||
| return [1.0, 2.0, 3.0]; | ||
| } | ||
|
|
||
| @override | ||
| Future<List<double>> medicalNormalized(String text) async { | ||
| lastText = 'normalized:$text'; | ||
| return [1.0, 2.0, 3.0]; | ||
| } | ||
| } | ||
|
|
||
| void main() { | ||
| group('MedicalTokenizer', () { | ||
| final tokenizer = MedicalTokenizer(); | ||
|
|
||
| test('expands Spanish abbreviations', () { | ||
| expect(tokenizer.expandAbbreviations('El paciente tiene TA alta'), | ||
| contains('tensión arterial')); | ||
| expect(tokenizer.expandAbbreviations('FC: 80 lpm'), | ||
| contains('frecuencia cardíaca')); | ||
| expect(tokenizer.expandAbbreviations('SpO2 al 98%'), | ||
| contains('saturación de oxígeno')); | ||
| expect(tokenizer.expandAbbreviations('Se solicita TAC de tórax'), | ||
| contains('tomografía axial computarizada')); | ||
| }); | ||
|
|
||
| test('expands English abbreviations', () { | ||
| expect(tokenizer.expandAbbreviations('Patient BP is normal'), | ||
| contains('blood pressure')); | ||
| expect(tokenizer.expandAbbreviations('HR: 72 bpm'), | ||
| contains('heart rate')); | ||
| expect(tokenizer.expandAbbreviations('Admitted to ICU'), | ||
| contains('intensive care unit')); | ||
| }); | ||
|
|
||
| test('handles case insensitivity', () { | ||
| expect(tokenizer.expandAbbreviations('ta'), contains('tensión arterial')); | ||
| expect(tokenizer.expandAbbreviations('TA'), contains('tensión arterial')); | ||
| }); | ||
|
|
||
| test('uses word boundaries to avoid partial matches', () { | ||
| // 'ta' is an abbreviation, but 'taza' contains 'ta'. It should not be expanded. | ||
| expect(tokenizer.expandAbbreviations('taza'), equals('taza')); | ||
| expect(tokenizer.expandAbbreviations('estadio'), equals('estadio')); | ||
| }); | ||
|
|
||
| test('handles special character Tª', () { | ||
| expect(tokenizer.expandAbbreviations('Tª de 38ºC'), | ||
| contains('temperatura')); | ||
| }); | ||
| }); | ||
|
|
||
| group('MedicalEmbeddingsAdapter', () { | ||
| test('expands text before calling inner adapter', () async { | ||
| final mock = MockEmbeddingsAdapter(); | ||
| final adapter = MedicalEmbeddingsAdapter(mock); | ||
|
|
||
| await adapter.embed('Paciente con HTA'); | ||
| expect(mock.lastText, contains('hipertensión arterial')); | ||
|
|
||
| await adapter.medicalNormalized('TA normal'); | ||
| expect(mock.lastText, contains('normalized:tensión arterial normal')); | ||
| }); | ||
|
|
||
| test('preserves dimension and provider name', () { | ||
| final mock = MockEmbeddingsAdapter(); | ||
| final adapter = MedicalEmbeddingsAdapter(mock); | ||
|
|
||
| expect(adapter.dimension, equals(3)); | ||
| expect(adapter.providerName, contains('medical_enhanced(mock)')); | ||
| }); | ||
| }); | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
expandAbbreviationsmethod is inefficient because it re-calculates the sorted keys and re-compiles multipleRegExpobjects on every call. This can lead to performance degradation when processing large texts or when called frequently.Consider pre-calculating the sorted keys and caching the compiled regular expressions as static members of the class. This avoids redundant work and improves the overall performance of the tokenizer.