From 6abda2d4e9052a908f8f74c13a4569bb4c367374 Mon Sep 17 00:00:00 2001 From: Raj Patel Date: Thu, 6 Nov 2025 21:35:14 +0530 Subject: [PATCH 1/2] Repair malformed media columns --- src/formpack/utils/bugfix.py | 99 +++++++++++++++++++++------- src/formpack/utils/expand_content.py | 4 +- tests/test_bugfix.py | 35 +++++++++- 3 files changed, 110 insertions(+), 28 deletions(-) diff --git a/src/formpack/utils/bugfix.py b/src/formpack/utils/bugfix.py index face387..01f4d50 100644 --- a/src/formpack/utils/bugfix.py +++ b/src/formpack/utils/bugfix.py @@ -5,13 +5,15 @@ from ..constants import UNTRANSLATED -def repair_file_column_content_in_place(content) -> bool: +def repair_media_column_content_in_place(content) -> bool: """ #321 introduced a bug where the `file` column gets renamed to `media::file` and treated as a translatable column (see #322). This function repairs that damage. - This function is intended to be run by KPI, which should it to correct + It also handles other `media::` columns similarly affected by the same bug. + + This function is intended to be run by KPI, which should use it to correct `Asset` and `AssetVersion` content. Returns `True` if any change was made @@ -27,47 +29,96 @@ def repair_file_column_content_in_place(content) -> bool: # Do not proceed if content is incomplete return False - try: - updates['translated'].remove('media::file') - except ValueError: - # The invalid column `media::file` inside `translated` is a hallmark of - # the problem this method is intended to fix. Do not proceed if it was - # not found + media_tokens = [ + t for t in updates['translated'] if isinstance(t, str) and t.startswith('media::') + ] + if not media_tokens: + # No media:: in translated, nothing to do return False + # Remove the media tokens from translated + for token in media_tokens: + try: + updates['translated'].remove(token) + except ValueError: + pass + max_label_list_length = 0 any_row_fixed = False for row in updates['survey']: max_label_list_length = max( max_label_list_length, len(row.get('label', [])) ) - bad_file_col = row.get('media::file') - if not bad_file_col: - continue - if not isinstance(bad_file_col, list): - # All problems of our own making (#322) will result in - # `media::file` being a list (or array when JSON) - continue - for val in bad_file_col: - if val is not None: - row['file'] = val - del row['media::file'] + + # For each media token, handle its value: + # - If value is a list, pick first non-None and migrate it. + # - If scalar, migrate it. + for media_tok in media_tokens: + bad_val = row.get(media_tok) + if bad_val is None: + continue + + # If it's a list (the broken shape), prefer the first non-None element + if isinstance(bad_val, list): + non_nulls = [v for v in bad_val if v is not None] + if not non_nulls: + try: + del row[media_tok] + except KeyError: + pass + any_row_fixed = True + continue + + chosen = non_nulls[0] + short_key = media_tok.split('::', 1)[1] + row[short_key] = chosen + try: + del row[media_tok] + except KeyError: + pass any_row_fixed = True - break + continue + + # Scalar value: migrate it directly + short_key = media_tok.split('::', 1)[1] + row[short_key] = bad_val + try: + del row[media_tok] + except KeyError: + pass + any_row_fixed = True + if not any_row_fixed: return False # Multi-language forms need an additional fix to remove a superfluous null - # translation added by the bogus `media::file` column + # translation added by the bogus `media::*` column(s) if len(updates['translations']) > max_label_list_length: labels_translations_mismatch = True if len(updates['translations']) == max_label_list_length + 1: try: - updates['translations'].remove(UNTRANSLATED) + if UNTRANSLATED is not None: + updates['translations'].remove(UNTRANSLATED) + labels_translations_mismatch = False + else: + # If sentinel not present, remove a media tail name + tails = [m.split('::', 1)[1] for m in media_tokens] + removed = False + for t in tails: + try: + updates['translations'].remove(t) + removed = True + break + except ValueError: + continue + if removed: + labels_translations_mismatch = False + else: + updates['translations'].pop() + labels_translations_mismatch = False except ValueError: pass - else: - labels_translations_mismatch = False # Fixed it! + if labels_translations_mismatch: # This form has uncorrected problems. Bail out instead of modifying # it at all diff --git a/src/formpack/utils/expand_content.py b/src/formpack/utils/expand_content.py index 852e50b..2bdbc2e 100644 --- a/src/formpack/utils/expand_content.py +++ b/src/formpack/utils/expand_content.py @@ -39,7 +39,7 @@ def _expand_translatable_content( special_column_details: Dict[str, Optional[str]], ) -> None: _scd = special_column_details - if 'translation' in _scd: + if 'translation' in _scd and _scd['translation'] is not UNTRANSLATED: translations = content['translations'] cur_translation = _scd['translation'] cur_translation_index = translations.index(cur_translation) @@ -87,7 +87,7 @@ def _get_translations_from_special_cols( ) -> Tuple[List[str], Set[str]]: translated_cols = [] for colname, parsedvals in iter(special_cols.items()): - if 'translation' in parsedvals: + if 'translation' in parsedvals and parsedvals['translation'] is not UNTRANSLATED: translated_cols.append(parsedvals['column']) if parsedvals['translation'] not in translations: translations.append(parsedvals['translation']) diff --git a/tests/test_bugfix.py b/tests/test_bugfix.py index 8b98e6c..a026ef6 100644 --- a/tests/test_bugfix.py +++ b/tests/test_bugfix.py @@ -1,6 +1,8 @@ # coding: utf-8 +from copy import deepcopy -from formpack.utils.bugfix import repair_file_column_content_in_place +from formpack.constants import UNTRANSLATED +from formpack.utils.bugfix import repair_media_column_content_in_place def test_repair_file_column(): @@ -26,7 +28,36 @@ def test_repair_file_column(): 'translated': ['label', 'media::file'], 'translations': ['Ukrainian', 'English', None], } - assert repair_file_column_content_in_place(content) + assert repair_media_column_content_in_place(content) assert content['survey'][1]['file'] == 'oblast.csv' assert content['translated'] == ['label'] assert content['translations'] == ['Ukrainian', 'English'] + + +def test_repair_media_image_none_plus_value_and_translations(): + content = { + 'schema': '1', + 'settings': {}, + 'survey': [ + { + 'label': ['Código de participante'], + 'name': 'repro_media', + 'type': 'text', + 'media::image': [None, 'repro_test.png'], + '$xpath': 'repro_media', + } + ], + 'translated': ['label', 'media::image'], + 'translations': [UNTRANSLATED, 'big-image'], + } + content_copy = deepcopy(content) + + assert repair_media_column_content_in_place(content_copy) is True + assert 'image' in content_copy['survey'][0] + assert content_copy['survey'][0]['image'] == 'repro_test.png' + + # 'media::image' token must be removed from translated + assert 'media::image' not in content_copy['translated'] + assert 'label' in content_copy['translated'] + resulting_translations = content_copy['translations'] + assert resulting_translations in (['big-image'], [UNTRANSLATED], [None]) From 376224bedf54ed668f90f7950597e0572ae7bcae Mon Sep 17 00:00:00 2001 From: Raj Patel Date: Fri, 14 Nov 2025 21:36:09 +0530 Subject: [PATCH 2/2] Handle big-image as valid media type and prevent translation mismatches during export --- src/formpack/constants.py | 2 +- src/formpack/utils/bugfix.py | 99 +++++++------------------- src/formpack/utils/expand_content.py | 4 +- tests/test_big_image_media_handling.py | 71 ++++++++++++++++++ tests/test_bugfix.py | 35 +-------- 5 files changed, 100 insertions(+), 111 deletions(-) create mode 100644 tests/test_big_image_media_handling.py diff --git a/src/formpack/constants.py b/src/formpack/constants.py index 1069d7a..608b013 100644 --- a/src/formpack/constants.py +++ b/src/formpack/constants.py @@ -57,7 +57,7 @@ # video or audio files # … # Media is translatable in the same way as labels and hints… -MEDIA_COLUMN_NAMES = ('audio', 'image', 'video') +MEDIA_COLUMN_NAMES = ('audio', 'image', 'video', 'big-image') # Export Settings EXPORT_SETTING_FIELDS = 'fields' diff --git a/src/formpack/utils/bugfix.py b/src/formpack/utils/bugfix.py index 01f4d50..face387 100644 --- a/src/formpack/utils/bugfix.py +++ b/src/formpack/utils/bugfix.py @@ -5,15 +5,13 @@ from ..constants import UNTRANSLATED -def repair_media_column_content_in_place(content) -> bool: +def repair_file_column_content_in_place(content) -> bool: """ #321 introduced a bug where the `file` column gets renamed to `media::file` and treated as a translatable column (see #322). This function repairs that damage. - It also handles other `media::` columns similarly affected by the same bug. - - This function is intended to be run by KPI, which should use it to correct + This function is intended to be run by KPI, which should it to correct `Asset` and `AssetVersion` content. Returns `True` if any change was made @@ -29,96 +27,47 @@ def repair_media_column_content_in_place(content) -> bool: # Do not proceed if content is incomplete return False - media_tokens = [ - t for t in updates['translated'] if isinstance(t, str) and t.startswith('media::') - ] - if not media_tokens: - # No media:: in translated, nothing to do + try: + updates['translated'].remove('media::file') + except ValueError: + # The invalid column `media::file` inside `translated` is a hallmark of + # the problem this method is intended to fix. Do not proceed if it was + # not found return False - # Remove the media tokens from translated - for token in media_tokens: - try: - updates['translated'].remove(token) - except ValueError: - pass - max_label_list_length = 0 any_row_fixed = False for row in updates['survey']: max_label_list_length = max( max_label_list_length, len(row.get('label', [])) ) - - # For each media token, handle its value: - # - If value is a list, pick first non-None and migrate it. - # - If scalar, migrate it. - for media_tok in media_tokens: - bad_val = row.get(media_tok) - if bad_val is None: - continue - - # If it's a list (the broken shape), prefer the first non-None element - if isinstance(bad_val, list): - non_nulls = [v for v in bad_val if v is not None] - if not non_nulls: - try: - del row[media_tok] - except KeyError: - pass - any_row_fixed = True - continue - - chosen = non_nulls[0] - short_key = media_tok.split('::', 1)[1] - row[short_key] = chosen - try: - del row[media_tok] - except KeyError: - pass + bad_file_col = row.get('media::file') + if not bad_file_col: + continue + if not isinstance(bad_file_col, list): + # All problems of our own making (#322) will result in + # `media::file` being a list (or array when JSON) + continue + for val in bad_file_col: + if val is not None: + row['file'] = val + del row['media::file'] any_row_fixed = True - continue - - # Scalar value: migrate it directly - short_key = media_tok.split('::', 1)[1] - row[short_key] = bad_val - try: - del row[media_tok] - except KeyError: - pass - any_row_fixed = True - + break if not any_row_fixed: return False # Multi-language forms need an additional fix to remove a superfluous null - # translation added by the bogus `media::*` column(s) + # translation added by the bogus `media::file` column if len(updates['translations']) > max_label_list_length: labels_translations_mismatch = True if len(updates['translations']) == max_label_list_length + 1: try: - if UNTRANSLATED is not None: - updates['translations'].remove(UNTRANSLATED) - labels_translations_mismatch = False - else: - # If sentinel not present, remove a media tail name - tails = [m.split('::', 1)[1] for m in media_tokens] - removed = False - for t in tails: - try: - updates['translations'].remove(t) - removed = True - break - except ValueError: - continue - if removed: - labels_translations_mismatch = False - else: - updates['translations'].pop() - labels_translations_mismatch = False + updates['translations'].remove(UNTRANSLATED) except ValueError: pass - + else: + labels_translations_mismatch = False # Fixed it! if labels_translations_mismatch: # This form has uncorrected problems. Bail out instead of modifying # it at all diff --git a/src/formpack/utils/expand_content.py b/src/formpack/utils/expand_content.py index 2bdbc2e..852e50b 100644 --- a/src/formpack/utils/expand_content.py +++ b/src/formpack/utils/expand_content.py @@ -39,7 +39,7 @@ def _expand_translatable_content( special_column_details: Dict[str, Optional[str]], ) -> None: _scd = special_column_details - if 'translation' in _scd and _scd['translation'] is not UNTRANSLATED: + if 'translation' in _scd: translations = content['translations'] cur_translation = _scd['translation'] cur_translation_index = translations.index(cur_translation) @@ -87,7 +87,7 @@ def _get_translations_from_special_cols( ) -> Tuple[List[str], Set[str]]: translated_cols = [] for colname, parsedvals in iter(special_cols.items()): - if 'translation' in parsedvals and parsedvals['translation'] is not UNTRANSLATED: + if 'translation' in parsedvals: translated_cols.append(parsedvals['column']) if parsedvals['translation'] not in translations: translations.append(parsedvals['translation']) diff --git a/tests/test_big_image_media_handling.py b/tests/test_big_image_media_handling.py new file mode 100644 index 0000000..45333f8 --- /dev/null +++ b/tests/test_big_image_media_handling.py @@ -0,0 +1,71 @@ +import pytest + +from formpack import FormPack +from formpack.errors import TranslationError +from formpack.utils.expand_content import expand_content + + +def test_big_image_with_other_media_types_works_correctly(): + content = { + 'survey': [ + { + 'type': 'select_one', + 'name': 'q_multi_media', + 'label': 'Question with multiple media', + 'media::image': 'small.png', + 'big-image': 'large.png', + 'select_from_list_name': 'opts', + } + ], + 'choices': [ + {'list_name': 'opts', 'name': 'a', 'label': 'Option A'}, + ], + 'translations': [None], + } + + expand_content(content, in_place=True) + + # All media should be in translated + translated = content.get('translated', []) + assert 'media::image' in translated + assert 'media::big-image' in translated + + # Only NONE should be in translations + assert 'big-image' not in content['translations'] + assert 'image' not in content['translations'] + + # FormPack should process without errors + fp = FormPack([{'content': content, 'version': 1}], 'Test') + assert len(fp.versions) == 1 + + +def test_formpack_raises_error_when_big_image_not_in_media_names(monkeypatch): + """ + Test that FormPack raises an error when `big-image` is used + but not included in media names + """ + content = { + 'survey': [ + { + 'type': 'select_one', + 'name': 'q_with_big_image', + 'label': 'Question with big image', + 'big-image': 'large.png', + 'select_from_list_name': 'opts', + } + ], + 'choices': [ + {'list_name': 'opts', 'name': 'a', 'label': 'Option A'}, + ], + 'translations': [None], + } + + # Patch MEDIA_COLUMN_NAMES to exclude 'big-image' + monkeypatch.setattr( + 'formpack.utils.expand_content.MEDIA_COLUMN_NAMES', + ('image', 'audio', 'video') + ) + expand_content(content, in_place=True) + + with pytest.raises(TranslationError): + FormPack([{'content': content, 'version': 1}], 'Test') diff --git a/tests/test_bugfix.py b/tests/test_bugfix.py index a026ef6..8b98e6c 100644 --- a/tests/test_bugfix.py +++ b/tests/test_bugfix.py @@ -1,8 +1,6 @@ # coding: utf-8 -from copy import deepcopy -from formpack.constants import UNTRANSLATED -from formpack.utils.bugfix import repair_media_column_content_in_place +from formpack.utils.bugfix import repair_file_column_content_in_place def test_repair_file_column(): @@ -28,36 +26,7 @@ def test_repair_file_column(): 'translated': ['label', 'media::file'], 'translations': ['Ukrainian', 'English', None], } - assert repair_media_column_content_in_place(content) + assert repair_file_column_content_in_place(content) assert content['survey'][1]['file'] == 'oblast.csv' assert content['translated'] == ['label'] assert content['translations'] == ['Ukrainian', 'English'] - - -def test_repair_media_image_none_plus_value_and_translations(): - content = { - 'schema': '1', - 'settings': {}, - 'survey': [ - { - 'label': ['Código de participante'], - 'name': 'repro_media', - 'type': 'text', - 'media::image': [None, 'repro_test.png'], - '$xpath': 'repro_media', - } - ], - 'translated': ['label', 'media::image'], - 'translations': [UNTRANSLATED, 'big-image'], - } - content_copy = deepcopy(content) - - assert repair_media_column_content_in_place(content_copy) is True - assert 'image' in content_copy['survey'][0] - assert content_copy['survey'][0]['image'] == 'repro_test.png' - - # 'media::image' token must be removed from translated - assert 'media::image' not in content_copy['translated'] - assert 'label' in content_copy['translated'] - resulting_translations = content_copy['translations'] - assert resulting_translations in (['big-image'], [UNTRANSLATED], [None])