diff --git a/airbyte_cdk/manifest_migrations/migrations/__init__.py b/airbyte_cdk/manifest_migrations/migrations/__init__.py index 4937a4853..87f7b7523 100644 --- a/airbyte_cdk/manifest_migrations/migrations/__init__.py +++ b/airbyte_cdk/manifest_migrations/migrations/__init__.py @@ -8,6 +8,9 @@ from airbyte_cdk.manifest_migrations.migrations.http_requester_request_body_json_data_to_request_body import ( HttpRequesterRequestBodyJsonDataToRequestBody, ) +from airbyte_cdk.manifest_migrations.migrations.http_requester_request_body_plain_text_json_to_request_body_json import ( + HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson, +) from airbyte_cdk.manifest_migrations.migrations.http_requester_url_base_to_url import ( HttpRequesterUrlBaseToUrl, ) @@ -15,5 +18,6 @@ __all__ = [ "HttpRequesterPathToUrl", "HttpRequesterRequestBodyJsonDataToRequestBody", + "HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson", "HttpRequesterUrlBaseToUrl", ] diff --git a/airbyte_cdk/manifest_migrations/migrations/http_requester_request_body_plain_text_json_to_request_body_json.py b/airbyte_cdk/manifest_migrations/migrations/http_requester_request_body_plain_text_json_to_request_body_json.py new file mode 100644 index 000000000..11dd1c5e1 --- /dev/null +++ b/airbyte_cdk/manifest_migrations/migrations/http_requester_request_body_plain_text_json_to_request_body_json.py @@ -0,0 +1,83 @@ +# +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +# + + +from airbyte_cdk.manifest_migrations.manifest_migration import ( + TYPE_TAG, + ManifestMigration, + ManifestType, +) + + +class HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson(ManifestMigration): + """Migrate `RequestBodyPlainText` with JSON-like content to `RequestBodyJsonObject`. + + The Connector Builder UI sometimes generates `request_body: {type: RequestBodyPlainText, ...}` + for raw JSON string bodies (Jinja templates containing JSON). After CDK v7.17.1 (PR #971), + `RequestBodyPlainText` is correctly routed to `request_body_data` (form-encoded), but this + broke connectors where the Builder had misclassified JSON content as plain text. + + This migration detects `RequestBodyPlainText` where the value is a JSON-like string and + converts it to `RequestBodyJsonObject` with a string value. `RequestBodyJsonObject` now + accepts both dict and string values, and routes through `InterpolatedNestedRequestInputProvider` + which correctly handles string templates containing JSON. + """ + + component_type = "HttpRequester" + request_body_key = "request_body" + plain_text_type = "RequestBodyPlainText" + json_object_type = "RequestBodyJsonObject" + + def should_migrate(self, manifest: ManifestType) -> bool: + if manifest.get(TYPE_TAG) != self.component_type: + return False + request_body = manifest.get(self.request_body_key) + if not isinstance(request_body, dict): + return False + if request_body.get("type") != self.plain_text_type: + return False + value = request_body.get("value") + if not isinstance(value, str): + return False + return self._is_json_like(value) + + def migrate(self, manifest: ManifestType) -> None: + request_body = manifest[self.request_body_key] + request_body["type"] = self.json_object_type + + def validate(self, manifest: ManifestType) -> bool: + request_body = manifest.get(self.request_body_key) + if not isinstance(request_body, dict): + return False + is_json_object_with_string = ( + request_body.get("type") == self.json_object_type + and isinstance(request_body.get("value"), str) + and self._is_json_like(request_body.get("value", "")) + ) + is_plain_text_json = ( + request_body.get("type") == self.plain_text_type + and isinstance(request_body.get("value"), str) + and self._is_json_like(request_body.get("value", "")) + ) + return is_json_object_with_string and not is_plain_text_json + + @staticmethod + def _is_json_like(value: str) -> bool: + """Check if a string value looks like JSON content. + + Returns `True` when the stripped value starts with `{` or `[`, excluding + Jinja expression openers (`{{`) and Jinja block openers (`{%`). + """ + stripped = value.strip() + if not stripped: + return False + if stripped.startswith("["): + return True + if ( + stripped.startswith("{") + and not stripped.startswith("{{") + and not stripped.startswith("{%") + ): + return True + return False diff --git a/airbyte_cdk/manifest_migrations/migrations/registry.yaml b/airbyte_cdk/manifest_migrations/migrations/registry.yaml index 393f499d7..524a9af08 100644 --- a/airbyte_cdk/manifest_migrations/migrations/registry.yaml +++ b/airbyte_cdk/manifest_migrations/migrations/registry.yaml @@ -20,3 +20,10 @@ manifest_migrations: description: | This migration updates the `request_body_json_data` field in the `http_requester` spec to `request_body`. The `request_body_json_data` field is deprecated and will be removed in a future version. + - name: http_requester_request_body_plain_text_json_to_request_body_json + order: 4 + description: | + This migration converts `RequestBodyPlainText` with JSON-like string content to + `RequestBodyJsonObject` with a string value. The Connector Builder UI sometimes + misclassifies JSON string bodies as plain text, which after CDK v7.17.1 routes them + to form-encoded `request_body_data` instead of JSON. diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 82e345ed2..e36df2863 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -4699,7 +4699,7 @@ definitions: type: string RequestBodyJsonObject: title: Json Object Body - description: Request body value converted into a JSON object + description: Request body value converted into a JSON object. Can be a dict or a Jinja-interpolated string that evaluates to a JSON object at runtime. type: object required: - type @@ -4709,8 +4709,10 @@ definitions: type: string enum: [RequestBodyJsonObject] value: - type: object - additionalProperties: true + anyOf: + - type: object + additionalProperties: true + - type: string RequestBodyGraphQL: title: GraphQL Body description: Request body value converted into a GraphQL query object diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 8bd7b1146..da023c8af 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -1401,7 +1401,7 @@ class RequestBodyUrlEncodedForm(BaseModel): class RequestBodyJsonObject(BaseModel): type: Literal["RequestBodyJsonObject"] - value: Dict[str, Any] + value: Union[Dict[str, Any], str] class RequestBodyGraphQlQuery(BaseModel): diff --git a/unit_tests/manifest_migrations/conftest.py b/unit_tests/manifest_migrations/conftest.py index fe9ecf04d..647a3911c 100644 --- a/unit_tests/manifest_migrations/conftest.py +++ b/unit_tests/manifest_migrations/conftest.py @@ -1206,6 +1206,243 @@ def expected_manifest_with_migrated_to_request_body() -> Dict[str, Any]: } +@pytest.fixture +def manifest_with_request_body_plain_text_json() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": { + "type": "CheckStream", + "stream_names": ["A"], + }, + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url": "https://api.example.com/v1/search", + "http_method": "POST", + "request_body": { + "type": "RequestBodyPlainText", + "value": '{"sort": [{"field": "createdAt", "order": "ASC"}], "filter": [{"type": "equals", "field": "active", "value": "true"}]}', + }, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url": "https://api.example.com/v1/query", + "http_method": "POST", + "request_body": { + "type": "RequestBodyPlainText", + "value": "plain text body that is not JSON", + }, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + }, + }, + "streams": [ + {"$ref": "#/definitions/streams/A"}, + {"$ref": "#/definitions/streams/B"}, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + } + + +@pytest.fixture +def expected_manifest_with_plain_text_json_migrated() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": {"type": "CheckStream", "stream_names": ["A"]}, + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url": "https://api.example.com/v1/search", + "http_method": "POST", + "request_body": { + "type": "RequestBodyJsonObject", + "value": '{"sort": [{"field": "createdAt", "order": "ASC"}], "filter": [{"type": "equals", "field": "active", "value": "true"}]}', + }, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url": "https://api.example.com/v1/query", + "http_method": "POST", + "request_body": { + "type": "RequestBodyPlainText", + "value": "plain text body that is not JSON", + }, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + }, + }, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url": "https://api.example.com/v1/search", + "http_method": "POST", + "request_body": { + "type": "RequestBodyJsonObject", + "value": '{"sort": [{"field": "createdAt", "order": "ASC"}], "filter": [{"type": "equals", "field": "active", "value": "true"}]}', + }, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url": "https://api.example.com/v1/query", + "http_method": "POST", + "request_body": { + "type": "RequestBodyPlainText", + "value": "plain text body that is not JSON", + }, + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + "metadata": { + "applied_migrations": [ + { + "from_version": "0.0.0", + "to_version": "*", + "migration": "HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson", + "migrated_at": "2025-04-01T00:00:00+00:00", + }, + ] + }, + } + + class DummyMigration(ManifestMigration): def _process_manifest(self, manifest): self.is_migrated = False diff --git a/unit_tests/manifest_migrations/test_manifest_migration.py b/unit_tests/manifest_migrations/test_manifest_migration.py index c5e2cebcc..40dce429b 100644 --- a/unit_tests/manifest_migrations/test_manifest_migration.py +++ b/unit_tests/manifest_migrations/test_manifest_migration.py @@ -12,6 +12,7 @@ from airbyte_cdk.manifest_migrations.migrations import ( HttpRequesterPathToUrl, HttpRequesterRequestBodyJsonDataToRequestBody, + HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson, HttpRequesterUrlBaseToUrl, ) from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ( @@ -69,6 +70,24 @@ def test_manifest_resolve_migrate_request_body_json_and_data_to_request_body( assert migrated_manifest == expected_manifest_with_migrated_to_request_body +@freeze_time("2025-04-01") +@patch.dict( + migrations_registry.MANIFEST_MIGRATIONS, + {"*": [HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson]}, + clear=True, +) +def test_manifest_migrate_request_body_plain_text_json_to_request_body_json( + manifest_with_request_body_plain_text_json, + expected_manifest_with_plain_text_json_migrated, +) -> None: + """Verify that RequestBodyPlainText with JSON content is migrated to RequestBodyJsonObject with string value.""" + + resolved_manifest = resolver.preprocess_manifest(manifest_with_request_body_plain_text_json) + migrated_manifest = ManifestMigrationHandler(dict(resolved_manifest)).apply_migrations() + + assert migrated_manifest == expected_manifest_with_plain_text_json_migrated + + @freeze_time("2025-04-01") @patch.dict(migrations_registry.MANIFEST_MIGRATIONS, {"0.0.0": [DummyMigration]}, clear=True) def test_manifest_resolve_do_not_migrate( diff --git a/unit_tests/manifest_migrations/test_request_body_plain_text_json_migration.py b/unit_tests/manifest_migrations/test_request_body_plain_text_json_migration.py new file mode 100644 index 000000000..f4dd04acd --- /dev/null +++ b/unit_tests/manifest_migrations/test_request_body_plain_text_json_migration.py @@ -0,0 +1,243 @@ +# +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +# + +import pytest + +from airbyte_cdk.manifest_migrations.migrations.http_requester_request_body_plain_text_json_to_request_body_json import ( + HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson, +) + + +@pytest.mark.parametrize( + "manifest,expected", + [ + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": '{"sort": [{"field": "createdAt"}], "filter": []}', + }, + }, + True, + id="json_object_string", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": '[{"field": "createdAt"}]', + }, + }, + True, + id="json_array_string", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": ' {"sort": [{"field": "{{ config[\'sort_field\'] }}"}]} ', + }, + }, + True, + id="json_with_jinja_and_whitespace", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": "plain text body content", + }, + }, + False, + id="actual_plain_text", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": "interpolate_me=5&option={{ config['option'] }}", + }, + }, + False, + id="url_encoded_form_string", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": "{{ config['body'] }}", + }, + }, + False, + id="jinja_expression_not_json", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": "{% if true %}body{% endif %}", + }, + }, + False, + id="jinja_block_tag_not_json", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyJsonObject", + "value": {"key": "value"}, + }, + }, + False, + id="json_object_type_not_plain_text", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyUrlEncodedForm", + "value": {"key": "value"}, + }, + }, + False, + id="url_encoded_type_not_plain_text", + ), + pytest.param( + { + "type": "HttpRequester", + }, + False, + id="no_request_body", + ), + pytest.param( + { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": "", + }, + }, + False, + id="empty_plain_text_value", + ), + pytest.param( + { + "type": "SomeOtherComponent", + "request_body": { + "type": "RequestBodyPlainText", + "value": '{"key": "value"}', + }, + }, + False, + id="wrong_component_type", + ), + ], +) +def test_should_migrate(manifest, expected): + migration = HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson() + assert migration.should_migrate(manifest) == expected + + +@pytest.mark.parametrize( + "manifest,expected_manifest", + [ + pytest.param( + { + "type": "HttpRequester", + "url": "https://api.example.com/search", + "request_body": { + "type": "RequestBodyPlainText", + "value": '{"sort": [{"field": "createdAt"}], "filter": []}', + }, + }, + { + "type": "HttpRequester", + "url": "https://api.example.com/search", + "request_body": { + "type": "RequestBodyJsonObject", + "value": '{"sort": [{"field": "createdAt"}], "filter": []}', + }, + }, + id="json_object_migrated_to_request_body_json_object", + ), + pytest.param( + { + "type": "HttpRequester", + "url": "https://api.example.com/search", + "request_body": { + "type": "RequestBodyPlainText", + "value": '{"sort": [{"field": "{{ config[\'sort_field\'] }}"}]}', + }, + }, + { + "type": "HttpRequester", + "url": "https://api.example.com/search", + "request_body": { + "type": "RequestBodyJsonObject", + "value": '{"sort": [{"field": "{{ config[\'sort_field\'] }}"}]}', + }, + }, + id="json_with_jinja_migrated_to_request_body_json_object", + ), + pytest.param( + { + "type": "HttpRequester", + "url": "https://api.example.com/search", + "request_body": { + "type": "RequestBodyPlainText", + "value": '[{"id": 1}, {"id": 2}]', + }, + }, + { + "type": "HttpRequester", + "url": "https://api.example.com/search", + "request_body": { + "type": "RequestBodyJsonObject", + "value": '[{"id": 1}, {"id": 2}]', + }, + }, + id="json_array_migrated_to_request_body_json_object", + ), + ], +) +def test_migrate(manifest, expected_manifest): + migration = HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson() + assert migration.should_migrate(manifest) is True + migration.migrate(manifest) + assert manifest == expected_manifest + assert migration.validate(manifest) is True + + +def test_validate_after_migration(): + """Validate returns True when migration was applied correctly.""" + manifest = { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyJsonObject", + "value": '{"key": "value"}', + }, + } + migration = HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson() + assert migration.validate(manifest) is True + + +def test_validate_before_migration(): + """Validate returns False when migration has not been applied yet.""" + manifest = { + "type": "HttpRequester", + "request_body": { + "type": "RequestBodyPlainText", + "value": '{"key": "value"}', + }, + } + migration = HttpRequesterRequestBodyPlainTextJsonToRequestBodyJson() + assert migration.validate(manifest) is False