From 6235a56ca0117ec1cd3bbe450d8cceaa8c705fb7 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 01/19] Add support for REMOTE UDFs. --- .../54a20886-bbd3-484f-837e-888b57bed997.json | 1 + .../gcp/spanner/SpannerResourceManager.java | 3 + .../spanner/AvroSchemaToDdlConverter.java | 5 +- .../cloud/teleport/spanner/AvroUtil.java | 1 + .../spanner/DdlToAvroSchemaConverter.java | 14 ++- .../spanner/ddl/InformationSchemaScanner.java | 84 ++++++++++++-- .../cloud/teleport/spanner/ddl/Udf.java | 95 +++++++++++++-- .../spanner/AvroSchemaToDdlConverterTest.java | 108 +++++++++++++++++- .../cloud/teleport/spanner/CopyDbTest.java | 58 ++++++++++ .../spanner/DdlToAvroSchemaConverterTest.java | 102 +++++++++++++++-- .../teleport/spanner/ExportPipelineIT.java | 44 ++++--- .../teleport/spanner/ImportPipelineIT.java | 16 +++ .../cloud/teleport/spanner/ddl/DdlTest.java | 88 +++++++++++++- .../ddl/InformationSchemaScannerIT.java | 53 ++++++++- .../ddl/InformationSchemaScannerTest.java | 2 +- .../spanner/ddl/RandomDdlGenerator.java | 31 ++++- .../cloud/teleport/spanner/ddl/UdfTest.java | 56 +++++++++ .../ExportPipelineIT/spanner-gsql-ddl.sql | 43 ++++--- .../ExportPipelineIT/spanner-pg-ddl.sql | 34 +++--- .../googlesql/UdfSchema-manifest.json | 6 + .../googlesql/UdfSchema.Remote-manifest.json | 6 + .../googlesql/UdfSchema.Remote.avro | Bin 0 -> 592 bytes .../ImportPipelineIT/googlesql/UdfSchema.avro | Bin 0 -> 236 bytes .../googlesql/spanner-export.json | 7 ++ .../postgres/UdfSchema-manifest.json | 6 + .../ImportPipelineIT/postgres/UdfSchema.avro | Bin 0 -> 236 bytes .../postgres/spanner-export.json | 3 + 27 files changed, 770 insertions(+), 96 deletions(-) create mode 120000 .jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json create mode 100644 v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema-manifest.json create mode 100644 v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote-manifest.json create mode 100644 v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote.avro create mode 100644 v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.avro create mode 100644 v1/src/test/resources/ImportPipelineIT/postgres/UdfSchema-manifest.json create mode 100644 v1/src/test/resources/ImportPipelineIT/postgres/UdfSchema.avro diff --git a/.jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json b/.jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json new file mode 120000 index 0000000000..64bc587b0b --- /dev/null +++ b/.jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json @@ -0,0 +1 @@ +/usr/local/google/home/adrw/.gemini/config/projects/54a20886-bbd3-484f-837e-888b57bed997.json \ No newline at end of file diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java index 7be2e5195a..77ca372143 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java @@ -49,6 +49,8 @@ import com.google.monitoring.v3.Aggregation.Aligner; import com.google.monitoring.v3.TimeInterval; import com.google.protobuf.Timestamp; +import com.google.spanner.admin.instance.v1.Instance.Edition; + import dev.failsafe.Failsafe; import dev.failsafe.RetryPolicy; import java.time.Duration; @@ -193,6 +195,7 @@ private synchronized void maybeCreateInstance() { InstanceInfo.newBuilder(InstanceId.of(projectId, instanceId)) .setInstanceConfigId(InstanceConfigId.of(projectId, "regional-" + region)) .setDisplayName(instanceId) + .setEdition(Edition.ENTERPRISE_PLUS) // Needed by Full Text Search. .setNodeCount(nodeCount) .build(); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverter.java b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverter.java index cf6ae4fda6..561b15155a 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverter.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverter.java @@ -52,6 +52,7 @@ import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_SKIP_RANGE_MIN; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_DEFINITION; +import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_LANGUAGE; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_NAME; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_PARAMETER; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_SECURITY; @@ -167,7 +168,9 @@ public Udf toUdf(String functionSpecificName, Schema schema) { .specificName(functionSpecificName) .name(schema.getProp(SPANNER_UDF_NAME)) .type(schema.getProp(SPANNER_UDF_TYPE)) - .definition(schema.getProp(SPANNER_UDF_DEFINITION)); + .language(schema.getProp(SPANNER_UDF_LANGUAGE)) + .definition(schema.getProp(SPANNER_UDF_DEFINITION)) + .options(toOptionsList(schema)); if (schema.getProp(SPANNER_UDF_SECURITY) != null) { builder.security(Udf.SqlSecurity.valueOf(schema.getProp(SPANNER_UDF_SECURITY))); } diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroUtil.java b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroUtil.java index 7ba212247b..4c3e6447a7 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroUtil.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroUtil.java @@ -55,6 +55,7 @@ private AvroUtil() {} public static final String SPANNER_UDF = "spannerUdf"; public static final String SPANNER_UDF_NAME = "spannerUdfName"; public static final String SPANNER_UDF_TYPE = "spannerUdfType"; + public static final String SPANNER_UDF_LANGUAGE = "spannerUdfLanguage"; public static final String SPANNER_UDF_DEFINITION = "spannerUdfDefinition"; public static final String SPANNER_UDF_SECURITY = "spannerUdfSecurity"; public static final String SPANNER_UDF_PARAMETER = "spannerUdfParameter_"; diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java b/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java index c2c4b92433..4a9306dd59 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java @@ -55,6 +55,7 @@ import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_SKIP_RANGE_MIN; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_DEFINITION; +import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_LANGUAGE; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_NAME; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_PARAMETER; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_SECURITY; @@ -134,10 +135,15 @@ public Collection convert(Ddl ddl) { // Indicate that this is a "CREATE FUNCTION", not a table or a view. recordBuilder.prop(SPANNER_ENTITY, SPANNER_UDF); recordBuilder.prop(SPANNER_UDF_NAME, udf.name()); - recordBuilder.prop(SPANNER_UDF_DEFINITION, udf.definition()); + if (udf.definition() != null) { + recordBuilder.prop(SPANNER_UDF_DEFINITION, udf.definition()); + } if (udf.type() != null) { recordBuilder.prop(SPANNER_UDF_TYPE, udf.type()); } + if (udf.language() != null) { + recordBuilder.prop(SPANNER_UDF_LANGUAGE, udf.language()); + } if (udf.security() != null) { recordBuilder.prop(SPANNER_UDF_SECURITY, udf.security().toString()); } @@ -145,6 +151,12 @@ public Collection convert(Ddl ddl) { for (UdfParameter udfParameter : udf.parameters()) { recordBuilder.prop(SPANNER_UDF_PARAMETER + i++, udfParameter.prettyPrint()); } + if (udf.options() != null) { + for (int j = 0; j < udf.options().size(); j++) { + recordBuilder.prop(SPANNER_OPTION + j, udf.options().get(j)); + } + } + schemas.add(recordBuilder.fields().endRecord()); } for (Table table : ddl.allTables()) { diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java index b35c3cde61..853714b61a 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java @@ -90,6 +90,7 @@ public Ddl scan() { if (isUdfSupported()) { listUdfs(builder); listUdfParameters(builder); + listUdfOptions(builder); } listColumns(builder); listColumnOptions(builder); @@ -1032,13 +1033,20 @@ private void listUdfs(Ddl.Builder builder) { case GOOGLE_STANDARD_SQL: queryStatement = Statement.of( - "SELECT r.routine_schema, r.routine_name, r.specific_schema, r.specific_name, " - + "r.data_type, r.routine_definition, r.security_type" + "SELECT r.routine_schema, r.routine_name, r.specific_schema, r.specific_name," + + " r.data_type, r.routine_body, r.routine_definition, r.security_type" + " FROM information_schema.routines AS r" - + " WHERE r.routine_schema NOT IN" - + " ('INFORMATION_SCHEMA', 'SPANNER_SYS')" - + " AND r.routine_type = 'FUNCTION'" - + " AND r.routine_body = 'SQL'"); + + " WHERE r.routine_schema NOT IN ('INFORMATION_SCHEMA', 'SPANNER_SYS')" + + " AND r.routine_type = 'FUNCTION'"); + break; + case POSTGRESQL: + queryStatement = + Statement.of( + "SELECT r.routine_schema, r.routine_name, r.specific_schema, r.specific_name," + + " r.data_type, r.routine_body, r.routine_definition, r.security_type" + + " FROM information_schema.routines AS r WHERE" + + " r.routine_schema NOT IN ('information_schema', 'spanner_sys', 'pg_catalog')" + + " AND r.routine_type = 'FUNCTION'"); break; default: throw new IllegalArgumentException( @@ -1055,13 +1063,22 @@ private void listUdfs(Ddl.Builder builder) { String functionSpecificName = getQualifiedName(resultSet.getString(2), resultSet.getString(3)); String functionType = resultSet.isNull(4) ? null : resultSet.getString(4); - String functionDefinition = resultSet.isNull(5) ? null : resultSet.getString(5); - String functionSecurityType = resultSet.isNull(6) ? null : resultSet.getString(6); + String language = resultSet.isNull(5) ? null : resultSet.getString(5); + String functionDefinition = resultSet.isNull(6) ? null : resultSet.getString(6); + String functionSecurityType = resultSet.isNull(7) ? null : resultSet.getString(7); + + // The routine_body is SQL or EXTERNAL and the external_language is not available yet. + // Assume that only available EXTERNAL language is REMOTE. + if (dialect == Dialect.POSTGRESQL && "EXTERNAL".equalsIgnoreCase(language)) { + language = "REMOTE"; + } + LOG.debug("Schema user-defined function {}", functionName); builder .createUdf(functionSpecificName) .name(functionName) .type(functionType) + .language(language) .definition(functionDefinition) .security(Udf.SqlSecurity.valueOf(functionSecurityType)) .endUdf(); @@ -1095,16 +1112,59 @@ private void listUdfParameters(Ddl.Builder builder) { } } + private void listUdfOptions(Ddl.Builder builder) { + // PostgreSQL doesn't have ROUTINE_OPTIONS table. It uses AS DEFINITION for options. + if (dialect == Dialect.POSTGRESQL) { + return; + } + ResultSet resultSet = + context.executeQuery( + Statement.of( + "SELECT t.SPECIFIC_SCHEMA, t.SPECIFIC_NAME, t.OPTION_NAME, t.OPTION_TYPE," + + " t.OPTION_VALUE FROM information_schema.routine_options AS t WHERE" + + " t.SPECIFIC_SCHEMA NOT IN ('INFORMATION_SCHEMA', 'SPANNER_SYS') ORDER BY" + + " t.SPECIFIC_NAME, t.OPTION_NAME")); + + Map> allOptions = Maps.newHashMap(); + while (resultSet.next()) { + String specificName = getQualifiedName(resultSet.getString(0), resultSet.getString(1)); + String optionName = resultSet.getString(2); + String optionType = resultSet.getString(3); + String optionValue = resultSet.getString(4); + + ImmutableList.Builder options = + allOptions.computeIfAbsent(specificName, k -> ImmutableList.builder()); + + if (optionType.equalsIgnoreCase("STRING")) { + options.add( + optionName + + "=" + + GSQL_LITERAL_QUOTE + + OPTION_STRING_ESCAPER.escape(optionValue) + + GSQL_LITERAL_QUOTE); + } else { + options.add(optionName + "=" + optionValue); + } + } + + for (Map.Entry> entry : allOptions.entrySet()) { + String specificName = entry.getKey(); + ImmutableList options = entry.getValue().build(); + builder.createUdf(specificName).options(options).endUdf(); + } + } + @VisibleForTesting Statement listFunctionParametersSQL() { switch (dialect) { case GOOGLE_STANDARD_SQL: return Statement.of( "SELECT p.specific_schema, p.specific_name, p.parameter_name, p.data_type," - + " p.parameter_default FROM information_schema.parameters AS p, information_schema.routines AS r" - + " WHERE p.specific_schema NOT IN ('INFORMATION_SCHEMA', 'SPANNER_SYS') and p.specific_name =" - + " r.specific_name and r.routine_type = 'FUNCTION' and r.routine_body = 'SQL' ORDER BY p.specific_schema," - + " p.specific_name, p.ordinal_position"); + + " p.parameter_default FROM information_schema.parameters AS p," + + " information_schema.routines AS r WHERE p.specific_schema NOT IN" + + " ('INFORMATION_SCHEMA', 'SPANNER_SYS') and p.specific_name = r.specific_name and" + + " r.routine_type = 'FUNCTION' ORDER BY" + + " p.specific_schema, p.specific_name, p.ordinal_position"); default: throw new IllegalArgumentException("Unrecognized dialect: " + dialect); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index e4a91653c3..daa28fedc7 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -21,6 +21,8 @@ import com.google.cloud.spanner.Dialect; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; +import com.google.common.escape.Escaper; +import com.google.common.escape.Escapers; import java.io.IOException; import java.io.Serializable; import java.util.LinkedHashMap; @@ -32,6 +34,11 @@ public abstract class Udf implements Serializable { private static final long serialVersionUID = 1L; + // Remote function body is printed using $$ strings, which are + // unlikely but possible to be present in the function definition. + // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE + public static final Escaper PG_REMOTE_UDF_BODY_ESCAPER = Escapers.builder().addEscape('$', "\\044").build(); + /** The access rights used by the UDF for underlying data: invoker-rights or definer-rights. */ public enum SqlSecurity { INVOKER, @@ -57,11 +64,16 @@ public enum SqlSecurity { @Nullable public abstract String definition(); + @Nullable + public abstract String language(); + @Nullable public abstract SqlSecurity security(); public abstract ImmutableList parameters(); + public abstract ImmutableList options(); + public void prettyPrint(Appendable appendable) throws IOException { appendable.append("CREATE FUNCTION ").append(quoteIdentifier(name(), dialect())); appendable.append("("); @@ -77,14 +89,67 @@ public void prettyPrint(Appendable appendable) throws IOException { if (type() != null) { appendable.append(" RETURNS ").append(type()); } - SqlSecurity rights = security(); - if (rights != null) { - appendable.append(" SQL SECURITY ").append(rights.toString()); + + // Determinism should be added to INFORMATION_SCHEMA.ROUTINES. + // For now, we infer it from the language. + if (language() != null && language().equalsIgnoreCase("REMOTE")) { + String determinism; + switch (dialect()) { + case GOOGLE_STANDARD_SQL: + determinism = "NOT DETERMINISTIC"; + break; + case POSTGRESQL: + determinism = "VOLATILE"; + break; + default: + throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } + appendable.append(" ").append(determinism); } - if (definition() != null) { - appendable.append(" AS ("); - appendable.append(definition()); - appendable.append(")"); + + if (language() != null && !language().isEmpty()) { + // GSQL does not accept LANGUAGE SQL even though it reports it. + if (dialect() != Dialect.GOOGLE_STANDARD_SQL || !language().equalsIgnoreCase("SQL")) { + appendable.append(" LANGUAGE ").append(language()); + } + } + + if (security() != null) { + // Remote UDF don't use SQL SECURITY, but it is marked NOT NULL in IS. + if (!"REMOTE".equalsIgnoreCase(language())) { + appendable.append(" SQL SECURITY ").append(security().toString()); + } + } + + if (!options().isEmpty()) { + switch (dialect()) { + case GOOGLE_STANDARD_SQL: + appendable.append(" OPTIONS (").append(String.join(", ", options())).append(")"); + break; + case POSTGRESQL: + throw new IllegalArgumentException( + "Options are not supported in PostgreSQL dialect for non-remote UDFs."); + default: + throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } + } + + if (definition() != null && !definition().isEmpty()) { + switch (dialect()) { + case GOOGLE_STANDARD_SQL: + appendable.append(" AS (").append(definition()).append(")"); + break; + case POSTGRESQL: + if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { + appendable.append(" RETURN ").append(definition()); + } else { + // Other langugges use AS definition instead of sql body. + appendable.append(" AS $$").append(PG_REMOTE_UDF_BODY_ESCAPER.escape(definition())).append("$$"); + } + break; + default: + throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } } } @@ -113,6 +178,10 @@ public Builder toBuilder() { if (type() != null) { builder.type(type()); } + if (language() != null) { + builder.language(language()); + } + builder.options(options()); if (definition() != null) { builder.definition(definition()); } @@ -126,7 +195,7 @@ public Builder toBuilder() { } public static Builder builder(Dialect dialect) { - return new AutoValue_Udf.Builder().dialect(dialect).parameters(ImmutableList.of()); + return new AutoValue_Udf.Builder().dialect(dialect).parameters(ImmutableList.of()).options(ImmutableList.of()); } public static Builder builder() { @@ -165,10 +234,18 @@ public Builder ddlBuilder(Ddl.Builder ddlBuilder) { public abstract String definition(); + public abstract Builder language(String language); + + public abstract String language(); + public abstract Builder security(SqlSecurity rights); public abstract SqlSecurity security(); + public abstract Builder options(ImmutableList options); + + public abstract ImmutableList options(); + public abstract Builder parameters(ImmutableList parameters); public ImmutableList parameters() { @@ -208,7 +285,9 @@ public Udf build() { .dialect(dialect()) .type(type()) .definition(definition()) + .language(language()) .security(security()) + .options(options()) .parameters(ImmutableList.copyOf(parameters())) .autoBuild(); } diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java index 912e54ebe4..f3761c0e68 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java @@ -21,15 +21,17 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; -import com.google.cloud.spanner.Dialect; -import com.google.cloud.teleport.spanner.common.Type; -import com.google.cloud.teleport.spanner.ddl.Ddl; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; + import org.apache.avro.Schema; import org.junit.Test; +import com.google.cloud.spanner.Dialect; +import com.google.cloud.teleport.spanner.common.Type; +import com.google.cloud.teleport.spanner.ddl.Ddl; + /** Tests {@link AvroSchemaToDdlConverter}. */ public class AvroSchemaToDdlConverterTest { @@ -857,7 +859,7 @@ public void udfSimple() { } @Test - public void udfAllOptions() { + public void udfSqlAllOptions() { String avroString = "{" + " \"type\" : \"record\"," @@ -887,6 +889,104 @@ public void udfAllOptions() { + " RETURNS STRING SQL SECURITY INVOKER AS (SELECT 1)")); } + @Test + public void pgUdfSqlAllOptions() { + String avroString = + "{" + + " \"type\" : \"record\"," + + " \"name\" : \"spanner.Foo\"," + + " \"spannerEntity\" : \"spannerUdf\", " + + " \"fields\" : []," + + " \"namespace\" : \"spannertest\"," + + " \"googleStorage\" : \"CloudSpanner\"," + + " \"googleFormatVersion\" : \"booleans\"," + + " \"spannerUdfName\" : \"Foo\"," + + " \"spannerUdfType\" : \"TEXT\"," + + " \"spannerUdfSecurity\" : \"INVOKER\"," + + " \"spannerUdfParameter_0\" : \"arg0 TEXT\"," + + " \"spannerUdfParameter_1\" : \"arg1 TEXT DEFAULT \\\"bar\\\"\"," + + " \"spannerUdfDefinition\" : \"SELECT 1\"" + + "}"; + + Schema schema = new Schema.Parser().parse(avroString); + + AvroSchemaToDdlConverter converter = new AvroSchemaToDdlConverter(Dialect.POSTGRESQL); + Ddl ddl = converter.toDdl(Collections.singleton(schema)); + assertThat(ddl.udfs(), hasSize(1)); + assertThat( + ddl.prettyPrint(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION \"Foo\"(\"arg0\" TEXT, \"arg1\" TEXT DEFAULT \"bar\")" + + " RETURNS TEXT SQL SECURITY INVOKER RETURN SELECT 1")); + } + + @Test + public void udfRemote() { + String avroString = + "{" + + " \"type\" : \"record\"," + + " \"name\" : \"UdfSchema_Foo\"," + + " \"fields\" : []," + + " \"namespace\" : \"spannertest\"," + + " \"googleStorage\" : \"CloudSpanner\"," + + " \"googleFormatVersion\" : \"booleans\"," + + " \"spannerEntity\" : \"spannerUdf\", " + + " \"spannerName\" : \"UdfSchema.Foo\"," + + " \"spannerUdfName\" : \"UdfSchema.Foo\"," + + " \"spannerUdfType\" : \"STRING\"," + + " \"spannerUdfLanguage\" : \"REMOTE\"," + + " \"spannerUdfParameter_0\" : \"arg0 STRING\"," + + " \"spannerUdfParameter_1\" : \"arg1 STRING DEFAULT \\\"bar\\\"\"," + + " \"spannerOption_0\" : \"endpoint=\\\"https://us-central1-myproject.cloudfunctions.net/myfunc\\\"\"," + + " \"spannerOption_1\" : \"max_batching_rows=50\"" + + "}"; + + Schema schema = new Schema.Parser().parse(avroString); + + AvroSchemaToDdlConverter converter = new AvroSchemaToDdlConverter(); + Ddl ddl = converter.toDdl(Collections.singleton(schema)); + assertThat(ddl.udfs(), hasSize(1)); + assertThat( + ddl.prettyPrint(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION `UdfSchema`.`Foo`(`arg0` STRING, `arg1` STRING DEFAULT \"bar\")" + + " RETURNS STRING NOT DETERMINISTIC LANGUAGE REMOTE" + + " OPTIONS (endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\", max_batching_rows=50)")); + } + + @Test + public void pgUdfRemote() { + String avroString = + "{" + + " \"type\" : \"record\"," + + " \"name\" : \"UdfSchema.Foo\"," + + " \"spannerEntity\" : \"spannerUdf\", " + + " \"fields\" : []," + + " \"namespace\" : \"spannertest\"," + + " \"googleStorage\" : \"CloudSpanner\"," + + " \"googleFormatVersion\" : \"booleans\"," + + " \"spannerName\" : \"UdfSchema.Foo\"," + + " \"spannerUdfName\" : \"UdfSchema.Foo\"," + + " \"spannerUdfType\" : \"STRING\"," + + " \"spannerUdfLanguage\" : \"REMOTE\"," + + " \"spannerUdfParameter_0\" : \"arg0 STRING\"," + + " \"spannerUdfParameter_1\" : \"arg1 STRING DEFAULT \\\"bar\\\"\"," + + " \"spannerUdfDefinition\" : \"{\\\"endpoint\\\": \\\"https://us-central1-myproject.cloudfunctions.net/myfunc\\\", \\\"max_batching_rows\\\": 50}\"" + + "}"; + + Schema schema = new Schema.Parser().parse(avroString); + + AvroSchemaToDdlConverter converter = new AvroSchemaToDdlConverter(Dialect.POSTGRESQL); + Ddl ddl = converter.toDdl(Collections.singleton(schema)); + assertThat(ddl.udfs(), hasSize(1)); + assertThat( + ddl.prettyPrint(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION \"UdfSchema\".\"Foo\"(\"arg0\" STRING, \"arg1\" STRING DEFAULT \"bar\")" + + " RETURNS STRING VOLATILE LANGUAGE REMOTE" + + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}$$")); + } + @Test public void invokerRightsView() { String avroString = diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java index b67f22a13d..5484081ef2 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java @@ -1101,6 +1101,64 @@ public void udfs() throws Exception { UdfParameter.parse( "arg1 STRING DEFAULT 'bar'", "s1.Foo2", Dialect.GOOGLE_STANDARD_SQL)) .endUdf() + .createUdf("s1.Foo2") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .name("s1.Foo3") + .language("REMOTE") + .type("INT64") + .addParameter( + UdfParameter.parse("arg0 INT64", "s1.Foo3", Dialect.GOOGLE_STANDARD_SQL)) + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) + .endUdf() + .build(); + createAndPopulate(ddl, 0); + runTest(); + } + + + @Test + public void pgUdfs() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = + ddlBuilder + .createSchema("s1") + .endNamedSchema() + .createUdf("s1.Foo1") + .dialect(Dialect.POSTGRESQL) + .name("s1.Foo1") + .definition("(SELECT 'bar')") + .endUdf() + .createUdf("s1.Foo2") + .dialect(Dialect.POSTGRESQL) + .name("s1.Foo2") + .definition("(SELECT 'bar')") + .security(SqlSecurity.INVOKER) + .type("TEXT") + .addParameter(UdfParameter.parse("arg0 TEXT", "s1.Foo2", Dialect.POSTGRESQL)) + .addParameter( + UdfParameter.parse( + "arg1 TEXT DEFAULT 'bar'", "s1.Foo2", Dialect.POSTGRESQL)) + .endUdf() + .createUdf("s1.Foo3") + .dialect(Dialect.POSTGRESQL) + .name("s1.Foo2") + .language("REMOTE") + .type("BIGINT") + .addParameter( + UdfParameter.parse("arg0 BIGINT", "s1.Foo3", Dialect.POSTGRESQL)) + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) + .endUdf() .build(); createAndPopulate(ddl, 0); runTest(); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java index 2c833e102d..32283dddb4 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java @@ -38,6 +38,7 @@ import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_INDEX; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_INTERLEAVE_TYPE; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_LABEL; +import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_NAME; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_NODE_TABLE; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_ON_DELETE_ACTION; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_OPTION; @@ -51,6 +52,7 @@ import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_SKIP_RANGE_MAX; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_SKIP_RANGE_MIN; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_DEFINITION; +import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_LANGUAGE; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_NAME; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_PARAMETER; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_UDF_SECURITY; @@ -67,6 +69,16 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.junit.Test; + import com.google.cloud.spanner.Dialect; import com.google.cloud.teleport.spanner.common.NumericUtils; import com.google.cloud.teleport.spanner.common.Type; @@ -82,14 +94,7 @@ import com.google.cloud.teleport.spanner.ddl.UdfParameter; import com.google.cloud.teleport.spanner.ddl.View; import com.google.common.collect.ImmutableList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; -import org.junit.Test; + /** Test for {@link DdlToAvroSchemaConverter}. */ public class DdlToAvroSchemaConverterTest { @@ -733,6 +738,87 @@ public void udfAllOptions() { assertThat(avroUdf.getName(), equalTo("spanner_Foo")); } + @Test + public void udfRemote() { + DdlToAvroSchemaConverter converter = + new DdlToAvroSchemaConverter("spannertest", "booleans", false); + Ddl ddl = + Ddl.builder() + .createUdf("UdfSchema.Foo") + .name("UdfSchema.Foo") + .type("STRING") + .language("REMOTE") + .addParameter(UdfParameter.parse("arg0 STRING", "UdfSchema.Foo", Dialect.GOOGLE_STANDARD_SQL)) + .addParameter( + UdfParameter.parse( + "arg1 STRING DEFAULT \"bar\"", "UdfSchema.Foo", Dialect.GOOGLE_STANDARD_SQL)) + .options(ImmutableList.of("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", "max_batching_rows=50")) + .endUdf() + .build(); + + Collection result = converter.convert(ddl); + assertThat(result, hasSize(1)); + Schema avroUdf = result.iterator().next(); + + assertThat(avroUdf, notNullValue()); + + assertThat(avroUdf.getName(), equalTo("UdfSchema_Foo")); + assertThat(avroUdf.getNamespace(), equalTo("spannertest")); + assertThat(avroUdf.getProp(GOOGLE_FORMAT_VERSION), equalTo("booleans")); + assertThat(avroUdf.getProp(GOOGLE_STORAGE), equalTo("CloudSpanner")); + assertThat(avroUdf.getProp(SPANNER_NAME), equalTo("UdfSchema.Foo")); + assertThat(avroUdf.getProp(SPANNER_UDF_NAME), equalTo("UdfSchema.Foo")); + assertThat(avroUdf.getProp(SPANNER_UDF_DEFINITION), nullValue()); + assertThat(avroUdf.getProp(SPANNER_UDF_SECURITY), nullValue()); + assertThat(avroUdf.getProp(SPANNER_UDF_TYPE), equalTo("STRING")); + assertThat(avroUdf.getProp(SPANNER_UDF_LANGUAGE), equalTo("REMOTE")); + assertThat(avroUdf.getProp(SPANNER_UDF_PARAMETER + 0), equalTo("`arg0` STRING")); + assertThat( + avroUdf.getProp(SPANNER_UDF_PARAMETER + 1), equalTo("`arg1` STRING DEFAULT \"bar\"")); + assertThat( + avroUdf.getProp(SPANNER_OPTION + 0), + equalTo("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + assertThat(avroUdf.getProp(SPANNER_OPTION + 1), equalTo("max_batching_rows=50")); +} + +@Test +public void pgUdfRemote() { + DdlToAvroSchemaConverter converter = new DdlToAvroSchemaConverter("spannertest", "booleans", false); + Ddl ddl = Ddl.builder(Dialect.POSTGRESQL) + .createUdf("UdfSchema.Foo") + .name("UdfSchema.Foo") + .type("TEXT") + .language("REMOTE") + .definition( + "{\"endpoint\":\"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\":50}") + .addParameter(UdfParameter.parse("arg0 TEXT", "UdfSchema.Foo", Dialect.POSTGRESQL)) + .addParameter( + UdfParameter.parse( + "arg1 TEXT DEFAULT \"bar\"", "spanner.Foo", Dialect.POSTGRESQL)) + .endUdf() + .build(); + + Collection result = converter.convert(ddl); + assertThat(result, hasSize(1)); + Schema avroUdf = result.iterator().next(); + + assertThat(avroUdf, notNullValue()); + + assertThat(avroUdf.getName(), equalTo("UdfSchema_Foo")); + assertThat(avroUdf.getNamespace(), equalTo("spannertest")); + assertThat(avroUdf.getProp(GOOGLE_FORMAT_VERSION), equalTo("booleans")); + assertThat(avroUdf.getProp(GOOGLE_STORAGE), equalTo("CloudSpanner")); + assertThat(avroUdf.getProp(SPANNER_NAME), equalTo("UdfSchema.Foo")); + assertThat(avroUdf.getProp(SPANNER_UDF_NAME), equalTo("UdfSchema.Foo")); + assertThat(avroUdf.getProp(SPANNER_UDF_DEFINITION), equalTo("{\"endpoint\":\"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\":50}")); + assertThat(avroUdf.getProp(SPANNER_UDF_SECURITY), nullValue()); + assertThat(avroUdf.getProp(SPANNER_UDF_TYPE), equalTo("TEXT")); + assertThat(avroUdf.getProp(SPANNER_UDF_LANGUAGE), equalTo("REMOTE")); + assertThat(avroUdf.getProp(SPANNER_UDF_PARAMETER + 0), equalTo("\"arg0\" TEXT")); + assertThat( + avroUdf.getProp(SPANNER_UDF_PARAMETER + 1), equalTo("\"arg1\" TEXT DEFAULT \"bar\"")); + } + @Test public void invokerRightsView() { DdlToAvroSchemaConverter converter = diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java index 733e00c42c..33550b044f 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java @@ -207,18 +207,19 @@ private void testSpannerToGCSAvroBase( // an empty database without any tables. spannerResourceManager.executeDdlStatement(setDefaultTimeZoneStatement); + String prefix = testName + "_"; String resourceFileName = "ExportPipelineIT/spanner-gsql-ddl.sql"; String ddl = String.join( " ", Resources.readLines( Resources.getResource(resourceFileName), StandardCharsets.UTF_8)) - .replaceAll("%PREFIX%", testName); + .replaceAll("%PREFIX%", prefix); ddl = ddl.trim(); List ddls = Arrays.stream(ddl.split(";")).filter(d -> !d.isBlank()).toList(); spannerResourceManager.executeDdlStatements(ddls); - List expectedData = generateTableRows(String.format("%s_Singers", testName)); + List expectedData = generateTableRows(String.format("%sSingers", prefix)); spannerResourceManager.write(expectedData); PipelineLauncher.LaunchConfig.Builder options = paramsAdder.apply( @@ -238,31 +239,37 @@ private void testSpannerToGCSAvroBase( List singersArtifacts = gcsClient.listArtifacts( - "output/", Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "Singers"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Singers"))); List emptyArtifacts = gcsClient.listArtifacts( - "output/", Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "Empty"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Empty"))); List modelStructArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "ModelStruct"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "ModelStruct"))); + List udfRemoteArtifacts = + gcsClient.listArtifacts( + "output/", + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "UdfSchema.Remote"))); List searchIndexArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "SearchIndex"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "SearchIndex"))); List identityArtifacts = gcsClient.listArtifacts( - "output/", Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "Identity"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Identity"))); List sequenceArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "Sequence1"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence1"))); List sequenceNoKindArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "Sequence2"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); + assertThat(singersArtifacts).isNotEmpty(); assertThat(emptyArtifacts).isNotEmpty(); + assertThat(udfRemoteArtifacts).isNotEmpty(); assertThat(modelStructArtifacts).isNotEmpty(); assertThat(identityArtifacts).isNotEmpty(); assertThat(sequenceArtifacts).isNotEmpty(); @@ -297,8 +304,7 @@ private void testPGSpannerToAvroBase( Function paramsAdder) throws IOException { - String tableNamePrefix = testName.substring(0, 15); - + String prefix = testName.substring(0, 15) + "_"; String setDefaultTimeZoneStatement = "ALTER DATABASE db SET spanner.default_time_zone = 'UTC'"; // Setting default time zone needs to be the first statement because it requires // an empty database without any tables. @@ -310,12 +316,12 @@ private void testPGSpannerToAvroBase( " ", Resources.readLines( Resources.getResource(resourceFileName), StandardCharsets.UTF_8)) - .replaceAll("%PREFIX%", tableNamePrefix); + .replaceAll("%PREFIX%", prefix); ddl = ddl.trim(); List ddls = Arrays.stream(ddl.split(";")).filter(d -> !d.isBlank()).toList(); spannerResourceManager.executeDdlStatements(ddls); - List expectedData = generateTableRows(String.format("%s_Singers", tableNamePrefix)); + List expectedData = generateTableRows(String.format("%Singers", prefix)); spannerResourceManager.write(expectedData); PipelineLauncher.LaunchConfig.Builder options = paramsAdder.apply( @@ -336,27 +342,27 @@ private void testPGSpannerToAvroBase( List singersArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", tableNamePrefix, "Singers"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Singers"))); List emptyArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", tableNamePrefix, "Empty"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Empty"))); List searchIndexArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", tableNamePrefix, "SearchIndex"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "SearchIndex"))); List identityArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", tableNamePrefix, "Identity"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Identity"))); List sequenceArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", tableNamePrefix, "Sequence1"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence1"))); List sequenceNoKindArtifacts = gcsClient.listArtifacts( "output/", - Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", tableNamePrefix, "Sequence2"))); + Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); assertThat(singersArtifacts).isNotEmpty(); assertThat(emptyArtifacts).isNotEmpty(); assertThat(identityArtifacts).isNotEmpty(); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java index 54df2b1a73..a20c5501f4 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java @@ -109,8 +109,24 @@ private void uploadImportPipelineArtifacts(String subdirectory) throws IOExcepti "input/Sequence2-manifest.json", Resources.getResource("ImportPipelineIT/" + subdirectory + "/Sequence2-manifest.json") .getPath()); + gcsClient.uploadArtifact( + "input/UdfSchema.avro-00000-of-00001", + Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema.avro") + .getPath()); + gcsClient.uploadArtifact( + "input/UdfSchema-manifest.json", + Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema-manifest.json") + .getPath()); if (Objects.equals(subdirectory, "googlesql")) { + gcsClient.uploadArtifact( + "input/UdfSchema.Remote.avro-00000-of-00001", + Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema.Remote.avro") + .getPath()); + gcsClient.uploadArtifact( + "input/UdfSchema.Remote-manifest.json", + Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema.Remote-manifest.json") + .getPath()); gcsClient.uploadArtifact( "input/ModelStruct.avro-00000-of-00001", Resources.getResource("ImportPipelineIT/" + subdirectory + "/ModelStruct.avro") diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java index 1a75574860..522ce5f1cf 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java @@ -1370,6 +1370,18 @@ public void udfs() { .addParameter( UdfParameter.parse( "arg1 STRING DEFAULT 'bar'", "spanner.Foo", Dialect.GOOGLE_STANDARD_SQL)) + .endUdf() + .createUdf("spanner.Foo3") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .name("Foo3") + .type("STRING") + .language("REMOTE") + .addParameter( + UdfParameter.parse("arg0 INT64", "spanner.Foo3", Dialect.GOOGLE_STANDARD_SQL)) + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", + "max_batching_rows=50")) .endUdf(); assertThat(ddlBuilder.hasUdf("spanner.Foo1")); assertThat(ddlBuilder.createUdf("spanner.Foo1").name().equals("Foo1")); @@ -1378,11 +1390,13 @@ public void udfs() { String expectedDdlString = "\nCREATE FUNCTION `Foo1`() AS ((SELECT 'bar'))\n" + "CREATE FUNCTION `Foo2`(`arg0` STRING, `arg1` STRING DEFAULT 'bar')" - + " RETURNS STRING SQL SECURITY INVOKER AS ((SELECT 'bar'))"; + + " RETURNS STRING SQL SECURITY INVOKER AS ((SELECT 'bar'))\n" + + "CREATE FUNCTION `Foo3`(`arg0` INT64) RETURNS STRING NOT DETERMINISTIC LANGUAGE REMOTE" + + " OPTIONS (endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\", max_batching_rows=50)"; assertThat(ddl.prettyPrint(), equalToCompressingWhiteSpace(expectedDdlString)); List statements = ddl.statements(); - assertEquals(2, statements.size()); + assertEquals(3, statements.size()); assertThat( statements.get(0), equalToCompressingWhiteSpace("CREATE FUNCTION `Foo1`() AS ((SELECT 'bar'))")); @@ -1391,12 +1405,82 @@ public void udfs() { equalToCompressingWhiteSpace( "CREATE FUNCTION `Foo2`(`arg0` STRING, `arg1` STRING DEFAULT 'bar')" + " RETURNS STRING SQL SECURITY INVOKER AS ((SELECT 'bar'))")); + assertThat( + statements.get(2), + equalToCompressingWhiteSpace( + "CREATE FUNCTION `Foo3`(`arg0` INT64) RETURNS STRING NOT DETERMINISTIC LANGUAGE REMOTE" + + " OPTIONS (endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\", max_batching_rows=50)")); + assertNotNull(ddl.hashCode()); + + assertThat( + ddl.toBuilder().build().prettyPrint(), equalToCompressingWhiteSpace(expectedDdlString)); + } + + @Test + public void pgUdfs() { + Ddl.Builder ddlBuilder = + Ddl.builder(Dialect.POSTGRESQL) + .createUdf("spanner.Foo1") + .dialect(Dialect.POSTGRESQL) + .name("Foo1") + .definition("(SELECT 'bar')") + .endUdf() + .createUdf("spanner.Foo2") + .dialect(Dialect.POSTGRESQL) + .name("Foo2") + .definition("(SELECT 'bar')") + .security(SqlSecurity.INVOKER) + .type("STRING") + .addParameter( + UdfParameter.parse("arg0 TEXT", "spanner.Foo", Dialect.POSTGRESQL)) + .addParameter( + UdfParameter.parse( + "arg1 TEXT DEFAULT 'bar'", "spanner.Foo", Dialect.POSTGRESQL)) + .endUdf() + .createUdf("spanner.Foo3") + .dialect(Dialect.POSTGRESQL) + .name("Foo3") + .type("STRING") + .language("REMOTE") + .addParameter( + UdfParameter.parse("arg0 BIGINT", "spanner.Foo3", Dialect.POSTGRESQL)) + .definition( + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}") + .endUdf(); + assertThat(ddlBuilder.hasUdf("spanner.Foo1")); + assertThat(ddlBuilder.createUdf("spanner.Foo1").name().equals("Foo1")); + Ddl ddl = ddlBuilder.build(); + + String expectedDdlString = + "\nCREATE FUNCTION \"Foo1\"() RETURN (SELECT 'bar')\n" + + "CREATE FUNCTION \"Foo2\"(\"arg0\" TEXT, \"arg1\" TEXT DEFAULT 'bar')" + + " RETURNS STRING SQL SECURITY INVOKER RETURN (SELECT 'bar')\n" + + "CREATE FUNCTION \"Foo3\"(\"arg0\" BIGINT) RETURNS STRING VOLATILE LANGUAGE REMOTE" + + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}$$"; + assertThat(ddl.prettyPrint(), equalToCompressingWhiteSpace(expectedDdlString)); + + List statements = ddl.statements(); + assertEquals(3, statements.size()); + assertThat( + statements.get(0), + equalToCompressingWhiteSpace("CREATE FUNCTION \"Foo1\"() RETURN (SELECT 'bar')")); + assertThat( + statements.get(1), + equalToCompressingWhiteSpace( + "CREATE FUNCTION \"Foo2\"(\"arg0\" TEXT, \"arg1\" TEXT DEFAULT 'bar')" + + " RETURNS STRING SQL SECURITY INVOKER RETURN (SELECT 'bar')")); + assertThat( + statements.get(2), + equalToCompressingWhiteSpace( + "CREATE FUNCTION \"Foo3\"(\"arg0\" BIGINT) RETURNS STRING VOLATILE LANGUAGE REMOTE" + + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}$$")); assertNotNull(ddl.hashCode()); assertThat( ddl.toBuilder().build().prettyPrint(), equalToCompressingWhiteSpace(expectedDdlString)); } + @Test public void sequences() { Ddl ddl = diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java index 0036985f35..30a45bdeec 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java @@ -19,6 +19,7 @@ import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_KIND; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_SKIP_RANGE_MAX; import static com.google.cloud.teleport.spanner.AvroUtil.SPANNER_SEQUENCE_SKIP_RANGE_MIN; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasItems; import static org.hamcrest.Matchers.hasSize; @@ -635,14 +636,17 @@ public void simpleUdf() throws Exception { + "C STRING DEFAULT 'NULL', " + "D STRING DEFAULT '') " + "RETURNS STRING AS (CONCAT(A, '::', B, '::', C, '::', D))"; + String udfDef3 = "CREATE FUNCTION s1.remote_udf(x INT64, y INT64) " + + "RETURNS INT64 NOT DETERMINISTIC LANGUAGE REMOTE " + + "OPTIONS ( endpoint = 'https://us-central1-myproject.cloudfunctions.net/myfunc' )"; - SPANNER_SERVER.createDatabase(dbId, Arrays.asList(namedSchemaDef, udfDef1, udfDef2)); + SPANNER_SERVER.createDatabase(dbId, Arrays.asList(namedSchemaDef, udfDef1, udfDef2, udfDef3)); Ddl ddl = getDatabaseDdl(); assertThat(ddl.schemas(), hasSize(1)); assertThat(ddl.schema("s1"), notNullValue()); - assertThat(ddl.udfs(), hasSize(2)); + assertThat(ddl.udfs(), hasSize(3)); Udf udf1 = ddl.udf("s1.foo"); assertThat(udf1, notNullValue()); assertThat(ddl.udf("S1.FOO"), sameInstance(udf1)); @@ -651,13 +655,21 @@ public void simpleUdf() throws Exception { assertThat(udf2, notNullValue()); assertThat(ddl.udf("S1.DEFault_values"), sameInstance(udf2)); + Udf udf3 = ddl.udf("s1.remote_udf"); + assertThat(udf3, notNullValue()); + assertThat(ddl.udf("S1.REMOTE_UDF"), sameInstance(udf3)); + assertThat(udf1.name(), equalTo("s1.foo")); assertThat(udf1.type(), equalTo("INT64")); + assertEquals(udf1.language(), "SQL"); + assertThat(udf1.options(), empty()); assertThat(udf1.definition(), equalTo("1")); assertEquals(udf1.security(), Udf.SqlSecurity.INVOKER); assertThat(udf2.name(), equalTo("s1.default_values")); assertThat(udf2.type(), equalTo("STRING")); + assertEquals(udf2.language(), "SQL"); + assertThat(udf2.options(), empty()); assertThat(udf2.definition(), equalTo("CONCAT(A, '::', B, '::', C, '::', D)")); assertEquals(udf2.security(), Udf.SqlSecurity.INVOKER); assertThat( @@ -687,6 +699,43 @@ public void simpleUdf() throws Exception { .type("STRING") .defaultExpression("''") .autoBuild())); + + + assertThat(udf3.name(), equalTo("s1.remote_udf")); + assertThat(udf3.type(), equalTo("INT64")); + assertEquals(udf3.language(), "REMOTE"); + assertThat(udf3.options(), hasItems("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + assertEquals(udf3.definition(), ""); + assertEquals(udf3.security(), Udf.SqlSecurity.INVOKER); + assertThat( + udf3.parameters(), + hasItems( + UdfParameter.builder() + .functionSpecificName("s1.remote_udf") + .name("x") + .type("INT64") + .defaultExpression(null) + .autoBuild(), + UdfParameter.builder() + .functionSpecificName("s1.remote_udf") + .name("y") + .type("INT64") + .defaultExpression(null) + .autoBuild())); + + } + + @Test + public void pgSimpleUdf() throws Exception { + String namedSchemaDef = "CREATE SCHEMA s1"; + + SPANNER_SERVER.createPgDatabase(dbId, Arrays.asList(namedSchemaDef)); + Ddl ddl = getPgDatabaseDdl(); + + assertThat(ddl.schemas(), hasSize(1)); + assertThat(ddl.schema("s1"), notNullValue()); + + assertThat(ddl.udfs(), hasSize(0)); } @Test diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java index fe12f1faa9..5284370302 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerTest.java @@ -234,7 +234,7 @@ public void testListFunctionParametersSQL() { "SELECT p.specific_schema, p.specific_name, p.parameter_name, p.data_type," + " p.parameter_default FROM information_schema.parameters AS p, information_schema.routines AS r" + " WHERE p.specific_schema NOT IN ('INFORMATION_SCHEMA', 'SPANNER_SYS') and p.specific_name =" - + " r.specific_name and r.routine_type = 'FUNCTION' and r.routine_body = 'SQL' ORDER BY p.specific_schema," + + " r.specific_name and r.routine_type = 'FUNCTION' ORDER BY p.specific_schema," + " p.specific_name, p.ordinal_position")); assertThrows( diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java index c85cf1c8bd..1dcee9b2ff 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java @@ -102,6 +102,10 @@ public abstract class RandomDdlGenerator { Type.Code.PG_NUMERIC, Type.Code.PG_DATE)); + private static final String[] UDF_LANGUAGES = new String[] { + "SQL", "REMOTE" + }; + private static final int MAX_PKS = 16; public abstract Dialect getDialect(); @@ -110,6 +114,8 @@ public abstract class RandomDdlGenerator { public abstract int getArrayChance(); + public abstract int getRemoteUdfChance(); + public abstract int[] getMaxBranchPerLevel(); public abstract int getMaxPkComponents(); @@ -146,6 +152,7 @@ public static Builder builder(Dialect dialect) { .setDialect(dialect) .setRandom(new Random()) .setArrayChance(20) + .setRemoteUdfChance(20) .setMaxPkComponents(3) .setMaxBranchPerLevel(new int[] {2, 2, 1, 1, 1, 1, 1}) .setMaxUdfs(0) @@ -172,6 +179,8 @@ public abstract static class Builder { public abstract Builder setArrayChance(int chance); + public abstract Builder setRemoteUdfChance(int chance); + public abstract Builder setMaxBranchPerLevel(int[] arr); public abstract Builder setMaxPkComponents(int val); @@ -236,16 +245,30 @@ private void generateUdf(Ddl.Builder builder) { .dialect(Dialect.GOOGLE_STANDARD_SQL) .name(name); if (getRandom().nextBoolean()) { - Type type = generateType(PK_TYPES, -1); + Type type = generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); udfBuilder.type(type.getCode().getName()); } - if (getRandom().nextBoolean()) { - udfBuilder.security(SqlSecurity.INVOKER); + + if (getRandom().nextInt(100) <= getRemoteUdfChance()) { + udfBuilder.language("REMOTE"); } + + if (!"REMOTE".equals(udfBuilder.language())) { + if (getRandom().nextBoolean()) { + udfBuilder.security(SqlSecurity.INVOKER); + } + } else { + if (getDialect() == Dialect.GOOGLE_STANDARD_SQL) { + udfBuilder.options(ImmutableList.of("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + } else { + udfBuilder.definition("\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\""); + } + } + int numUdfParameters = getRandom().nextInt(getMaxUdfParameters() + 1); for (int i = 0; i < numUdfParameters; i++) { String paramName = generateIdentifier(getMaxIdLength()); - Type type = generateType(PK_TYPES, -1); + Type type = generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); UdfParameter.Builder udfParameterBuilder = udfBuilder.parameter(paramName).type(type.getCode().getName()); if (getRandom().nextBoolean()) { diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java index e9ed6bd64c..ecb53d3d75 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java @@ -21,6 +21,7 @@ import com.google.cloud.spanner.Dialect; import com.google.cloud.teleport.spanner.ddl.Udf.SqlSecurity; +import com.google.common.collect.ImmutableList; import org.junit.Test; /** Unit tests for Udf class. */ @@ -73,4 +74,59 @@ public void testUdfWithInvalidParameter() { assertThrows(IllegalArgumentException.class, () -> udf.parameter("p1")); } + + @Test + public void testRemoteUdf() { + Udf udf = + Udf.builder() + .name("foo") + .specificName("s1.foo") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .type("string") + .language("REMOTE") + .addParameter(UdfParameter.parse("p1 int32", "s1.foo", Dialect.GOOGLE_STANDARD_SQL)) + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", + "max_batching_rows=50")) + .build(); + + assertThat( + udf.toString(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION `foo`(`p1` int32) RETURNS string NOT DETERMINISTIC LANGUAGE REMOTE" + + " OPTIONS (endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\", max_batching_rows=50)")); + + assertThat( + udf.toBuilder().build().toString(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION `foo`(`p1` int32) RETURNS string NOT DETERMINISTIC LANGUAGE REMOTE" + + " OPTIONS (endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\", max_batching_rows=50)")); + } + + @Test + public void testPgRemoteUdf() { + Udf udf = + Udf.builder() + .name("foo") + .specificName("s1.foo") + .dialect(Dialect.POSTGRESQL) + .type("TEXT") + .language("REMOTE") + .addParameter(UdfParameter.parse("p1 BIGINT", "s1.foo", Dialect.POSTGRESQL)) + .definition("{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/$myfunc\"}") + .build(); + + assertThat( + udf.toString(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" + + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}$$")); + + assertThat( + udf.toBuilder().build().toString(), + equalToCompressingWhiteSpace( + "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" + + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}$$")); + } } diff --git a/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql b/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql index c849546c17..a33ec76cca 100644 --- a/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql +++ b/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql @@ -1,45 +1,52 @@ -DROP TABLE IF EXISTS `%PREFIX%_EmptyTable`; -CREATE TABLE `%PREFIX%_EmptyTable` ( +DROP TABLE IF EXISTS `%PREFIX%EmptyTable`; +CREATE TABLE `%PREFIX%EmptyTable` ( id INT64 NOT NULL ) PRIMARY KEY(id); ALTER DATABASE db SET OPTIONS (default_sequence_kind = 'bit_reversed_positive'); -DROP TABLE IF EXISTS `%PREFIX%_Identity`; -CREATE TABLE `%PREFIX%_Identity` ( +DROP TABLE IF EXISTS `%PREFIX%Identity`; +CREATE TABLE `%PREFIX%Identity` ( Id INT64 NOT NULL GENERATED BY DEFAULT AS IDENTITY (BIT_REVERSED_POSITIVE), NonKeyIdCol1 INT64 NOT NULL GENERATED BY DEFAULT AS IDENTITY, NonKeyIdCol2 INT64 NOT NULL GENERATED BY DEFAULT AS IDENTITY (SKIP RANGE 1000, 2000) ) PRIMARY KEY(Id); -DROP SEQUENCE IF EXISTS `%PREFIX%_Sequence1`; -CREATE SEQUENCE `%PREFIX%_Sequence1` BIT_REVERSED_POSITIVE SKIP RANGE 99, 999; +DROP SEQUENCE IF EXISTS `%PREFIX%Sequence1`; +CREATE SEQUENCE `%PREFIX%Sequence1` BIT_REVERSED_POSITIVE SKIP RANGE 99, 999; -DROP SEQUENCE IF EXISTS `%PREFIX%_Sequence2`; -CREATE SEQUENCE `%PREFIX%_Sequence2`; +DROP SEQUENCE IF EXISTS `%PREFIX%Sequence2`; +CREATE SEQUENCE `%PREFIX%Sequence2`; -DROP TABLE IF EXISTS `%PREFIX%_Root`; -CREATE TABLE `%PREFIX%_Root` ( +DROP TABLE IF EXISTS `%PREFIX%Root`; +CREATE TABLE `%PREFIX%Root` ( Id INT64 NOT NULL ) PRIMARY KEY(Id); -DROP TABLE IF EXISTS `%PREFIX%_Singers`; -CREATE TABLE `%PREFIX%_Singers` ( +DROP TABLE IF EXISTS `%PREFIX%Singers`; +CREATE TABLE `%PREFIX%Singers` ( Id INT64 NOT NULL, FirstName String(1024), LastName String(1024), Rating FLOAT32, Review String(MAX), `MyTokens` TOKENLIST AS (TOKENIZE_FULLTEXT(Review)) HIDDEN -) PRIMARY KEY(Id), INTERLEAVE IN `%PREFIX%_Root`; +) PRIMARY KEY(Id), INTERLEAVE IN `%PREFIX%Root`; -DROP MODEL IF EXISTS `%PREFIX%_ModelStruct`; -CREATE MODEL `%PREFIX%_ModelStruct` +DROP MODEL IF EXISTS `%PREFIX%ModelStruct`; +CREATE MODEL `%PREFIX%ModelStruct` INPUT(content STRING(MAX)) OUTPUT (embeddings STRUCT, values ARRAY>) REMOTE OPTIONS (endpoint="//aiplatform.googleapis.com/projects/span-cloud-testing/locations/us-central1/publishers/google/models/textembedding-gecko"); -DROP SEARCH INDEX IF EXISTS `%PREFIX%_SequenceIndex`; -CREATE SEARCH INDEX `%PREFIX%_SearchIndex` - ON `%PREFIX%_Singers`(`MyTokens`) +CREATE SCHEMA `%PREFIX%UdfSchema`; + +CREATE FUNCTION `%PREFIX%UdfSchema`.`Remote`(x INT64, y INT64) RETURNS INT64 NOT DETERMINISTIC LANGUAGE REMOTE OPTIONS ( + endpoint = `https://us-central1-myproject.cloudfunctions.net/myfunc`, + max_batching_rows = 10 +); + +DROP SEARCH INDEX IF EXISTS `%PREFIX%SequenceIndex`; +CREATE SEARCH INDEX `%PREFIX%SearchIndex` + ON `%PREFIX%Singers`(`MyTokens`) OPTIONS (sort_order_sharding=TRUE); diff --git a/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql b/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql index a02970568e..811fc91c82 100644 --- a/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql +++ b/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql @@ -1,40 +1,42 @@ -DROP TABLE IF EXISTS "%PREFIX%_EmptyTable"; -CREATE TABLE "%PREFIX%_EmptyTable" ( +DROP TABLE IF EXISTS "%PREFIX%EmptyTable"; +CREATE TABLE "%PREFIX%EmptyTable" ( id bigint NOT NULL, PRIMARY KEY(id) ); ALTER DATABASE db SET spanner.default_sequence_kind = 'bit_reversed_positive'; -DROP TABLE IF EXISTS "%PREFIX%_Identity"; -CREATE TABLE "%PREFIX%_Identity" ( +DROP TABLE IF EXISTS "%PREFIX%Identity"; +CREATE TABLE "%PREFIX%Identity" ( Id bigint NOT NULL GENERATED BY DEFAULT AS IDENTITY (BIT_REVERSED_POSITIVE) PRIMARY KEY, NonKeyIdCol1 bigint NOT NULL GENERATED BY DEFAULT AS IDENTITY, NonKeyIdCol2 bigint NOT NULL GENERATED BY DEFAULT AS IDENTITY (SKIP RANGE 1000 2000) ); -DROP SEQUENCE IF EXISTS "%PREFIX%_Sequence1"; -CREATE SEQUENCE "%PREFIX%_Sequence1" BIT_REVERSED_POSITIVE SKIP RANGE 99 999; +DROP SEQUENCE IF EXISTS "%PREFIX%Sequence1"; +CREATE SEQUENCE "%PREFIX%Sequence1" BIT_REVERSED_POSITIVE SKIP RANGE 99 999; -DROP SEQUENCE IF EXISTS "%PREFIX%_Sequence2"; -CREATE SEQUENCE "%PREFIX%_Sequence2"; +DROP SEQUENCE IF EXISTS "%PREFIX%Sequence2"; +CREATE SEQUENCE "%PREFIX%Sequence2"; -DROP TABLE IF EXISTS "%PREFIX%_Root"; -CREATE TABLE "%PREFIX%_Root" ( +DROP TABLE IF EXISTS "%PREFIX%Root"; +CREATE TABLE "%PREFIX%Root" ( "Id" bigint, PRIMARY KEY("Id") ); -DROP TABLE IF EXISTS "%PREFIX%_Singers"; -CREATE TABLE "%PREFIX%_Singers" ( +DROP TABLE IF EXISTS "%PREFIX%Singers"; +CREATE TABLE "%PREFIX%Singers" ( "Id" bigint, "FirstName" character varying(256), "LastName" character varying(256), "Rating" real, "NameTokens" spanner.tokenlist generated always as (spanner.tokenize_fulltext("FirstName")) stored hidden, - PRIMARY KEY("Id")) INTERLEAVE IN "%PREFIX%_Root"; + PRIMARY KEY("Id")) INTERLEAVE IN "%PREFIX%Root"; -DROP SEARCH INDEX IF EXISTS "%PREFIX%_SearchIndex"; -CREATE SEARCH INDEX "%PREFIX%_SearchIndex" - ON "%PREFIX%_Singers"("NameTokens") ORDER BY "Id" WHERE "Id" IS NOT NULL +CREATE SCHEMA "%PREFIX%UdfSchema"; + +DROP SEARCH INDEX IF EXISTS "%PREFIX%SearchIndex"; +CREATE SEARCH INDEX "%PREFIX%SearchIndex" + ON "%PREFIX%Singers"("NameTokens") ORDER BY "Id" WHERE "Id" IS NOT NULL WITH (sort_order_sharding=TRUE); diff --git a/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema-manifest.json b/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema-manifest.json new file mode 100644 index 0000000000..75dbc2c1b4 --- /dev/null +++ b/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema-manifest.json @@ -0,0 +1,6 @@ +{ + "files": [{ + "name": "UdfSchema.avro-00000-of-00001", + "md5": "ru78pNHqG1/4/+Aj3c5bCA\u003d\u003d" + }] +} diff --git a/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote-manifest.json b/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote-manifest.json new file mode 100644 index 0000000000..bf8ab801f9 --- /dev/null +++ b/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote-manifest.json @@ -0,0 +1,6 @@ +{ + "files": [{ + "name": "UdfSchema.Remote.avro-00000-of-00001", + "md5": "dmM+e7CpXKLF0w2c5uiQ2w\u003d\u003d" + }] +} diff --git a/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote.avro b/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.Remote.avro new file mode 100644 index 0000000000000000000000000000000000000000..c5f25bf300358ac6f7f52880be62398608aac43a GIT binary patch literal 592 zcmah`O-{l<6joQBz@3SiH53{R3ldgZ6QWXWk&T!#OkYb$JJZa(Dve?5Av}R6aN!|b zdk_!c3NAGM8cs3@w(5B8&>oj8s`1u#uH!9+Dr% zwl>b_EErQ{lZ3~C6!)rQH`heH&N=0AL1R;j=UljbN29S2aSA>PSpml;)UPVr5MNTM zTSAbFBSy2)LATd$9sR*E`;5xpI6kmKRz-Dt1NY4FHZ1zpAFsTt^-hJ%C=Ng>-Gp|w it){hk+6hDnPfSr7CiAV2x2xy<^pUmZ!`JBW<>?dVu+IGe literal 0 HcmV?d00001 diff --git a/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.avro b/v1/src/test/resources/ImportPipelineIT/googlesql/UdfSchema.avro new file mode 100644 index 0000000000000000000000000000000000000000..01cdb837c6699b8c97c718ff910d8d68ad0065da GIT binary patch literal 236 zcmeZI%3@>^ODrqO*DFrWNX<>0z+A0VQdy9yWTjM;nw(#hqNJmgmzWFUho+^ODrqO*DFrWNX<>0z+A0VQdy9yWTjM;nw(#hqNJmgmzWFUho+ Date: Tue, 26 May 2026 13:37:00 -0700 Subject: [PATCH 02/19] Update PG UDF support and fix tests --- .../it/gcp/dataflow/DirectRunnerClient.java | 2 +- it/pom.xml | 33 - pom.xml | 118 ---- v1/pom.xml | 69 -- .../DLPTextToBigQueryStreamingIT.java | 256 -------- .../templates/PubSubToBigQueryIT.java | 597 ------------------ .../templates/PubSubTopicToBigQueryIT.java | 113 ---- .../templates/PubsubToBigQueryLT.java | 252 -------- .../templates/TextToBigQueryStreamLT.java | 274 -------- 9 files changed, 1 insertion(+), 1713 deletions(-) delete mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java delete mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java delete mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java delete mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java delete mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java index efce55e16a..8041dc7a80 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java @@ -265,7 +265,7 @@ public void cancel() { currentJob.setCurrentState(JobState.CANCELLED.toString()); try { - this.stop(); + this.interrupt(); } catch (Exception e) { LOG.warn("Error cancelling job", e); } diff --git a/it/pom.xml b/it/pom.xml index 4ab068a5a1..5dd27ef146 100644 --- a/it/pom.xml +++ b/it/pom.xml @@ -137,39 +137,6 @@ - - com.diffplug.spotless - spotless-maven-plugin - ${spotless-maven-plugin.version} - - - - - - *.md - .gitignore - - - - - - - - - 1.17.0 - - - - - - - - - check - - - - diff --git a/pom.xml b/pom.xml index 254447f187..f8fd313d9b 100644 --- a/pom.xml +++ b/pom.xml @@ -410,124 +410,6 @@ - - com.diffplug.spotless - spotless-maven-plugin - ${spotless-maven-plugin.version} - - - - - - *.md - .gitignore - - - - - - - - - src/main/java/org/apache/beam/** - src/test/java/org/apache/beam/** - - - - 1.17.0 - - - - ${licenseHeaderFile} - - - - - - - - check - - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - ${maven-checkstyle-plugin.version} - - - com.puppycrawl.tools - checkstyle - ${checkstyle.version} - - - - checkstyle/checkstyle.xml - checkstyle/suppressions.xml - true - true - false - true - - - - - test-compile - - check - - - - - - org.jacoco - jacoco-maven-plugin - ${jacoco.version} - - - - **/*AutoValue_* - **/*Exception.* - **/constants/** - **/CustomTransformationImplFetcher.* - **/JarFileReader.* - **/CustomTransformationWithShardFor*IT.* - **/CustomTransformationWithCassandraForIT.* - **/models/* - **/exceptions/* - - - - - default-prepare-agent - - prepare-agent - - - - default-report - prepare-package - - report - - - - report-aggregate - verify - - report-aggregate - - - - org.codehaus.mojo exec-maven-plugin diff --git a/v1/pom.xml b/v1/pom.xml index 27d505b078..e2abd5ae21 100644 --- a/v1/pom.xml +++ b/v1/pom.xml @@ -885,75 +885,6 @@ - - org.jacoco - jacoco-maven-plugin - ${jacoco.version} - - - - **/*InformationSchemaScanner.* - - **/*AutoValue_* - - **/*com/google/cloud/teleport/spanner/spannerio/changestreams/**/* - - - - PACKAGE - - com.google.cloud.teleport.spanner - com.google.cloud.teleport.spanner.** - - - - com.google.cloud.teleport.spanner.ddl.InformationSchemaScanner - - com.google.cloud.teleport.spanner.proto - - - - - - - default-prepare-agent - - prepare-agent - - - - default-report - prepare-package - - report - - - - default-check - - check - - - - - - - LINE - COVEREDRATIO - 0.5 - - - BRANCH - COVEREDRATIO - 0.5 - - - - - - - - diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java b/v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java deleted file mode 100644 index 3da99c8ba9..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (C) 2024 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.templates; - -import static com.google.common.truth.Truth.assertThat; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; - -import com.google.cloud.bigquery.TableId; -import com.google.cloud.teleport.metadata.SkipRunnerV2Test; -import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.privacy.dlp.v2.CharacterMaskConfig; -import com.google.privacy.dlp.v2.CryptoHashConfig; -import com.google.privacy.dlp.v2.CryptoKey; -import com.google.privacy.dlp.v2.DeidentifyConfig; -import com.google.privacy.dlp.v2.DeidentifyTemplate; -import com.google.privacy.dlp.v2.FieldId; -import com.google.privacy.dlp.v2.FieldTransformation; -import com.google.privacy.dlp.v2.InfoType; -import com.google.privacy.dlp.v2.InfoTypeTransformations; -import com.google.privacy.dlp.v2.InspectConfig; -import com.google.privacy.dlp.v2.InspectTemplate; -import com.google.privacy.dlp.v2.PrimitiveTransformation; -import com.google.privacy.dlp.v2.RecordTransformations; -import com.google.privacy.dlp.v2.TransientCryptoKey; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.function.Consumer; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.beam.it.common.PipelineLauncher; -import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; -import org.apache.beam.it.common.PipelineOperator; -import org.apache.beam.it.common.utils.ResourceManagerUtils; -import org.apache.beam.it.gcp.TemplateTestBase; -import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; -import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; -import org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts; -import org.apache.beam.it.gcp.dlp.DlpResourceManager; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Integration test for {@link DLPTextToBigQueryStreaming} (Stream_DLP_GCS_Text_to_BigQuery). */ -@Category({TemplateIntegrationTest.class, SkipRunnerV2Test.class}) -@TemplateIntegrationTest(DLPTextToBigQueryStreaming.class) -@RunWith(JUnit4.class) -public class DLPTextToBigQueryStreamingIT extends TemplateTestBase { - - private static final Logger LOG = LoggerFactory.getLogger(DLPTextToBigQueryStreamingIT.class); - - private BigQueryResourceManager bigQueryClient; - private DlpResourceManager dlpResourceManager; - - @Before - public void setup() throws IOException { - bigQueryClient = BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); - dlpResourceManager = DlpResourceManager.builder(PROJECT, credentialsProvider).build(); - - gcsClient.createArtifact( - "input.csv", - "Card Type Full Name,Issuing Bank,Card Num,Card Holder's Name,CVV/CVV2,Issue Date,Expiry Date,Billing Date,Card PIN,Credit Limit,Comments\n" - + "Visa,Chase,4111111111111111,Frank Q Ortiz,360,09/2019,09/2024,7,1247,103700,Please change my number to 604-406-9050. Thanks\n"); - } - - @After - public void tearDown() { - ResourceManagerUtils.cleanResources(bigQueryClient, dlpResourceManager); - } - - @Test - public void testDLPTextToBigQueryInspect() throws IOException { - // Create a template to hash card number + CVV - DeidentifyTemplate deidentifyTemplate = createInfoTypeTemplate(); - - // Create inspect template - InspectTemplate inspectTemplate = createInspectTemplate(); - - testDLPTextToBigQueryBase( - params -> params.addParameter("inspectTemplateName", inspectTemplate.getName()), - (record) -> { - assertThat((String) record.get("Card_Num")).isEqualTo("4111111111111111"); - assertThat((String) record.get("CVVCVV2")).isEqualTo("360"); - assertThat((String) record.get("Comments")) - .isNotEqualTo("Please change my number to 604-406-9050. Thanks"); - }, - deidentifyTemplate); - } - - @Test - public void testDLPTextToBigQuery() throws IOException { - // Create a template to hash card number + CVV - DeidentifyTemplate deidentifyTemplate = createRecordTypeTemplate(); - - testDLPTextToBigQueryBase( - Function.identity(), - (record) -> { - assertThat((String) record.get("Card_Num")).isNotEqualTo("4111111111111111"); - assertThat((String) record.get("CVVCVV2")).isNotEqualTo("360"); - assertThat((String) record.get("Comments")) - .isEqualTo("Please change my number to 604-406-9050. Thanks"); - }, - deidentifyTemplate); - } - - public void testDLPTextToBigQueryBase( - Function paramsAdder, - Consumer> recordAsserts, - DeidentifyTemplate deidentifyTemplate) - throws IOException { - // Arrange - String dataset = bigQueryClient.createDataset(REGION); - - // Act - PipelineLauncher.LaunchInfo info = - launchTemplate( - paramsAdder.apply( - LaunchConfig.builder(testName, specPath) - .addParameter("inputFilePattern", getGcsPath("*.csv")) - .addParameter("datasetName", dataset) - .addParameter("batchSize", "1000") - .addParameter("dlpProjectId", PROJECT) - .addParameter("deidentifyTemplateName", deidentifyTemplate.getName()))); - assertThatPipeline(info).isRunning(); - - TableId targetTableId = TableId.of(PROJECT, dataset, "input"); - PipelineOperator.Result result = - pipelineOperator() - // drain doesn't seem to work with the TextIO GCS files watching that the template uses - .waitForConditionAndCancel( - createConfig(info), - BigQueryRowsCheck.builder(bigQueryClient, targetTableId).setMinRows(1).build()); - - // Assert - assertThatResult(result).meetsConditions(); - List> records = - BigQueryAsserts.tableResultToRecords(bigQueryClient.readTable(targetTableId)); - assertThat(records).hasSize(1); - - recordAsserts.accept(records.get(0)); - } - - private InspectTemplate createInspectTemplate() throws IOException { - InspectTemplate inspectTemplate = - dlpResourceManager.createInspectTemplate( - InspectTemplate.newBuilder() - .setName(String.format("projects/%s/inspectTemplates/%s", PROJECT, testId)) - .setDescription("Template for test " + testName) - .setInspectConfig( - InspectConfig.newBuilder() - .addAllInfoTypes( - Stream.of("PHONE_NUMBER") - .map(it -> InfoType.newBuilder().setName(it).build()) - .collect(Collectors.toList())) - .build()) - .build()); - LOG.info("Created inspect template: {}", inspectTemplate.getName()); - - return inspectTemplate; - } - - private DeidentifyTemplate createInfoTypeTemplate() throws IOException { - DeidentifyTemplate deidentifyTemplate = - dlpResourceManager.createDeidentifyTemplate( - DeidentifyTemplate.newBuilder() - .setName(String.format("projects/%s/deidentifyTemplates/%s", PROJECT, testId)) - .setDescription("Template for test " + testName) - .setDeidentifyConfig( - DeidentifyConfig.newBuilder() - .setInfoTypeTransformations( - InfoTypeTransformations.newBuilder() - .addTransformations( - InfoTypeTransformations.InfoTypeTransformation.newBuilder() - .setPrimitiveTransformation( - PrimitiveTransformation.newBuilder() - .setCharacterMaskConfig( - CharacterMaskConfig.newBuilder() - .setMaskingCharacter("X") - .setNumberToMask(5) - .build()) - .setCryptoHashConfig( - CryptoHashConfig.newBuilder() - .setCryptoKey( - CryptoKey.newBuilder() - .setTransient( - TransientCryptoKey.newBuilder() - .setName(testId) - .build()) - .build()) - .build()) - .build()) - .build()) - .build()) - .build()) - .build()); - LOG.info("Created deidentify template: {}", deidentifyTemplate.getName()); - - return deidentifyTemplate; - } - - private DeidentifyTemplate createRecordTypeTemplate() throws IOException { - DeidentifyTemplate deidentifyTemplate = - dlpResourceManager.createDeidentifyTemplate( - DeidentifyTemplate.newBuilder() - .setName(String.format("projects/%s/deidentifyTemplates/%s", PROJECT, testId)) - .setDescription("Template for test " + testName) - .setDeidentifyConfig( - DeidentifyConfig.newBuilder() - .setRecordTransformations( - RecordTransformations.newBuilder() - .addFieldTransformations( - FieldTransformation.newBuilder() - .addFields(FieldId.newBuilder().setName("Card Num").build()) - .addFields(FieldId.newBuilder().setName("CVV/CVV2").build()) - .setPrimitiveTransformation( - PrimitiveTransformation.newBuilder() - .setCryptoHashConfig( - CryptoHashConfig.newBuilder() - .setCryptoKey( - CryptoKey.newBuilder() - .setTransient( - TransientCryptoKey.newBuilder() - .setName(testId) - .build()) - .build()) - .build()) - .build()) - .build()) - .build()) - .build()) - .build()); - LOG.info("Created deidentify template: {}", deidentifyTemplate.getName()); - - return deidentifyTemplate; - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java b/v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java deleted file mode 100644 index 186fa63892..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java +++ /dev/null @@ -1,597 +0,0 @@ -/* - * Copyright (C) 2022 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.templates; - -import static com.google.common.truth.Truth.assertThat; -import static org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts.assertThatBigQueryRecords; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; - -import com.google.cloud.bigquery.Field; -import com.google.cloud.bigquery.LegacySQLTypeName; -import com.google.cloud.bigquery.Schema; -import com.google.cloud.bigquery.StandardSQLTypeName; -import com.google.cloud.bigquery.TableId; -import com.google.cloud.bigquery.TableResult; -import com.google.cloud.teleport.metadata.SkipRunnerV2Test; -import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.common.collect.ImmutableMap; -import com.google.protobuf.ByteString; -import com.google.pubsub.v1.SubscriptionName; -import com.google.pubsub.v1.TopicName; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.function.Function; -import java.util.function.Supplier; -import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; -import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; -import org.apache.beam.it.common.PipelineOperator.Result; -import org.apache.beam.it.common.utils.ResourceManagerUtils; -import org.apache.beam.it.gcp.TemplateTestBase; -import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; -import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; -import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; -import org.apache.commons.lang3.RandomStringUtils; -import org.json.JSONObject; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Integration test for {@link PubSubToBigQuery} classic template. */ -@Category({TemplateIntegrationTest.class, SkipRunnerV2Test.class}) -@RunWith(JUnit4.class) -public final class PubSubToBigQueryIT extends TemplateTestBase { - - private static final int MESSAGES_COUNT = 10; - private static final int BAD_MESSAGES_COUNT = 3; - - private static final Schema BIG_QUERY_DLQ_SCHEMA = getDlqSchema(); - - private PubsubResourceManager pubsubResourceManager; - private BigQueryResourceManager bigQueryResourceManager; - - @Before - public void setUp() throws IOException { - pubsubResourceManager = - PubsubResourceManager.builder(testName, PROJECT, credentialsProvider).build(); - bigQueryResourceManager = - BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); - - gcsClient.createArtifact( - "udf.js", - "function uppercaseName(value) {\n" - + " const data = JSON.parse(value);\n" - + " data.name = data.name.toUpperCase();\n" - + " return JSON.stringify(data);\n" - + "}"); - } - - @After - public void cleanUp() { - ResourceManagerUtils.cleanResources(pubsubResourceManager, bigQueryResourceManager); - } - - @Test - @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") - public void testTopicToBigQueryClassic() throws IOException { - testTopicToBigQueryClassicBase(Function.identity()); - } - - @Test - @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") - public void testTopicToBigQueryClassicStreamingEngine() throws IOException { - testTopicToBigQueryClassicBase(this::enableStreamingEngine); - } - - private void testTopicToBigQueryClassicBase( - Function paramsAdder) throws IOException { - // Arrange - List bqSchemaFields = - Arrays.asList( - Field.of("id", StandardSQLTypeName.INT64), - Field.of("job", StandardSQLTypeName.STRING), - Field.of("name", StandardSQLTypeName.STRING)); - Schema bqSchema = Schema.of(bqSchemaFields); - - TopicName topic = pubsubResourceManager.createTopic("input"); - bigQueryResourceManager.createDataset(REGION); - TableId table = bigQueryResourceManager.createTable(testName, bqSchema); - - TableId dlqTable = - bigQueryResourceManager.createTable( - table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, - BIG_QUERY_DLQ_SCHEMA); - - // Act - LaunchInfo info = - launchTemplate( - paramsAdder.apply( - LaunchConfig.builder(testName, specPath) - .addParameter("inputTopic", topic.toString()) - .addParameter("outputTableSpec", toTableSpecLegacy(table)) - .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) - .addParameter("javascriptTextTransformFunctionName", "uppercaseName") - .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable)))); - assertThatPipeline(info).isRunning(); - - List> expectedMessages = new ArrayList<>(); - List goodData = new ArrayList<>(); - for (int i = 1; i <= MESSAGES_COUNT; i++) { - Map message = - new HashMap<>( - Map.of("id", i, "job", testName, "name", RandomStringUtils.randomAlphabetic(1, 20))); - ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); - goodData.add(messageData); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - message.put("name", message.get("name").toString().toUpperCase()); - expectedMessages.add(message); - } - - List badData = new ArrayList<>(); - for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { - ByteString messageData = ByteString.copyFromUtf8("bad id " + i); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - badData.add(messageData); - } - - // For tests that run against topics, sending repeatedly will make it work for - // cases in which the on-demand subscription is created after sending messages. - Supplier pubSubMessageSender = - () -> { - goodData.forEach( - goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); - badData.forEach( - badMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), badMessage)); - return true; - }; - - Result result = - pipelineOperator() - .waitForConditionAndCancel( - createConfig(info), - pubSubMessageSender, - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(MESSAGES_COUNT) - .build(), - BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) - .setMinRows(BAD_MESSAGES_COUNT) - .build()); - - // Assert - assertThatResult(result).meetsConditions(); - TableResult records = bigQueryResourceManager.readTable(table); - - // Make sure record can be read and UDF changed name to uppercase - assertThatBigQueryRecords(records).hasRecordsUnordered(expectedMessages); - - TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); - assertThat(dlqRecords.getValues().iterator().next().toString()) - .contains("Expected json literal but found"); - assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); - } - - @Test - @TemplateIntegrationTest( - value = PubSubToBigQuery.class, - template = "PubSub_Subscription_to_BigQuery") - public void testSubscriptionToBigQueryClassic() throws IOException { - // Arrange - List bqSchemaFields = - Arrays.asList( - Field.of("id", StandardSQLTypeName.INT64), - Field.of("job", StandardSQLTypeName.STRING), - Field.of("name", StandardSQLTypeName.STRING)); - Schema bqSchema = Schema.of(bqSchemaFields); - - TopicName topic = pubsubResourceManager.createTopic("input"); - SubscriptionName subscription = pubsubResourceManager.createSubscription(topic, "input-sub-1"); - bigQueryResourceManager.createDataset(REGION); - TableId table = bigQueryResourceManager.createTable(testName, bqSchema); - - TableId dlqTable = - bigQueryResourceManager.createTable( - table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, - BIG_QUERY_DLQ_SCHEMA); - - // Act - LaunchInfo info = - launchTemplate( - LaunchConfig.builder(testName, specPath) - .addParameter("inputSubscription", subscription.toString()) - .addParameter("outputTableSpec", toTableSpecLegacy(table)) - .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) - .addParameter("javascriptTextTransformFunctionName", "uppercaseName") - .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable))); - assertThatPipeline(info).isRunning(); - - List> expectedMessages = new ArrayList<>(); - for (int i = 1; i <= MESSAGES_COUNT; i++) { - Map message = - new HashMap<>( - Map.of("id", i, "job", testName, "name", RandomStringUtils.randomAlphabetic(1, 20))); - ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - message.put("name", message.get("name").toString().toUpperCase()); - expectedMessages.add(message); - } - - for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { - ByteString messageData = ByteString.copyFromUtf8("bad id " + i); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - } - - Result result = - pipelineOperator() - .waitForConditionAndCancel( - createConfig(info), - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(MESSAGES_COUNT) - .build(), - BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) - .setMinRows(BAD_MESSAGES_COUNT) - .build()); - - // Assert - assertThatResult(result).meetsConditions(); - TableResult records = bigQueryResourceManager.readTable(table); - - // Make sure record can be read and UDF changed name to uppercase - assertThatBigQueryRecords(records).hasRecordsUnordered(expectedMessages); - - TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); - assertThat(dlqRecords.getValues().iterator().next().toString()) - .contains("Expected json literal but found"); - assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); - } - - @Test - @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") - public void testTopicToBigQueryClassicWithReload() throws IOException, InterruptedException { - // Arrange - List bqSchemaFields = - Arrays.asList( - Field.of("id", StandardSQLTypeName.INT64), - Field.of("job", StandardSQLTypeName.STRING), - Field.of("name", StandardSQLTypeName.STRING)); - Schema bqSchema = Schema.of(bqSchemaFields); - - TopicName topic = pubsubResourceManager.createTopic("input"); - bigQueryResourceManager.createDataset(REGION); - TableId table = bigQueryResourceManager.createTable(testName, bqSchema); - - TableId dlqTable = - bigQueryResourceManager.createTable( - table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, - BIG_QUERY_DLQ_SCHEMA); - - // Act - LaunchInfo info = - launchTemplate( - LaunchConfig.builder(testName, specPath) - .addParameter("inputTopic", topic.toString()) - .addParameter("outputTableSpec", toTableSpecLegacy(table)) - .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) - .addParameter("javascriptTextTransformFunctionName", "uppercaseName") - .addParameter("javascriptTextTransformReloadIntervalMinutes", "1") - .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable))); - assertThatPipeline(info).isRunning(); - - List> expectedUpperMessages = new ArrayList<>(); - List goodUpperData = new ArrayList<>(); - for (int i = 1; i <= MESSAGES_COUNT; i++) { - Map message = - new HashMap<>( - Map.of( - "id", - i, - "job", - testName, - "name", - "UPPER: " + RandomStringUtils.randomAlphabetic(1, 20))); - ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); - goodUpperData.add(messageData); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - message.put("name", message.get("name").toString().toUpperCase()); - expectedUpperMessages.add(message); - } - - List badData = new ArrayList<>(); - for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { - ByteString messageData = ByteString.copyFromUtf8("bad id " + i); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - badData.add(messageData); - } - - // For tests that run against topics, sending repeatedly will make it work for - // cases in which the on-demand subscription is created after sending messages. - Supplier pubSubMessageSender = - () -> { - goodUpperData.forEach( - goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); - badData.forEach( - badMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), badMessage)); - return true; - }; - - BigQueryRowsCheck bigQueryRowsCheck = - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(MESSAGES_COUNT) - .build(); - BigQueryRowsCheck bigQueryDlqRowsCheck = - BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) - .setMinRows(BAD_MESSAGES_COUNT) - .build(); - Result result = - pipelineOperator() - .waitForCondition( - createConfig(info), pubSubMessageSender, bigQueryRowsCheck, bigQueryDlqRowsCheck); - // Assert - assertThatResult(result).meetsConditions(); - TableResult records = bigQueryResourceManager.readTable(table); - - // Make sure record can be read and UDF changed name to uppercase - assertThatBigQueryRecords(records).hasRecordsUnordered(expectedUpperMessages); - - TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); - assertThat(dlqRecords.getValues().iterator().next().toString()) - .contains("Expected json literal but found"); - assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); - - // modify UDF to test reloading - gcsClient.createArtifact( - "udf.js", - "function uppercaseName(value) {\n" - + " const data = JSON.parse(value);\n" - + " data.name = data.name.toLowerCase();\n" - + " return JSON.stringify(data);\n" - + "}"); - - // wait to ensure the reload will take effect. - TimeUnit.MINUTES.sleep(2); - List> expectedLowerMessages = new ArrayList<>(); - List goodLowerData = new ArrayList<>(); - for (int i = MESSAGES_COUNT + 1; i <= MESSAGES_COUNT * 2; i++) { - Map message = - new HashMap<>( - Map.of( - "id", - i, - "job", - testName, - "name", - "lower: " + RandomStringUtils.randomAlphabetic(1, 20))); - ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); - goodLowerData.add(messageData); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - message.put("name", message.get("name").toString().toLowerCase()); - expectedLowerMessages.add(message); - } - - // For tests that run against topics, sending repeatedly will make it work for - // cases in which the on-demand subscription is created after sending messages. - Supplier pubSubReloadedMessageSender = - () -> { - goodLowerData.forEach( - goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); - return true; - }; - - Result reloadedResult = - pipelineOperator() - .waitForConditionAndCancel( - createConfig(info), - pubSubReloadedMessageSender, - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(bigQueryRowsCheck.getRowCount().intValue() + (MESSAGES_COUNT * 2)) - .build()); - // Assert - assertThatResult(reloadedResult).meetsConditions(); - TableResult reloadedRecords = bigQueryResourceManager.readTable(table); - - // Make sure record can be read and UDF changed name to uppercase and lowercase. - assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedUpperMessages); - assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedLowerMessages); - } - - @Test - @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") - public void testTopicToBigQueryClassicWithReloadIntervalZero() - throws IOException, InterruptedException { - // Arrange - List bqSchemaFields = - Arrays.asList( - Field.of("id", StandardSQLTypeName.INT64), - Field.of("job", StandardSQLTypeName.STRING), - Field.of("name", StandardSQLTypeName.STRING)); - Schema bqSchema = Schema.of(bqSchemaFields); - - TopicName topic = pubsubResourceManager.createTopic("input"); - bigQueryResourceManager.createDataset(REGION); - TableId table = bigQueryResourceManager.createTable(testName, bqSchema); - - TableId dlqTable = - bigQueryResourceManager.createTable( - table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, - BIG_QUERY_DLQ_SCHEMA); - - // Act - LaunchInfo info = - launchTemplate( - LaunchConfig.builder(testName, specPath) - .addParameter("inputTopic", topic.toString()) - .addParameter("outputTableSpec", toTableSpecLegacy(table)) - .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) - .addParameter("javascriptTextTransformFunctionName", "uppercaseName") - .addParameter("javascriptTextTransformReloadIntervalMinutes", "0") - .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable))); - assertThatPipeline(info).isRunning(); - - List> expectedUpperMessages = new ArrayList<>(); - List goodUpperData = new ArrayList<>(); - for (int i = 1; i <= MESSAGES_COUNT; i++) { - Map message = - new HashMap<>( - Map.of( - "id", - i, - "job", - testName, - "name", - "UPPER: " + RandomStringUtils.randomAlphabetic(1, 20))); - ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); - goodUpperData.add(messageData); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - message.put("name", message.get("name").toString().toUpperCase()); - expectedUpperMessages.add(message); - } - - List badData = new ArrayList<>(); - for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { - ByteString messageData = ByteString.copyFromUtf8("bad id " + i); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - badData.add(messageData); - } - - // For tests that run against topics, sending repeatedly will make it work for - // cases in which the on-demand subscription is created after sending messages. - Supplier pubSubMessageSender = - () -> { - goodUpperData.forEach( - goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); - badData.forEach( - badMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), badMessage)); - return true; - }; - - BigQueryRowsCheck bigQueryRowsCheck = - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(MESSAGES_COUNT) - .build(); - BigQueryRowsCheck bigQueryDlqRowsCheck = - BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) - .setMinRows(BAD_MESSAGES_COUNT) - .build(); - Result result = - pipelineOperator() - .waitForCondition( - createConfig(info), pubSubMessageSender, bigQueryRowsCheck, bigQueryDlqRowsCheck); - // Assert - assertThatResult(result).meetsConditions(); - TableResult records = bigQueryResourceManager.readTable(table); - - // Make sure record can be read and UDF changed name to uppercase - assertThatBigQueryRecords(records).hasRecordsUnordered(expectedUpperMessages); - - TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); - assertThat(dlqRecords.getValues().iterator().next().toString()) - .contains("Expected json literal but found"); - assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); - - // modify UDF to test reloading - gcsClient.createArtifact( - "udf.js", - "function uppercaseName(value) {\n" - + " const data = JSON.parse(value);\n" - + " data.name = data.name.toLowerCase();\n" - + " return JSON.stringify(data);\n" - + "}"); - - // wait to ensure the reload will take effect. - TimeUnit.MINUTES.sleep(2); - List> expectedLowerMessages = new ArrayList<>(); - List goodLowerData = new ArrayList<>(); - for (int i = MESSAGES_COUNT + 1; i <= MESSAGES_COUNT * 2; i++) { - Map message = - new HashMap<>( - Map.of( - "id", - i, - "job", - testName, - "name", - "LOWER: " + RandomStringUtils.randomAlphabetic(1, 20))); - ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); - goodLowerData.add(messageData); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - message.put("name", message.get("name").toString().toUpperCase()); - expectedLowerMessages.add(message); - } - - // For tests that run against topics, sending repeatedly will make it work for - // cases in which the on-demand subscription is created after sending messages. - Supplier pubSubReloadedMessageSender = - () -> { - goodLowerData.forEach( - goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); - return true; - }; - - Result reloadedResult = - pipelineOperator() - .waitForConditionAndCancel( - createConfig(info), - pubSubReloadedMessageSender, - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(bigQueryRowsCheck.getRowCount().intValue() + (MESSAGES_COUNT * 2)) - .build()); - // Assert - assertThatResult(reloadedResult).meetsConditions(); - TableResult reloadedRecords = bigQueryResourceManager.readTable(table); - - // Make sure record can be read and UDF changed name to uppercase and lowercase. - assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedUpperMessages); - assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedLowerMessages); - } - - private static Schema getDlqSchema() { - return Schema.of( - Arrays.asList( - Field.newBuilder("timestamp", StandardSQLTypeName.TIMESTAMP) - .setMode(Field.Mode.REQUIRED) - .build(), - Field.newBuilder("payloadString", StandardSQLTypeName.STRING) - .setMode(Field.Mode.REQUIRED) - .build(), - Field.newBuilder("payloadBytes", StandardSQLTypeName.BYTES) - .setMode(Field.Mode.REQUIRED) - .build(), - Field.newBuilder( - "attributes", - LegacySQLTypeName.RECORD, - Field.newBuilder("key", StandardSQLTypeName.STRING) - .setMode(Field.Mode.NULLABLE) - .build(), - Field.newBuilder("value", StandardSQLTypeName.STRING) - .setMode(Field.Mode.NULLABLE) - .build()) - .setMode(Field.Mode.REPEATED) - .build(), - Field.newBuilder("errorMessage", StandardSQLTypeName.STRING) - .setMode(Field.Mode.NULLABLE) - .build(), - Field.newBuilder("stacktrace", StandardSQLTypeName.STRING) - .setMode(Field.Mode.NULLABLE) - .build())); - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java b/v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java deleted file mode 100644 index 3169641134..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2022 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.templates; - -import static org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts.assertThatBigQueryRecords; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; - -import com.google.cloud.bigquery.Field; -import com.google.cloud.bigquery.Schema; -import com.google.cloud.bigquery.StandardSQLTypeName; -import com.google.cloud.bigquery.TableId; -import com.google.cloud.teleport.metadata.SkipRunnerV2Test; -import com.google.cloud.teleport.metadata.TemplateIntegrationTest; -import com.google.common.collect.ImmutableMap; -import com.google.protobuf.ByteString; -import com.google.pubsub.v1.TopicName; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; -import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; -import org.apache.beam.it.common.PipelineOperator.Result; -import org.apache.beam.it.common.utils.ResourceManagerUtils; -import org.apache.beam.it.gcp.TemplateTestBase; -import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; -import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; -import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; -import org.json.JSONObject; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Integration test for {@link PubSubToBigQuery} PubSub Topic to Bigquery. */ -@Category({TemplateIntegrationTest.class, SkipRunnerV2Test.class}) -@TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") -@RunWith(JUnit4.class) -public final class PubSubTopicToBigQueryIT extends TemplateTestBase { - private PubsubResourceManager pubsubResourceManager; - private BigQueryResourceManager bigQueryResourceManager; - - @Before - public void setUp() throws IOException { - pubsubResourceManager = - PubsubResourceManager.builder(testName, PROJECT, credentialsProvider).build(); - bigQueryResourceManager = - BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); - } - - @After - public void cleanUp() { - ResourceManagerUtils.cleanResources(pubsubResourceManager, bigQueryResourceManager); - } - - @Test - public void testTopicToBigQuery() throws IOException { - // Arrange - Map message = Map.of("job", testId, "msg", "message"); - List bqSchemaFields = - Arrays.asList( - Field.of("job", StandardSQLTypeName.STRING), - Field.of("msg", StandardSQLTypeName.STRING)); - Schema bqSchema = Schema.of(bqSchemaFields); - - TopicName topic = pubsubResourceManager.createTopic("input"); - bigQueryResourceManager.createDataset(REGION); - TableId table = bigQueryResourceManager.createTable(testName, bqSchema); - - LaunchConfig.Builder options = - LaunchConfig.builder(testName, specPath) - .addParameter("inputTopic", topic.toString()) - .addParameter("outputTableSpec", toTableSpecLegacy(table)); - - // Act - LaunchInfo info = launchTemplate(options); - assertThatPipeline(info).isRunning(); - - Result result = - pipelineOperator() - .waitForConditionAndFinish( - createConfig(info), - () -> { - ByteString messageData = - ByteString.copyFromUtf8(new JSONObject(message).toString()); - pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); - return BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(1) - .build() - .get(); - }); - - // Assert - assertThatResult(result).meetsConditions(); - assertThatBigQueryRecords(bigQueryResourceManager.readTable(table)).allMatch(message); - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java b/v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java deleted file mode 100644 index 4e765d9bf0..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (C) 2022 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.templates; - -import static org.apache.beam.it.common.TestProperties.getProperty; -import static org.apache.beam.it.gcp.bigquery.BigQueryResourceManagerUtils.toTableSpec; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; - -import com.google.cloud.bigquery.Field; -import com.google.cloud.bigquery.FieldValueList; -import com.google.cloud.bigquery.Schema; -import com.google.cloud.bigquery.StandardSQLTypeName; -import com.google.cloud.bigquery.TableId; -import com.google.cloud.bigquery.TableResult; -import com.google.cloud.teleport.metadata.TemplateLoadTest; -import com.google.common.base.MoreObjects; -import com.google.pubsub.v1.SubscriptionName; -import com.google.pubsub.v1.TopicName; -import java.io.IOException; -import java.text.ParseException; -import java.time.Duration; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Map; -import java.util.function.Function; -import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; -import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; -import org.apache.beam.it.common.PipelineOperator.Result; -import org.apache.beam.it.common.TestProperties; -import org.apache.beam.it.common.utils.ResourceManagerUtils; -import org.apache.beam.it.gcp.TemplateLoadTestBase; -import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; -import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; -import org.apache.beam.it.gcp.datagenerator.DataGenerator; -import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Performance tests for {@link PubSubToBigQuery PubSub to BigQuery} template. */ -@Category(TemplateLoadTest.class) -@TemplateLoadTest(PubSubToBigQuery.class) -@RunWith(JUnit4.class) -public class PubsubToBigQueryLT extends TemplateLoadTestBase { - private static final String SPEC_PATH = - MoreObjects.firstNonNull( - TestProperties.specPath(), - "gs://dataflow-templates/latest/PubSub_Subscription_to_BigQuery"); - // 35,000,000 messages of the given schema make up approximately 10GB - private static final int NUM_MESSAGES = 35_000_000; - // schema should match schema supplied to generate fake records. - private static final Schema SCHEMA = - Schema.of( - Field.of("eventId", StandardSQLTypeName.STRING), - Field.of("eventTimestamp", StandardSQLTypeName.INT64), - Field.of("ipv4", StandardSQLTypeName.STRING), - Field.of("ipv6", StandardSQLTypeName.STRING), - Field.of("country", StandardSQLTypeName.STRING), - Field.of("username", StandardSQLTypeName.STRING), - Field.of("quest", StandardSQLTypeName.STRING), - Field.of("score", StandardSQLTypeName.INT64), - Field.of("completed", StandardSQLTypeName.BOOL), - // add a insert timestamp column to query latency values - Field.newBuilder("_metadata_insert_timestamp", StandardSQLTypeName.TIMESTAMP) - .setDefaultValueExpression("CURRENT_TIMESTAMP()") - .build()); - private static final String INPUT_PCOLLECTION = - "ReadPubSubSubscription/PubsubUnboundedSource.out0"; - private static final String OUTPUT_PCOLLECTION = - "WriteSuccessfulRecords/StreamingInserts/StreamingWriteTables/StripShardId/Map.out0"; - private static PubsubResourceManager pubsubResourceManager; - private static BigQueryResourceManager bigQueryResourceManager; - - @Before - public void setup() throws IOException { - pubsubResourceManager = - PubsubResourceManager.builder(testName, project, CREDENTIALS_PROVIDER).build(); - bigQueryResourceManager = - BigQueryResourceManager.builder(testName, project, CREDENTIALS).build(); - } - - @After - public void tearDown() { - ResourceManagerUtils.cleanResources(pubsubResourceManager, bigQueryResourceManager); - } - - @Test - public void testBacklog10gb() throws IOException, ParseException, InterruptedException { - testBacklog(this::disableRunnerV2); - } - - @Test - public void testSteadyState1hr() throws ParseException, IOException, InterruptedException { - testSteadyState1hr(this::disableRunnerV2); - } - - @Test - public void testSteadyState1hrUsingStreamingEngine() - throws ParseException, IOException, InterruptedException { - testSteadyState1hr(this::enableStreamingEngine); - } - - @Ignore("RunnerV2 is disabled on streaming templates.") - @Test - public void testSteadyState1hrUsingRunnerV2() - throws ParseException, IOException, InterruptedException { - testSteadyState1hr(this::enableRunnerV2); - } - - @Test - public void testSteadyState1hrUsingAtLeastOnceMode() - throws ParseException, IOException, InterruptedException { - ArrayList experiments = new ArrayList<>(); - experiments.add("streaming_mode_at_least_once"); - testSteadyState1hr( - b -> - b.addEnvironment("additionalExperiments", experiments) - .addEnvironment("enableStreamingEngine", true)); - } - - public void testBacklog(Function paramsAdder) - throws IOException, ParseException, InterruptedException { - // Arrange - TopicName backlogTopic = pubsubResourceManager.createTopic("backlog-input"); - SubscriptionName backlogSubscription = - pubsubResourceManager.createSubscription(backlogTopic, "backlog-subscription"); - TableId table = bigQueryResourceManager.createTable(testName, SCHEMA); - // Generate fake data to table - DataGenerator dataGenerator = - DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") - .setQPS("1000000") - .setMessagesLimit(String.valueOf(NUM_MESSAGES)) - .setTopic(backlogTopic.toString()) - .setNumWorkers("50") - .setMaxNumWorkers("100") - .build(); - dataGenerator.execute(Duration.ofMinutes(30)); - LaunchConfig options = - paramsAdder - .apply( - LaunchConfig.builder(testName, SPEC_PATH) - .addEnvironment("maxWorkers", 5) - .addEnvironment("numWorkers", 4) - .addParameter("inputSubscription", backlogSubscription.toString()) - .addParameter("outputTableSpec", toTableSpec(project, table))) - .build(); - - // Act - LaunchInfo info = pipelineLauncher.launch(project, region, options); - assertThatPipeline(info).isRunning(); - Result result = - pipelineOperator.waitForConditionAndCancel( - createConfig(info, Duration.ofMinutes(40)), - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(NUM_MESSAGES) - .build()); - - // Assert - assertThatResult(result).meetsConditions(); - - // export results - exportMetricsToBigQuery(info, getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION)); - } - - public void testSteadyState1hr(Function paramsAdder) - throws ParseException, IOException, InterruptedException { - // Arrange - String qps = getProperty("qps", "100000", TestProperties.Type.PROPERTY); - TopicName inputTopic = pubsubResourceManager.createTopic("steady-state-input"); - SubscriptionName inputSubscription = - pubsubResourceManager.createSubscription(inputTopic, "steady-state-subscription"); - TableId table = - bigQueryResourceManager.createTable( - testName, SCHEMA, System.currentTimeMillis() + 7200000); // expire in 2 hrs - DataGenerator dataGenerator = - DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") - .setQPS(qps) - .setTopic(inputTopic.toString()) - .setNumWorkers("10") - .setMaxNumWorkers("100") - .build(); - - LaunchConfig options = - paramsAdder - .apply( - LaunchConfig.builder(testName, SPEC_PATH) - .addEnvironment("maxWorkers", 10) - .addEnvironment("numWorkers", 7) - .addEnvironment("additionalUserLabels", Collections.singletonMap("qps", qps)) - .addParameter("inputSubscription", inputSubscription.toString()) - .addParameter("outputTableSpec", toTableSpec(project, table))) - .build(); - - // Act - LaunchInfo info = pipelineLauncher.launch(project, region, options); - assertThatPipeline(info).isRunning(); - // ElementCount metric in dataflow is approximate, allow for 1% difference - Integer expectedMessages = (int) (dataGenerator.execute(Duration.ofMinutes(60)) * 0.99); - Result result = - pipelineOperator.waitForConditionAndCancel( - createConfig(info, Duration.ofMinutes(20)), - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(expectedMessages) - .build()); - // Assert - assertThatResult(result).meetsConditions(); - - Map metrics = getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION); - // Query end to end latency metrics from BigQuery - TableResult latencyResult = - bigQueryResourceManager.runQuery( - String.format( - "WITH difference AS (SELECT\n" - + " TIMESTAMP_DIFF(_metadata_insert_timestamp,\n" - + " TIMESTAMP_MILLIS(eventTimestamp), SECOND) AS latency,\n" - + " FROM %s.%s)\n" - + " SELECT\n" - + " PERCENTILE_CONT(difference.latency, 0.5) OVER () AS median,\n" - + " PERCENTILE_CONT(difference.latency, 0.9) OVER () as percentile_90,\n" - + " PERCENTILE_CONT(difference.latency, 0.95) OVER () as percentile_95,\n" - + " PERCENTILE_CONT(difference.latency, 0.99) OVER () as percentile_99\n" - + " FROM difference LIMIT 1", - bigQueryResourceManager.getDatasetId(), testName)); - - FieldValueList latencyValues = latencyResult.getValues().iterator().next(); - metrics.put("median_latency", latencyValues.get(0).getDoubleValue()); - metrics.put("percentile_90_latency", latencyValues.get(1).getDoubleValue()); - metrics.put("percentile_95_latency", latencyValues.get(2).getDoubleValue()); - metrics.put("percentile_99_latency", latencyValues.get(3).getDoubleValue()); - - // export results - exportMetricsToBigQuery(info, metrics); - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java b/v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java deleted file mode 100644 index 56f5c06c38..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (C) 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.templates; - -import static org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils.getFullGcsPath; -import static org.apache.beam.it.gcp.bigquery.BigQueryResourceManagerUtils.toTableSpec; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; -import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; - -import com.google.cloud.bigquery.Field; -import com.google.cloud.bigquery.Schema; -import com.google.cloud.bigquery.StandardSQLTypeName; -import com.google.cloud.bigquery.TableId; -import com.google.cloud.teleport.metadata.TemplateLoadTest; -import com.google.common.base.MoreObjects; -import com.google.common.io.Resources; -import java.io.IOException; -import java.text.ParseException; -import java.time.Duration; -import java.util.function.Function; -import org.apache.beam.it.common.PipelineLauncher; -import org.apache.beam.it.common.PipelineOperator; -import org.apache.beam.it.common.TestProperties; -import org.apache.beam.it.common.utils.ResourceManagerUtils; -import org.apache.beam.it.gcp.TemplateLoadTestBase; -import org.apache.beam.it.gcp.artifacts.ArtifactClient; -import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; -import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; -import org.apache.beam.it.gcp.datagenerator.DataGenerator; -import org.apache.beam.it.gcp.storage.GcsResourceManager; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Performance tests for {@link TextToBigQueryStreamLT GCS Text to BigQuery} template. */ -@Category(TemplateLoadTest.class) -@TemplateLoadTest(TextToBigQueryStreaming.class) -@RunWith(JUnit4.class) -public class TextToBigQueryStreamLT extends TemplateLoadTestBase { - private static final String SPEC_PATH = - MoreObjects.firstNonNull( - TestProperties.specPath(), "gs://dataflow-templates/latest/Stream_GCS_Text_to_BigQuery"); - private static final String ARTIFACT_BUCKET = TestProperties.artifactBucket(); - private static final String TEST_ROOT_DIR = - TextToBigQueryStreamLT.class.getSimpleName().toLowerCase(); - private static final String INPUT_PCOLLECTION = - "ReadFromSource/Via ReadFiles/Read all via FileBasedSource/Read ranges.out0"; - private static final String OUTPUT_PCOLLECTION = - "InsertIntoBigQuery/StreamingInserts/StreamingWriteTables/StripShardId/Map.out0"; - // schema should match schema supplied to generate fake records. - private static final Schema SCHEMA = - Schema.of( - Field.of("eventId", StandardSQLTypeName.STRING), - Field.of("eventTimestamp", StandardSQLTypeName.INT64), - Field.of("ipv4", StandardSQLTypeName.STRING), - Field.of("ipv6", StandardSQLTypeName.STRING), - Field.of("country", StandardSQLTypeName.STRING), - Field.of("username", StandardSQLTypeName.STRING), - Field.of("quest", StandardSQLTypeName.STRING), - Field.of("score", StandardSQLTypeName.INT64), - Field.of("completed", StandardSQLTypeName.BOOL)); - - // 35,000,000 messages of the given schema make up approximately 10GB - private static final int NUM_MESSAGES_FOR_10GB = 35000000; - private static final long TIMEOUT_FOR_10_GB_TEST_MINUTES = 30; - private static final long TIMEOUT_FOR_1_HOUR_TEST_MINUTES = 60; - - private static ArtifactClient gcsClient; - private static BigQueryResourceManager bigQueryResourceManager; - private String jsonPath; - private String udfPath; - - @Before - public void setup() throws IOException { - bigQueryResourceManager = - BigQueryResourceManager.builder(testName, project, CREDENTIALS).build(); - - gcsClient = GcsResourceManager.builder(ARTIFACT_BUCKET, TEST_ROOT_DIR, CREDENTIALS).build(); - // upload schema files and save path - jsonPath = - getFullGcsPath( - ARTIFACT_BUCKET, - gcsClient - .uploadArtifact( - "input/schema.json", - Resources.getResource("TextIOToBigQueryTest/schema.json").getPath()) - .name()); - - udfPath = - getFullGcsPath( - ARTIFACT_BUCKET, - gcsClient - .uploadArtifact( - "input/udf.js", Resources.getResource("TextIOToBigQueryTest/udf.js").getPath()) - .name()); - } - - @After - public void teardown() { - ResourceManagerUtils.cleanResources(bigQueryResourceManager, gcsClient); - } - - @Test - public void testBacklog10gb() throws IOException, ParseException, InterruptedException { - testBacklog(this::disableRunnerV2); - } - - @Test - public void testSteadyState1hr() throws IOException, ParseException, InterruptedException { - testSteadyState1hr(this::disableRunnerV2); - } - - @Test - public void testSteadyState1hrUsingStreamingEngine() - throws IOException, ParseException, InterruptedException { - testSteadyState1hr(this::enableStreamingEngine); - } - - @Ignore("RunnerV2 is disabled on streaming templates.") - @Test - public void testSteadyState1hrUsingRunnerV2() - throws IOException, ParseException, InterruptedException { - testSteadyState1hr(this::enableRunnerV2); - } - - private void testBacklog( - Function - paramsAdder) - throws IOException, ParseException, InterruptedException { - - DataGenerator dataGenerator = - DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") - .setQPS("1000000") - .setMessagesLimit(String.valueOf(NUM_MESSAGES_FOR_10GB)) - .setSinkType("GCS") - .setOutputDirectory(getTestMethodDirPath()) - .setNumShards("20") - .setNumWorkers("50") - .setMaxNumWorkers("100") - .build(); - // Executes the data generator - dataGenerator.execute(Duration.ofMinutes(TIMEOUT_FOR_10_GB_TEST_MINUTES)); - - /* - * This table will automatically expire 1 h after creation if not cleaned up manually or by - * calling the {@link BigQueryResourceManager#cleanupAll()} method. - */ - TableId table = bigQueryResourceManager.createTable(testName, SCHEMA); - - PipelineLauncher.LaunchConfig options = - paramsAdder - .apply( - PipelineLauncher.LaunchConfig.builder(testName, SPEC_PATH) - .addEnvironment("maxWorkers", 10) - .addEnvironment("numWorkers", 5) - .addParameter("outputTable", toTableSpec(project, table)) - .addParameter("inputFilePattern", getTestMethodDirPath() + "/*") - .addParameter("JSONPath", jsonPath) - .addParameter( - "bigQueryLoadingTemporaryDirectory", getTestMethodDirPath() + "/temp") - .addParameter("javascriptTextTransformGcsPath", udfPath) - .addParameter("javascriptTextTransformFunctionName", "identity")) - .build(); - - // Act - PipelineLauncher.LaunchInfo info = pipelineLauncher.launch(project, region, options); - assertThatPipeline(info).isRunning(); - PipelineOperator.Result result = - // The method waitForConditionAndCancel was used because the streaming pipeline template - // includes a call to Splittable DoFn. Invoking a splittable DoFn causes the job to remain - // in the Draining state indefinitely. - // @see - // Important information about draining a job - pipelineOperator.waitForConditionAndCancel( - createConfig(info, Duration.ofMinutes(TIMEOUT_FOR_10_GB_TEST_MINUTES)), - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(NUM_MESSAGES_FOR_10GB) - .build()); - - // Assert - assertThatResult(result).meetsConditions(); - - // export results - exportMetricsToBigQuery(info, getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION)); - } - - private void testSteadyState1hr( - Function - paramsAdder) - throws IOException, ParseException, InterruptedException { - - DataGenerator dataGenerator = - DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") - .setQPS("100000") - .setSinkType("GCS") - .setOutputDirectory(getTestMethodDirPath()) - .setNumShards("20") - .setNumWorkers("10") - .setMaxNumWorkers("15") - .build(); - - /* - * This table will automatically expire 2h after creation if not cleaned up manually or by - * calling the {@link BigQueryResourceManager#cleanupAll()} method. - */ - TableId table = - bigQueryResourceManager.createTable(testName, SCHEMA, System.currentTimeMillis() + 7200000); - - PipelineLauncher.LaunchConfig options = - paramsAdder - .apply( - PipelineLauncher.LaunchConfig.builder(testName, SPEC_PATH) - .addEnvironment("maxWorkers", 10) - .addEnvironment("numWorkers", 5) - .addParameter("outputTable", toTableSpec(project, table)) - .addParameter("inputFilePattern", getTestMethodDirPath() + "/*") - .addParameter("JSONPath", jsonPath) - .addParameter( - "bigQueryLoadingTemporaryDirectory", getTestMethodDirPath() + "/temp") - .addParameter("javascriptTextTransformGcsPath", udfPath) - .addParameter("javascriptTextTransformFunctionName", "identity")) - .build(); - - // Act - PipelineLauncher.LaunchInfo info = pipelineLauncher.launch(project, region, options); - assertThatPipeline(info).isRunning(); - - // Executes the data generator and return approximate number of messages - // ElementCount metric in dataflow is approximate, allow for 1% difference - int expectedMessages = - (int) (dataGenerator.execute(Duration.ofMinutes(TIMEOUT_FOR_1_HOUR_TEST_MINUTES)) * 0.99); - - PipelineOperator.Result result = - // The method waitForConditionAndCancel was used because the streaming pipeline template - // includes a call to Splittable DoFn. Invoking a splittable DoFn causes the job to remain - // in the Draining state indefinitely. - // @see - // Important information about draining a job - pipelineOperator.waitForConditionAndCancel( - createConfig(info, Duration.ofMinutes(10)), - BigQueryRowsCheck.builder(bigQueryResourceManager, table) - .setMinRows(expectedMessages) - .build()); - - // Assert - assertThatResult(result).meetsConditions(); - - // export results - exportMetricsToBigQuery(info, getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION)); - } - - private String getTestMethodDirPath() { - return getFullGcsPath(ARTIFACT_BUCKET, TEST_ROOT_DIR, gcsClient.runId(), testName); - } -} From 001f595022874ec038a24d385aaa7c81f14157a9 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 03/19] Add support for REMOTE UDFs. --- .../com/google/cloud/teleport/spanner/ExportPipelineIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java index 33550b044f..23c8209782 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java @@ -321,7 +321,7 @@ private void testPGSpannerToAvroBase( List ddls = Arrays.stream(ddl.split(";")).filter(d -> !d.isBlank()).toList(); spannerResourceManager.executeDdlStatements(ddls); - List expectedData = generateTableRows(String.format("%Singers", prefix)); + List expectedData = generateTableRows(String.format("%sSingers", prefix)); spannerResourceManager.write(expectedData); PipelineLauncher.LaunchConfig.Builder options = paramsAdder.apply( From 95c093879af19d0f4c5366eb1c27f754ab379b05 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 04/19] Add support for REMOTE UDFs. --- .../it/gcp/dataflow/DirectRunnerClient.java | 2 +- it/pom.xml | 33 + pom.xml | 118 ++++ v1/pom.xml | 69 ++ .../DLPTextToBigQueryStreamingIT.java | 256 ++++++++ .../templates/PubSubToBigQueryIT.java | 597 ++++++++++++++++++ .../templates/PubSubTopicToBigQueryIT.java | 113 ++++ .../templates/PubsubToBigQueryLT.java | 252 ++++++++ .../templates/TextToBigQueryStreamLT.java | 274 ++++++++ 9 files changed, 1713 insertions(+), 1 deletion(-) create mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java create mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java create mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java create mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java create mode 100644 v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java index 8041dc7a80..efce55e16a 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java @@ -265,7 +265,7 @@ public void cancel() { currentJob.setCurrentState(JobState.CANCELLED.toString()); try { - this.interrupt(); + this.stop(); } catch (Exception e) { LOG.warn("Error cancelling job", e); } diff --git a/it/pom.xml b/it/pom.xml index 5dd27ef146..4ab068a5a1 100644 --- a/it/pom.xml +++ b/it/pom.xml @@ -137,6 +137,39 @@ + + com.diffplug.spotless + spotless-maven-plugin + ${spotless-maven-plugin.version} + + + + + + *.md + .gitignore + + + + + + + + + 1.17.0 + + + + + + + + + check + + + + diff --git a/pom.xml b/pom.xml index f8fd313d9b..254447f187 100644 --- a/pom.xml +++ b/pom.xml @@ -410,6 +410,124 @@ + + com.diffplug.spotless + spotless-maven-plugin + ${spotless-maven-plugin.version} + + + + + + *.md + .gitignore + + + + + + + + + src/main/java/org/apache/beam/** + src/test/java/org/apache/beam/** + + + + 1.17.0 + + + + ${licenseHeaderFile} + + + + + + + + check + + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + ${maven-checkstyle-plugin.version} + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + + + checkstyle/checkstyle.xml + checkstyle/suppressions.xml + true + true + false + true + + + + + test-compile + + check + + + + + + org.jacoco + jacoco-maven-plugin + ${jacoco.version} + + + + **/*AutoValue_* + **/*Exception.* + **/constants/** + **/CustomTransformationImplFetcher.* + **/JarFileReader.* + **/CustomTransformationWithShardFor*IT.* + **/CustomTransformationWithCassandraForIT.* + **/models/* + **/exceptions/* + + + + + default-prepare-agent + + prepare-agent + + + + default-report + prepare-package + + report + + + + report-aggregate + verify + + report-aggregate + + + + org.codehaus.mojo exec-maven-plugin diff --git a/v1/pom.xml b/v1/pom.xml index e2abd5ae21..27d505b078 100644 --- a/v1/pom.xml +++ b/v1/pom.xml @@ -885,6 +885,75 @@ + + org.jacoco + jacoco-maven-plugin + ${jacoco.version} + + + + **/*InformationSchemaScanner.* + + **/*AutoValue_* + + **/*com/google/cloud/teleport/spanner/spannerio/changestreams/**/* + + + + PACKAGE + + com.google.cloud.teleport.spanner + com.google.cloud.teleport.spanner.** + + + + com.google.cloud.teleport.spanner.ddl.InformationSchemaScanner + + com.google.cloud.teleport.spanner.proto + + + + + + + default-prepare-agent + + prepare-agent + + + + default-report + prepare-package + + report + + + + default-check + + check + + + + + + + LINE + COVEREDRATIO + 0.5 + + + BRANCH + COVEREDRATIO + 0.5 + + + + + + + + diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java b/v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java new file mode 100644 index 0000000000..3da99c8ba9 --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/templates/DLPTextToBigQueryStreamingIT.java @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.templates; + +import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.bigquery.TableId; +import com.google.cloud.teleport.metadata.SkipRunnerV2Test; +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import com.google.privacy.dlp.v2.CharacterMaskConfig; +import com.google.privacy.dlp.v2.CryptoHashConfig; +import com.google.privacy.dlp.v2.CryptoKey; +import com.google.privacy.dlp.v2.DeidentifyConfig; +import com.google.privacy.dlp.v2.DeidentifyTemplate; +import com.google.privacy.dlp.v2.FieldId; +import com.google.privacy.dlp.v2.FieldTransformation; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InfoTypeTransformations; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectTemplate; +import com.google.privacy.dlp.v2.PrimitiveTransformation; +import com.google.privacy.dlp.v2.RecordTransformations; +import com.google.privacy.dlp.v2.TransientCryptoKey; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.beam.it.common.PipelineLauncher; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineOperator; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.gcp.TemplateTestBase; +import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; +import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; +import org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts; +import org.apache.beam.it.gcp.dlp.DlpResourceManager; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Integration test for {@link DLPTextToBigQueryStreaming} (Stream_DLP_GCS_Text_to_BigQuery). */ +@Category({TemplateIntegrationTest.class, SkipRunnerV2Test.class}) +@TemplateIntegrationTest(DLPTextToBigQueryStreaming.class) +@RunWith(JUnit4.class) +public class DLPTextToBigQueryStreamingIT extends TemplateTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(DLPTextToBigQueryStreamingIT.class); + + private BigQueryResourceManager bigQueryClient; + private DlpResourceManager dlpResourceManager; + + @Before + public void setup() throws IOException { + bigQueryClient = BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); + dlpResourceManager = DlpResourceManager.builder(PROJECT, credentialsProvider).build(); + + gcsClient.createArtifact( + "input.csv", + "Card Type Full Name,Issuing Bank,Card Num,Card Holder's Name,CVV/CVV2,Issue Date,Expiry Date,Billing Date,Card PIN,Credit Limit,Comments\n" + + "Visa,Chase,4111111111111111,Frank Q Ortiz,360,09/2019,09/2024,7,1247,103700,Please change my number to 604-406-9050. Thanks\n"); + } + + @After + public void tearDown() { + ResourceManagerUtils.cleanResources(bigQueryClient, dlpResourceManager); + } + + @Test + public void testDLPTextToBigQueryInspect() throws IOException { + // Create a template to hash card number + CVV + DeidentifyTemplate deidentifyTemplate = createInfoTypeTemplate(); + + // Create inspect template + InspectTemplate inspectTemplate = createInspectTemplate(); + + testDLPTextToBigQueryBase( + params -> params.addParameter("inspectTemplateName", inspectTemplate.getName()), + (record) -> { + assertThat((String) record.get("Card_Num")).isEqualTo("4111111111111111"); + assertThat((String) record.get("CVVCVV2")).isEqualTo("360"); + assertThat((String) record.get("Comments")) + .isNotEqualTo("Please change my number to 604-406-9050. Thanks"); + }, + deidentifyTemplate); + } + + @Test + public void testDLPTextToBigQuery() throws IOException { + // Create a template to hash card number + CVV + DeidentifyTemplate deidentifyTemplate = createRecordTypeTemplate(); + + testDLPTextToBigQueryBase( + Function.identity(), + (record) -> { + assertThat((String) record.get("Card_Num")).isNotEqualTo("4111111111111111"); + assertThat((String) record.get("CVVCVV2")).isNotEqualTo("360"); + assertThat((String) record.get("Comments")) + .isEqualTo("Please change my number to 604-406-9050. Thanks"); + }, + deidentifyTemplate); + } + + public void testDLPTextToBigQueryBase( + Function paramsAdder, + Consumer> recordAsserts, + DeidentifyTemplate deidentifyTemplate) + throws IOException { + // Arrange + String dataset = bigQueryClient.createDataset(REGION); + + // Act + PipelineLauncher.LaunchInfo info = + launchTemplate( + paramsAdder.apply( + LaunchConfig.builder(testName, specPath) + .addParameter("inputFilePattern", getGcsPath("*.csv")) + .addParameter("datasetName", dataset) + .addParameter("batchSize", "1000") + .addParameter("dlpProjectId", PROJECT) + .addParameter("deidentifyTemplateName", deidentifyTemplate.getName()))); + assertThatPipeline(info).isRunning(); + + TableId targetTableId = TableId.of(PROJECT, dataset, "input"); + PipelineOperator.Result result = + pipelineOperator() + // drain doesn't seem to work with the TextIO GCS files watching that the template uses + .waitForConditionAndCancel( + createConfig(info), + BigQueryRowsCheck.builder(bigQueryClient, targetTableId).setMinRows(1).build()); + + // Assert + assertThatResult(result).meetsConditions(); + List> records = + BigQueryAsserts.tableResultToRecords(bigQueryClient.readTable(targetTableId)); + assertThat(records).hasSize(1); + + recordAsserts.accept(records.get(0)); + } + + private InspectTemplate createInspectTemplate() throws IOException { + InspectTemplate inspectTemplate = + dlpResourceManager.createInspectTemplate( + InspectTemplate.newBuilder() + .setName(String.format("projects/%s/inspectTemplates/%s", PROJECT, testId)) + .setDescription("Template for test " + testName) + .setInspectConfig( + InspectConfig.newBuilder() + .addAllInfoTypes( + Stream.of("PHONE_NUMBER") + .map(it -> InfoType.newBuilder().setName(it).build()) + .collect(Collectors.toList())) + .build()) + .build()); + LOG.info("Created inspect template: {}", inspectTemplate.getName()); + + return inspectTemplate; + } + + private DeidentifyTemplate createInfoTypeTemplate() throws IOException { + DeidentifyTemplate deidentifyTemplate = + dlpResourceManager.createDeidentifyTemplate( + DeidentifyTemplate.newBuilder() + .setName(String.format("projects/%s/deidentifyTemplates/%s", PROJECT, testId)) + .setDescription("Template for test " + testName) + .setDeidentifyConfig( + DeidentifyConfig.newBuilder() + .setInfoTypeTransformations( + InfoTypeTransformations.newBuilder() + .addTransformations( + InfoTypeTransformations.InfoTypeTransformation.newBuilder() + .setPrimitiveTransformation( + PrimitiveTransformation.newBuilder() + .setCharacterMaskConfig( + CharacterMaskConfig.newBuilder() + .setMaskingCharacter("X") + .setNumberToMask(5) + .build()) + .setCryptoHashConfig( + CryptoHashConfig.newBuilder() + .setCryptoKey( + CryptoKey.newBuilder() + .setTransient( + TransientCryptoKey.newBuilder() + .setName(testId) + .build()) + .build()) + .build()) + .build()) + .build()) + .build()) + .build()) + .build()); + LOG.info("Created deidentify template: {}", deidentifyTemplate.getName()); + + return deidentifyTemplate; + } + + private DeidentifyTemplate createRecordTypeTemplate() throws IOException { + DeidentifyTemplate deidentifyTemplate = + dlpResourceManager.createDeidentifyTemplate( + DeidentifyTemplate.newBuilder() + .setName(String.format("projects/%s/deidentifyTemplates/%s", PROJECT, testId)) + .setDescription("Template for test " + testName) + .setDeidentifyConfig( + DeidentifyConfig.newBuilder() + .setRecordTransformations( + RecordTransformations.newBuilder() + .addFieldTransformations( + FieldTransformation.newBuilder() + .addFields(FieldId.newBuilder().setName("Card Num").build()) + .addFields(FieldId.newBuilder().setName("CVV/CVV2").build()) + .setPrimitiveTransformation( + PrimitiveTransformation.newBuilder() + .setCryptoHashConfig( + CryptoHashConfig.newBuilder() + .setCryptoKey( + CryptoKey.newBuilder() + .setTransient( + TransientCryptoKey.newBuilder() + .setName(testId) + .build()) + .build()) + .build()) + .build()) + .build()) + .build()) + .build()) + .build()); + LOG.info("Created deidentify template: {}", deidentifyTemplate.getName()); + + return deidentifyTemplate; + } +} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java b/v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java new file mode 100644 index 0000000000..186fa63892 --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/templates/PubSubToBigQueryIT.java @@ -0,0 +1,597 @@ +/* + * Copyright (C) 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.templates; + +import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts.assertThatBigQueryRecords; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableResult; +import com.google.cloud.teleport.metadata.SkipRunnerV2Test; +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import com.google.common.collect.ImmutableMap; +import com.google.protobuf.ByteString; +import com.google.pubsub.v1.SubscriptionName; +import com.google.pubsub.v1.TopicName; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Supplier; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; +import org.apache.beam.it.common.PipelineOperator.Result; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.gcp.TemplateTestBase; +import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; +import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; +import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; +import org.apache.commons.lang3.RandomStringUtils; +import org.json.JSONObject; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Integration test for {@link PubSubToBigQuery} classic template. */ +@Category({TemplateIntegrationTest.class, SkipRunnerV2Test.class}) +@RunWith(JUnit4.class) +public final class PubSubToBigQueryIT extends TemplateTestBase { + + private static final int MESSAGES_COUNT = 10; + private static final int BAD_MESSAGES_COUNT = 3; + + private static final Schema BIG_QUERY_DLQ_SCHEMA = getDlqSchema(); + + private PubsubResourceManager pubsubResourceManager; + private BigQueryResourceManager bigQueryResourceManager; + + @Before + public void setUp() throws IOException { + pubsubResourceManager = + PubsubResourceManager.builder(testName, PROJECT, credentialsProvider).build(); + bigQueryResourceManager = + BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); + + gcsClient.createArtifact( + "udf.js", + "function uppercaseName(value) {\n" + + " const data = JSON.parse(value);\n" + + " data.name = data.name.toUpperCase();\n" + + " return JSON.stringify(data);\n" + + "}"); + } + + @After + public void cleanUp() { + ResourceManagerUtils.cleanResources(pubsubResourceManager, bigQueryResourceManager); + } + + @Test + @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") + public void testTopicToBigQueryClassic() throws IOException { + testTopicToBigQueryClassicBase(Function.identity()); + } + + @Test + @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") + public void testTopicToBigQueryClassicStreamingEngine() throws IOException { + testTopicToBigQueryClassicBase(this::enableStreamingEngine); + } + + private void testTopicToBigQueryClassicBase( + Function paramsAdder) throws IOException { + // Arrange + List bqSchemaFields = + Arrays.asList( + Field.of("id", StandardSQLTypeName.INT64), + Field.of("job", StandardSQLTypeName.STRING), + Field.of("name", StandardSQLTypeName.STRING)); + Schema bqSchema = Schema.of(bqSchemaFields); + + TopicName topic = pubsubResourceManager.createTopic("input"); + bigQueryResourceManager.createDataset(REGION); + TableId table = bigQueryResourceManager.createTable(testName, bqSchema); + + TableId dlqTable = + bigQueryResourceManager.createTable( + table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, + BIG_QUERY_DLQ_SCHEMA); + + // Act + LaunchInfo info = + launchTemplate( + paramsAdder.apply( + LaunchConfig.builder(testName, specPath) + .addParameter("inputTopic", topic.toString()) + .addParameter("outputTableSpec", toTableSpecLegacy(table)) + .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) + .addParameter("javascriptTextTransformFunctionName", "uppercaseName") + .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable)))); + assertThatPipeline(info).isRunning(); + + List> expectedMessages = new ArrayList<>(); + List goodData = new ArrayList<>(); + for (int i = 1; i <= MESSAGES_COUNT; i++) { + Map message = + new HashMap<>( + Map.of("id", i, "job", testName, "name", RandomStringUtils.randomAlphabetic(1, 20))); + ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); + goodData.add(messageData); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + message.put("name", message.get("name").toString().toUpperCase()); + expectedMessages.add(message); + } + + List badData = new ArrayList<>(); + for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { + ByteString messageData = ByteString.copyFromUtf8("bad id " + i); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + badData.add(messageData); + } + + // For tests that run against topics, sending repeatedly will make it work for + // cases in which the on-demand subscription is created after sending messages. + Supplier pubSubMessageSender = + () -> { + goodData.forEach( + goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); + badData.forEach( + badMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), badMessage)); + return true; + }; + + Result result = + pipelineOperator() + .waitForConditionAndCancel( + createConfig(info), + pubSubMessageSender, + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(MESSAGES_COUNT) + .build(), + BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) + .setMinRows(BAD_MESSAGES_COUNT) + .build()); + + // Assert + assertThatResult(result).meetsConditions(); + TableResult records = bigQueryResourceManager.readTable(table); + + // Make sure record can be read and UDF changed name to uppercase + assertThatBigQueryRecords(records).hasRecordsUnordered(expectedMessages); + + TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); + assertThat(dlqRecords.getValues().iterator().next().toString()) + .contains("Expected json literal but found"); + assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); + } + + @Test + @TemplateIntegrationTest( + value = PubSubToBigQuery.class, + template = "PubSub_Subscription_to_BigQuery") + public void testSubscriptionToBigQueryClassic() throws IOException { + // Arrange + List bqSchemaFields = + Arrays.asList( + Field.of("id", StandardSQLTypeName.INT64), + Field.of("job", StandardSQLTypeName.STRING), + Field.of("name", StandardSQLTypeName.STRING)); + Schema bqSchema = Schema.of(bqSchemaFields); + + TopicName topic = pubsubResourceManager.createTopic("input"); + SubscriptionName subscription = pubsubResourceManager.createSubscription(topic, "input-sub-1"); + bigQueryResourceManager.createDataset(REGION); + TableId table = bigQueryResourceManager.createTable(testName, bqSchema); + + TableId dlqTable = + bigQueryResourceManager.createTable( + table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, + BIG_QUERY_DLQ_SCHEMA); + + // Act + LaunchInfo info = + launchTemplate( + LaunchConfig.builder(testName, specPath) + .addParameter("inputSubscription", subscription.toString()) + .addParameter("outputTableSpec", toTableSpecLegacy(table)) + .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) + .addParameter("javascriptTextTransformFunctionName", "uppercaseName") + .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable))); + assertThatPipeline(info).isRunning(); + + List> expectedMessages = new ArrayList<>(); + for (int i = 1; i <= MESSAGES_COUNT; i++) { + Map message = + new HashMap<>( + Map.of("id", i, "job", testName, "name", RandomStringUtils.randomAlphabetic(1, 20))); + ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + message.put("name", message.get("name").toString().toUpperCase()); + expectedMessages.add(message); + } + + for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { + ByteString messageData = ByteString.copyFromUtf8("bad id " + i); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + } + + Result result = + pipelineOperator() + .waitForConditionAndCancel( + createConfig(info), + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(MESSAGES_COUNT) + .build(), + BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) + .setMinRows(BAD_MESSAGES_COUNT) + .build()); + + // Assert + assertThatResult(result).meetsConditions(); + TableResult records = bigQueryResourceManager.readTable(table); + + // Make sure record can be read and UDF changed name to uppercase + assertThatBigQueryRecords(records).hasRecordsUnordered(expectedMessages); + + TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); + assertThat(dlqRecords.getValues().iterator().next().toString()) + .contains("Expected json literal but found"); + assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); + } + + @Test + @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") + public void testTopicToBigQueryClassicWithReload() throws IOException, InterruptedException { + // Arrange + List bqSchemaFields = + Arrays.asList( + Field.of("id", StandardSQLTypeName.INT64), + Field.of("job", StandardSQLTypeName.STRING), + Field.of("name", StandardSQLTypeName.STRING)); + Schema bqSchema = Schema.of(bqSchemaFields); + + TopicName topic = pubsubResourceManager.createTopic("input"); + bigQueryResourceManager.createDataset(REGION); + TableId table = bigQueryResourceManager.createTable(testName, bqSchema); + + TableId dlqTable = + bigQueryResourceManager.createTable( + table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, + BIG_QUERY_DLQ_SCHEMA); + + // Act + LaunchInfo info = + launchTemplate( + LaunchConfig.builder(testName, specPath) + .addParameter("inputTopic", topic.toString()) + .addParameter("outputTableSpec", toTableSpecLegacy(table)) + .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) + .addParameter("javascriptTextTransformFunctionName", "uppercaseName") + .addParameter("javascriptTextTransformReloadIntervalMinutes", "1") + .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable))); + assertThatPipeline(info).isRunning(); + + List> expectedUpperMessages = new ArrayList<>(); + List goodUpperData = new ArrayList<>(); + for (int i = 1; i <= MESSAGES_COUNT; i++) { + Map message = + new HashMap<>( + Map.of( + "id", + i, + "job", + testName, + "name", + "UPPER: " + RandomStringUtils.randomAlphabetic(1, 20))); + ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); + goodUpperData.add(messageData); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + message.put("name", message.get("name").toString().toUpperCase()); + expectedUpperMessages.add(message); + } + + List badData = new ArrayList<>(); + for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { + ByteString messageData = ByteString.copyFromUtf8("bad id " + i); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + badData.add(messageData); + } + + // For tests that run against topics, sending repeatedly will make it work for + // cases in which the on-demand subscription is created after sending messages. + Supplier pubSubMessageSender = + () -> { + goodUpperData.forEach( + goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); + badData.forEach( + badMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), badMessage)); + return true; + }; + + BigQueryRowsCheck bigQueryRowsCheck = + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(MESSAGES_COUNT) + .build(); + BigQueryRowsCheck bigQueryDlqRowsCheck = + BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) + .setMinRows(BAD_MESSAGES_COUNT) + .build(); + Result result = + pipelineOperator() + .waitForCondition( + createConfig(info), pubSubMessageSender, bigQueryRowsCheck, bigQueryDlqRowsCheck); + // Assert + assertThatResult(result).meetsConditions(); + TableResult records = bigQueryResourceManager.readTable(table); + + // Make sure record can be read and UDF changed name to uppercase + assertThatBigQueryRecords(records).hasRecordsUnordered(expectedUpperMessages); + + TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); + assertThat(dlqRecords.getValues().iterator().next().toString()) + .contains("Expected json literal but found"); + assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); + + // modify UDF to test reloading + gcsClient.createArtifact( + "udf.js", + "function uppercaseName(value) {\n" + + " const data = JSON.parse(value);\n" + + " data.name = data.name.toLowerCase();\n" + + " return JSON.stringify(data);\n" + + "}"); + + // wait to ensure the reload will take effect. + TimeUnit.MINUTES.sleep(2); + List> expectedLowerMessages = new ArrayList<>(); + List goodLowerData = new ArrayList<>(); + for (int i = MESSAGES_COUNT + 1; i <= MESSAGES_COUNT * 2; i++) { + Map message = + new HashMap<>( + Map.of( + "id", + i, + "job", + testName, + "name", + "lower: " + RandomStringUtils.randomAlphabetic(1, 20))); + ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); + goodLowerData.add(messageData); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + message.put("name", message.get("name").toString().toLowerCase()); + expectedLowerMessages.add(message); + } + + // For tests that run against topics, sending repeatedly will make it work for + // cases in which the on-demand subscription is created after sending messages. + Supplier pubSubReloadedMessageSender = + () -> { + goodLowerData.forEach( + goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); + return true; + }; + + Result reloadedResult = + pipelineOperator() + .waitForConditionAndCancel( + createConfig(info), + pubSubReloadedMessageSender, + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(bigQueryRowsCheck.getRowCount().intValue() + (MESSAGES_COUNT * 2)) + .build()); + // Assert + assertThatResult(reloadedResult).meetsConditions(); + TableResult reloadedRecords = bigQueryResourceManager.readTable(table); + + // Make sure record can be read and UDF changed name to uppercase and lowercase. + assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedUpperMessages); + assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedLowerMessages); + } + + @Test + @TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") + public void testTopicToBigQueryClassicWithReloadIntervalZero() + throws IOException, InterruptedException { + // Arrange + List bqSchemaFields = + Arrays.asList( + Field.of("id", StandardSQLTypeName.INT64), + Field.of("job", StandardSQLTypeName.STRING), + Field.of("name", StandardSQLTypeName.STRING)); + Schema bqSchema = Schema.of(bqSchemaFields); + + TopicName topic = pubsubResourceManager.createTopic("input"); + bigQueryResourceManager.createDataset(REGION); + TableId table = bigQueryResourceManager.createTable(testName, bqSchema); + + TableId dlqTable = + bigQueryResourceManager.createTable( + table.getTable() + PubSubToBigQuery.DEFAULT_DEADLETTER_TABLE_SUFFIX, + BIG_QUERY_DLQ_SCHEMA); + + // Act + LaunchInfo info = + launchTemplate( + LaunchConfig.builder(testName, specPath) + .addParameter("inputTopic", topic.toString()) + .addParameter("outputTableSpec", toTableSpecLegacy(table)) + .addParameter("javascriptTextTransformGcsPath", getGcsPath("udf.js")) + .addParameter("javascriptTextTransformFunctionName", "uppercaseName") + .addParameter("javascriptTextTransformReloadIntervalMinutes", "0") + .addParameter("outputDeadletterTable", toTableSpecLegacy(dlqTable))); + assertThatPipeline(info).isRunning(); + + List> expectedUpperMessages = new ArrayList<>(); + List goodUpperData = new ArrayList<>(); + for (int i = 1; i <= MESSAGES_COUNT; i++) { + Map message = + new HashMap<>( + Map.of( + "id", + i, + "job", + testName, + "name", + "UPPER: " + RandomStringUtils.randomAlphabetic(1, 20))); + ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); + goodUpperData.add(messageData); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + message.put("name", message.get("name").toString().toUpperCase()); + expectedUpperMessages.add(message); + } + + List badData = new ArrayList<>(); + for (int i = 1; i <= BAD_MESSAGES_COUNT; i++) { + ByteString messageData = ByteString.copyFromUtf8("bad id " + i); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + badData.add(messageData); + } + + // For tests that run against topics, sending repeatedly will make it work for + // cases in which the on-demand subscription is created after sending messages. + Supplier pubSubMessageSender = + () -> { + goodUpperData.forEach( + goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); + badData.forEach( + badMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), badMessage)); + return true; + }; + + BigQueryRowsCheck bigQueryRowsCheck = + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(MESSAGES_COUNT) + .build(); + BigQueryRowsCheck bigQueryDlqRowsCheck = + BigQueryRowsCheck.builder(bigQueryResourceManager, dlqTable) + .setMinRows(BAD_MESSAGES_COUNT) + .build(); + Result result = + pipelineOperator() + .waitForCondition( + createConfig(info), pubSubMessageSender, bigQueryRowsCheck, bigQueryDlqRowsCheck); + // Assert + assertThatResult(result).meetsConditions(); + TableResult records = bigQueryResourceManager.readTable(table); + + // Make sure record can be read and UDF changed name to uppercase + assertThatBigQueryRecords(records).hasRecordsUnordered(expectedUpperMessages); + + TableResult dlqRecords = bigQueryResourceManager.readTable(dlqTable); + assertThat(dlqRecords.getValues().iterator().next().toString()) + .contains("Expected json literal but found"); + assertThat(dlqRecords.getTotalRows()).isAtLeast(BAD_MESSAGES_COUNT); + + // modify UDF to test reloading + gcsClient.createArtifact( + "udf.js", + "function uppercaseName(value) {\n" + + " const data = JSON.parse(value);\n" + + " data.name = data.name.toLowerCase();\n" + + " return JSON.stringify(data);\n" + + "}"); + + // wait to ensure the reload will take effect. + TimeUnit.MINUTES.sleep(2); + List> expectedLowerMessages = new ArrayList<>(); + List goodLowerData = new ArrayList<>(); + for (int i = MESSAGES_COUNT + 1; i <= MESSAGES_COUNT * 2; i++) { + Map message = + new HashMap<>( + Map.of( + "id", + i, + "job", + testName, + "name", + "LOWER: " + RandomStringUtils.randomAlphabetic(1, 20))); + ByteString messageData = ByteString.copyFromUtf8(new JSONObject(message).toString()); + goodLowerData.add(messageData); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + message.put("name", message.get("name").toString().toUpperCase()); + expectedLowerMessages.add(message); + } + + // For tests that run against topics, sending repeatedly will make it work for + // cases in which the on-demand subscription is created after sending messages. + Supplier pubSubReloadedMessageSender = + () -> { + goodLowerData.forEach( + goodMessage -> pubsubResourceManager.publish(topic, ImmutableMap.of(), goodMessage)); + return true; + }; + + Result reloadedResult = + pipelineOperator() + .waitForConditionAndCancel( + createConfig(info), + pubSubReloadedMessageSender, + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(bigQueryRowsCheck.getRowCount().intValue() + (MESSAGES_COUNT * 2)) + .build()); + // Assert + assertThatResult(reloadedResult).meetsConditions(); + TableResult reloadedRecords = bigQueryResourceManager.readTable(table); + + // Make sure record can be read and UDF changed name to uppercase and lowercase. + assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedUpperMessages); + assertThatBigQueryRecords(reloadedRecords).hasRecordsUnordered(expectedLowerMessages); + } + + private static Schema getDlqSchema() { + return Schema.of( + Arrays.asList( + Field.newBuilder("timestamp", StandardSQLTypeName.TIMESTAMP) + .setMode(Field.Mode.REQUIRED) + .build(), + Field.newBuilder("payloadString", StandardSQLTypeName.STRING) + .setMode(Field.Mode.REQUIRED) + .build(), + Field.newBuilder("payloadBytes", StandardSQLTypeName.BYTES) + .setMode(Field.Mode.REQUIRED) + .build(), + Field.newBuilder( + "attributes", + LegacySQLTypeName.RECORD, + Field.newBuilder("key", StandardSQLTypeName.STRING) + .setMode(Field.Mode.NULLABLE) + .build(), + Field.newBuilder("value", StandardSQLTypeName.STRING) + .setMode(Field.Mode.NULLABLE) + .build()) + .setMode(Field.Mode.REPEATED) + .build(), + Field.newBuilder("errorMessage", StandardSQLTypeName.STRING) + .setMode(Field.Mode.NULLABLE) + .build(), + Field.newBuilder("stacktrace", StandardSQLTypeName.STRING) + .setMode(Field.Mode.NULLABLE) + .build())); + } +} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java b/v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java new file mode 100644 index 0000000000..3169641134 --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/templates/PubSubTopicToBigQueryIT.java @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.templates; + +import static org.apache.beam.it.gcp.bigquery.matchers.BigQueryAsserts.assertThatBigQueryRecords; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.teleport.metadata.SkipRunnerV2Test; +import com.google.cloud.teleport.metadata.TemplateIntegrationTest; +import com.google.common.collect.ImmutableMap; +import com.google.protobuf.ByteString; +import com.google.pubsub.v1.TopicName; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; +import org.apache.beam.it.common.PipelineOperator.Result; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.gcp.TemplateTestBase; +import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; +import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; +import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; +import org.json.JSONObject; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Integration test for {@link PubSubToBigQuery} PubSub Topic to Bigquery. */ +@Category({TemplateIntegrationTest.class, SkipRunnerV2Test.class}) +@TemplateIntegrationTest(value = PubSubToBigQuery.class, template = "PubSub_to_BigQuery") +@RunWith(JUnit4.class) +public final class PubSubTopicToBigQueryIT extends TemplateTestBase { + private PubsubResourceManager pubsubResourceManager; + private BigQueryResourceManager bigQueryResourceManager; + + @Before + public void setUp() throws IOException { + pubsubResourceManager = + PubsubResourceManager.builder(testName, PROJECT, credentialsProvider).build(); + bigQueryResourceManager = + BigQueryResourceManager.builder(testName, PROJECT, credentials).build(); + } + + @After + public void cleanUp() { + ResourceManagerUtils.cleanResources(pubsubResourceManager, bigQueryResourceManager); + } + + @Test + public void testTopicToBigQuery() throws IOException { + // Arrange + Map message = Map.of("job", testId, "msg", "message"); + List bqSchemaFields = + Arrays.asList( + Field.of("job", StandardSQLTypeName.STRING), + Field.of("msg", StandardSQLTypeName.STRING)); + Schema bqSchema = Schema.of(bqSchemaFields); + + TopicName topic = pubsubResourceManager.createTopic("input"); + bigQueryResourceManager.createDataset(REGION); + TableId table = bigQueryResourceManager.createTable(testName, bqSchema); + + LaunchConfig.Builder options = + LaunchConfig.builder(testName, specPath) + .addParameter("inputTopic", topic.toString()) + .addParameter("outputTableSpec", toTableSpecLegacy(table)); + + // Act + LaunchInfo info = launchTemplate(options); + assertThatPipeline(info).isRunning(); + + Result result = + pipelineOperator() + .waitForConditionAndFinish( + createConfig(info), + () -> { + ByteString messageData = + ByteString.copyFromUtf8(new JSONObject(message).toString()); + pubsubResourceManager.publish(topic, ImmutableMap.of(), messageData); + return BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(1) + .build() + .get(); + }); + + // Assert + assertThatResult(result).meetsConditions(); + assertThatBigQueryRecords(bigQueryResourceManager.readTable(table)).allMatch(message); + } +} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java b/v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java new file mode 100644 index 0000000000..4e765d9bf0 --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/templates/PubsubToBigQueryLT.java @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.templates; + +import static org.apache.beam.it.common.TestProperties.getProperty; +import static org.apache.beam.it.gcp.bigquery.BigQueryResourceManagerUtils.toTableSpec; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.FieldValueList; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableResult; +import com.google.cloud.teleport.metadata.TemplateLoadTest; +import com.google.common.base.MoreObjects; +import com.google.pubsub.v1.SubscriptionName; +import com.google.pubsub.v1.TopicName; +import java.io.IOException; +import java.text.ParseException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Map; +import java.util.function.Function; +import org.apache.beam.it.common.PipelineLauncher.LaunchConfig; +import org.apache.beam.it.common.PipelineLauncher.LaunchInfo; +import org.apache.beam.it.common.PipelineOperator.Result; +import org.apache.beam.it.common.TestProperties; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.gcp.TemplateLoadTestBase; +import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; +import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; +import org.apache.beam.it.gcp.datagenerator.DataGenerator; +import org.apache.beam.it.gcp.pubsub.PubsubResourceManager; +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Performance tests for {@link PubSubToBigQuery PubSub to BigQuery} template. */ +@Category(TemplateLoadTest.class) +@TemplateLoadTest(PubSubToBigQuery.class) +@RunWith(JUnit4.class) +public class PubsubToBigQueryLT extends TemplateLoadTestBase { + private static final String SPEC_PATH = + MoreObjects.firstNonNull( + TestProperties.specPath(), + "gs://dataflow-templates/latest/PubSub_Subscription_to_BigQuery"); + // 35,000,000 messages of the given schema make up approximately 10GB + private static final int NUM_MESSAGES = 35_000_000; + // schema should match schema supplied to generate fake records. + private static final Schema SCHEMA = + Schema.of( + Field.of("eventId", StandardSQLTypeName.STRING), + Field.of("eventTimestamp", StandardSQLTypeName.INT64), + Field.of("ipv4", StandardSQLTypeName.STRING), + Field.of("ipv6", StandardSQLTypeName.STRING), + Field.of("country", StandardSQLTypeName.STRING), + Field.of("username", StandardSQLTypeName.STRING), + Field.of("quest", StandardSQLTypeName.STRING), + Field.of("score", StandardSQLTypeName.INT64), + Field.of("completed", StandardSQLTypeName.BOOL), + // add a insert timestamp column to query latency values + Field.newBuilder("_metadata_insert_timestamp", StandardSQLTypeName.TIMESTAMP) + .setDefaultValueExpression("CURRENT_TIMESTAMP()") + .build()); + private static final String INPUT_PCOLLECTION = + "ReadPubSubSubscription/PubsubUnboundedSource.out0"; + private static final String OUTPUT_PCOLLECTION = + "WriteSuccessfulRecords/StreamingInserts/StreamingWriteTables/StripShardId/Map.out0"; + private static PubsubResourceManager pubsubResourceManager; + private static BigQueryResourceManager bigQueryResourceManager; + + @Before + public void setup() throws IOException { + pubsubResourceManager = + PubsubResourceManager.builder(testName, project, CREDENTIALS_PROVIDER).build(); + bigQueryResourceManager = + BigQueryResourceManager.builder(testName, project, CREDENTIALS).build(); + } + + @After + public void tearDown() { + ResourceManagerUtils.cleanResources(pubsubResourceManager, bigQueryResourceManager); + } + + @Test + public void testBacklog10gb() throws IOException, ParseException, InterruptedException { + testBacklog(this::disableRunnerV2); + } + + @Test + public void testSteadyState1hr() throws ParseException, IOException, InterruptedException { + testSteadyState1hr(this::disableRunnerV2); + } + + @Test + public void testSteadyState1hrUsingStreamingEngine() + throws ParseException, IOException, InterruptedException { + testSteadyState1hr(this::enableStreamingEngine); + } + + @Ignore("RunnerV2 is disabled on streaming templates.") + @Test + public void testSteadyState1hrUsingRunnerV2() + throws ParseException, IOException, InterruptedException { + testSteadyState1hr(this::enableRunnerV2); + } + + @Test + public void testSteadyState1hrUsingAtLeastOnceMode() + throws ParseException, IOException, InterruptedException { + ArrayList experiments = new ArrayList<>(); + experiments.add("streaming_mode_at_least_once"); + testSteadyState1hr( + b -> + b.addEnvironment("additionalExperiments", experiments) + .addEnvironment("enableStreamingEngine", true)); + } + + public void testBacklog(Function paramsAdder) + throws IOException, ParseException, InterruptedException { + // Arrange + TopicName backlogTopic = pubsubResourceManager.createTopic("backlog-input"); + SubscriptionName backlogSubscription = + pubsubResourceManager.createSubscription(backlogTopic, "backlog-subscription"); + TableId table = bigQueryResourceManager.createTable(testName, SCHEMA); + // Generate fake data to table + DataGenerator dataGenerator = + DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") + .setQPS("1000000") + .setMessagesLimit(String.valueOf(NUM_MESSAGES)) + .setTopic(backlogTopic.toString()) + .setNumWorkers("50") + .setMaxNumWorkers("100") + .build(); + dataGenerator.execute(Duration.ofMinutes(30)); + LaunchConfig options = + paramsAdder + .apply( + LaunchConfig.builder(testName, SPEC_PATH) + .addEnvironment("maxWorkers", 5) + .addEnvironment("numWorkers", 4) + .addParameter("inputSubscription", backlogSubscription.toString()) + .addParameter("outputTableSpec", toTableSpec(project, table))) + .build(); + + // Act + LaunchInfo info = pipelineLauncher.launch(project, region, options); + assertThatPipeline(info).isRunning(); + Result result = + pipelineOperator.waitForConditionAndCancel( + createConfig(info, Duration.ofMinutes(40)), + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(NUM_MESSAGES) + .build()); + + // Assert + assertThatResult(result).meetsConditions(); + + // export results + exportMetricsToBigQuery(info, getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION)); + } + + public void testSteadyState1hr(Function paramsAdder) + throws ParseException, IOException, InterruptedException { + // Arrange + String qps = getProperty("qps", "100000", TestProperties.Type.PROPERTY); + TopicName inputTopic = pubsubResourceManager.createTopic("steady-state-input"); + SubscriptionName inputSubscription = + pubsubResourceManager.createSubscription(inputTopic, "steady-state-subscription"); + TableId table = + bigQueryResourceManager.createTable( + testName, SCHEMA, System.currentTimeMillis() + 7200000); // expire in 2 hrs + DataGenerator dataGenerator = + DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") + .setQPS(qps) + .setTopic(inputTopic.toString()) + .setNumWorkers("10") + .setMaxNumWorkers("100") + .build(); + + LaunchConfig options = + paramsAdder + .apply( + LaunchConfig.builder(testName, SPEC_PATH) + .addEnvironment("maxWorkers", 10) + .addEnvironment("numWorkers", 7) + .addEnvironment("additionalUserLabels", Collections.singletonMap("qps", qps)) + .addParameter("inputSubscription", inputSubscription.toString()) + .addParameter("outputTableSpec", toTableSpec(project, table))) + .build(); + + // Act + LaunchInfo info = pipelineLauncher.launch(project, region, options); + assertThatPipeline(info).isRunning(); + // ElementCount metric in dataflow is approximate, allow for 1% difference + Integer expectedMessages = (int) (dataGenerator.execute(Duration.ofMinutes(60)) * 0.99); + Result result = + pipelineOperator.waitForConditionAndCancel( + createConfig(info, Duration.ofMinutes(20)), + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(expectedMessages) + .build()); + // Assert + assertThatResult(result).meetsConditions(); + + Map metrics = getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION); + // Query end to end latency metrics from BigQuery + TableResult latencyResult = + bigQueryResourceManager.runQuery( + String.format( + "WITH difference AS (SELECT\n" + + " TIMESTAMP_DIFF(_metadata_insert_timestamp,\n" + + " TIMESTAMP_MILLIS(eventTimestamp), SECOND) AS latency,\n" + + " FROM %s.%s)\n" + + " SELECT\n" + + " PERCENTILE_CONT(difference.latency, 0.5) OVER () AS median,\n" + + " PERCENTILE_CONT(difference.latency, 0.9) OVER () as percentile_90,\n" + + " PERCENTILE_CONT(difference.latency, 0.95) OVER () as percentile_95,\n" + + " PERCENTILE_CONT(difference.latency, 0.99) OVER () as percentile_99\n" + + " FROM difference LIMIT 1", + bigQueryResourceManager.getDatasetId(), testName)); + + FieldValueList latencyValues = latencyResult.getValues().iterator().next(); + metrics.put("median_latency", latencyValues.get(0).getDoubleValue()); + metrics.put("percentile_90_latency", latencyValues.get(1).getDoubleValue()); + metrics.put("percentile_95_latency", latencyValues.get(2).getDoubleValue()); + metrics.put("percentile_99_latency", latencyValues.get(3).getDoubleValue()); + + // export results + exportMetricsToBigQuery(info, metrics); + } +} diff --git a/v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java b/v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java new file mode 100644 index 0000000000..56f5c06c38 --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/templates/TextToBigQueryStreamLT.java @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.templates; + +import static org.apache.beam.it.gcp.artifacts.utils.ArtifactUtils.getFullGcsPath; +import static org.apache.beam.it.gcp.bigquery.BigQueryResourceManagerUtils.toTableSpec; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatPipeline; +import static org.apache.beam.it.truthmatchers.PipelineAsserts.assertThatResult; + +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.teleport.metadata.TemplateLoadTest; +import com.google.common.base.MoreObjects; +import com.google.common.io.Resources; +import java.io.IOException; +import java.text.ParseException; +import java.time.Duration; +import java.util.function.Function; +import org.apache.beam.it.common.PipelineLauncher; +import org.apache.beam.it.common.PipelineOperator; +import org.apache.beam.it.common.TestProperties; +import org.apache.beam.it.common.utils.ResourceManagerUtils; +import org.apache.beam.it.gcp.TemplateLoadTestBase; +import org.apache.beam.it.gcp.artifacts.ArtifactClient; +import org.apache.beam.it.gcp.bigquery.BigQueryResourceManager; +import org.apache.beam.it.gcp.bigquery.conditions.BigQueryRowsCheck; +import org.apache.beam.it.gcp.datagenerator.DataGenerator; +import org.apache.beam.it.gcp.storage.GcsResourceManager; +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Performance tests for {@link TextToBigQueryStreamLT GCS Text to BigQuery} template. */ +@Category(TemplateLoadTest.class) +@TemplateLoadTest(TextToBigQueryStreaming.class) +@RunWith(JUnit4.class) +public class TextToBigQueryStreamLT extends TemplateLoadTestBase { + private static final String SPEC_PATH = + MoreObjects.firstNonNull( + TestProperties.specPath(), "gs://dataflow-templates/latest/Stream_GCS_Text_to_BigQuery"); + private static final String ARTIFACT_BUCKET = TestProperties.artifactBucket(); + private static final String TEST_ROOT_DIR = + TextToBigQueryStreamLT.class.getSimpleName().toLowerCase(); + private static final String INPUT_PCOLLECTION = + "ReadFromSource/Via ReadFiles/Read all via FileBasedSource/Read ranges.out0"; + private static final String OUTPUT_PCOLLECTION = + "InsertIntoBigQuery/StreamingInserts/StreamingWriteTables/StripShardId/Map.out0"; + // schema should match schema supplied to generate fake records. + private static final Schema SCHEMA = + Schema.of( + Field.of("eventId", StandardSQLTypeName.STRING), + Field.of("eventTimestamp", StandardSQLTypeName.INT64), + Field.of("ipv4", StandardSQLTypeName.STRING), + Field.of("ipv6", StandardSQLTypeName.STRING), + Field.of("country", StandardSQLTypeName.STRING), + Field.of("username", StandardSQLTypeName.STRING), + Field.of("quest", StandardSQLTypeName.STRING), + Field.of("score", StandardSQLTypeName.INT64), + Field.of("completed", StandardSQLTypeName.BOOL)); + + // 35,000,000 messages of the given schema make up approximately 10GB + private static final int NUM_MESSAGES_FOR_10GB = 35000000; + private static final long TIMEOUT_FOR_10_GB_TEST_MINUTES = 30; + private static final long TIMEOUT_FOR_1_HOUR_TEST_MINUTES = 60; + + private static ArtifactClient gcsClient; + private static BigQueryResourceManager bigQueryResourceManager; + private String jsonPath; + private String udfPath; + + @Before + public void setup() throws IOException { + bigQueryResourceManager = + BigQueryResourceManager.builder(testName, project, CREDENTIALS).build(); + + gcsClient = GcsResourceManager.builder(ARTIFACT_BUCKET, TEST_ROOT_DIR, CREDENTIALS).build(); + // upload schema files and save path + jsonPath = + getFullGcsPath( + ARTIFACT_BUCKET, + gcsClient + .uploadArtifact( + "input/schema.json", + Resources.getResource("TextIOToBigQueryTest/schema.json").getPath()) + .name()); + + udfPath = + getFullGcsPath( + ARTIFACT_BUCKET, + gcsClient + .uploadArtifact( + "input/udf.js", Resources.getResource("TextIOToBigQueryTest/udf.js").getPath()) + .name()); + } + + @After + public void teardown() { + ResourceManagerUtils.cleanResources(bigQueryResourceManager, gcsClient); + } + + @Test + public void testBacklog10gb() throws IOException, ParseException, InterruptedException { + testBacklog(this::disableRunnerV2); + } + + @Test + public void testSteadyState1hr() throws IOException, ParseException, InterruptedException { + testSteadyState1hr(this::disableRunnerV2); + } + + @Test + public void testSteadyState1hrUsingStreamingEngine() + throws IOException, ParseException, InterruptedException { + testSteadyState1hr(this::enableStreamingEngine); + } + + @Ignore("RunnerV2 is disabled on streaming templates.") + @Test + public void testSteadyState1hrUsingRunnerV2() + throws IOException, ParseException, InterruptedException { + testSteadyState1hr(this::enableRunnerV2); + } + + private void testBacklog( + Function + paramsAdder) + throws IOException, ParseException, InterruptedException { + + DataGenerator dataGenerator = + DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") + .setQPS("1000000") + .setMessagesLimit(String.valueOf(NUM_MESSAGES_FOR_10GB)) + .setSinkType("GCS") + .setOutputDirectory(getTestMethodDirPath()) + .setNumShards("20") + .setNumWorkers("50") + .setMaxNumWorkers("100") + .build(); + // Executes the data generator + dataGenerator.execute(Duration.ofMinutes(TIMEOUT_FOR_10_GB_TEST_MINUTES)); + + /* + * This table will automatically expire 1 h after creation if not cleaned up manually or by + * calling the {@link BigQueryResourceManager#cleanupAll()} method. + */ + TableId table = bigQueryResourceManager.createTable(testName, SCHEMA); + + PipelineLauncher.LaunchConfig options = + paramsAdder + .apply( + PipelineLauncher.LaunchConfig.builder(testName, SPEC_PATH) + .addEnvironment("maxWorkers", 10) + .addEnvironment("numWorkers", 5) + .addParameter("outputTable", toTableSpec(project, table)) + .addParameter("inputFilePattern", getTestMethodDirPath() + "/*") + .addParameter("JSONPath", jsonPath) + .addParameter( + "bigQueryLoadingTemporaryDirectory", getTestMethodDirPath() + "/temp") + .addParameter("javascriptTextTransformGcsPath", udfPath) + .addParameter("javascriptTextTransformFunctionName", "identity")) + .build(); + + // Act + PipelineLauncher.LaunchInfo info = pipelineLauncher.launch(project, region, options); + assertThatPipeline(info).isRunning(); + PipelineOperator.Result result = + // The method waitForConditionAndCancel was used because the streaming pipeline template + // includes a call to Splittable DoFn. Invoking a splittable DoFn causes the job to remain + // in the Draining state indefinitely. + // @see + // Important information about draining a job + pipelineOperator.waitForConditionAndCancel( + createConfig(info, Duration.ofMinutes(TIMEOUT_FOR_10_GB_TEST_MINUTES)), + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(NUM_MESSAGES_FOR_10GB) + .build()); + + // Assert + assertThatResult(result).meetsConditions(); + + // export results + exportMetricsToBigQuery(info, getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION)); + } + + private void testSteadyState1hr( + Function + paramsAdder) + throws IOException, ParseException, InterruptedException { + + DataGenerator dataGenerator = + DataGenerator.builderWithSchemaTemplate(testName, "GAME_EVENT") + .setQPS("100000") + .setSinkType("GCS") + .setOutputDirectory(getTestMethodDirPath()) + .setNumShards("20") + .setNumWorkers("10") + .setMaxNumWorkers("15") + .build(); + + /* + * This table will automatically expire 2h after creation if not cleaned up manually or by + * calling the {@link BigQueryResourceManager#cleanupAll()} method. + */ + TableId table = + bigQueryResourceManager.createTable(testName, SCHEMA, System.currentTimeMillis() + 7200000); + + PipelineLauncher.LaunchConfig options = + paramsAdder + .apply( + PipelineLauncher.LaunchConfig.builder(testName, SPEC_PATH) + .addEnvironment("maxWorkers", 10) + .addEnvironment("numWorkers", 5) + .addParameter("outputTable", toTableSpec(project, table)) + .addParameter("inputFilePattern", getTestMethodDirPath() + "/*") + .addParameter("JSONPath", jsonPath) + .addParameter( + "bigQueryLoadingTemporaryDirectory", getTestMethodDirPath() + "/temp") + .addParameter("javascriptTextTransformGcsPath", udfPath) + .addParameter("javascriptTextTransformFunctionName", "identity")) + .build(); + + // Act + PipelineLauncher.LaunchInfo info = pipelineLauncher.launch(project, region, options); + assertThatPipeline(info).isRunning(); + + // Executes the data generator and return approximate number of messages + // ElementCount metric in dataflow is approximate, allow for 1% difference + int expectedMessages = + (int) (dataGenerator.execute(Duration.ofMinutes(TIMEOUT_FOR_1_HOUR_TEST_MINUTES)) * 0.99); + + PipelineOperator.Result result = + // The method waitForConditionAndCancel was used because the streaming pipeline template + // includes a call to Splittable DoFn. Invoking a splittable DoFn causes the job to remain + // in the Draining state indefinitely. + // @see + // Important information about draining a job + pipelineOperator.waitForConditionAndCancel( + createConfig(info, Duration.ofMinutes(10)), + BigQueryRowsCheck.builder(bigQueryResourceManager, table) + .setMinRows(expectedMessages) + .build()); + + // Assert + assertThatResult(result).meetsConditions(); + + // export results + exportMetricsToBigQuery(info, getMetrics(info, INPUT_PCOLLECTION, OUTPUT_PCOLLECTION)); + } + + private String getTestMethodDirPath() { + return getFullGcsPath(ARTIFACT_BUCKET, TEST_ROOT_DIR, gcsClient.runId(), testName); + } +} From 34a37845e9ae6b60b1409b10c9754c229d1b61a8 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 05/19] Add support for REMOTE UDFs. --- .jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json | 1 - 1 file changed, 1 deletion(-) delete mode 120000 .jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json diff --git a/.jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json b/.jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json deleted file mode 120000 index 64bc587b0b..0000000000 --- a/.jetskicli/54a20886-bbd3-484f-837e-888b57bed997.json +++ /dev/null @@ -1 +0,0 @@ -/usr/local/google/home/adrw/.gemini/config/projects/54a20886-bbd3-484f-837e-888b57bed997.json \ No newline at end of file From 91bcc4fe14a5bb38cef3ee90d7c976d274ea36ad Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 06/19] Add support for REMOTE UDFs. --- pom.xml | 2 +- .../teleport/spanner/AvroRecordConverter.java | 20 +++---- .../spanner/ddl/InformationSchemaScanner.java | 2 +- .../cloud/teleport/spanner/ddl/Udf.java | 17 ++++-- .../spanner/spannerio/StructUtils.java | 12 ++-- .../spanner/AvroSchemaToDdlConverterTest.java | 8 +-- .../cloud/teleport/spanner/CopyDbTest.java | 10 +--- .../spanner/DdlToAvroSchemaConverterTest.java | 55 ++++++++++--------- .../teleport/spanner/ExportPipelineIT.java | 30 ++++------ .../teleport/spanner/ImportPipelineIT.java | 6 +- .../cloud/teleport/spanner/ddl/DdlTest.java | 14 ++--- .../ddl/InformationSchemaScannerIT.java | 9 +-- .../spanner/ddl/RandomDdlGenerator.java | 21 ++++--- .../cloud/teleport/spanner/ddl/UdfTest.java | 3 +- 14 files changed, 103 insertions(+), 106 deletions(-) diff --git a/pom.xml b/pom.xml index 254447f187..f399d6a25d 100644 --- a/pom.xml +++ b/pom.xml @@ -434,7 +434,7 @@ - 1.17.0 + 1.27.0 diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java index e55f76979b..8599db65cd 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java @@ -401,8 +401,8 @@ static Optional> readFloat32Array( return Optional.empty(); } switch (avroType) { - // For type check at compile time, the type of x has to be specified (as cast) so that - // convertability to float can be verified. + // For type check at compile time, the type of x has to be specified (as cast) so that + // convertability to float can be verified. case FLOAT: return Optional.of((List) fieldValue); case STRING: @@ -413,9 +413,9 @@ static Optional> readFloat32Array( .map(x -> x == null ? null : Float.valueOf(x.toString())) .collect(Collectors.toList())); } - // Avoid decoding integers as not all 32 bit integers can be represented in float32. + // Avoid decoding integers as not all 32 bit integers can be represented in float32. case INT: - // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. + // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. case LONG: case DOUBLE: default: @@ -432,8 +432,8 @@ static Optional> readFloat64Array( return Optional.empty(); } switch (avroType) { - // For type check at compile time, the type of x has to be specified (as cast) so that - // convertability to double can be verified. + // For type check at compile time, the type of x has to be specified (as cast) so that + // convertability to double can be verified. case DOUBLE: return Optional.of((List) fieldValue); case FLOAT: @@ -482,8 +482,8 @@ static Optional> readInt64Array( return Optional.empty(); } switch (avroType) { - // For type check at compile time, the type of x has to be specified (as cast) so that - // convertability to long can be verified. + // For type check at compile time, the type of x has to be specified (as cast) so that + // convertability to long can be verified. case LONG: return Optional.of((List) fieldValue); case INT: @@ -637,9 +637,9 @@ private static Optional readFloat32( return Optional.ofNullable((Utf8) record.get(fieldName)) .map(Utf8::toString) .map(Float::valueOf); - // Avoid decoding integers as not all 32 bit integers can be represented in float32. + // Avoid decoding integers as not all 32 bit integers can be represented in float32. case INT: - // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. + // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. case LONG: case DOUBLE: default: diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java index 853714b61a..694618bda4 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java @@ -1070,7 +1070,7 @@ private void listUdfs(Ddl.Builder builder) { // The routine_body is SQL or EXTERNAL and the external_language is not available yet. // Assume that only available EXTERNAL language is REMOTE. if (dialect == Dialect.POSTGRESQL && "EXTERNAL".equalsIgnoreCase(language)) { - language = "REMOTE"; + language = "REMOTE"; } LOG.debug("Schema user-defined function {}", functionName); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index daa28fedc7..3d57ea8839 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -37,7 +37,8 @@ public abstract class Udf implements Serializable { // Remote function body is printed using $$ strings, which are // unlikely but possible to be present in the function definition. // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE - public static final Escaper PG_REMOTE_UDF_BODY_ESCAPER = Escapers.builder().addEscape('$', "\\044").build(); + public static final Escaper PG_REMOTE_UDF_BODY_ESCAPER = + Escapers.builder().addEscape('$', "\\044").build(); /** The access rights used by the UDF for underlying data: invoker-rights or definer-rights. */ public enum SqlSecurity { @@ -128,7 +129,7 @@ public void prettyPrint(Appendable appendable) throws IOException { break; case POSTGRESQL: throw new IllegalArgumentException( - "Options are not supported in PostgreSQL dialect for non-remote UDFs."); + "Options are not supported in PostgreSQL dialect for UDFs."); default: throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); } @@ -143,8 +144,11 @@ public void prettyPrint(Appendable appendable) throws IOException { if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { appendable.append(" RETURN ").append(definition()); } else { - // Other langugges use AS definition instead of sql body. - appendable.append(" AS $$").append(PG_REMOTE_UDF_BODY_ESCAPER.escape(definition())).append("$$"); + // Other langugges use AS definition instead of sql body. + appendable + .append(" AS $$") + .append(PG_REMOTE_UDF_BODY_ESCAPER.escape(definition())) + .append("$$"); } break; default: @@ -195,7 +199,10 @@ public Builder toBuilder() { } public static Builder builder(Dialect dialect) { - return new AutoValue_Udf.Builder().dialect(dialect).parameters(ImmutableList.of()).options(ImmutableList.of()); + return new AutoValue_Udf.Builder() + .dialect(dialect) + .parameters(ImmutableList.of()) + .options(ImmutableList.of()); } public static Builder builder() { diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java b/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java index 69f90677d1..3857957e05 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java @@ -262,7 +262,7 @@ private static Type simpleBeamTypeToSpannerType(Schema.FieldType beamType) { return Type.string(); case BOOLEAN: return Type.bool(); - // TODO: implement logical type date and timestamp + // TODO: implement logical type date and timestamp case DATETIME: return Type.timestamp(); default: @@ -335,7 +335,7 @@ private static void addIterableToStructBuilder( case STRING: structBuilder.set(column).toStringArray((Iterable) ((Object) iterable)); break; - // TODO: implement logical date and datetime + // TODO: implement logical date and datetime case DATETIME: if (iterable == null) { structBuilder.set(column).toTimestampArray(null); @@ -367,10 +367,10 @@ private static void addIterableToStructBuilder( return struct.getBoolean(column); case BYTES: return struct.getBytes(column).toByteArray(); - // TODO: implement logical datetime + // TODO: implement logical datetime case TIMESTAMP: return Instant.ofEpochSecond(struct.getTimestamp(column).getSeconds()).toDateTime(); - // TODO: implement logical date + // TODO: implement logical date case DATE: return DateTime.parse(struct.getDate(column).toString()); case INT64: @@ -411,12 +411,12 @@ private static void addIterableToStructBuilder( return struct.getBooleanList(column); case BYTES: return struct.getBytesList(column); - // TODO: implement logical datetime + // TODO: implement logical datetime case TIMESTAMP: return struct.getTimestampList(column).stream() .map(timestamp -> Instant.ofEpochSecond(timestamp.getSeconds()).toDateTime()) .collect(toList()); - // TODO: implement logical date + // TODO: implement logical date case DATE: return struct.getDateList(column).stream() .map(date -> DateTime.parse(date.toString())) diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java index f3761c0e68..15c17c3142 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java @@ -21,17 +21,15 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; +import com.google.cloud.spanner.Dialect; +import com.google.cloud.teleport.spanner.common.Type; +import com.google.cloud.teleport.spanner.ddl.Ddl; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; - import org.apache.avro.Schema; import org.junit.Test; -import com.google.cloud.spanner.Dialect; -import com.google.cloud.teleport.spanner.common.Type; -import com.google.cloud.teleport.spanner.ddl.Ddl; - /** Tests {@link AvroSchemaToDdlConverter}. */ public class AvroSchemaToDdlConverterTest { diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java index 5484081ef2..1b8f9ffa0c 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java @@ -1106,8 +1106,7 @@ public void udfs() throws Exception { .name("s1.Foo3") .language("REMOTE") .type("INT64") - .addParameter( - UdfParameter.parse("arg0 INT64", "s1.Foo3", Dialect.GOOGLE_STANDARD_SQL)) + .addParameter(UdfParameter.parse("arg0 INT64", "s1.Foo3", Dialect.GOOGLE_STANDARD_SQL)) .options( ImmutableList.of( "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) @@ -1117,7 +1116,6 @@ public void udfs() throws Exception { runTest(); } - @Test public void pgUdfs() throws Exception { Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); @@ -1145,16 +1143,14 @@ public void pgUdfs() throws Exception { .type("TEXT") .addParameter(UdfParameter.parse("arg0 TEXT", "s1.Foo2", Dialect.POSTGRESQL)) .addParameter( - UdfParameter.parse( - "arg1 TEXT DEFAULT 'bar'", "s1.Foo2", Dialect.POSTGRESQL)) + UdfParameter.parse("arg1 TEXT DEFAULT 'bar'", "s1.Foo2", Dialect.POSTGRESQL)) .endUdf() .createUdf("s1.Foo3") .dialect(Dialect.POSTGRESQL) .name("s1.Foo2") .language("REMOTE") .type("BIGINT") - .addParameter( - UdfParameter.parse("arg0 BIGINT", "s1.Foo3", Dialect.POSTGRESQL)) + .addParameter(UdfParameter.parse("arg0 BIGINT", "s1.Foo3", Dialect.POSTGRESQL)) .options( ImmutableList.of( "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java index 32283dddb4..81e39e5f3a 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverterTest.java @@ -69,16 +69,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; - -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.SchemaBuilder; -import org.junit.Test; - import com.google.cloud.spanner.Dialect; import com.google.cloud.teleport.spanner.common.NumericUtils; import com.google.cloud.teleport.spanner.common.Type; @@ -94,7 +84,14 @@ import com.google.cloud.teleport.spanner.ddl.UdfParameter; import com.google.cloud.teleport.spanner.ddl.View; import com.google.common.collect.ImmutableList; - +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.junit.Test; /** Test for {@link DdlToAvroSchemaConverter}. */ public class DdlToAvroSchemaConverterTest { @@ -744,15 +741,19 @@ public void udfRemote() { new DdlToAvroSchemaConverter("spannertest", "booleans", false); Ddl ddl = Ddl.builder() - .createUdf("UdfSchema.Foo") - .name("UdfSchema.Foo") + .createUdf("UdfSchema.Foo") + .name("UdfSchema.Foo") .type("STRING") .language("REMOTE") - .addParameter(UdfParameter.parse("arg0 STRING", "UdfSchema.Foo", Dialect.GOOGLE_STANDARD_SQL)) + .addParameter( + UdfParameter.parse("arg0 STRING", "UdfSchema.Foo", Dialect.GOOGLE_STANDARD_SQL)) .addParameter( UdfParameter.parse( - "arg1 STRING DEFAULT \"bar\"", "UdfSchema.Foo", Dialect.GOOGLE_STANDARD_SQL)) - .options(ImmutableList.of("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", "max_batching_rows=50")) + "arg1 STRING DEFAULT \"bar\"", "UdfSchema.Foo", Dialect.GOOGLE_STANDARD_SQL)) + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", + "max_batching_rows=50")) .endUdf() .build(); @@ -779,22 +780,23 @@ public void udfRemote() { avroUdf.getProp(SPANNER_OPTION + 0), equalTo("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); assertThat(avroUdf.getProp(SPANNER_OPTION + 1), equalTo("max_batching_rows=50")); -} + } -@Test -public void pgUdfRemote() { - DdlToAvroSchemaConverter converter = new DdlToAvroSchemaConverter("spannertest", "booleans", false); - Ddl ddl = Ddl.builder(Dialect.POSTGRESQL) + @Test + public void pgUdfRemote() { + DdlToAvroSchemaConverter converter = + new DdlToAvroSchemaConverter("spannertest", "booleans", false); + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) .createUdf("UdfSchema.Foo") .name("UdfSchema.Foo") .type("TEXT") .language("REMOTE") .definition( - "{\"endpoint\":\"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\":50}") + "{\"endpoint\":\"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\":50}") .addParameter(UdfParameter.parse("arg0 TEXT", "UdfSchema.Foo", Dialect.POSTGRESQL)) .addParameter( - UdfParameter.parse( - "arg1 TEXT DEFAULT \"bar\"", "spanner.Foo", Dialect.POSTGRESQL)) + UdfParameter.parse("arg1 TEXT DEFAULT \"bar\"", "spanner.Foo", Dialect.POSTGRESQL)) .endUdf() .build(); @@ -810,7 +812,10 @@ public void pgUdfRemote() { assertThat(avroUdf.getProp(GOOGLE_STORAGE), equalTo("CloudSpanner")); assertThat(avroUdf.getProp(SPANNER_NAME), equalTo("UdfSchema.Foo")); assertThat(avroUdf.getProp(SPANNER_UDF_NAME), equalTo("UdfSchema.Foo")); - assertThat(avroUdf.getProp(SPANNER_UDF_DEFINITION), equalTo("{\"endpoint\":\"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\":50}")); + assertThat( + avroUdf.getProp(SPANNER_UDF_DEFINITION), + equalTo( + "{\"endpoint\":\"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\":50}")); assertThat(avroUdf.getProp(SPANNER_UDF_SECURITY), nullValue()); assertThat(avroUdf.getProp(SPANNER_UDF_TYPE), equalTo("TEXT")); assertThat(avroUdf.getProp(SPANNER_UDF_LANGUAGE), equalTo("REMOTE")); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java index 23c8209782..9c375ae404 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java @@ -245,27 +245,23 @@ private void testSpannerToGCSAvroBase( "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Empty"))); List modelStructArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "ModelStruct"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "ModelStruct"))); List udfRemoteArtifacts = gcsClient.listArtifacts( "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "UdfSchema.Remote"))); List searchIndexArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "SearchIndex"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "SearchIndex"))); List identityArtifacts = gcsClient.listArtifacts( "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Identity"))); List sequenceArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence1"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence1"))); List sequenceNoKindArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); assertThat(singersArtifacts).isNotEmpty(); assertThat(emptyArtifacts).isNotEmpty(); @@ -341,28 +337,22 @@ private void testPGSpannerToAvroBase( List singersArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Singers"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Singers"))); List emptyArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Empty"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Empty"))); List searchIndexArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "SearchIndex"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "SearchIndex"))); List identityArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Identity"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Identity"))); List sequenceArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence1"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence1"))); List sequenceNoKindArtifacts = gcsClient.listArtifacts( - "output/", - Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); + "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); assertThat(singersArtifacts).isNotEmpty(); assertThat(emptyArtifacts).isNotEmpty(); assertThat(identityArtifacts).isNotEmpty(); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java index a20c5501f4..2df8ff9bb4 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java @@ -111,8 +111,7 @@ private void uploadImportPipelineArtifacts(String subdirectory) throws IOExcepti .getPath()); gcsClient.uploadArtifact( "input/UdfSchema.avro-00000-of-00001", - Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema.avro") - .getPath()); + Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema.avro").getPath()); gcsClient.uploadArtifact( "input/UdfSchema-manifest.json", Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema-manifest.json") @@ -125,7 +124,8 @@ private void uploadImportPipelineArtifacts(String subdirectory) throws IOExcepti .getPath()); gcsClient.uploadArtifact( "input/UdfSchema.Remote-manifest.json", - Resources.getResource("ImportPipelineIT/" + subdirectory + "/UdfSchema.Remote-manifest.json") + Resources.getResource( + "ImportPipelineIT/" + subdirectory + "/UdfSchema.Remote-manifest.json") .getPath()); gcsClient.uploadArtifact( "input/ModelStruct.avro-00000-of-00001", diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java index 522ce5f1cf..06ae927b70 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java @@ -1431,21 +1431,18 @@ public void pgUdfs() { .definition("(SELECT 'bar')") .security(SqlSecurity.INVOKER) .type("STRING") + .addParameter(UdfParameter.parse("arg0 TEXT", "spanner.Foo", Dialect.POSTGRESQL)) .addParameter( - UdfParameter.parse("arg0 TEXT", "spanner.Foo", Dialect.POSTGRESQL)) - .addParameter( - UdfParameter.parse( - "arg1 TEXT DEFAULT 'bar'", "spanner.Foo", Dialect.POSTGRESQL)) + UdfParameter.parse("arg1 TEXT DEFAULT 'bar'", "spanner.Foo", Dialect.POSTGRESQL)) .endUdf() .createUdf("spanner.Foo3") .dialect(Dialect.POSTGRESQL) .name("Foo3") .type("STRING") .language("REMOTE") - .addParameter( - UdfParameter.parse("arg0 BIGINT", "spanner.Foo3", Dialect.POSTGRESQL)) - .definition( - "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}") + .addParameter(UdfParameter.parse("arg0 BIGINT", "spanner.Foo3", Dialect.POSTGRESQL)) + .definition( + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}") .endUdf(); assertThat(ddlBuilder.hasUdf("spanner.Foo1")); assertThat(ddlBuilder.createUdf("spanner.Foo1").name().equals("Foo1")); @@ -1480,7 +1477,6 @@ public void pgUdfs() { ddl.toBuilder().build().prettyPrint(), equalToCompressingWhiteSpace(expectedDdlString)); } - @Test public void sequences() { Ddl ddl = diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java index 30a45bdeec..9e25d424bc 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java @@ -636,7 +636,8 @@ public void simpleUdf() throws Exception { + "C STRING DEFAULT 'NULL', " + "D STRING DEFAULT '') " + "RETURNS STRING AS (CONCAT(A, '::', B, '::', C, '::', D))"; - String udfDef3 = "CREATE FUNCTION s1.remote_udf(x INT64, y INT64) " + String udfDef3 = + "CREATE FUNCTION s1.remote_udf(x INT64, y INT64) " + "RETURNS INT64 NOT DETERMINISTIC LANGUAGE REMOTE " + "OPTIONS ( endpoint = 'https://us-central1-myproject.cloudfunctions.net/myfunc' )"; @@ -700,11 +701,12 @@ public void simpleUdf() throws Exception { .defaultExpression("''") .autoBuild())); - assertThat(udf3.name(), equalTo("s1.remote_udf")); assertThat(udf3.type(), equalTo("INT64")); assertEquals(udf3.language(), "REMOTE"); - assertThat(udf3.options(), hasItems("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + assertThat( + udf3.options(), + hasItems("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); assertEquals(udf3.definition(), ""); assertEquals(udf3.security(), Udf.SqlSecurity.INVOKER); assertThat( @@ -722,7 +724,6 @@ public void simpleUdf() throws Exception { .type("INT64") .defaultExpression(null) .autoBuild())); - } @Test diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java index 1dcee9b2ff..7cd5bf58b8 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java @@ -102,9 +102,7 @@ public abstract class RandomDdlGenerator { Type.Code.PG_NUMERIC, Type.Code.PG_DATE)); - private static final String[] UDF_LANGUAGES = new String[] { - "SQL", "REMOTE" - }; + private static final String[] UDF_LANGUAGES = new String[] {"SQL", "REMOTE"}; private static final int MAX_PKS = 16; @@ -245,7 +243,8 @@ private void generateUdf(Ddl.Builder builder) { .dialect(Dialect.GOOGLE_STANDARD_SQL) .name(name); if (getRandom().nextBoolean()) { - Type type = generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); + Type type = + generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); udfBuilder.type(type.getCode().getName()); } @@ -255,20 +254,24 @@ private void generateUdf(Ddl.Builder builder) { if (!"REMOTE".equals(udfBuilder.language())) { if (getRandom().nextBoolean()) { - udfBuilder.security(SqlSecurity.INVOKER); - } + udfBuilder.security(SqlSecurity.INVOKER); + } } else { if (getDialect() == Dialect.GOOGLE_STANDARD_SQL) { - udfBuilder.options(ImmutableList.of("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + udfBuilder.options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); } else { - udfBuilder.definition("\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\""); + udfBuilder.definition( + "\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\""); } } int numUdfParameters = getRandom().nextInt(getMaxUdfParameters() + 1); for (int i = 0; i < numUdfParameters; i++) { String paramName = generateIdentifier(getMaxIdLength()); - Type type = generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); + Type type = + generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); UdfParameter.Builder udfParameterBuilder = udfBuilder.parameter(paramName).type(type.getCode().getName()); if (getRandom().nextBoolean()) { diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java index ecb53d3d75..960679d401 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java @@ -114,7 +114,8 @@ public void testPgRemoteUdf() { .type("TEXT") .language("REMOTE") .addParameter(UdfParameter.parse("p1 BIGINT", "s1.foo", Dialect.POSTGRESQL)) - .definition("{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/$myfunc\"}") + .definition( + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/$myfunc\"}") .build(); assertThat( From 082e1b7dd22d9c51ceba369566ef74da84d98478 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 07/19] Add support for REMOTE UDFs. --- .../spanner/DdlToAvroSchemaConverter.java | 6 +- .../spanner/ddl/InformationSchemaScanner.java | 6 +- .../cloud/teleport/spanner/ddl/Udf.java | 8 +- .../cloud/teleport/spanner/ddl/Udf.java.orig | 318 ++++ .../cloud/teleport/spanner/CopyDbTest.java | 5 +- .../teleport/spanner/CopyDbTest.java.orig | 1362 +++++++++++++++++ .../spanner/ddl/RandomDdlGenerator.java | 2 +- .../spanner/ddl/RandomDdlGenerator.java.orig | 751 +++++++++ .../ExportPipelineIT/spanner-gsql-ddl.sql | 2 +- 9 files changed, 2444 insertions(+), 16 deletions(-) create mode 100644 v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig create mode 100644 v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig create mode 100644 v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java b/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java index 4a9306dd59..a001fce20e 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/DdlToAvroSchemaConverter.java @@ -151,10 +151,8 @@ public Collection convert(Ddl ddl) { for (UdfParameter udfParameter : udf.parameters()) { recordBuilder.prop(SPANNER_UDF_PARAMETER + i++, udfParameter.prettyPrint()); } - if (udf.options() != null) { - for (int j = 0; j < udf.options().size(); j++) { - recordBuilder.prop(SPANNER_OPTION + j, udf.options().get(j)); - } + for (int j = 0; j < udf.options().size(); j++) { + recordBuilder.prop(SPANNER_OPTION + j, udf.options().get(j)); } schemas.add(recordBuilder.fields().endRecord()); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java index 694618bda4..5af96c48db 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java @@ -1128,9 +1128,9 @@ private void listUdfOptions(Ddl.Builder builder) { Map> allOptions = Maps.newHashMap(); while (resultSet.next()) { String specificName = getQualifiedName(resultSet.getString(0), resultSet.getString(1)); - String optionName = resultSet.getString(2); - String optionType = resultSet.getString(3); - String optionValue = resultSet.getString(4); + String optionName = resultSet.isNull(2) ? "" : resultSet.getString(2); + String optionType = resultSet.isNull(3) ? "" : resultSet.getString(3); + String optionValue = resultSet.isNull(4) ? "" : resultSet.getString(4); ImmutableList.Builder options = allOptions.computeIfAbsent(specificName, k -> ImmutableList.builder()); diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index 3d57ea8839..bb444f69d3 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -37,7 +37,7 @@ public abstract class Udf implements Serializable { // Remote function body is printed using $$ strings, which are // unlikely but possible to be present in the function definition. // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE - public static final Escaper PG_REMOTE_UDF_BODY_ESCAPER = + public static final Escaper PG_DEFINITION_ESCAPER = Escapers.builder().addEscape('$', "\\044").build(); /** The access rights used by the UDF for underlying data: invoker-rights or definer-rights. */ @@ -129,7 +129,7 @@ public void prettyPrint(Appendable appendable) throws IOException { break; case POSTGRESQL: throw new IllegalArgumentException( - "Options are not supported in PostgreSQL dialect for UDFs."); + "Options are not supported in PostgreSQL dialect for non-remote UDFs."); default: throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); } @@ -144,10 +144,10 @@ public void prettyPrint(Appendable appendable) throws IOException { if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { appendable.append(" RETURN ").append(definition()); } else { - // Other langugges use AS definition instead of sql body. + // Other languages use AS definition instead of sql body. appendable .append(" AS $$") - .append(PG_REMOTE_UDF_BODY_ESCAPER.escape(definition())) + .append(PG_DEFINITION_ESCAPER.escape(definition())) .append("$$"); } break; diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig new file mode 100644 index 0000000000..9f62342e20 --- /dev/null +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.spanner.ddl; + +import static com.google.cloud.teleport.spanner.common.NameUtils.quoteIdentifier; + +import com.google.auto.value.AutoValue; +import com.google.cloud.spanner.Dialect; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import com.google.common.escape.Escaper; +import com.google.common.escape.Escapers; +import java.io.IOException; +import java.io.Serializable; +import java.util.LinkedHashMap; +import javax.annotation.Nullable; + +/** Cloud Spanner user-defined function. */ +@AutoValue +public abstract class Udf implements Serializable { + + private static final long serialVersionUID = 1L; + + // Remote function body is printed using $$ strings, which are + // unlikely but possible to be present in the function definition. + // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE +<<<<<<< HEAD + public static final Escaper PG_REMOTE_UDF_BODY_ESCAPER = +======= + public static final Escaper PG_DEFINITION_ESCAPER = +>>>>>>> ff46924a7 (Add support for REMOTE UDFs.) + Escapers.builder().addEscape('$', "\\044").build(); + + /** The access rights used by the UDF for underlying data: invoker-rights or definer-rights. */ + public enum SqlSecurity { + INVOKER, + DEFINER, + } + + /** + * The specific name uniquely identifies the UDF even if its name is overloaded. It can be used to + * join UDF metadata from multiple sources (e.g. INFORMATION_SCHEMA.ROUTINES and + * INFORMATION_SCHEMA.PARAMETERS). The specific name is not guaranteed to match the user-specified + * name() or be the same after export and import. + */ + public abstract String specificName(); + + @Nullable + public abstract String name(); + + public abstract Dialect dialect(); + + @Nullable + public abstract String type(); + + @Nullable + public abstract String definition(); + + @Nullable + public abstract String language(); + + @Nullable + public abstract SqlSecurity security(); + + public abstract ImmutableList parameters(); + + public abstract ImmutableList options(); + + public void prettyPrint(Appendable appendable) throws IOException { + appendable.append("CREATE FUNCTION ").append(quoteIdentifier(name(), dialect())); + appendable.append("("); + boolean first = true; + for (UdfParameter parameter : parameters()) { + if (!first) { + appendable.append(", "); + } + first = false; + appendable.append(parameter.prettyPrint()); + } + appendable.append(")"); + if (type() != null) { + appendable.append(" RETURNS ").append(type()); + } + + // Determinism should be added to INFORMATION_SCHEMA.ROUTINES. + // For now, we infer it from the language. + if (language() != null && language().equalsIgnoreCase("REMOTE")) { + String determinism; + switch (dialect()) { + case GOOGLE_STANDARD_SQL: + determinism = "NOT DETERMINISTIC"; + break; + case POSTGRESQL: + determinism = "VOLATILE"; + break; + default: + throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } + appendable.append(" ").append(determinism); + } + + if (language() != null && !language().isEmpty()) { + // GSQL does not accept LANGUAGE SQL even though it reports it. + if (dialect() != Dialect.GOOGLE_STANDARD_SQL || !language().equalsIgnoreCase("SQL")) { + appendable.append(" LANGUAGE ").append(language()); + } + } + + if (security() != null) { + // Remote UDF don't use SQL SECURITY, but it is marked NOT NULL in IS. + if (!"REMOTE".equalsIgnoreCase(language())) { + appendable.append(" SQL SECURITY ").append(security().toString()); + } + } + + if (!options().isEmpty()) { + switch (dialect()) { + case GOOGLE_STANDARD_SQL: + appendable.append(" OPTIONS (").append(String.join(", ", options())).append(")"); + break; + case POSTGRESQL: + throw new IllegalArgumentException( + "Options are not supported in PostgreSQL dialect for UDFs."); + default: + throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } + } + + if (definition() != null && !definition().isEmpty()) { + switch (dialect()) { + case GOOGLE_STANDARD_SQL: + appendable.append(" AS (").append(definition()).append(")"); + break; + case POSTGRESQL: + if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { + appendable.append(" RETURN ").append(definition()); + } else { +<<<<<<< HEAD + // Other langugges use AS definition instead of sql body. + appendable + .append(" AS $$") + .append(PG_REMOTE_UDF_BODY_ESCAPER.escape(definition())) +======= + // Other languages use AS definition instead of sql body. + appendable + .append(" AS $$") + .append(PG_DEFINITION_ESCAPER.escape(definition())) +>>>>>>> ff46924a7 (Add support for REMOTE UDFs.) + .append("$$"); + } + break; + default: + throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } + } + } + + public String prettyPrint() { + StringBuilder sb = new StringBuilder(); + try { + prettyPrint(sb); + } catch (IOException e) { + throw new RuntimeException(e); + } + return sb.toString(); + } + + @Override + public String toString() { + return prettyPrint(); + } + + public abstract Builder autoToBuilder(); + + public Builder toBuilder() { + Builder builder = autoToBuilder().specificName(specificName()).dialect(dialect()); + if (name() != null) { + builder.name(name()); + } + if (type() != null) { + builder.type(type()); + } + if (language() != null) { + builder.language(language()); + } + builder.options(options()); + if (definition() != null) { + builder.definition(definition()); + } + if (security() != null) { + builder.security(security()); + } + for (UdfParameter parameter : parameters()) { + builder.addParameter(parameter); + } + return builder; + } + + public static Builder builder(Dialect dialect) { + return new AutoValue_Udf.Builder() + .dialect(dialect) + .parameters(ImmutableList.of()) + .options(ImmutableList.of()); + } + + public static Builder builder() { + return builder(Dialect.GOOGLE_STANDARD_SQL); + } + + /** A builder for {@link Udf}. */ + @AutoValue.Builder + public abstract static class Builder { + private Ddl.Builder ddlBuilder; + private LinkedHashMap parametersMap = Maps.newLinkedHashMap(); + private ImmutableList.Builder parameters = ImmutableList.builder(); + + public Builder ddlBuilder(Ddl.Builder ddlBuilder) { + this.ddlBuilder = ddlBuilder; + return this; + } + + public abstract Builder specificName(String specificName); + + public abstract String specificName(); + + public abstract Builder name(String name); + + public abstract String name(); + + public abstract Builder dialect(Dialect dialect); + + public abstract Dialect dialect(); + + public abstract Builder type(String type); + + public abstract String type(); + + public abstract Builder definition(String definition); + + public abstract String definition(); + + public abstract Builder language(String language); + + public abstract String language(); + + public abstract Builder security(SqlSecurity rights); + + public abstract SqlSecurity security(); + + public abstract Builder options(ImmutableList options); + + public abstract ImmutableList options(); + + public abstract Builder parameters(ImmutableList parameters); + + public ImmutableList parameters() { + return parameters.build(); + } + + public UdfParameter.Builder parameter(String name) { + UdfParameter parameter = parametersMap.get(name.toLowerCase()); + if (parameter != null) { + if (!parameter.functionSpecificName().equals(specificName())) { + throw new IllegalArgumentException( + String.format( + "Parameter %s has a different function specific name %s than the user-defined" + + " function %s.", + name, parameter.functionSpecificName(), specificName())); + } + return parameter.toBuilder().udfBuilder(this); + } + return UdfParameter.builder(dialect()) + .name(name) + .functionSpecificName(specificName()) + .udfBuilder(this); + } + + public Builder addParameter(UdfParameter parameter) { + parameters.add(parameter); + parametersMap.put(parameter.name().toLowerCase(), parameter); + return this; + } + + abstract Udf autoBuild(); + + public Udf build() { + return new AutoValue_Udf.Builder() + .specificName(specificName()) + .name(name()) + .dialect(dialect()) + .type(type()) + .definition(definition()) + .language(language()) + .security(security()) + .options(options()) + .parameters(ImmutableList.copyOf(parameters())) + .autoBuild(); + } + + public Ddl.Builder endUdf() { + ddlBuilder.addUdf(build()); + return ddlBuilder; + } + } +} diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java index 1b8f9ffa0c..62ba01995d 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java @@ -1151,9 +1151,8 @@ public void pgUdfs() throws Exception { .language("REMOTE") .type("BIGINT") .addParameter(UdfParameter.parse("arg0 BIGINT", "s1.Foo3", Dialect.POSTGRESQL)) - .options( - ImmutableList.of( - "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) + .definition( + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\"}") .endUdf() .build(); createAndPopulate(ddl, 0); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig new file mode 100644 index 0000000000..68ee56788f --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig @@ -0,0 +1,1362 @@ +/* + * Copyright (C) 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.spanner; + +import static org.hamcrest.Matchers.is; +import static org.hamcrest.text.IsEqualCompressingWhiteSpace.equalToCompressingWhiteSpace; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; + +import com.google.cloud.spanner.DatabaseClient; +import com.google.cloud.spanner.Dialect; +import com.google.cloud.spanner.ReadOnlyTransaction; +import com.google.cloud.teleport.spanner.common.Type; +import com.google.cloud.teleport.spanner.common.Type.StructField; +import com.google.cloud.teleport.spanner.ddl.Ddl; +import com.google.cloud.teleport.spanner.ddl.InformationSchemaScanner; +import com.google.cloud.teleport.spanner.ddl.RandomDdlGenerator; +import com.google.cloud.teleport.spanner.ddl.Udf.SqlSecurity; +import com.google.cloud.teleport.spanner.ddl.UdfParameter; +import com.google.cloud.teleport.spanner.proto.ExportProtos.Export; +import com.google.cloud.teleport.spanner.spannerio.SpannerConfig; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.values.PCollection; +import org.junit.After; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TemporaryFolder; + +/** + * An end to end test that exports and imports a database and verifies that the content is + * identical. Additionally, this test verifies the behavior of table level export. This requires an + * active GCP project with a Spanner instance. Hence this test can only be run locally with a + * project set up using 'gcloud config'. + */ +@Category(IntegrationTest.class) +public class CopyDbTest { + private final Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + private final long numericTime = timestamp.getTime(); + private final String sourceDb = "copydb-source" + Long.toString(numericTime); + private final String destinationDb = "copydb-dest" + Long.toString(numericTime); + private final String destDbPrefix = "import"; + + @Rule public final transient TestPipeline exportPipeline = TestPipeline.create(); + @Rule public final transient TestPipeline importPipeline = TestPipeline.create(); + @Rule public final transient TestPipeline comparePipeline = TestPipeline.create(); + @Rule public final TemporaryFolder tmpDir = new TemporaryFolder(); + @Rule public final SpannerServerResource spannerServer = new SpannerServerResource(); + + @After + public void teardown() { + spannerServer.dropDatabase(sourceDb); + spannerServer.dropDatabase(destinationDb); + } + + private void createAndPopulate(Ddl ddl, int numBatches) throws Exception { + switch (ddl.dialect()) { + case GOOGLE_STANDARD_SQL: + spannerServer.createDatabase(sourceDb, ddl.statements()); + spannerServer.createDatabase(destinationDb, Collections.emptyList()); + break; + case POSTGRESQL: + spannerServer.createPgDatabase(sourceDb, ddl.statements()); + spannerServer.createPgDatabase(destinationDb, Collections.emptyList()); + break; + default: + throw new IllegalArgumentException("Unrecognized dialect: " + ddl.dialect()); + } + spannerServer.populateRandomData(sourceDb, ddl, numBatches); + } + + @Test + public void allTypesSchema() throws Exception { + // spotless:off + Ddl ddl = Ddl.builder() + .createTable("Users") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("age").int64().endColumn() + .primaryKey().asc("first_name").desc("last_name").end() + .endTable() + .createTable("AllTYPES") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("id").int64().notNull().endColumn() + .column("bool_field").bool().endColumn() + .column("int64_field").int64().endColumn() + .column("float32_field").float32().endColumn() + .column("float64_field").float64().endColumn() + .column("string_field").string().max().endColumn() + .column("bytes_field").bytes().max().endColumn() + .column("timestamp_field").timestamp().endColumn() + .column("date_field").date().endColumn() + .column("arr_bool_field").type(Type.array(Type.bool())).endColumn() + .column("arr_int64_field").type(Type.array(Type.int64())).endColumn() + .column("arr_float32_field").type(Type.array(Type.float32())).endColumn() + .column("arr_float64_field").type(Type.array(Type.float64())).endColumn() + .column("arr_string_field").type(Type.array(Type.string())).max().endColumn() + .column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn() + .column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn() + .column("arr_date_field").type(Type.array(Type.date())).endColumn() + .primaryKey().asc("first_name").desc("last_name").asc("id").end() + .interleaveInParent("Users") + .onDeleteCascade() + .endTable() + .build(); + // spotless:on + createAndPopulate(ddl, 100); + runTest(); + } + + @Test + public void allPgTypesSchema() throws Exception { + // spotless:off + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("Users") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name") + .pgVarchar() + .size(5) + .endColumn() + .column("age") + .pgInt8() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .end() + .endTable() + .createTable("AllTYPES") + .column("id") + .pgInt8() + .notNull() + .endColumn() + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name") + .pgVarchar() + .size(5) + .endColumn() + .column("bool_field") + .pgBool() + .endColumn() + .column("int_field") + .pgInt8() + .endColumn() + .column("float32_field") + .pgFloat4() + .endColumn() + .column("float64_field") + .pgFloat8() + .endColumn() + .column("string_field") + .pgText() + .endColumn() + .column("bytes_field") + .pgBytea() + .endColumn() + .column("timestamp_field") + .pgTimestamptz() + .endColumn() + .column("numeric_field") + .pgNumeric() + .endColumn() + .column("date_field") + .pgDate() + .endColumn() + .column("arr_bool_field") + .type(Type.pgArray(Type.pgBool())) + .endColumn() + .column("arr_int_field") + .type(Type.pgArray(Type.pgInt8())) + .endColumn() + .column("arr_float32_field") + .type(Type.pgArray(Type.pgFloat4())) + .endColumn() + .column("arr_float64_field") + .type(Type.pgArray(Type.pgFloat8())) + .endColumn() + .column("arr_string_field") + .type(Type.pgArray(Type.pgVarchar())) + .max() + .endColumn() + .column("arr_bytes_field") + .type(Type.pgArray(Type.pgBytea())) + .max() + .endColumn() + .column("arr_timestamp_field") + .type(Type.pgArray(Type.pgTimestamptz())) + .endColumn() + .column("arr_date_field") + .type(Type.pgArray(Type.pgDate())) + .endColumn() + .column("arr_numeric_field") + .type(Type.pgArray(Type.pgNumeric())) + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .asc("id") + .asc("float64_field") + .end() + .interleaveInParent("Users") + .onDeleteCascade() + .endTable() + .build(); + // spotless:on + createAndPopulate(ddl, 100); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void emptyTables() throws Exception { + // spotless:off + Ddl ddl = Ddl.builder() + .createTable("Users") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("age").int64().endColumn() + .primaryKey().asc("first_name").desc("last_name").end() + .endTable() + .createTable("AllTYPES") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("id").int64().notNull().endColumn() + .column("bool_field").bool().endColumn() + .column("int64_field").int64().endColumn() + .column("float32_field").float32().endColumn() + .column("float64_field").float64().endColumn() + .column("string_field").string().max().endColumn() + .column("bytes_field").bytes().max().endColumn() + .column("timestamp_field").timestamp().endColumn() + .column("date_field").date().endColumn() + .column("arr_bool_field").type(Type.array(Type.bool())).endColumn() + .column("arr_int64_field").type(Type.array(Type.int64())).endColumn() + .column("arr_float32_field").type(Type.array(Type.float32())).endColumn() + .column("arr_float64_field").type(Type.array(Type.float64())).endColumn() + .column("arr_string_field").type(Type.array(Type.string())).max().endColumn() + .column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn() + .column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn() + .column("arr_date_field").type(Type.array(Type.date())).endColumn() + .primaryKey().asc("first_name").desc("last_name").asc("id").end() + .interleaveInParent("Users") + .endTable() + .build(); + createAndPopulate(ddl, 10); + + // Add empty tables. + Ddl emptyTables = Ddl.builder() + .createTable("empty_one") + .column("first").string().max().endColumn() + .column("second").string().size(5).endColumn() + .column("value").int64().endColumn() + .primaryKey().asc("first").desc("second").end() + .endTable() + .createTable("empty_two") + .column("first").string().max().endColumn() + .column("second").string().size(5).endColumn() + .column("value").int64().endColumn() + .column("another_value").int64().endColumn() + .primaryKey().asc("first").end() + .endTable() + .build(); + // spotless:on + spannerServer.updateDatabase(sourceDb, emptyTables.createTableStatements()); + runTest(); + } + + @Test + public void emptyPgTables() throws Exception { + // spotless:off + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("Users") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name").pgVarchar().size(5).endColumn() + .column("age") + .pgInt8() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .end() + .endTable() + .createTable("AllTYPES") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name").pgVarchar().size(5).endColumn() + .column("id") + .pgInt8() + .notNull() + .endColumn() + .column("bool_field") + .pgBool() + .endColumn() + .column("int_field") + .pgInt8() + .endColumn() + .column("float32_field") + .pgFloat4() + .endColumn() + .column("float64_field") + .pgFloat8() + .endColumn() + .column("string_field") + .pgText() + .endColumn() + .column("bytes_field") + .pgBytea() + .endColumn() + .column("timestamp_field") + .pgTimestamptz() + .endColumn() + .column("numeric_field") + .pgNumeric() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .asc("id") + .end() + .interleaveInParent("Users") + .onDeleteCascade() + .endTable() + .build(); + createAndPopulate(ddl, 10); + + // Add empty tables. + Ddl emptyTables = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("empty_one") + .column("first") + .pgVarchar() + .max() + .endColumn() + .column("second").pgVarchar().size(5).endColumn() + .column("value") + .pgInt8() + .endColumn() + .primaryKey() + .asc("first") + .asc("second") + .end() + .endTable() + .createTable("empty_two") + .column("first") + .pgVarchar() + .max() + .endColumn() + .column("second").pgVarchar().size(5).endColumn() + .column("value") + .pgInt8() + .endColumn() + .column("another_value") + .pgInt8() + .endColumn() + .primaryKey() + .asc("first") + .end() + .endTable() + .build(); + // spotless:on + spannerServer.updateDatabase(sourceDb, emptyTables.createTableStatements()); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void allEmptyTables() throws Exception { + // spotless:off + Ddl ddl = Ddl.builder() + .createTable("Users") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("age").int64().endColumn() + .primaryKey().asc("first_name").desc("last_name").end() + .endTable() + .createTable("AllTYPES") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("id").int64().notNull().endColumn() + .column("bool_field").bool().endColumn() + .column("int64_field").int64().endColumn() + .column("float32_field").float32().endColumn() + .column("float64_field").float64().endColumn() + .column("string_field").string().max().endColumn() + .column("bytes_field").bytes().max().endColumn() + .column("timestamp_field").timestamp().endColumn() + .column("date_field").date().endColumn() + .column("arr_bool_field").type(Type.array(Type.bool())).endColumn() + .column("arr_int64_field").type(Type.array(Type.int64())).endColumn() + .column("arr_float32_field").type(Type.array(Type.float32())).endColumn() + .column("arr_float64_field").type(Type.array(Type.float64())).endColumn() + .column("arr_string_field").type(Type.array(Type.string())).max().endColumn() + .column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn() + .column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn() + .column("arr_date_field").type(Type.array(Type.date())).endColumn() + .primaryKey().asc("first_name").desc("last_name").asc("id").end() + .interleaveInParent("Users") + .endTable() + .build(); + // spotless:on + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void allEmptyPgTables() throws Exception { + // spotless:off + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("Users") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name") + .pgVarchar() + .size(5) + .endColumn() + .column("age") + .pgInt8() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .end() + .endTable() + .createTable("AllTYPES") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name") + .pgVarchar() + .size(5) + .endColumn() + .column("id") + .pgInt8() + .notNull() + .endColumn() + .column("bool_field") + .pgBool() + .endColumn() + .column("int_field") + .pgInt8() + .endColumn() + .column("float32_field") + .pgFloat4() + .endColumn() + .column("float64_field") + .pgFloat8() + .endColumn() + .column("string_field") + .pgText() + .endColumn() + .column("bytes_field") + .pgBytea() + .endColumn() + .column("timestamp_field") + .pgTimestamptz() + .endColumn() + .column("numeric_field") + .pgNumeric() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .asc("id") + .end() + .interleaveInParent("Users") + .onDeleteCascade() + .endTable() + .build(); + // spotless:on + createAndPopulate(ddl, 0); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void databaseOptions() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(); + // Table Content + // spotless:off + ddlBuilder.createTable("Users") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("age").int64().endColumn() + .primaryKey().asc("first_name").desc("last_name").end() + .endTable() + .createTable("EmploymentData") + .column("first_name").string().max().endColumn() + .column("last_name").string().size(5).endColumn() + .column("id").int64().notNull().endColumn() + .column("age").int64().endColumn() + .column("address").string().max().endColumn() + .primaryKey().asc("first_name").desc("last_name").asc("id").end() + .interleaveInParent("Users") + .onDeleteCascade() + .endTable(); + // spotless:on + // Allowed and well-formed database option + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("version_retention_period") + .setOptionValue("\"6d\"") + .build()); + // Disallowed database option + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("optimizer_version") + .setOptionValue("1") + .build()); + // Malformed database option + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("123version") + .setOptionValue("xyz") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = ddlBuilder.build(); + createAndPopulate(ddl, 100); + runTest(); + Ddl destinationDdl = readDdl(destinationDb, Dialect.GOOGLE_STANDARD_SQL); + List destDbOptions = destinationDdl.setOptionsStatements(destinationDb); + assertThat(destDbOptions.size(), is(1)); + assertThat( + destDbOptions.get(0), + is( + "ALTER DATABASE `" + + destinationDb + + "` SET OPTIONS ( version_retention_period = \"6d\" )")); + } + + @Test + public void pgDatabaseOptions() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); + // Table Content + // spotless:off + ddlBuilder + .createTable("Users") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name").pgVarchar().size(5).endColumn() + .column("age") + .pgInt8() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .end() + .endTable() + .createTable("EmploymentData") + .column("first_name") + .pgVarchar() + .max() + .endColumn() + .column("last_name").pgVarchar().size(5).endColumn() + .column("id") + .pgInt8() + .notNull() + .endColumn() + .column("age") + .pgInt8() + .endColumn() + .column("address") + .pgVarchar() + .max() + .endColumn() + .primaryKey() + .asc("first_name") + .asc("last_name") + .asc("id") + .end() + .interleaveInParent("Users") + .onDeleteCascade() + .endTable(); + // spotless:on + // Allowed and well-formed database option + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("version_retention_period") + .setOptionValue("'6d'") + .build()); + // Disallowed database option + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("optimizer_version") + .setOptionValue("1") + .build()); + // Malformed database option + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("123version") + .setOptionValue("xyz") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = ddlBuilder.build(); + createAndPopulate(ddl, 100); + runTest(Dialect.POSTGRESQL); + Ddl destinationDdl = readDdl(destinationDb, Dialect.POSTGRESQL); + List destDbOptions = destinationDdl.setOptionsStatements(destinationDb); + assertThat(destDbOptions.size(), is(1)); + assertThat( + destDbOptions.get(0), + is("ALTER DATABASE \"" + destinationDb + "\" SET spanner.version_retention_period = '6d'")); + } + + @Test + public void emptyDb() throws Exception { + Ddl ddl = Ddl.builder().build(); + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void emptyPgDb() throws Exception { + Ddl ddl = Ddl.builder(Dialect.POSTGRESQL).build(); + createAndPopulate(ddl, 0); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void foreignKeys() throws Exception { + // spotless:off + Ddl ddl = Ddl.builder() + .createTable("Ref") + .column("id1").int64().endColumn() + .column("id2").int64().endColumn() + .primaryKey().asc("id1").asc("id2").end() + .endTable() + .createTable("Child") + .column("id1").int64().endColumn() + .column("id2").int64().endColumn() + .column("id3").int64().endColumn() + .primaryKey().asc("id1").asc("id2").asc("id3").end() + .interleaveInParent("Ref") + // Add some foreign keys that are guaranteed to be satisfied due to interleaving + .foreignKeys(ImmutableList.of( + "ALTER TABLE `Child` ADD CONSTRAINT `fk1` FOREIGN KEY (`id1`) REFERENCES `Ref` (`id1`)", + "ALTER TABLE `Child` ADD CONSTRAINT `fk2` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`)", + "ALTER TABLE `Child` ADD CONSTRAINT `fk3` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`)", + "ALTER TABLE `Child` ADD CONSTRAINT `fk4` FOREIGN KEY (`id2`, `id1`) REFERENCES `Ref` (`id2`, `id1`)", + "ALTER TABLE `Child` ADD CONSTRAINT `fk5` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`) NOT ENFORCED", + "ALTER TABLE `Child` ADD CONSTRAINT `fk6` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`) ENFORCED")) + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 100); + runTest(); + } + + @Test + public void pgForeignKeys() throws Exception { + // spotless:off + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("Ref") + .column("id1") + .pgInt8() + .endColumn() + .column("id2") + .pgInt8() + .endColumn() + .primaryKey() + .asc("id1") + .asc("id2") + .end() + .endTable() + .createTable("Child") + .column("id1") + .pgInt8() + .endColumn() + .column("id2") + .pgInt8() + .endColumn() + .column("id3") + .pgInt8() + .endColumn() + .primaryKey() + .asc("id1") + .asc("id2") + .asc("id3") + .end() + .interleaveInParent("Ref") + // Add some foreign keys that are guaranteed to be satisfied due to interleaving + .foreignKeys( + ImmutableList.of( + "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk1\" FOREIGN KEY (\"id1\") REFERENCES" + + " \"Ref\" (\"id1\")", + "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk2\" FOREIGN KEY (\"id2\") REFERENCES" + + " \"Ref\" (\"id2\")", + "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk3\" FOREIGN KEY (\"id2\") REFERENCES" + + " \"Ref\" (\"id2\")", + "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk4\" FOREIGN KEY (\"id2\", \"id1\") " + + "REFERENCES \"Ref\" (\"id2\", \"id1\")")) + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 100); + runTest(Dialect.POSTGRESQL); + } + + // TODO: enable this test once CHECK constraints are enabled + // @Test + public void checkConstraints() throws Exception { + // spotless:off + Ddl ddl = Ddl.builder() + .createTable("T") + .column("id").int64().endColumn() + .column("A").int64().endColumn() + .primaryKey().asc("id").end() + .checkConstraints(ImmutableList.of( + "CONSTRAINT `ck` CHECK(TO_HEX(SHA1(CAST(A AS STRING))) <= '~')")) + .endTable().build(); + // spotless:on + + createAndPopulate(ddl, 100); + runTest(); + } + + @Test + public void pgCheckConstraints() throws Exception { + // spotless:off + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("T") + .column("id") + .pgInt8() + .endColumn() + .column("A") + .pgInt8() + .endColumn() + .primaryKey() + .asc("id") + .end() + .checkConstraints( + ImmutableList.of( + "CONSTRAINT \"ck\" CHECK(LENGTH(CAST(\"A\" AS VARCHAR)) >= '0'::bigint)")) + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 100); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void models() throws Exception { + // spotless:off + Ddl ddl = + Ddl.builder() + .createModel("Iris") + .remote(true) + .options(ImmutableList.of( + "endpoint=\"//aiplatform.googleapis.com/projects/span-cloud-testing/locations/us-central1/endpoints/4608339105032437760\"")) + .inputColumn("f1").type(Type.float64()).size(-1).endInputColumn() + .inputColumn("f2").type(Type.float64()).size(-1).endInputColumn() + .inputColumn("f3").type(Type.float64()).size(-1).endInputColumn() + .inputColumn("f4").type(Type.float64()).size(-1).endInputColumn() + .outputColumn("classes").type(Type.array(Type.string())).size(-1).endOutputColumn() + .outputColumn("scores").type(Type.array(Type.float64())).size(-1).endOutputColumn() + .endModel() + .createModel("TextEmbeddingGecko") + .remote(true) + .options(ImmutableList.of( + "endpoint=\"//aiplatform.googleapis.com/projects/span-cloud-testing/locations/us-central1/publishers/google/models/textembedding-gecko\"")) + .inputColumn("content").type(Type.string()).size(-1).endInputColumn() + .outputColumn("embeddings").type(Type.struct( + StructField.of("statistics", Type.struct(StructField.of("truncated", Type.bool()), + StructField.of("token_count", Type.float64()))), + StructField.of("values", Type.array(Type.float64())))).size(-1).endOutputColumn() + .endModel() + .build(); + // spotless:on + + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void changeStreams() throws Exception { + Ddl ddl = + Ddl.builder() + .createTable("T1") + .endTable() + .createTable("T2") + .column("key") + .int64() + .endColumn() + .column("c1") + .int64() + .endColumn() + .column("c2") + .string() + .max() + .endColumn() + .primaryKey() + .asc("key") + .end() + .endTable() + .createTable("T3") + .endTable() + .createChangeStream("ChangeStreamAll") + .forClause("FOR ALL") + .options( + ImmutableList.of( + "retention_period=\"7d\"", "value_capture_type=\"OLD_AND_NEW_VALUES\"")) + .endChangeStream() + .createChangeStream("ChangeStreamEmpty") + .endChangeStream() + .createChangeStream("ChangeStreamTableColumns") + .forClause("FOR `T1`, `T2`(`c1`, `c2`), `T3`()") + .endChangeStream() + .build(); + createAndPopulate(ddl, 0); + runTest(); + } + + // TODO: Enable the test once change streams are supported in PG. + // @Test + public void pgChangeStreams() throws Exception { + Ddl ddl = + Ddl.builder(Dialect.POSTGRESQL) + .createTable("T1") + .column("key") + .pgInt8() + .endColumn() + .primaryKey() + .asc("key") + .end() + .endTable() + .createTable("T2") + .column("key") + .pgInt8() + .endColumn() + .column("c1") + .pgInt8() + .endColumn() + .column("c2") + .pgVarchar() + .max() + .endColumn() + .primaryKey() + .asc("key") + .end() + .endTable() + .createTable("T3") + .column("key") + .pgInt8() + .endColumn() + .primaryKey() + .asc("key") + .end() + .endTable() + .createChangeStream("ChangeStreamAll") + .forClause("FOR ALL") + .options( + ImmutableList.of( + "retention_period='7d'", "value_capture_type='OLD_AND_NEW_VALUES'")) + .endChangeStream() + .createChangeStream("ChangeStreamEmpty") + .endChangeStream() + .createChangeStream("ChangeStreamTableColumns") + .forClause("FOR \"T1\", \"T2\"(\"c1\", \"c2\"), \"T3\"()") + .endChangeStream() + .build(); + createAndPopulate(ddl, 0); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void identityColumn() throws Exception { + // spotless:off + Ddl.Builder ddlBuilder = Ddl.builder(); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = ddlBuilder + .createTable("IdentityTable") + .column("id") + .int64() + .isIdentityColumn(true) + .sequenceKind("bit_reversed_positive") + .counterStartValue(1000L) + .skipRangeMin(2000L) + .skipRangeMax(3000L) + .endColumn() + .column("non_key_column") + .int64() + .isIdentityColumn(true) + .sequenceKind("bit_reversed_positive") + .counterStartValue(1000L) + .skipRangeMin(2000L) + .skipRangeMax(3000L) + .endColumn() + .column("no_sequence_kind_column") + .int64() + .isIdentityColumn(true) + .sequenceKind("default") + .counterStartValue(1000L) + .skipRangeMin(2000L) + .skipRangeMax(3000L) + .endColumn() + .column("value").int64().endColumn() + .primaryKey().asc("id").end() + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 10); + runTest(); + } + + @Test + public void pgIdentityColumn() throws Exception { + // spotless:off + Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = ddlBuilder + .createTable("IdentityTable") + .column("id") + .int64() + .isIdentityColumn(true) + .sequenceKind("bit_reversed_positive") + .counterStartValue(1000L) + .skipRangeMin(2000L) + .skipRangeMax(3000L) + .endColumn() + .column("non_key_column") + .int64() + .isIdentityColumn(true) + .sequenceKind("bit_reversed_positive") + .counterStartValue(1000L) + .skipRangeMin(2000L) + .skipRangeMax(3000L) + .endColumn() + .column("no_sequence_kind_column") + .int64() + .isIdentityColumn(true) + .sequenceKind("default") + .counterStartValue(1000L) + .skipRangeMin(2000L) + .skipRangeMax(3000L) + .endColumn() + .column("value").int64().endColumn() + .primaryKey().asc("id").end() + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 10); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void commitTimestampColumns() throws Exception { + // spotless:off + Ddl.Builder ddlBuilder = Ddl.builder(); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = ddlBuilder + .createTable("CommitTimestampTable") + .column("id") + .int64() + .endColumn() + .column("default_commit_ts") + .type(Type.timestamp()) + .defaultExpression("PENDING_COMMIT_TIMESTAMP()") + .columnOptions(ImmutableList.of("allow_commit_timestamp=TRUE")) + .endColumn() + .column("on_update_ts") + .type(Type.timestamp()) + .defaultExpression("PENDING_COMMIT_TIMESTAMP()") + .onUpdateExpression("PENDING_COMMIT_TIMESTAMP()") + .columnOptions(ImmutableList.of("allow_commit_timestamp=TRUE")) + .endColumn() + .primaryKey().asc("id").end() + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 10); + runTest(); + } + + @Test + public void pgCommitTimestampColumns() throws Exception { + // spotless:off + Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = ddlBuilder + .createTable("CommitTimestampTable") + .column("id") + .int64() + .endColumn() + .column("default_commit_ts") + .pgSpannerCommitTimestamp() + .defaultExpression("spanner.pending_commit_timestamp()") + .endColumn() + .column("on_update_ts") + .pgSpannerCommitTimestamp() + .defaultExpression("spanner.pending_commit_timestamp()") + .onUpdateExpression("spanner.pending_commit_timestamp()") + .endColumn() + .primaryKey().asc("id").end() + .endTable() + .build(); + // spotless:on + + createAndPopulate(ddl, 10); + runTest(); + } + + @Test + public void udfs() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = + ddlBuilder + .createSchema("s1") + .endNamedSchema() + .createUdf("s1.Foo1") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .name("s1.Foo1") + .definition("(SELECT 'bar')") + .endUdf() + .createUdf("s1.Foo2") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .name("s1.Foo2") + .definition("(SELECT 'bar')") + .security(SqlSecurity.INVOKER) + .type("STRING") + .addParameter(UdfParameter.parse("arg0 STRING", "s1.Foo2", Dialect.GOOGLE_STANDARD_SQL)) + .addParameter( + UdfParameter.parse( + "arg1 STRING DEFAULT 'bar'", "s1.Foo2", Dialect.GOOGLE_STANDARD_SQL)) + .endUdf() + .createUdf("s1.Foo3") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .name("s1.Foo3") + .language("REMOTE") + .type("INT64") + .addParameter(UdfParameter.parse("arg0 INT64", "s1.Foo3", Dialect.GOOGLE_STANDARD_SQL)) + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) + .endUdf() + .build(); + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void pgUdfs() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = + ddlBuilder + .createSchema("s1") + .endNamedSchema() + .createUdf("s1.Foo1") + .dialect(Dialect.POSTGRESQL) + .name("s1.Foo1") + .definition("(SELECT 'bar')") + .endUdf() + .createUdf("s1.Foo2") + .dialect(Dialect.POSTGRESQL) + .name("s1.Foo2") + .definition("(SELECT 'bar')") + .security(SqlSecurity.INVOKER) + .type("TEXT") + .addParameter(UdfParameter.parse("arg0 TEXT", "s1.Foo2", Dialect.POSTGRESQL)) + .addParameter( + UdfParameter.parse("arg1 TEXT DEFAULT 'bar'", "s1.Foo2", Dialect.POSTGRESQL)) + .endUdf() + .createUdf("s1.Foo3") + .dialect(Dialect.POSTGRESQL) + .name("s1.Foo3") + .language("REMOTE") + .type("BIGINT") + .addParameter(UdfParameter.parse("arg0 BIGINT", "s1.Foo3", Dialect.POSTGRESQL)) +<<<<<<< HEAD + .options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) +======= + .definition( + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\"}") +>>>>>>> ff46924a7 (Add support for REMOTE UDFs.) + .endUdf() + .build(); + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void sequences() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = + ddlBuilder + .createSequence("Sequence1") + .options( + ImmutableList.of( + "sequence_kind=\"bit_reversed_positive\"", + "skip_range_min=0", + "skip_range_max=1000", + "start_with_counter=50")) + .endSequence() + .createSequence("Sequence2") + .options( + ImmutableList.of( + "sequence_kind=\"bit_reversed_positive\"", "start_with_counter=9999")) + .endSequence() + .createSequence("Sequence3") + .options(ImmutableList.of("sequence_kind=\"bit_reversed_positive\"")) + .endSequence() + .createSequence("Sequence4") + .options( + ImmutableList.of( + "sequence_kind=\"default\"", + "skip_range_min=0", + "skip_range_max=1000", + "start_with_counter=50")) + .endSequence() + .createTable("UsersWithSequenceId") + .column("id") + .int64() + .notNull() + .defaultExpression("GET_NEXT_SEQUENCE_VALUE(SEQUENCE Sequence3)") + .endColumn() + .column("first_name") + .string() + .size(10) + .endColumn() + .primaryKey() + .asc("id") + .end() + .endTable() + .build(); + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void pgSequences() throws Exception { + Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); + List dbOptionList = new ArrayList<>(); + dbOptionList.add( + Export.DatabaseOption.newBuilder() + .setOptionName("default_sequence_kind") + .setOptionValue("\"bit_reversed_positive\"") + .build()); + ddlBuilder.mergeDatabaseOptions(dbOptionList); + Ddl ddl = + ddlBuilder + .createSequence("PGSequence1") + .sequenceKind("bit_reversed_positive") + .counterStartValue(Long.valueOf(50)) + .skipRangeMin(Long.valueOf(0)) + .skipRangeMax(Long.valueOf(1000)) + .endSequence() + .createSequence("PGSequence2") + .sequenceKind("bit_reversed_positive") + .counterStartValue(Long.valueOf(9999)) + .endSequence() + .createSequence("PGSequence3") + .sequenceKind("bit_reversed_positive") + .endSequence() + .createSequence("PGSequence4") + .sequenceKind("default") + .counterStartValue(Long.valueOf(50)) + .skipRangeMin(Long.valueOf(0)) + .skipRangeMax(Long.valueOf(1000)) + .endSequence() + .createTable("PGUsersWithSequenceId") + .column("id") + .pgInt8() + .notNull() + .defaultExpression("nextval('\"PGSequence3\"')") + .endColumn() + .column("first_name") + .pgVarchar() + .size(10) + .endColumn() + .primaryKey() + .asc("id") + .end() + .endTable() + .build(); + + createAndPopulate(ddl, 0); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void randomSchema() throws Exception { + Ddl ddl = RandomDdlGenerator.builder().build().generate(); + createAndPopulate(ddl, 100); + runTest(); + } + + @Test + public void randomPgSchema() throws Exception { + Ddl ddl = RandomDdlGenerator.builder(Dialect.POSTGRESQL).setMaxViews(2).build().generate(); + System.out.println(ddl.prettyPrint()); + createAndPopulate(ddl, 100); + runTest(Dialect.POSTGRESQL); + } + + @Test + public void randomSchemaNoData() throws Exception { + Ddl ddl = RandomDdlGenerator.builder().build().generate(); + createAndPopulate(ddl, 0); + runTest(); + } + + @Test + public void randomPgSchemaNoData() throws Exception { + Ddl ddl = RandomDdlGenerator.builder(Dialect.POSTGRESQL).setMaxViews(2).build().generate(); + createAndPopulate(ddl, 0); + runTest(Dialect.POSTGRESQL); + } + + private void runTest() { + runTest(Dialect.GOOGLE_STANDARD_SQL); + } + + private void runTest(Dialect dialect) { + String tmpDirPath = tmpDir.getRoot().getAbsolutePath(); + ValueProvider.StaticValueProvider destination = + ValueProvider.StaticValueProvider.of(tmpDirPath); + ValueProvider.StaticValueProvider jobId = ValueProvider.StaticValueProvider.of("jobid"); + ValueProvider.StaticValueProvider source = + ValueProvider.StaticValueProvider.of(tmpDirPath + "/jobid"); + + SpannerConfig sourceConfig = spannerServer.getSpannerConfig(sourceDb); + exportPipeline.apply("Export", new ExportTransform(sourceConfig, destination, jobId)); + PipelineResult exportResult = exportPipeline.run(); + exportResult.waitUntilFinish(); + + SpannerConfig destConfig = spannerServer.getSpannerConfig(destinationDb); + importPipeline.apply( + "Import", + new ImportTransform( + destConfig, + source, + ValueProvider.StaticValueProvider.of(true), + ValueProvider.StaticValueProvider.of(true), + ValueProvider.StaticValueProvider.of(true), + ValueProvider.StaticValueProvider.of(true), + ValueProvider.StaticValueProvider.of(true), + ValueProvider.StaticValueProvider.of(30), + ValueProvider.StaticValueProvider.of(40))); + PipelineResult importResult = importPipeline.run(); + importResult.waitUntilFinish(); + + PCollection mismatchCount = + comparePipeline.apply("Compare", new CompareDatabases(sourceConfig, destConfig)); + PAssert.that(mismatchCount) + .satisfies( + (x) -> { + assertEquals(Lists.newArrayList(x), Lists.newArrayList(0L)); + return null; + }); + PipelineResult compareResult = comparePipeline.run(); + compareResult.waitUntilFinish(); + + Ddl sourceDdl = readDdl(sourceDb, dialect); + Ddl destinationDdl = readDdl(destinationDb, dialect); + + assertThat(sourceDdl.prettyPrint(), equalToCompressingWhiteSpace(destinationDdl.prettyPrint())); + } + + /* Returns the Ddl representing a Spanner database for given a String for the database name */ + private Ddl readDdl(String db, Dialect dialect) { + DatabaseClient dbClient = spannerServer.getDbClient(db); + Ddl ddl; + try (ReadOnlyTransaction ctx = dbClient.readOnlyTransaction()) { + ddl = new InformationSchemaScanner(ctx, dialect).scan(); + } + return ddl; + } +} diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java index 7cd5bf58b8..0e9ab7bd52 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java @@ -263,7 +263,7 @@ private void generateUdf(Ddl.Builder builder) { "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); } else { udfBuilder.definition( - "\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\""); + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\"}"); } } diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig new file mode 100644 index 0000000000..ff1cb26828 --- /dev/null +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig @@ -0,0 +1,751 @@ +/* + * Copyright (C) 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package com.google.cloud.teleport.spanner.ddl; + +import com.google.auto.value.AutoValue; +import com.google.cloud.spanner.Dialect; +import com.google.cloud.teleport.spanner.common.Type; +import com.google.cloud.teleport.spanner.ddl.ForeignKey.ReferentialAction; +import com.google.cloud.teleport.spanner.ddl.Table.InterleaveType; +import com.google.cloud.teleport.spanner.ddl.Udf.SqlSecurity; +import com.google.common.base.Optional; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Sets; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; +import java.util.stream.Collectors; + +/** Generates a random {@link Ddl}. */ +@AutoValue +public abstract class RandomDdlGenerator { + + // No bytes, no floats. + private static final Type.Code[] PK_TYPES = + new Type.Code[] { + Type.Code.BOOL, Type.Code.INT64, Type.Code.STRING, Type.Code.TIMESTAMP, Type.Code.DATE + }; + + private static final Type.Code[] PG_PK_TYPES = + new Type.Code[] { + Type.Code.PG_BOOL, + Type.Code.PG_INT8, + Type.Code.PG_FLOAT8, + Type.Code.PG_TEXT, + Type.Code.PG_VARCHAR, + Type.Code.PG_TIMESTAMPTZ, + Type.Code.PG_DATE + }; + + private static final Type.Code[] COLUMN_TYPES = + new Type.Code[] { + Type.Code.BOOL, + Type.Code.INT64, + Type.Code.FLOAT32, + Type.Code.FLOAT64, + Type.Code.STRING, + Type.Code.BYTES, + Type.Code.TIMESTAMP, + Type.Code.DATE + }; + + private static final Type.Code[] PG_COLUMN_TYPES = + new Type.Code[] { + Type.Code.PG_BOOL, + Type.Code.PG_INT8, + Type.Code.PG_FLOAT4, + Type.Code.PG_FLOAT8, + Type.Code.PG_VARCHAR, + Type.Code.PG_BYTEA, + Type.Code.PG_TIMESTAMPTZ, + Type.Code.PG_NUMERIC, + Type.Code.PG_DATE + }; + + // Types that could be used by check constraint + private static final Set CHECK_CONSTRAINT_TYPES = + new HashSet<>( + Arrays.asList( + Type.Code.BOOL, + Type.Code.INT64, + Type.Code.FLOAT32, + Type.Code.FLOAT64, + Type.Code.STRING, + Type.Code.TIMESTAMP, + Type.Code.DATE)); + + private static final Set PG_CHECK_CONSTRAINT_TYPES = + new HashSet<>( + Arrays.asList( + Type.Code.PG_BOOL, + Type.Code.PG_INT8, + Type.Code.PG_FLOAT4, + Type.Code.PG_FLOAT8, + Type.Code.PG_TEXT, + Type.Code.PG_VARCHAR, + Type.Code.PG_TIMESTAMPTZ, + Type.Code.PG_NUMERIC, + Type.Code.PG_DATE)); + + private static final String[] UDF_LANGUAGES = new String[] {"SQL", "REMOTE"}; + + private static final int MAX_PKS = 16; + + public abstract Dialect getDialect(); + + public abstract Random getRandom(); + + public abstract int getArrayChance(); + + public abstract int getRemoteUdfChance(); + + public abstract int[] getMaxBranchPerLevel(); + + public abstract int getMaxPkComponents(); + + public abstract int getMaxColumns(); + + public abstract int getMaxIdLength(); + + public abstract int getMaxIndex(); + + public abstract int getMaxForeignKeys(); + + public abstract boolean getEnableGeneratedColumns(); + + public abstract boolean getEnableDefaultColumns(); + + public abstract boolean getEnableCheckConstraints(); + + public abstract int getMaxUdfs(); + + public abstract int getMaxUdfParameters(); + + public abstract int getMaxViews(); + + public abstract int getMaxChangeStreams(); + + public static Builder builder() { + return builder(Dialect.GOOGLE_STANDARD_SQL); + } + + public static Builder builder(Dialect dialect) { + + return new AutoValue_RandomDdlGenerator.Builder() + .setDialect(dialect) + .setRandom(new Random()) + .setArrayChance(20) + .setRemoteUdfChance(20) + .setMaxPkComponents(3) + .setMaxBranchPerLevel(new int[] {2, 2, 1, 1, 1, 1, 1}) + .setMaxUdfs(0) + .setMaxUdfParameters(2) + .setMaxViews(0) + .setMaxIndex(2) + .setMaxForeignKeys(2) + .setEnableCheckConstraints(true) + .setMaxColumns(8) + .setMaxIdLength(11) + .setEnableGeneratedColumns(true) + .setEnableDefaultColumns(true) + // Change stream is only supported in GoogleSQL, not in PostgreSQL. + .setMaxChangeStreams(dialect == Dialect.GOOGLE_STANDARD_SQL ? 2 : 0); + } + + /** A builder for {@link RandomDdlGenerator}. */ + @AutoValue.Builder + public abstract static class Builder { + + public abstract Builder setDialect(Dialect dialect); + + public abstract Builder setRandom(Random rnd); + + public abstract Builder setArrayChance(int chance); + + public abstract Builder setRemoteUdfChance(int chance); + + public abstract Builder setMaxBranchPerLevel(int[] arr); + + public abstract Builder setMaxPkComponents(int val); + + public abstract Builder setMaxIdLength(int val); + + public abstract Builder setMaxColumns(int val); + + public abstract RandomDdlGenerator build(); + + public abstract Builder setMaxIndex(int indexes); + + public abstract Builder setMaxForeignKeys(int foreignKeys); + + public abstract Builder setEnableGeneratedColumns(boolean enable); + + public abstract Builder setEnableDefaultColumns(boolean enable); + + public abstract Builder setEnableCheckConstraints(boolean checkConstraints); + + public abstract Builder setMaxUdfs(int maxUdfs); + + public abstract Builder setMaxUdfParameters(int maxUdfParameters); + + public abstract Builder setMaxViews(int maxViews); + + public abstract Builder setMaxChangeStreams(int maxChangeStreams); + } + + public abstract Builder toBuilder(); + + private Set allIdentifiers = Sets.newHashSet(); + + public Ddl generate() { + Ddl.Builder builder = Ddl.builder(getDialect()); + int numParentTables = 1 + getRandom().nextInt(getMaxBranchPerLevel()[0]); + for (int i = 0; i < numParentTables; i++) { + generateTable(builder, null, 0); + } + int numUdfs = getRandom().nextInt(getMaxUdfs() + 1); + for (int i = 0; i < numUdfs; i++) { + generateUdf(builder); + } + int numViews = getRandom().nextInt(getMaxViews() + 1); + for (int i = 0; i < numViews; i++) { + generateView(builder); + } + int numChangeStreams = getRandom().nextInt(getMaxChangeStreams() + 1); + for (int i = 0; i < numChangeStreams; i++) { + generateChangeStream(builder); + } + + return builder.build(); + } + + private void generateUdf(Ddl.Builder builder) { + String name = generateIdentifier(getMaxIdLength()); + Udf.Builder udfBuilder = + builder + .createUdf(name) + .definition("select 1") + .dialect(Dialect.GOOGLE_STANDARD_SQL) + .name(name); + if (getRandom().nextBoolean()) { + Type type = + generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); + udfBuilder.type(type.getCode().getName()); + } + + if (getRandom().nextInt(100) <= getRemoteUdfChance()) { + udfBuilder.language("REMOTE"); + } + + if (!"REMOTE".equals(udfBuilder.language())) { + if (getRandom().nextBoolean()) { + udfBuilder.security(SqlSecurity.INVOKER); + } + } else { + if (getDialect() == Dialect.GOOGLE_STANDARD_SQL) { + udfBuilder.options( + ImmutableList.of( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + } else { + udfBuilder.definition( +<<<<<<< HEAD + "\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\""); +======= + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\"}"); +>>>>>>> ff46924a7 (Add support for REMOTE UDFs.) + } + } + + int numUdfParameters = getRandom().nextInt(getMaxUdfParameters() + 1); + for (int i = 0; i < numUdfParameters; i++) { + String paramName = generateIdentifier(getMaxIdLength()); + Type type = + generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); + UdfParameter.Builder udfParameterBuilder = + udfBuilder.parameter(paramName).type(type.getCode().getName()); + if (getRandom().nextBoolean()) { + udfParameterBuilder.defaultExpression(addDefaultValueToColumn(type)); + } + udfParameterBuilder.endUdfParameter(); + } + udfBuilder.endUdf(); + } + + private void generateView(Ddl.Builder builder) { + String name = generateIdentifier(getMaxIdLength()); + View.Builder viewBuilder = builder.createView(name); + if (getRandom().nextBoolean()) { + viewBuilder.security(View.SqlSecurity.INVOKER); + } else { + viewBuilder.security(View.SqlSecurity.DEFINER); + } + + Table sourceTable = selectRandomTable(builder); + if (sourceTable == null) { + viewBuilder.query("select 1"); + } else { + StringBuilder queryBuilder = new StringBuilder("select "); + boolean firstIncluded = true; + for (Column column : sourceTable.columns()) { + if (getRandom().nextBoolean()) { + if (!firstIncluded) { + queryBuilder.append(", "); + } + if (getDialect() == Dialect.POSTGRESQL) { + queryBuilder.append("\""); + } + queryBuilder.append(column.name()); + if (getDialect() == Dialect.POSTGRESQL) { + queryBuilder.append("\""); + } + firstIncluded = false; + } + } + if (firstIncluded) { + queryBuilder.append("1"); + } + queryBuilder.append(" from "); + if (getDialect() == Dialect.POSTGRESQL) { + queryBuilder.append("\""); + } + queryBuilder.append(sourceTable.name()); + if (getDialect() == Dialect.POSTGRESQL) { + queryBuilder.append("\""); + } + viewBuilder.query(queryBuilder.toString()); + } + + viewBuilder.endView(); + } + + private void generateChangeStream(Ddl.Builder builder) { + if (getDialect() == Dialect.POSTGRESQL) { + throw new IllegalArgumentException("Change stream is not supported in PostgreSQL dialect."); + } + + String name = generateIdentifier(getMaxIdLength()); + ChangeStream.Builder changeStreamBuilder = builder.createChangeStream(name); + + generateChangeStreamForClause(builder, changeStreamBuilder); + + ImmutableList.Builder options = ImmutableList.builder(); + if (getRandom().nextBoolean()) { + options.add("retention_period=\"7d\""); + } + if (getRandom().nextBoolean()) { + options.add("value_capture_type=\"OLD_AND_NEW_VALUES\""); + } + changeStreamBuilder.options(options.build()); + + changeStreamBuilder.endChangeStream(); + } + + private void generateChangeStreamForClause( + Ddl.Builder builder, ChangeStream.Builder changeStreamBuilder) { + boolean forAll = getRandom().nextBoolean(); + if (forAll) { + changeStreamBuilder.forClause("FOR ALL"); + return; + } + + Table table = selectRandomTable(builder); + if (table == null) { + return; + } + + StringBuilder forClause = new StringBuilder("FOR `").append(table.name()).append("`"); + boolean allColumns = getRandom().nextBoolean(); + if (allColumns) { + changeStreamBuilder.forClause(forClause.toString()); + return; + } + + // Select a random set of watched columns, excluding primary keys and generated columns. + Set watchedColumns = Sets.newHashSet(); + Set primaryKeys = + table.primaryKeys().stream().map(pk -> pk.name()).collect(Collectors.toSet()); + for (Column column : table.columns()) { + if (getRandom().nextBoolean() + && !primaryKeys.contains(column.name()) + && !column.isGenerated()) { + watchedColumns.add("`" + column.name() + "`"); + } + } + forClause.append("(").append(String.join(", ", watchedColumns)).append(")"); + changeStreamBuilder.forClause(forClause.toString()); + } + + private void generateTable(Ddl.Builder builder, Table parent, int level) { + String name = generateIdentifier(getMaxIdLength()); + Table.Builder tableBuilder = builder.createTable(name); + + Random rnd = getRandom(); + int pkSize = 0; + if (parent != null) { + tableBuilder.interleaveInParent(parent.name()); + tableBuilder.interleaveType( + getDialect() == Dialect.GOOGLE_STANDARD_SQL && rnd.nextBoolean() + ? InterleaveType.IN + : InterleaveType.IN_PARENT); + for (IndexColumn pk : parent.primaryKeys()) { + Column pkColumn = parent.column(pk.name()); + tableBuilder.addColumn(pkColumn); + tableBuilder.primaryKey().set(pk).end(); + pkSize++; + } + } + + int numPks = Math.min(1 + rnd.nextInt(getMaxPkComponents()), MAX_PKS - pkSize); + for (int i = 0; i < numPks; i++) { + Column pkColumn = + generateColumn( + (getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); + tableBuilder.addColumn(pkColumn); + + IndexColumn.Order order = rnd.nextBoolean() ? IndexColumn.Order.ASC : IndexColumn.Order.DESC; + if (getDialect() == Dialect.POSTGRESQL) { + order = IndexColumn.Order.ASC; + } + IndexColumn pk = IndexColumn.create(pkColumn.name(), order, getDialect()); + tableBuilder.primaryKey().set(pk).end(); + } + + int numColumns = rnd.nextInt(getMaxColumns()); + + for (int i = 0; i < numColumns; i++) { + Column column = + generateColumn( + (getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? COLUMN_TYPES : PG_COLUMN_TYPES, + getArrayChance()); + tableBuilder.addColumn(column); + } + + Table table = tableBuilder.build(); + + if (getEnableGeneratedColumns()) { + // Add a generated column + Column depColumn = table.columns().get(rnd.nextInt(table.columns().size())); + String expr = depColumn.name(); + if (getDialect() == Dialect.POSTGRESQL) { + expr = "\"" + expr + "\""; + } + Column generatedColumn = + Column.builder(getDialect()) + .name("generated") + .type(depColumn.type()) + .max() + .notNull(depColumn.notNull()) + .generatedAs(expr) + .stored() + .autoBuild(); + tableBuilder.addColumn(generatedColumn); + table = tableBuilder.build(); + } + + int numIndexes = rnd.nextInt(getMaxIndex()); + ImmutableList.Builder indexes = ImmutableList.builder(); + for (int i = 0; i < numIndexes; i++) { + Index.Builder index = + Index.builder(getDialect()).name(generateIdentifier(getMaxIdLength())).table(name); + IndexColumn.IndexColumnsBuilder columns = index.columns(); + ImmutableList.Builder filters = ImmutableList.builder(); + boolean interleaved = rnd.nextBoolean(); + Set pks = Sets.newHashSet(); + // Do not interleave indexes at the last table level. + // This causes tests to fail as generated schema exceeds interleaving limit. + int finalLevel = getMaxBranchPerLevel().length - 1; + if (interleaved && level < finalLevel) { + index.interleaveIn(table.name()); + } + for (IndexColumn pk : table.primaryKeys()) { + if (interleaved) { + columns.set(pk); + if (rnd.nextBoolean()) { + filters.add("\"" + pk.name() + "\" IS NOT NULL"); + } + } + pks.add(pk.name()); + } + + int maxNumIndexColumns = MAX_PKS - pks.size(); + int indexColumns = 0; + for (int j = 0; j < table.columns().size(); j++) { + Column cm = table.columns().get(j); + String columnName = cm.name(); + if (indexColumns >= maxNumIndexColumns) { + break; + } + // Already added. + if (interleaved && pks.contains(columnName)) { + continue; + } + if (cm.type().getCode() == Type.Code.ARRAY || cm.type().getCode() == Type.Code.PG_ARRAY) { + continue; + } + // Skip the types that may generate NaN value, as NaN cannot be used as a key + if (cm.type().getCode() == Type.Code.FLOAT32 + || cm.type().getCode() == Type.Code.PG_FLOAT4 + || cm.type().getCode() == Type.Code.FLOAT64 + || cm.type().getCode() == Type.Code.PG_FLOAT8 + || cm.type().getCode() == Type.Code.PG_NUMERIC) { + continue; + } + int val = rnd.nextInt(4); + switch (val) { + case 0: + columns.create().name(columnName).asc(); + if (!pks.contains(columnName)) { + indexColumns++; + } + break; + case 1: + columns.create().name(columnName).desc(); + if (!pks.contains(columnName)) { + indexColumns++; + } + break; + case 2: + if (!pks.contains(columnName)) { + columns.create().name(columnName).storing(); + } + break; + default: + // skip this column + } + // skip the primary key column if it is randomed to storing + if (val < 2 || (val < 3 && !pks.contains(columnName))) { + if (getDialect() == Dialect.POSTGRESQL) { + if (rnd.nextBoolean()) { + columns.nullsFirst(); + } else { + columns.nullsLast(); + } + } + columns.endIndexColumn(); + if (rnd.nextBoolean()) { + filters.add("\"" + columnName + "\" IS NOT NULL"); + } + } + } + columns.end(); + index.nullFiltered(rnd.nextBoolean()); + index.filter(String.join(" AND ", filters.build())); + // index.unique(rnd.nextBoolean()); + if (indexColumns > 0) { + indexes.add(index.build().prettyPrint()); + } + } + tableBuilder.indexes(indexes.build()); + + if (parent != null) { + // Create redundant foreign keys to the parent table. + int numForeignKeys = rnd.nextInt(getMaxForeignKeys()); + ImmutableList.Builder foreignKeys = ImmutableList.builder(); + for (int i = 0; i < numForeignKeys; i++) { + ForeignKey.Builder foreignKeyBuilder = + ForeignKey.builder(getDialect()) + .name(generateIdentifier(getMaxIdLength())) + .table(name) + .referencedTable(parent.name()); + for (IndexColumn pk : parent.primaryKeys()) { + foreignKeyBuilder.columnsBuilder().add(pk.name()); + foreignKeyBuilder.referencedColumnsBuilder().add(pk.name()); + } + if (rnd.nextBoolean()) { + foreignKeyBuilder.referentialAction(Optional.of(generateRandomReferentialAction(rnd))); + } + if (rnd.nextBoolean()) { + foreignKeyBuilder.isEnforced(rnd.nextBoolean()); + } + ForeignKey foreignKey = foreignKeyBuilder.build(); + if (foreignKey.columns().size() > 0) { + foreignKeys.add(foreignKey.prettyPrint()); + } + } + tableBuilder.foreignKeys(foreignKeys.build()); + } + + while (getEnableCheckConstraints()) { + ImmutableList.Builder checkConstraints = ImmutableList.builder(); + // Pick a random column to add check constraint on. + ImmutableList columns = table.columns(); + int colIndex = rnd.nextInt(columns.size()); + Column column = columns.get(colIndex); + if (getDialect() == Dialect.GOOGLE_STANDARD_SQL + && !CHECK_CONSTRAINT_TYPES.contains(column.type().getCode())) { + continue; + } + if (getDialect() == Dialect.POSTGRESQL + && !PG_CHECK_CONSTRAINT_TYPES.contains(column.type().getCode())) { + continue; + } + // An expression that won't be trivially optimized away by query optimizer. + + String expr = "TO_HEX(SHA1(CAST(" + column.name() + " AS STRING))) <= '~'"; + String checkName = generateIdentifier(getMaxIdLength()); + if (getDialect() == Dialect.POSTGRESQL) { + expr = "LENGTH(CAST(\"" + column.name() + "\" AS VARCHAR)) > '-1'::bigint"; + checkName = "\"" + checkName + "\""; + } + checkConstraints.add("CONSTRAINT " + checkName + " CHECK(" + expr + ")"); + tableBuilder.checkConstraints(checkConstraints.build()); + break; + } + + tableBuilder.endTable(); + + table = tableBuilder.build(); + + int nextLevel = level + 1; + int[] maxBranchPerLevel = getMaxBranchPerLevel(); + if (nextLevel < maxBranchPerLevel.length + && maxBranchPerLevel[nextLevel] > 0 + && table.primaryKeys().size() < MAX_PKS) { + generateTable(builder, table, nextLevel); + } + } + + private ReferentialAction generateRandomReferentialAction(Random rnd) { + return rnd.nextBoolean() + ? ReferentialAction.ON_DELETE_CASCADE + : ReferentialAction.ON_DELETE_NO_ACTION; + } + + private String addDefaultValueToColumn(Type type) { + String expr = null; + if (getEnableDefaultColumns()) { + // Generate default values to columns with certain types only: + switch (type.getCode()) { + case BOOL: + case PG_BOOL: + expr = "(false)"; + break; + case INT64: + expr = "(100)"; + break; + case PG_INT8: + expr = "'100'::bigint"; + break; + case STRING: + case PG_VARCHAR: + expr = "'John'"; + break; + } + } + return expr; + } + + private Column generateColumn(Type.Code[] codes, int arrayPercentage) { + int length = 1 + getRandom().nextInt(getMaxIdLength()); + String name = generateIdentifier(length); + Type type = generateType(codes, arrayPercentage); + int size = -1; + boolean nullable = getRandom().nextBoolean(); + String expr = addDefaultValueToColumn(type); + return Column.builder(getDialect()) + .name(name) + .type(type) + .size(size) + .notNull(nullable) + .defaultExpression(expr) + .autoBuild(); + } + + private String generateIdentifier(int length) { + String id; + while (true) { + id = RandomUtils.randomAlphanumeric(length); + if (!allIdentifiers.contains(id.toLowerCase())) { + break; + } + } + allIdentifiers.add(id.toLowerCase()); + return id; + } + + private Type generateType(Type.Code[] codes, int arrayPercentage) { + boolean isArray = getRandom().nextInt(100) <= arrayPercentage; + Type.Code code = randomCode(codes); + if (isArray) { + if (getDialect() == Dialect.POSTGRESQL) { + return Type.pgArray(typeOf(code)); + } + return Type.array(typeOf(code)); + } + return typeOf(code); + } + + private Table selectRandomTable(Ddl.Builder builder) { + Collection tables = builder.tables(); + int tablesToSkip = getRandom().nextInt(tables.size()); + for (Table table : tables) { + if (tablesToSkip > 0) { + --tablesToSkip; + } else { + return table; + } + } + return null; + } + + private Type typeOf(Type.Code code) { + switch (code) { + case BOOL: + return Type.bool(); + case FLOAT32: + return Type.float32(); + case FLOAT64: + return Type.float64(); + case STRING: + return Type.string(); + case BYTES: + return Type.bytes(); + case TIMESTAMP: + return Type.timestamp(); + case DATE: + return Type.date(); + case INT64: + return Type.int64(); + case PG_BOOL: + return Type.pgBool(); + case PG_INT8: + return Type.pgInt8(); + case PG_FLOAT4: + return Type.pgFloat4(); + case PG_FLOAT8: + return Type.pgFloat8(); + case PG_TEXT: + return Type.pgText(); + case PG_VARCHAR: + return Type.pgVarchar(); + case PG_BYTEA: + return Type.pgBytea(); + case PG_TIMESTAMPTZ: + return Type.pgTimestamptz(); + case PG_NUMERIC: + return Type.pgNumeric(); + case PG_DATE: + return Type.pgDate(); + } + throw new IllegalArgumentException("Arrays and Structs are not supported"); + } + + private Type.Code randomCode(Type.Code[] codes) { + return codes[getRandom().nextInt(codes.length)]; + } +} diff --git a/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql b/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql index a33ec76cca..342476dd5d 100644 --- a/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql +++ b/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql @@ -42,7 +42,7 @@ CREATE MODEL `%PREFIX%ModelStruct` CREATE SCHEMA `%PREFIX%UdfSchema`; CREATE FUNCTION `%PREFIX%UdfSchema`.`Remote`(x INT64, y INT64) RETURNS INT64 NOT DETERMINISTIC LANGUAGE REMOTE OPTIONS ( - endpoint = `https://us-central1-myproject.cloudfunctions.net/myfunc`, + endpoint = "https://us-central1-myproject.cloudfunctions.net/myfunc", max_batching_rows = 10 ); From 503617780fb5ffdc59aa6583959316e835ca8892 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 08/19] Add support for REMOTE UDFs. --- .../org/apache/beam/it/gcp/spanner/SpannerResourceManager.java | 1 - .../test/java/com/google/cloud/teleport/spanner/CopyDbTest.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java index 77ca372143..052f6e60ee 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/spanner/SpannerResourceManager.java @@ -50,7 +50,6 @@ import com.google.monitoring.v3.TimeInterval; import com.google.protobuf.Timestamp; import com.google.spanner.admin.instance.v1.Instance.Edition; - import dev.failsafe.Failsafe; import dev.failsafe.RetryPolicy; import java.time.Duration; diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java index 62ba01995d..c1a64702e0 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java @@ -1101,7 +1101,7 @@ public void udfs() throws Exception { UdfParameter.parse( "arg1 STRING DEFAULT 'bar'", "s1.Foo2", Dialect.GOOGLE_STANDARD_SQL)) .endUdf() - .createUdf("s1.Foo2") + .createUdf("s1.Foo3") .dialect(Dialect.GOOGLE_STANDARD_SQL) .name("s1.Foo3") .language("REMOTE") From 14de2c4db73749af8f1957d9c3c2921a5b4dea0a Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 13:25:44 -0700 Subject: [PATCH 09/19] Add support for REMOTE UDFs. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f399d6a25d..254447f187 100644 --- a/pom.xml +++ b/pom.xml @@ -434,7 +434,7 @@ - 1.27.0 + 1.17.0 From e32609880aa8ae51fc2e1f581463d4cf1418f37d Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 15:02:47 -0700 Subject: [PATCH 10/19] Spotless fixes. --- .../teleport/spanner/AvroRecordConverter.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java index 8599db65cd..e55f76979b 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/AvroRecordConverter.java @@ -401,8 +401,8 @@ static Optional> readFloat32Array( return Optional.empty(); } switch (avroType) { - // For type check at compile time, the type of x has to be specified (as cast) so that - // convertability to float can be verified. + // For type check at compile time, the type of x has to be specified (as cast) so that + // convertability to float can be verified. case FLOAT: return Optional.of((List) fieldValue); case STRING: @@ -413,9 +413,9 @@ static Optional> readFloat32Array( .map(x -> x == null ? null : Float.valueOf(x.toString())) .collect(Collectors.toList())); } - // Avoid decoding integers as not all 32 bit integers can be represented in float32. + // Avoid decoding integers as not all 32 bit integers can be represented in float32. case INT: - // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. + // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. case LONG: case DOUBLE: default: @@ -432,8 +432,8 @@ static Optional> readFloat64Array( return Optional.empty(); } switch (avroType) { - // For type check at compile time, the type of x has to be specified (as cast) so that - // convertability to double can be verified. + // For type check at compile time, the type of x has to be specified (as cast) so that + // convertability to double can be verified. case DOUBLE: return Optional.of((List) fieldValue); case FLOAT: @@ -482,8 +482,8 @@ static Optional> readInt64Array( return Optional.empty(); } switch (avroType) { - // For type check at compile time, the type of x has to be specified (as cast) so that - // convertability to long can be verified. + // For type check at compile time, the type of x has to be specified (as cast) so that + // convertability to long can be verified. case LONG: return Optional.of((List) fieldValue); case INT: @@ -637,9 +637,9 @@ private static Optional readFloat32( return Optional.ofNullable((Utf8) record.get(fieldName)) .map(Utf8::toString) .map(Float::valueOf); - // Avoid decoding integers as not all 32 bit integers can be represented in float32. + // Avoid decoding integers as not all 32 bit integers can be represented in float32. case INT: - // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. + // Avoid decoding 64 bit values into 32 bit space as this will cause a precision loss. case LONG: case DOUBLE: default: From 8b62fd0ad30a2353d9ec20eb7c77c6bf2167798d Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 26 May 2026 15:06:35 -0700 Subject: [PATCH 11/19] More spotless. --- .../teleport/spanner/spannerio/StructUtils.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java b/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java index 3857957e05..69f90677d1 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/spannerio/StructUtils.java @@ -262,7 +262,7 @@ private static Type simpleBeamTypeToSpannerType(Schema.FieldType beamType) { return Type.string(); case BOOLEAN: return Type.bool(); - // TODO: implement logical type date and timestamp + // TODO: implement logical type date and timestamp case DATETIME: return Type.timestamp(); default: @@ -335,7 +335,7 @@ private static void addIterableToStructBuilder( case STRING: structBuilder.set(column).toStringArray((Iterable) ((Object) iterable)); break; - // TODO: implement logical date and datetime + // TODO: implement logical date and datetime case DATETIME: if (iterable == null) { structBuilder.set(column).toTimestampArray(null); @@ -367,10 +367,10 @@ private static void addIterableToStructBuilder( return struct.getBoolean(column); case BYTES: return struct.getBytes(column).toByteArray(); - // TODO: implement logical datetime + // TODO: implement logical datetime case TIMESTAMP: return Instant.ofEpochSecond(struct.getTimestamp(column).getSeconds()).toDateTime(); - // TODO: implement logical date + // TODO: implement logical date case DATE: return DateTime.parse(struct.getDate(column).toString()); case INT64: @@ -411,12 +411,12 @@ private static void addIterableToStructBuilder( return struct.getBooleanList(column); case BYTES: return struct.getBytesList(column); - // TODO: implement logical datetime + // TODO: implement logical datetime case TIMESTAMP: return struct.getTimestampList(column).stream() .map(timestamp -> Instant.ofEpochSecond(timestamp.getSeconds()).toDateTime()) .collect(toList()); - // TODO: implement logical date + // TODO: implement logical date case DATE: return struct.getDateList(column).stream() .map(date -> DateTime.parse(date.toString())) From 94aa3eb580d6ded79ab06f6f540cddcdccc001c8 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Wed, 27 May 2026 15:15:44 -0700 Subject: [PATCH 12/19] Consider CHANGE STREAM created functions when exporting UDF. --- .../spanner/ddl/InformationSchemaScanner.java | 27 +++++++++++-------- .../cloud/teleport/spanner/ddl/Udf.java | 11 ++++---- .../teleport/spanner/ExportPipelineIT.java | 7 +++++ .../ExportPipelineIT/spanner-gsql-ddl.sql | 2 ++ .../ExportPipelineIT/spanner-pg-ddl.sql | 2 ++ 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java index 5af96c48db..198c6882be 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java @@ -1056,10 +1056,10 @@ private void listUdfs(Ddl.Builder builder) { ResultSet resultSet = context.executeQuery(queryStatement); while (resultSet.next()) { - String functionName = - resultSet.isNull(0) || resultSet.isNull(1) + String schema = resultSet.isNull(0) ? null : resultSet.getString(0); + String functionName = resultSet.isNull(1) ? null - : getQualifiedName(resultSet.getString(0), resultSet.getString(1)); + : getQualifiedName(schema, resultSet.getString(1)); String functionSpecificName = getQualifiedName(resultSet.getString(2), resultSet.getString(3)); String functionType = resultSet.isNull(4) ? null : resultSet.getString(4); @@ -1067,10 +1067,10 @@ private void listUdfs(Ddl.Builder builder) { String functionDefinition = resultSet.isNull(6) ? null : resultSet.getString(6); String functionSecurityType = resultSet.isNull(7) ? null : resultSet.getString(7); - // The routine_body is SQL or EXTERNAL and the external_language is not available yet. - // Assume that only available EXTERNAL language is REMOTE. - if (dialect == Dialect.POSTGRESQL && "EXTERNAL".equalsIgnoreCase(language)) { - language = "REMOTE"; + // Built-in functions such as Change Stream READ_X are marked as External. + // Skip and do not re-create they will be autmatically added by change streams. + if ("EXTERNAL".equalsIgnoreCase(language)) { + continue; } LOG.debug("Schema user-defined function {}", functionName); @@ -1117,13 +1117,18 @@ private void listUdfOptions(Ddl.Builder builder) { if (dialect == Dialect.POSTGRESQL) { return; } + // Filter out EXTERNAL functions, which are built-in. ResultSet resultSet = context.executeQuery( Statement.of( - "SELECT t.SPECIFIC_SCHEMA, t.SPECIFIC_NAME, t.OPTION_NAME, t.OPTION_TYPE," - + " t.OPTION_VALUE FROM information_schema.routine_options AS t WHERE" - + " t.SPECIFIC_SCHEMA NOT IN ('INFORMATION_SCHEMA', 'SPANNER_SYS') ORDER BY" - + " t.SPECIFIC_NAME, t.OPTION_NAME")); + "SELECT o.SPECIFIC_SCHEMA, o.SPECIFIC_NAME, o.OPTION_NAME, o.OPTION_TYPE," + + " o.OPTION_VALUE" + + " FROM information_schema.routine_options AS o" + + " INNER JOIN information_schema.routines AS r" + + " USING (SPECIFIC_SCHEMA, SPECIFIC_NAME)" + + " WHERE o.SPECIFIC_SCHEMA NOT IN ('INFORMATION_SCHEMA', 'SPANNER_SYS') " + + " AND r.routine_body != 'EXTERNAL'" + + " ORDER BY o.SPECIFIC_NAME, o.OPTION_NAME")); Map> allOptions = Maps.newHashMap(); while (resultSet.next()) { diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index bb444f69d3..36e5447230 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -34,11 +34,10 @@ public abstract class Udf implements Serializable { private static final long serialVersionUID = 1L; - // Remote function body is printed using $$ strings, which are - // unlikely but possible to be present in the function definition. - // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE + // Remote UDF definition is printed using: AS '{definition}'. + // Quotes inside definiton string can be escaped using ''. public static final Escaper PG_DEFINITION_ESCAPER = - Escapers.builder().addEscape('$', "\\044").build(); + Escapers.builder().addEscape('\'', "''").build(); /** The access rights used by the UDF for underlying data: invoker-rights or definer-rights. */ public enum SqlSecurity { @@ -146,9 +145,9 @@ public void prettyPrint(Appendable appendable) throws IOException { } else { // Other languages use AS definition instead of sql body. appendable - .append(" AS $$") + .append(" AS '") .append(PG_DEFINITION_ESCAPER.escape(definition())) - .append("$$"); + .append("'"); } break; default: diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java index 9c375ae404..3283147c6d 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java @@ -263,6 +263,13 @@ private void testSpannerToGCSAvroBase( gcsClient.listArtifacts( "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); + try { + Thread.sleep(1000 * 60 * 5); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + assertThat(singersArtifacts).isNotEmpty(); assertThat(emptyArtifacts).isNotEmpty(); assertThat(udfRemoteArtifacts).isNotEmpty(); diff --git a/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql b/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql index 342476dd5d..f30df2bed4 100644 --- a/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql +++ b/v1/src/test/resources/ExportPipelineIT/spanner-gsql-ddl.sql @@ -33,6 +33,8 @@ CREATE TABLE `%PREFIX%Singers` ( `MyTokens` TOKENLIST AS (TOKENIZE_FULLTEXT(Review)) HIDDEN ) PRIMARY KEY(Id), INTERLEAVE IN `%PREFIX%Root`; +CREATE CHANGE STREAM `%PREFIX%SingersChanges` FOR `%PREFIX%Singers`; + DROP MODEL IF EXISTS `%PREFIX%ModelStruct`; CREATE MODEL `%PREFIX%ModelStruct` INPUT(content STRING(MAX)) diff --git a/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql b/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql index 811fc91c82..14c1882e9a 100644 --- a/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql +++ b/v1/src/test/resources/ExportPipelineIT/spanner-pg-ddl.sql @@ -34,6 +34,8 @@ CREATE TABLE "%PREFIX%Singers" ( "NameTokens" spanner.tokenlist generated always as (spanner.tokenize_fulltext("FirstName")) stored hidden, PRIMARY KEY("Id")) INTERLEAVE IN "%PREFIX%Root"; +CREATE CHANGE STREAM "%PREFIX%SingersChanges" FOR "%PREFIX%Singers"; + CREATE SCHEMA "%PREFIX%UdfSchema"; DROP SEARCH INDEX IF EXISTS "%PREFIX%SearchIndex"; From 71e282cd93eb6f79dbdd411580d9b7bec22f2e6d Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Thu, 28 May 2026 11:06:56 -0700 Subject: [PATCH 13/19] Fixes for formatting and tests --- .../teleport/spanner/ddl/InformationSchemaScanner.java | 5 ++--- .../teleport/spanner/AvroSchemaToDdlConverterTest.java | 2 +- .../google/cloud/teleport/spanner/ExportPipelineIT.java | 7 ------- .../com/google/cloud/teleport/spanner/ddl/DdlTest.java | 4 ++-- .../com/google/cloud/teleport/spanner/ddl/UdfTest.java | 4 ++-- 5 files changed, 7 insertions(+), 15 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java index 198c6882be..ebfa67bb93 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScanner.java @@ -1057,9 +1057,8 @@ private void listUdfs(Ddl.Builder builder) { while (resultSet.next()) { String schema = resultSet.isNull(0) ? null : resultSet.getString(0); - String functionName = resultSet.isNull(1) - ? null - : getQualifiedName(schema, resultSet.getString(1)); + String functionName = + resultSet.isNull(1) ? null : getQualifiedName(schema, resultSet.getString(1)); String functionSpecificName = getQualifiedName(resultSet.getString(2), resultSet.getString(3)); String functionType = resultSet.isNull(4) ? null : resultSet.getString(4); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java index 15c17c3142..2885836762 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/AvroSchemaToDdlConverterTest.java @@ -982,7 +982,7 @@ public void pgUdfRemote() { equalToCompressingWhiteSpace( "CREATE FUNCTION \"UdfSchema\".\"Foo\"(\"arg0\" STRING, \"arg1\" STRING DEFAULT \"bar\")" + " RETURNS STRING VOLATILE LANGUAGE REMOTE" - + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}$$")); + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}'")); } @Test diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java index 3283147c6d..9c375ae404 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ExportPipelineIT.java @@ -263,13 +263,6 @@ private void testSpannerToGCSAvroBase( gcsClient.listArtifacts( "output/", Pattern.compile(String.format(".*/%s%s.*\\.avro.*", prefix, "Sequence2"))); - try { - Thread.sleep(1000 * 60 * 5); - } catch (InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - assertThat(singersArtifacts).isNotEmpty(); assertThat(emptyArtifacts).isNotEmpty(); assertThat(udfRemoteArtifacts).isNotEmpty(); diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java index 06ae927b70..480bf82c34 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/DdlTest.java @@ -1453,7 +1453,7 @@ public void pgUdfs() { + "CREATE FUNCTION \"Foo2\"(\"arg0\" TEXT, \"arg1\" TEXT DEFAULT 'bar')" + " RETURNS STRING SQL SECURITY INVOKER RETURN (SELECT 'bar')\n" + "CREATE FUNCTION \"Foo3\"(\"arg0\" BIGINT) RETURNS STRING VOLATILE LANGUAGE REMOTE" - + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}$$"; + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}'"; assertThat(ddl.prettyPrint(), equalToCompressingWhiteSpace(expectedDdlString)); List statements = ddl.statements(); @@ -1470,7 +1470,7 @@ public void pgUdfs() { statements.get(2), equalToCompressingWhiteSpace( "CREATE FUNCTION \"Foo3\"(\"arg0\" BIGINT) RETURNS STRING VOLATILE LANGUAGE REMOTE" - + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}$$")); + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\", \"max_batching_rows\": 50}'")); assertNotNull(ddl.hashCode()); assertThat( diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java index 960679d401..3e2e4215ec 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java @@ -122,12 +122,12 @@ public void testPgRemoteUdf() { udf.toString(), equalToCompressingWhiteSpace( "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" - + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}$$")); + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}'")); assertThat( udf.toBuilder().build().toString(), equalToCompressingWhiteSpace( "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" - + " AS $${\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}$$")); + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}'")); } } From f16519ecd68084fb02d04e0221b81386bebd2567 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Thu, 28 May 2026 11:12:17 -0700 Subject: [PATCH 14/19] More text fixes --- .../java/com/google/cloud/teleport/spanner/ddl/UdfTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java index 3e2e4215ec..90f1931ff0 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java @@ -115,19 +115,19 @@ public void testPgRemoteUdf() { .language("REMOTE") .addParameter(UdfParameter.parse("p1 BIGINT", "s1.foo", Dialect.POSTGRESQL)) .definition( - "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/$myfunc\"}") + "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/'myfunc\"}") .build(); assertThat( udf.toString(), equalToCompressingWhiteSpace( "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" - + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}'")); + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/''myfunc\"}'")); assertThat( udf.toBuilder().build().toString(), equalToCompressingWhiteSpace( "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" - + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/\\044myfunc\"}'")); + + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/''myfunc\"}'")); } } From 0a590adbeee8fd15bf1607406272c1c94eac0732 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Mon, 1 Jun 2026 10:18:48 -0700 Subject: [PATCH 15/19] Respond to comments. --- .../it/gcp/dataflow/DirectRunnerClient.java | 2 +- pom.xml | 2 +- .../cloud/teleport/spanner/ddl/Udf.java | 28 +- .../cloud/teleport/spanner/ddl/Udf.java.orig | 318 ---- .../teleport/spanner/CopyDbTest.java.orig | 1362 ----------------- .../teleport/spanner/ImportPipelineIT.java | 7 + .../ddl/InformationSchemaScannerIT.java | 6 +- .../spanner/ddl/RandomDdlGenerator.java.orig | 751 --------- .../cloud/teleport/spanner/ddl/UdfTest.java | 4 + 9 files changed, 24 insertions(+), 2456 deletions(-) delete mode 100644 v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig delete mode 100644 v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig delete mode 100644 v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java index efce55e16a..8041dc7a80 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java @@ -265,7 +265,7 @@ public void cancel() { currentJob.setCurrentState(JobState.CANCELLED.toString()); try { - this.stop(); + this.interrupt(); } catch (Exception e) { LOG.warn("Error cancelling job", e); } diff --git a/pom.xml b/pom.xml index 254447f187..1d3c5d8bbf 100644 --- a/pom.xml +++ b/pom.xml @@ -48,7 +48,7 @@ 3.6.2 3.4.0 1.7.1 - 0.8.13 + 0.8.14 2.73.0 diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index 36e5447230..8d156b4c8f 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -94,15 +94,10 @@ public void prettyPrint(Appendable appendable) throws IOException { // For now, we infer it from the language. if (language() != null && language().equalsIgnoreCase("REMOTE")) { String determinism; - switch (dialect()) { - case GOOGLE_STANDARD_SQL: + if (dialect() == Dialect.GOOGLE_STANDARD_SQL) { determinism = "NOT DETERMINISTIC"; - break; - case POSTGRESQL: - determinism = "VOLATILE"; - break; - default: - throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); + } else { + determinism = "VOLATILE"; } appendable.append(" ").append(determinism); } @@ -122,24 +117,18 @@ public void prettyPrint(Appendable appendable) throws IOException { } if (!options().isEmpty()) { - switch (dialect()) { - case GOOGLE_STANDARD_SQL: + if (dialect() == Dialect.GOOGLE_STANDARD_SQL) { appendable.append(" OPTIONS (").append(String.join(", ", options())).append(")"); - break; - case POSTGRESQL: + } else { throw new IllegalArgumentException( "Options are not supported in PostgreSQL dialect for non-remote UDFs."); - default: - throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); } } if (definition() != null && !definition().isEmpty()) { - switch (dialect()) { - case GOOGLE_STANDARD_SQL: + if (dialect() == Dialect.GOOGLE_STANDARD_SQL) { appendable.append(" AS (").append(definition()).append(")"); - break; - case POSTGRESQL: + } else { if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { appendable.append(" RETURN ").append(definition()); } else { @@ -149,9 +138,6 @@ public void prettyPrint(Appendable appendable) throws IOException { .append(PG_DEFINITION_ESCAPER.escape(definition())) .append("'"); } - break; - default: - throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); } } } diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig deleted file mode 100644 index 9f62342e20..0000000000 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java.orig +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Copyright (C) 2021 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.spanner.ddl; - -import static com.google.cloud.teleport.spanner.common.NameUtils.quoteIdentifier; - -import com.google.auto.value.AutoValue; -import com.google.cloud.spanner.Dialect; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Maps; -import com.google.common.escape.Escaper; -import com.google.common.escape.Escapers; -import java.io.IOException; -import java.io.Serializable; -import java.util.LinkedHashMap; -import javax.annotation.Nullable; - -/** Cloud Spanner user-defined function. */ -@AutoValue -public abstract class Udf implements Serializable { - - private static final long serialVersionUID = 1L; - - // Remote function body is printed using $$ strings, which are - // unlikely but possible to be present in the function definition. - // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE -<<<<<<< HEAD - public static final Escaper PG_REMOTE_UDF_BODY_ESCAPER = -======= - public static final Escaper PG_DEFINITION_ESCAPER = ->>>>>>> ff46924a7 (Add support for REMOTE UDFs.) - Escapers.builder().addEscape('$', "\\044").build(); - - /** The access rights used by the UDF for underlying data: invoker-rights or definer-rights. */ - public enum SqlSecurity { - INVOKER, - DEFINER, - } - - /** - * The specific name uniquely identifies the UDF even if its name is overloaded. It can be used to - * join UDF metadata from multiple sources (e.g. INFORMATION_SCHEMA.ROUTINES and - * INFORMATION_SCHEMA.PARAMETERS). The specific name is not guaranteed to match the user-specified - * name() or be the same after export and import. - */ - public abstract String specificName(); - - @Nullable - public abstract String name(); - - public abstract Dialect dialect(); - - @Nullable - public abstract String type(); - - @Nullable - public abstract String definition(); - - @Nullable - public abstract String language(); - - @Nullable - public abstract SqlSecurity security(); - - public abstract ImmutableList parameters(); - - public abstract ImmutableList options(); - - public void prettyPrint(Appendable appendable) throws IOException { - appendable.append("CREATE FUNCTION ").append(quoteIdentifier(name(), dialect())); - appendable.append("("); - boolean first = true; - for (UdfParameter parameter : parameters()) { - if (!first) { - appendable.append(", "); - } - first = false; - appendable.append(parameter.prettyPrint()); - } - appendable.append(")"); - if (type() != null) { - appendable.append(" RETURNS ").append(type()); - } - - // Determinism should be added to INFORMATION_SCHEMA.ROUTINES. - // For now, we infer it from the language. - if (language() != null && language().equalsIgnoreCase("REMOTE")) { - String determinism; - switch (dialect()) { - case GOOGLE_STANDARD_SQL: - determinism = "NOT DETERMINISTIC"; - break; - case POSTGRESQL: - determinism = "VOLATILE"; - break; - default: - throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); - } - appendable.append(" ").append(determinism); - } - - if (language() != null && !language().isEmpty()) { - // GSQL does not accept LANGUAGE SQL even though it reports it. - if (dialect() != Dialect.GOOGLE_STANDARD_SQL || !language().equalsIgnoreCase("SQL")) { - appendable.append(" LANGUAGE ").append(language()); - } - } - - if (security() != null) { - // Remote UDF don't use SQL SECURITY, but it is marked NOT NULL in IS. - if (!"REMOTE".equalsIgnoreCase(language())) { - appendable.append(" SQL SECURITY ").append(security().toString()); - } - } - - if (!options().isEmpty()) { - switch (dialect()) { - case GOOGLE_STANDARD_SQL: - appendable.append(" OPTIONS (").append(String.join(", ", options())).append(")"); - break; - case POSTGRESQL: - throw new IllegalArgumentException( - "Options are not supported in PostgreSQL dialect for UDFs."); - default: - throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); - } - } - - if (definition() != null && !definition().isEmpty()) { - switch (dialect()) { - case GOOGLE_STANDARD_SQL: - appendable.append(" AS (").append(definition()).append(")"); - break; - case POSTGRESQL: - if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { - appendable.append(" RETURN ").append(definition()); - } else { -<<<<<<< HEAD - // Other langugges use AS definition instead of sql body. - appendable - .append(" AS $$") - .append(PG_REMOTE_UDF_BODY_ESCAPER.escape(definition())) -======= - // Other languages use AS definition instead of sql body. - appendable - .append(" AS $$") - .append(PG_DEFINITION_ESCAPER.escape(definition())) ->>>>>>> ff46924a7 (Add support for REMOTE UDFs.) - .append("$$"); - } - break; - default: - throw new IllegalArgumentException(String.format("Unrecognized Dialect: %s.", dialect())); - } - } - } - - public String prettyPrint() { - StringBuilder sb = new StringBuilder(); - try { - prettyPrint(sb); - } catch (IOException e) { - throw new RuntimeException(e); - } - return sb.toString(); - } - - @Override - public String toString() { - return prettyPrint(); - } - - public abstract Builder autoToBuilder(); - - public Builder toBuilder() { - Builder builder = autoToBuilder().specificName(specificName()).dialect(dialect()); - if (name() != null) { - builder.name(name()); - } - if (type() != null) { - builder.type(type()); - } - if (language() != null) { - builder.language(language()); - } - builder.options(options()); - if (definition() != null) { - builder.definition(definition()); - } - if (security() != null) { - builder.security(security()); - } - for (UdfParameter parameter : parameters()) { - builder.addParameter(parameter); - } - return builder; - } - - public static Builder builder(Dialect dialect) { - return new AutoValue_Udf.Builder() - .dialect(dialect) - .parameters(ImmutableList.of()) - .options(ImmutableList.of()); - } - - public static Builder builder() { - return builder(Dialect.GOOGLE_STANDARD_SQL); - } - - /** A builder for {@link Udf}. */ - @AutoValue.Builder - public abstract static class Builder { - private Ddl.Builder ddlBuilder; - private LinkedHashMap parametersMap = Maps.newLinkedHashMap(); - private ImmutableList.Builder parameters = ImmutableList.builder(); - - public Builder ddlBuilder(Ddl.Builder ddlBuilder) { - this.ddlBuilder = ddlBuilder; - return this; - } - - public abstract Builder specificName(String specificName); - - public abstract String specificName(); - - public abstract Builder name(String name); - - public abstract String name(); - - public abstract Builder dialect(Dialect dialect); - - public abstract Dialect dialect(); - - public abstract Builder type(String type); - - public abstract String type(); - - public abstract Builder definition(String definition); - - public abstract String definition(); - - public abstract Builder language(String language); - - public abstract String language(); - - public abstract Builder security(SqlSecurity rights); - - public abstract SqlSecurity security(); - - public abstract Builder options(ImmutableList options); - - public abstract ImmutableList options(); - - public abstract Builder parameters(ImmutableList parameters); - - public ImmutableList parameters() { - return parameters.build(); - } - - public UdfParameter.Builder parameter(String name) { - UdfParameter parameter = parametersMap.get(name.toLowerCase()); - if (parameter != null) { - if (!parameter.functionSpecificName().equals(specificName())) { - throw new IllegalArgumentException( - String.format( - "Parameter %s has a different function specific name %s than the user-defined" - + " function %s.", - name, parameter.functionSpecificName(), specificName())); - } - return parameter.toBuilder().udfBuilder(this); - } - return UdfParameter.builder(dialect()) - .name(name) - .functionSpecificName(specificName()) - .udfBuilder(this); - } - - public Builder addParameter(UdfParameter parameter) { - parameters.add(parameter); - parametersMap.put(parameter.name().toLowerCase(), parameter); - return this; - } - - abstract Udf autoBuild(); - - public Udf build() { - return new AutoValue_Udf.Builder() - .specificName(specificName()) - .name(name()) - .dialect(dialect()) - .type(type()) - .definition(definition()) - .language(language()) - .security(security()) - .options(options()) - .parameters(ImmutableList.copyOf(parameters())) - .autoBuild(); - } - - public Ddl.Builder endUdf() { - ddlBuilder.addUdf(build()); - return ddlBuilder; - } - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig b/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig deleted file mode 100644 index 68ee56788f..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/CopyDbTest.java.orig +++ /dev/null @@ -1,1362 +0,0 @@ -/* - * Copyright (C) 2018 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.spanner; - -import static org.hamcrest.Matchers.is; -import static org.hamcrest.text.IsEqualCompressingWhiteSpace.equalToCompressingWhiteSpace; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; - -import com.google.cloud.spanner.DatabaseClient; -import com.google.cloud.spanner.Dialect; -import com.google.cloud.spanner.ReadOnlyTransaction; -import com.google.cloud.teleport.spanner.common.Type; -import com.google.cloud.teleport.spanner.common.Type.StructField; -import com.google.cloud.teleport.spanner.ddl.Ddl; -import com.google.cloud.teleport.spanner.ddl.InformationSchemaScanner; -import com.google.cloud.teleport.spanner.ddl.RandomDdlGenerator; -import com.google.cloud.teleport.spanner.ddl.Udf.SqlSecurity; -import com.google.cloud.teleport.spanner.ddl.UdfParameter; -import com.google.cloud.teleport.spanner.proto.ExportProtos.Export; -import com.google.cloud.teleport.spanner.spannerio.SpannerConfig; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.options.ValueProvider; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.values.PCollection; -import org.junit.After; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.TemporaryFolder; - -/** - * An end to end test that exports and imports a database and verifies that the content is - * identical. Additionally, this test verifies the behavior of table level export. This requires an - * active GCP project with a Spanner instance. Hence this test can only be run locally with a - * project set up using 'gcloud config'. - */ -@Category(IntegrationTest.class) -public class CopyDbTest { - private final Timestamp timestamp = new Timestamp(System.currentTimeMillis()); - private final long numericTime = timestamp.getTime(); - private final String sourceDb = "copydb-source" + Long.toString(numericTime); - private final String destinationDb = "copydb-dest" + Long.toString(numericTime); - private final String destDbPrefix = "import"; - - @Rule public final transient TestPipeline exportPipeline = TestPipeline.create(); - @Rule public final transient TestPipeline importPipeline = TestPipeline.create(); - @Rule public final transient TestPipeline comparePipeline = TestPipeline.create(); - @Rule public final TemporaryFolder tmpDir = new TemporaryFolder(); - @Rule public final SpannerServerResource spannerServer = new SpannerServerResource(); - - @After - public void teardown() { - spannerServer.dropDatabase(sourceDb); - spannerServer.dropDatabase(destinationDb); - } - - private void createAndPopulate(Ddl ddl, int numBatches) throws Exception { - switch (ddl.dialect()) { - case GOOGLE_STANDARD_SQL: - spannerServer.createDatabase(sourceDb, ddl.statements()); - spannerServer.createDatabase(destinationDb, Collections.emptyList()); - break; - case POSTGRESQL: - spannerServer.createPgDatabase(sourceDb, ddl.statements()); - spannerServer.createPgDatabase(destinationDb, Collections.emptyList()); - break; - default: - throw new IllegalArgumentException("Unrecognized dialect: " + ddl.dialect()); - } - spannerServer.populateRandomData(sourceDb, ddl, numBatches); - } - - @Test - public void allTypesSchema() throws Exception { - // spotless:off - Ddl ddl = Ddl.builder() - .createTable("Users") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("age").int64().endColumn() - .primaryKey().asc("first_name").desc("last_name").end() - .endTable() - .createTable("AllTYPES") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("id").int64().notNull().endColumn() - .column("bool_field").bool().endColumn() - .column("int64_field").int64().endColumn() - .column("float32_field").float32().endColumn() - .column("float64_field").float64().endColumn() - .column("string_field").string().max().endColumn() - .column("bytes_field").bytes().max().endColumn() - .column("timestamp_field").timestamp().endColumn() - .column("date_field").date().endColumn() - .column("arr_bool_field").type(Type.array(Type.bool())).endColumn() - .column("arr_int64_field").type(Type.array(Type.int64())).endColumn() - .column("arr_float32_field").type(Type.array(Type.float32())).endColumn() - .column("arr_float64_field").type(Type.array(Type.float64())).endColumn() - .column("arr_string_field").type(Type.array(Type.string())).max().endColumn() - .column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn() - .column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn() - .column("arr_date_field").type(Type.array(Type.date())).endColumn() - .primaryKey().asc("first_name").desc("last_name").asc("id").end() - .interleaveInParent("Users") - .onDeleteCascade() - .endTable() - .build(); - // spotless:on - createAndPopulate(ddl, 100); - runTest(); - } - - @Test - public void allPgTypesSchema() throws Exception { - // spotless:off - Ddl ddl = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("Users") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name") - .pgVarchar() - .size(5) - .endColumn() - .column("age") - .pgInt8() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .end() - .endTable() - .createTable("AllTYPES") - .column("id") - .pgInt8() - .notNull() - .endColumn() - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name") - .pgVarchar() - .size(5) - .endColumn() - .column("bool_field") - .pgBool() - .endColumn() - .column("int_field") - .pgInt8() - .endColumn() - .column("float32_field") - .pgFloat4() - .endColumn() - .column("float64_field") - .pgFloat8() - .endColumn() - .column("string_field") - .pgText() - .endColumn() - .column("bytes_field") - .pgBytea() - .endColumn() - .column("timestamp_field") - .pgTimestamptz() - .endColumn() - .column("numeric_field") - .pgNumeric() - .endColumn() - .column("date_field") - .pgDate() - .endColumn() - .column("arr_bool_field") - .type(Type.pgArray(Type.pgBool())) - .endColumn() - .column("arr_int_field") - .type(Type.pgArray(Type.pgInt8())) - .endColumn() - .column("arr_float32_field") - .type(Type.pgArray(Type.pgFloat4())) - .endColumn() - .column("arr_float64_field") - .type(Type.pgArray(Type.pgFloat8())) - .endColumn() - .column("arr_string_field") - .type(Type.pgArray(Type.pgVarchar())) - .max() - .endColumn() - .column("arr_bytes_field") - .type(Type.pgArray(Type.pgBytea())) - .max() - .endColumn() - .column("arr_timestamp_field") - .type(Type.pgArray(Type.pgTimestamptz())) - .endColumn() - .column("arr_date_field") - .type(Type.pgArray(Type.pgDate())) - .endColumn() - .column("arr_numeric_field") - .type(Type.pgArray(Type.pgNumeric())) - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .asc("id") - .asc("float64_field") - .end() - .interleaveInParent("Users") - .onDeleteCascade() - .endTable() - .build(); - // spotless:on - createAndPopulate(ddl, 100); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void emptyTables() throws Exception { - // spotless:off - Ddl ddl = Ddl.builder() - .createTable("Users") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("age").int64().endColumn() - .primaryKey().asc("first_name").desc("last_name").end() - .endTable() - .createTable("AllTYPES") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("id").int64().notNull().endColumn() - .column("bool_field").bool().endColumn() - .column("int64_field").int64().endColumn() - .column("float32_field").float32().endColumn() - .column("float64_field").float64().endColumn() - .column("string_field").string().max().endColumn() - .column("bytes_field").bytes().max().endColumn() - .column("timestamp_field").timestamp().endColumn() - .column("date_field").date().endColumn() - .column("arr_bool_field").type(Type.array(Type.bool())).endColumn() - .column("arr_int64_field").type(Type.array(Type.int64())).endColumn() - .column("arr_float32_field").type(Type.array(Type.float32())).endColumn() - .column("arr_float64_field").type(Type.array(Type.float64())).endColumn() - .column("arr_string_field").type(Type.array(Type.string())).max().endColumn() - .column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn() - .column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn() - .column("arr_date_field").type(Type.array(Type.date())).endColumn() - .primaryKey().asc("first_name").desc("last_name").asc("id").end() - .interleaveInParent("Users") - .endTable() - .build(); - createAndPopulate(ddl, 10); - - // Add empty tables. - Ddl emptyTables = Ddl.builder() - .createTable("empty_one") - .column("first").string().max().endColumn() - .column("second").string().size(5).endColumn() - .column("value").int64().endColumn() - .primaryKey().asc("first").desc("second").end() - .endTable() - .createTable("empty_two") - .column("first").string().max().endColumn() - .column("second").string().size(5).endColumn() - .column("value").int64().endColumn() - .column("another_value").int64().endColumn() - .primaryKey().asc("first").end() - .endTable() - .build(); - // spotless:on - spannerServer.updateDatabase(sourceDb, emptyTables.createTableStatements()); - runTest(); - } - - @Test - public void emptyPgTables() throws Exception { - // spotless:off - Ddl ddl = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("Users") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name").pgVarchar().size(5).endColumn() - .column("age") - .pgInt8() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .end() - .endTable() - .createTable("AllTYPES") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name").pgVarchar().size(5).endColumn() - .column("id") - .pgInt8() - .notNull() - .endColumn() - .column("bool_field") - .pgBool() - .endColumn() - .column("int_field") - .pgInt8() - .endColumn() - .column("float32_field") - .pgFloat4() - .endColumn() - .column("float64_field") - .pgFloat8() - .endColumn() - .column("string_field") - .pgText() - .endColumn() - .column("bytes_field") - .pgBytea() - .endColumn() - .column("timestamp_field") - .pgTimestamptz() - .endColumn() - .column("numeric_field") - .pgNumeric() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .asc("id") - .end() - .interleaveInParent("Users") - .onDeleteCascade() - .endTable() - .build(); - createAndPopulate(ddl, 10); - - // Add empty tables. - Ddl emptyTables = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("empty_one") - .column("first") - .pgVarchar() - .max() - .endColumn() - .column("second").pgVarchar().size(5).endColumn() - .column("value") - .pgInt8() - .endColumn() - .primaryKey() - .asc("first") - .asc("second") - .end() - .endTable() - .createTable("empty_two") - .column("first") - .pgVarchar() - .max() - .endColumn() - .column("second").pgVarchar().size(5).endColumn() - .column("value") - .pgInt8() - .endColumn() - .column("another_value") - .pgInt8() - .endColumn() - .primaryKey() - .asc("first") - .end() - .endTable() - .build(); - // spotless:on - spannerServer.updateDatabase(sourceDb, emptyTables.createTableStatements()); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void allEmptyTables() throws Exception { - // spotless:off - Ddl ddl = Ddl.builder() - .createTable("Users") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("age").int64().endColumn() - .primaryKey().asc("first_name").desc("last_name").end() - .endTable() - .createTable("AllTYPES") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("id").int64().notNull().endColumn() - .column("bool_field").bool().endColumn() - .column("int64_field").int64().endColumn() - .column("float32_field").float32().endColumn() - .column("float64_field").float64().endColumn() - .column("string_field").string().max().endColumn() - .column("bytes_field").bytes().max().endColumn() - .column("timestamp_field").timestamp().endColumn() - .column("date_field").date().endColumn() - .column("arr_bool_field").type(Type.array(Type.bool())).endColumn() - .column("arr_int64_field").type(Type.array(Type.int64())).endColumn() - .column("arr_float32_field").type(Type.array(Type.float32())).endColumn() - .column("arr_float64_field").type(Type.array(Type.float64())).endColumn() - .column("arr_string_field").type(Type.array(Type.string())).max().endColumn() - .column("arr_bytes_field").type(Type.array(Type.bytes())).max().endColumn() - .column("arr_timestamp_field").type(Type.array(Type.timestamp())).endColumn() - .column("arr_date_field").type(Type.array(Type.date())).endColumn() - .primaryKey().asc("first_name").desc("last_name").asc("id").end() - .interleaveInParent("Users") - .endTable() - .build(); - // spotless:on - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void allEmptyPgTables() throws Exception { - // spotless:off - Ddl ddl = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("Users") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name") - .pgVarchar() - .size(5) - .endColumn() - .column("age") - .pgInt8() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .end() - .endTable() - .createTable("AllTYPES") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name") - .pgVarchar() - .size(5) - .endColumn() - .column("id") - .pgInt8() - .notNull() - .endColumn() - .column("bool_field") - .pgBool() - .endColumn() - .column("int_field") - .pgInt8() - .endColumn() - .column("float32_field") - .pgFloat4() - .endColumn() - .column("float64_field") - .pgFloat8() - .endColumn() - .column("string_field") - .pgText() - .endColumn() - .column("bytes_field") - .pgBytea() - .endColumn() - .column("timestamp_field") - .pgTimestamptz() - .endColumn() - .column("numeric_field") - .pgNumeric() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .asc("id") - .end() - .interleaveInParent("Users") - .onDeleteCascade() - .endTable() - .build(); - // spotless:on - createAndPopulate(ddl, 0); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void databaseOptions() throws Exception { - Ddl.Builder ddlBuilder = Ddl.builder(); - // Table Content - // spotless:off - ddlBuilder.createTable("Users") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("age").int64().endColumn() - .primaryKey().asc("first_name").desc("last_name").end() - .endTable() - .createTable("EmploymentData") - .column("first_name").string().max().endColumn() - .column("last_name").string().size(5).endColumn() - .column("id").int64().notNull().endColumn() - .column("age").int64().endColumn() - .column("address").string().max().endColumn() - .primaryKey().asc("first_name").desc("last_name").asc("id").end() - .interleaveInParent("Users") - .onDeleteCascade() - .endTable(); - // spotless:on - // Allowed and well-formed database option - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("version_retention_period") - .setOptionValue("\"6d\"") - .build()); - // Disallowed database option - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("optimizer_version") - .setOptionValue("1") - .build()); - // Malformed database option - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("123version") - .setOptionValue("xyz") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = ddlBuilder.build(); - createAndPopulate(ddl, 100); - runTest(); - Ddl destinationDdl = readDdl(destinationDb, Dialect.GOOGLE_STANDARD_SQL); - List destDbOptions = destinationDdl.setOptionsStatements(destinationDb); - assertThat(destDbOptions.size(), is(1)); - assertThat( - destDbOptions.get(0), - is( - "ALTER DATABASE `" - + destinationDb - + "` SET OPTIONS ( version_retention_period = \"6d\" )")); - } - - @Test - public void pgDatabaseOptions() throws Exception { - Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); - // Table Content - // spotless:off - ddlBuilder - .createTable("Users") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name").pgVarchar().size(5).endColumn() - .column("age") - .pgInt8() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .end() - .endTable() - .createTable("EmploymentData") - .column("first_name") - .pgVarchar() - .max() - .endColumn() - .column("last_name").pgVarchar().size(5).endColumn() - .column("id") - .pgInt8() - .notNull() - .endColumn() - .column("age") - .pgInt8() - .endColumn() - .column("address") - .pgVarchar() - .max() - .endColumn() - .primaryKey() - .asc("first_name") - .asc("last_name") - .asc("id") - .end() - .interleaveInParent("Users") - .onDeleteCascade() - .endTable(); - // spotless:on - // Allowed and well-formed database option - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("version_retention_period") - .setOptionValue("'6d'") - .build()); - // Disallowed database option - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("optimizer_version") - .setOptionValue("1") - .build()); - // Malformed database option - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("123version") - .setOptionValue("xyz") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = ddlBuilder.build(); - createAndPopulate(ddl, 100); - runTest(Dialect.POSTGRESQL); - Ddl destinationDdl = readDdl(destinationDb, Dialect.POSTGRESQL); - List destDbOptions = destinationDdl.setOptionsStatements(destinationDb); - assertThat(destDbOptions.size(), is(1)); - assertThat( - destDbOptions.get(0), - is("ALTER DATABASE \"" + destinationDb + "\" SET spanner.version_retention_period = '6d'")); - } - - @Test - public void emptyDb() throws Exception { - Ddl ddl = Ddl.builder().build(); - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void emptyPgDb() throws Exception { - Ddl ddl = Ddl.builder(Dialect.POSTGRESQL).build(); - createAndPopulate(ddl, 0); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void foreignKeys() throws Exception { - // spotless:off - Ddl ddl = Ddl.builder() - .createTable("Ref") - .column("id1").int64().endColumn() - .column("id2").int64().endColumn() - .primaryKey().asc("id1").asc("id2").end() - .endTable() - .createTable("Child") - .column("id1").int64().endColumn() - .column("id2").int64().endColumn() - .column("id3").int64().endColumn() - .primaryKey().asc("id1").asc("id2").asc("id3").end() - .interleaveInParent("Ref") - // Add some foreign keys that are guaranteed to be satisfied due to interleaving - .foreignKeys(ImmutableList.of( - "ALTER TABLE `Child` ADD CONSTRAINT `fk1` FOREIGN KEY (`id1`) REFERENCES `Ref` (`id1`)", - "ALTER TABLE `Child` ADD CONSTRAINT `fk2` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`)", - "ALTER TABLE `Child` ADD CONSTRAINT `fk3` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`)", - "ALTER TABLE `Child` ADD CONSTRAINT `fk4` FOREIGN KEY (`id2`, `id1`) REFERENCES `Ref` (`id2`, `id1`)", - "ALTER TABLE `Child` ADD CONSTRAINT `fk5` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`) NOT ENFORCED", - "ALTER TABLE `Child` ADD CONSTRAINT `fk6` FOREIGN KEY (`id2`) REFERENCES `Ref` (`id2`) ENFORCED")) - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 100); - runTest(); - } - - @Test - public void pgForeignKeys() throws Exception { - // spotless:off - Ddl ddl = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("Ref") - .column("id1") - .pgInt8() - .endColumn() - .column("id2") - .pgInt8() - .endColumn() - .primaryKey() - .asc("id1") - .asc("id2") - .end() - .endTable() - .createTable("Child") - .column("id1") - .pgInt8() - .endColumn() - .column("id2") - .pgInt8() - .endColumn() - .column("id3") - .pgInt8() - .endColumn() - .primaryKey() - .asc("id1") - .asc("id2") - .asc("id3") - .end() - .interleaveInParent("Ref") - // Add some foreign keys that are guaranteed to be satisfied due to interleaving - .foreignKeys( - ImmutableList.of( - "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk1\" FOREIGN KEY (\"id1\") REFERENCES" - + " \"Ref\" (\"id1\")", - "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk2\" FOREIGN KEY (\"id2\") REFERENCES" - + " \"Ref\" (\"id2\")", - "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk3\" FOREIGN KEY (\"id2\") REFERENCES" - + " \"Ref\" (\"id2\")", - "ALTER TABLE \"Child\" ADD CONSTRAINT \"fk4\" FOREIGN KEY (\"id2\", \"id1\") " - + "REFERENCES \"Ref\" (\"id2\", \"id1\")")) - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 100); - runTest(Dialect.POSTGRESQL); - } - - // TODO: enable this test once CHECK constraints are enabled - // @Test - public void checkConstraints() throws Exception { - // spotless:off - Ddl ddl = Ddl.builder() - .createTable("T") - .column("id").int64().endColumn() - .column("A").int64().endColumn() - .primaryKey().asc("id").end() - .checkConstraints(ImmutableList.of( - "CONSTRAINT `ck` CHECK(TO_HEX(SHA1(CAST(A AS STRING))) <= '~')")) - .endTable().build(); - // spotless:on - - createAndPopulate(ddl, 100); - runTest(); - } - - @Test - public void pgCheckConstraints() throws Exception { - // spotless:off - Ddl ddl = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("T") - .column("id") - .pgInt8() - .endColumn() - .column("A") - .pgInt8() - .endColumn() - .primaryKey() - .asc("id") - .end() - .checkConstraints( - ImmutableList.of( - "CONSTRAINT \"ck\" CHECK(LENGTH(CAST(\"A\" AS VARCHAR)) >= '0'::bigint)")) - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 100); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void models() throws Exception { - // spotless:off - Ddl ddl = - Ddl.builder() - .createModel("Iris") - .remote(true) - .options(ImmutableList.of( - "endpoint=\"//aiplatform.googleapis.com/projects/span-cloud-testing/locations/us-central1/endpoints/4608339105032437760\"")) - .inputColumn("f1").type(Type.float64()).size(-1).endInputColumn() - .inputColumn("f2").type(Type.float64()).size(-1).endInputColumn() - .inputColumn("f3").type(Type.float64()).size(-1).endInputColumn() - .inputColumn("f4").type(Type.float64()).size(-1).endInputColumn() - .outputColumn("classes").type(Type.array(Type.string())).size(-1).endOutputColumn() - .outputColumn("scores").type(Type.array(Type.float64())).size(-1).endOutputColumn() - .endModel() - .createModel("TextEmbeddingGecko") - .remote(true) - .options(ImmutableList.of( - "endpoint=\"//aiplatform.googleapis.com/projects/span-cloud-testing/locations/us-central1/publishers/google/models/textembedding-gecko\"")) - .inputColumn("content").type(Type.string()).size(-1).endInputColumn() - .outputColumn("embeddings").type(Type.struct( - StructField.of("statistics", Type.struct(StructField.of("truncated", Type.bool()), - StructField.of("token_count", Type.float64()))), - StructField.of("values", Type.array(Type.float64())))).size(-1).endOutputColumn() - .endModel() - .build(); - // spotless:on - - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void changeStreams() throws Exception { - Ddl ddl = - Ddl.builder() - .createTable("T1") - .endTable() - .createTable("T2") - .column("key") - .int64() - .endColumn() - .column("c1") - .int64() - .endColumn() - .column("c2") - .string() - .max() - .endColumn() - .primaryKey() - .asc("key") - .end() - .endTable() - .createTable("T3") - .endTable() - .createChangeStream("ChangeStreamAll") - .forClause("FOR ALL") - .options( - ImmutableList.of( - "retention_period=\"7d\"", "value_capture_type=\"OLD_AND_NEW_VALUES\"")) - .endChangeStream() - .createChangeStream("ChangeStreamEmpty") - .endChangeStream() - .createChangeStream("ChangeStreamTableColumns") - .forClause("FOR `T1`, `T2`(`c1`, `c2`), `T3`()") - .endChangeStream() - .build(); - createAndPopulate(ddl, 0); - runTest(); - } - - // TODO: Enable the test once change streams are supported in PG. - // @Test - public void pgChangeStreams() throws Exception { - Ddl ddl = - Ddl.builder(Dialect.POSTGRESQL) - .createTable("T1") - .column("key") - .pgInt8() - .endColumn() - .primaryKey() - .asc("key") - .end() - .endTable() - .createTable("T2") - .column("key") - .pgInt8() - .endColumn() - .column("c1") - .pgInt8() - .endColumn() - .column("c2") - .pgVarchar() - .max() - .endColumn() - .primaryKey() - .asc("key") - .end() - .endTable() - .createTable("T3") - .column("key") - .pgInt8() - .endColumn() - .primaryKey() - .asc("key") - .end() - .endTable() - .createChangeStream("ChangeStreamAll") - .forClause("FOR ALL") - .options( - ImmutableList.of( - "retention_period='7d'", "value_capture_type='OLD_AND_NEW_VALUES'")) - .endChangeStream() - .createChangeStream("ChangeStreamEmpty") - .endChangeStream() - .createChangeStream("ChangeStreamTableColumns") - .forClause("FOR \"T1\", \"T2\"(\"c1\", \"c2\"), \"T3\"()") - .endChangeStream() - .build(); - createAndPopulate(ddl, 0); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void identityColumn() throws Exception { - // spotless:off - Ddl.Builder ddlBuilder = Ddl.builder(); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = ddlBuilder - .createTable("IdentityTable") - .column("id") - .int64() - .isIdentityColumn(true) - .sequenceKind("bit_reversed_positive") - .counterStartValue(1000L) - .skipRangeMin(2000L) - .skipRangeMax(3000L) - .endColumn() - .column("non_key_column") - .int64() - .isIdentityColumn(true) - .sequenceKind("bit_reversed_positive") - .counterStartValue(1000L) - .skipRangeMin(2000L) - .skipRangeMax(3000L) - .endColumn() - .column("no_sequence_kind_column") - .int64() - .isIdentityColumn(true) - .sequenceKind("default") - .counterStartValue(1000L) - .skipRangeMin(2000L) - .skipRangeMax(3000L) - .endColumn() - .column("value").int64().endColumn() - .primaryKey().asc("id").end() - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 10); - runTest(); - } - - @Test - public void pgIdentityColumn() throws Exception { - // spotless:off - Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = ddlBuilder - .createTable("IdentityTable") - .column("id") - .int64() - .isIdentityColumn(true) - .sequenceKind("bit_reversed_positive") - .counterStartValue(1000L) - .skipRangeMin(2000L) - .skipRangeMax(3000L) - .endColumn() - .column("non_key_column") - .int64() - .isIdentityColumn(true) - .sequenceKind("bit_reversed_positive") - .counterStartValue(1000L) - .skipRangeMin(2000L) - .skipRangeMax(3000L) - .endColumn() - .column("no_sequence_kind_column") - .int64() - .isIdentityColumn(true) - .sequenceKind("default") - .counterStartValue(1000L) - .skipRangeMin(2000L) - .skipRangeMax(3000L) - .endColumn() - .column("value").int64().endColumn() - .primaryKey().asc("id").end() - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 10); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void commitTimestampColumns() throws Exception { - // spotless:off - Ddl.Builder ddlBuilder = Ddl.builder(); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = ddlBuilder - .createTable("CommitTimestampTable") - .column("id") - .int64() - .endColumn() - .column("default_commit_ts") - .type(Type.timestamp()) - .defaultExpression("PENDING_COMMIT_TIMESTAMP()") - .columnOptions(ImmutableList.of("allow_commit_timestamp=TRUE")) - .endColumn() - .column("on_update_ts") - .type(Type.timestamp()) - .defaultExpression("PENDING_COMMIT_TIMESTAMP()") - .onUpdateExpression("PENDING_COMMIT_TIMESTAMP()") - .columnOptions(ImmutableList.of("allow_commit_timestamp=TRUE")) - .endColumn() - .primaryKey().asc("id").end() - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 10); - runTest(); - } - - @Test - public void pgCommitTimestampColumns() throws Exception { - // spotless:off - Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = ddlBuilder - .createTable("CommitTimestampTable") - .column("id") - .int64() - .endColumn() - .column("default_commit_ts") - .pgSpannerCommitTimestamp() - .defaultExpression("spanner.pending_commit_timestamp()") - .endColumn() - .column("on_update_ts") - .pgSpannerCommitTimestamp() - .defaultExpression("spanner.pending_commit_timestamp()") - .onUpdateExpression("spanner.pending_commit_timestamp()") - .endColumn() - .primaryKey().asc("id").end() - .endTable() - .build(); - // spotless:on - - createAndPopulate(ddl, 10); - runTest(); - } - - @Test - public void udfs() throws Exception { - Ddl.Builder ddlBuilder = Ddl.builder(); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = - ddlBuilder - .createSchema("s1") - .endNamedSchema() - .createUdf("s1.Foo1") - .dialect(Dialect.GOOGLE_STANDARD_SQL) - .name("s1.Foo1") - .definition("(SELECT 'bar')") - .endUdf() - .createUdf("s1.Foo2") - .dialect(Dialect.GOOGLE_STANDARD_SQL) - .name("s1.Foo2") - .definition("(SELECT 'bar')") - .security(SqlSecurity.INVOKER) - .type("STRING") - .addParameter(UdfParameter.parse("arg0 STRING", "s1.Foo2", Dialect.GOOGLE_STANDARD_SQL)) - .addParameter( - UdfParameter.parse( - "arg1 STRING DEFAULT 'bar'", "s1.Foo2", Dialect.GOOGLE_STANDARD_SQL)) - .endUdf() - .createUdf("s1.Foo3") - .dialect(Dialect.GOOGLE_STANDARD_SQL) - .name("s1.Foo3") - .language("REMOTE") - .type("INT64") - .addParameter(UdfParameter.parse("arg0 INT64", "s1.Foo3", Dialect.GOOGLE_STANDARD_SQL)) - .options( - ImmutableList.of( - "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) - .endUdf() - .build(); - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void pgUdfs() throws Exception { - Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = - ddlBuilder - .createSchema("s1") - .endNamedSchema() - .createUdf("s1.Foo1") - .dialect(Dialect.POSTGRESQL) - .name("s1.Foo1") - .definition("(SELECT 'bar')") - .endUdf() - .createUdf("s1.Foo2") - .dialect(Dialect.POSTGRESQL) - .name("s1.Foo2") - .definition("(SELECT 'bar')") - .security(SqlSecurity.INVOKER) - .type("TEXT") - .addParameter(UdfParameter.parse("arg0 TEXT", "s1.Foo2", Dialect.POSTGRESQL)) - .addParameter( - UdfParameter.parse("arg1 TEXT DEFAULT 'bar'", "s1.Foo2", Dialect.POSTGRESQL)) - .endUdf() - .createUdf("s1.Foo3") - .dialect(Dialect.POSTGRESQL) - .name("s1.Foo3") - .language("REMOTE") - .type("BIGINT") - .addParameter(UdfParameter.parse("arg0 BIGINT", "s1.Foo3", Dialect.POSTGRESQL)) -<<<<<<< HEAD - .options( - ImmutableList.of( - "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")) -======= - .definition( - "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\"}") ->>>>>>> ff46924a7 (Add support for REMOTE UDFs.) - .endUdf() - .build(); - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void sequences() throws Exception { - Ddl.Builder ddlBuilder = Ddl.builder(); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = - ddlBuilder - .createSequence("Sequence1") - .options( - ImmutableList.of( - "sequence_kind=\"bit_reversed_positive\"", - "skip_range_min=0", - "skip_range_max=1000", - "start_with_counter=50")) - .endSequence() - .createSequence("Sequence2") - .options( - ImmutableList.of( - "sequence_kind=\"bit_reversed_positive\"", "start_with_counter=9999")) - .endSequence() - .createSequence("Sequence3") - .options(ImmutableList.of("sequence_kind=\"bit_reversed_positive\"")) - .endSequence() - .createSequence("Sequence4") - .options( - ImmutableList.of( - "sequence_kind=\"default\"", - "skip_range_min=0", - "skip_range_max=1000", - "start_with_counter=50")) - .endSequence() - .createTable("UsersWithSequenceId") - .column("id") - .int64() - .notNull() - .defaultExpression("GET_NEXT_SEQUENCE_VALUE(SEQUENCE Sequence3)") - .endColumn() - .column("first_name") - .string() - .size(10) - .endColumn() - .primaryKey() - .asc("id") - .end() - .endTable() - .build(); - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void pgSequences() throws Exception { - Ddl.Builder ddlBuilder = Ddl.builder(Dialect.POSTGRESQL); - List dbOptionList = new ArrayList<>(); - dbOptionList.add( - Export.DatabaseOption.newBuilder() - .setOptionName("default_sequence_kind") - .setOptionValue("\"bit_reversed_positive\"") - .build()); - ddlBuilder.mergeDatabaseOptions(dbOptionList); - Ddl ddl = - ddlBuilder - .createSequence("PGSequence1") - .sequenceKind("bit_reversed_positive") - .counterStartValue(Long.valueOf(50)) - .skipRangeMin(Long.valueOf(0)) - .skipRangeMax(Long.valueOf(1000)) - .endSequence() - .createSequence("PGSequence2") - .sequenceKind("bit_reversed_positive") - .counterStartValue(Long.valueOf(9999)) - .endSequence() - .createSequence("PGSequence3") - .sequenceKind("bit_reversed_positive") - .endSequence() - .createSequence("PGSequence4") - .sequenceKind("default") - .counterStartValue(Long.valueOf(50)) - .skipRangeMin(Long.valueOf(0)) - .skipRangeMax(Long.valueOf(1000)) - .endSequence() - .createTable("PGUsersWithSequenceId") - .column("id") - .pgInt8() - .notNull() - .defaultExpression("nextval('\"PGSequence3\"')") - .endColumn() - .column("first_name") - .pgVarchar() - .size(10) - .endColumn() - .primaryKey() - .asc("id") - .end() - .endTable() - .build(); - - createAndPopulate(ddl, 0); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void randomSchema() throws Exception { - Ddl ddl = RandomDdlGenerator.builder().build().generate(); - createAndPopulate(ddl, 100); - runTest(); - } - - @Test - public void randomPgSchema() throws Exception { - Ddl ddl = RandomDdlGenerator.builder(Dialect.POSTGRESQL).setMaxViews(2).build().generate(); - System.out.println(ddl.prettyPrint()); - createAndPopulate(ddl, 100); - runTest(Dialect.POSTGRESQL); - } - - @Test - public void randomSchemaNoData() throws Exception { - Ddl ddl = RandomDdlGenerator.builder().build().generate(); - createAndPopulate(ddl, 0); - runTest(); - } - - @Test - public void randomPgSchemaNoData() throws Exception { - Ddl ddl = RandomDdlGenerator.builder(Dialect.POSTGRESQL).setMaxViews(2).build().generate(); - createAndPopulate(ddl, 0); - runTest(Dialect.POSTGRESQL); - } - - private void runTest() { - runTest(Dialect.GOOGLE_STANDARD_SQL); - } - - private void runTest(Dialect dialect) { - String tmpDirPath = tmpDir.getRoot().getAbsolutePath(); - ValueProvider.StaticValueProvider destination = - ValueProvider.StaticValueProvider.of(tmpDirPath); - ValueProvider.StaticValueProvider jobId = ValueProvider.StaticValueProvider.of("jobid"); - ValueProvider.StaticValueProvider source = - ValueProvider.StaticValueProvider.of(tmpDirPath + "/jobid"); - - SpannerConfig sourceConfig = spannerServer.getSpannerConfig(sourceDb); - exportPipeline.apply("Export", new ExportTransform(sourceConfig, destination, jobId)); - PipelineResult exportResult = exportPipeline.run(); - exportResult.waitUntilFinish(); - - SpannerConfig destConfig = spannerServer.getSpannerConfig(destinationDb); - importPipeline.apply( - "Import", - new ImportTransform( - destConfig, - source, - ValueProvider.StaticValueProvider.of(true), - ValueProvider.StaticValueProvider.of(true), - ValueProvider.StaticValueProvider.of(true), - ValueProvider.StaticValueProvider.of(true), - ValueProvider.StaticValueProvider.of(true), - ValueProvider.StaticValueProvider.of(30), - ValueProvider.StaticValueProvider.of(40))); - PipelineResult importResult = importPipeline.run(); - importResult.waitUntilFinish(); - - PCollection mismatchCount = - comparePipeline.apply("Compare", new CompareDatabases(sourceConfig, destConfig)); - PAssert.that(mismatchCount) - .satisfies( - (x) -> { - assertEquals(Lists.newArrayList(x), Lists.newArrayList(0L)); - return null; - }); - PipelineResult compareResult = comparePipeline.run(); - compareResult.waitUntilFinish(); - - Ddl sourceDdl = readDdl(sourceDb, dialect); - Ddl destinationDdl = readDdl(destinationDb, dialect); - - assertThat(sourceDdl.prettyPrint(), equalToCompressingWhiteSpace(destinationDdl.prettyPrint())); - } - - /* Returns the Ddl representing a Spanner database for given a String for the database name */ - private Ddl readDdl(String db, Dialect dialect) { - DatabaseClient dbClient = spannerServer.getDbClient(db); - Ddl ddl; - try (ReadOnlyTransaction ctx = dbClient.readOnlyTransaction()) { - ddl = new InformationSchemaScanner(ctx, dialect).scan(); - } - return ddl; - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java index 2df8ff9bb4..7d323c97c2 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java @@ -39,6 +39,7 @@ import java.util.Map; import java.util.Objects; import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.beam.it.common.PipelineLauncher; import org.apache.beam.it.common.PipelineOperator; import org.apache.beam.it.common.utils.ResourceManagerUtils; @@ -251,6 +252,10 @@ private void testGoogleSqlImportPipelineBase( assertThat(float32Records).hasSize(9); assertThatStructs(float32Records).hasRecordsUnordered(getFloat32TableExpectedRows()); + + assertThat(spannerResourceManager + .runQuery("SELECT CONCAT(ROUTINE_SCHEMA, '.', ROUTINE_NAME) FROM INFORMATION_SCHEMA.ROUTINES").stream() + .map(row -> row.getString(0)).collect(Collectors.toList())).containsExactly("UdfSchema.Remote"); } @Test @@ -315,6 +320,8 @@ private void testPostgresImportPipelineBase( assertThat(float32Records).hasSize(9); assertThatStructs(float32Records).hasRecordsUnordered(getFloat32TableExpectedRows()); + + // TODO(b/485601737): Add PG UDFs. } // TODO(b/395532087): Consolidate this with other tests after UUID launch. diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java index 9e25d424bc..6c606dae40 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java @@ -639,7 +639,7 @@ public void simpleUdf() throws Exception { String udfDef3 = "CREATE FUNCTION s1.remote_udf(x INT64, y INT64) " + "RETURNS INT64 NOT DETERMINISTIC LANGUAGE REMOTE " - + "OPTIONS ( endpoint = 'https://us-central1-myproject.cloudfunctions.net/myfunc' )"; + + "OPTIONS ( endpoint = 'https://us-central1-myproject.cloudfunctions.net/myfunc', max_batching_rows = 50 )"; SPANNER_SERVER.createDatabase(dbId, Arrays.asList(namedSchemaDef, udfDef1, udfDef2, udfDef3)); Ddl ddl = getDatabaseDdl(); @@ -706,7 +706,7 @@ public void simpleUdf() throws Exception { assertEquals(udf3.language(), "REMOTE"); assertThat( udf3.options(), - hasItems("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); + hasItems("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", "max_batching_rows=50")); assertEquals(udf3.definition(), ""); assertEquals(udf3.security(), Udf.SqlSecurity.INVOKER); assertThat( @@ -736,6 +736,8 @@ public void pgSimpleUdf() throws Exception { assertThat(ddl.schemas(), hasSize(1)); assertThat(ddl.schema("s1"), notNullValue()); + // TODO(b/485601737): Add PG UDFs. + assertThat(ddl.udfs(), hasSize(0)); } diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig deleted file mode 100644 index ff1cb26828..0000000000 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/RandomDdlGenerator.java.orig +++ /dev/null @@ -1,751 +0,0 @@ -/* - * Copyright (C) 2018 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -package com.google.cloud.teleport.spanner.ddl; - -import com.google.auto.value.AutoValue; -import com.google.cloud.spanner.Dialect; -import com.google.cloud.teleport.spanner.common.Type; -import com.google.cloud.teleport.spanner.ddl.ForeignKey.ReferentialAction; -import com.google.cloud.teleport.spanner.ddl.Table.InterleaveType; -import com.google.cloud.teleport.spanner.ddl.Udf.SqlSecurity; -import com.google.common.base.Optional; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Sets; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Random; -import java.util.Set; -import java.util.stream.Collectors; - -/** Generates a random {@link Ddl}. */ -@AutoValue -public abstract class RandomDdlGenerator { - - // No bytes, no floats. - private static final Type.Code[] PK_TYPES = - new Type.Code[] { - Type.Code.BOOL, Type.Code.INT64, Type.Code.STRING, Type.Code.TIMESTAMP, Type.Code.DATE - }; - - private static final Type.Code[] PG_PK_TYPES = - new Type.Code[] { - Type.Code.PG_BOOL, - Type.Code.PG_INT8, - Type.Code.PG_FLOAT8, - Type.Code.PG_TEXT, - Type.Code.PG_VARCHAR, - Type.Code.PG_TIMESTAMPTZ, - Type.Code.PG_DATE - }; - - private static final Type.Code[] COLUMN_TYPES = - new Type.Code[] { - Type.Code.BOOL, - Type.Code.INT64, - Type.Code.FLOAT32, - Type.Code.FLOAT64, - Type.Code.STRING, - Type.Code.BYTES, - Type.Code.TIMESTAMP, - Type.Code.DATE - }; - - private static final Type.Code[] PG_COLUMN_TYPES = - new Type.Code[] { - Type.Code.PG_BOOL, - Type.Code.PG_INT8, - Type.Code.PG_FLOAT4, - Type.Code.PG_FLOAT8, - Type.Code.PG_VARCHAR, - Type.Code.PG_BYTEA, - Type.Code.PG_TIMESTAMPTZ, - Type.Code.PG_NUMERIC, - Type.Code.PG_DATE - }; - - // Types that could be used by check constraint - private static final Set CHECK_CONSTRAINT_TYPES = - new HashSet<>( - Arrays.asList( - Type.Code.BOOL, - Type.Code.INT64, - Type.Code.FLOAT32, - Type.Code.FLOAT64, - Type.Code.STRING, - Type.Code.TIMESTAMP, - Type.Code.DATE)); - - private static final Set PG_CHECK_CONSTRAINT_TYPES = - new HashSet<>( - Arrays.asList( - Type.Code.PG_BOOL, - Type.Code.PG_INT8, - Type.Code.PG_FLOAT4, - Type.Code.PG_FLOAT8, - Type.Code.PG_TEXT, - Type.Code.PG_VARCHAR, - Type.Code.PG_TIMESTAMPTZ, - Type.Code.PG_NUMERIC, - Type.Code.PG_DATE)); - - private static final String[] UDF_LANGUAGES = new String[] {"SQL", "REMOTE"}; - - private static final int MAX_PKS = 16; - - public abstract Dialect getDialect(); - - public abstract Random getRandom(); - - public abstract int getArrayChance(); - - public abstract int getRemoteUdfChance(); - - public abstract int[] getMaxBranchPerLevel(); - - public abstract int getMaxPkComponents(); - - public abstract int getMaxColumns(); - - public abstract int getMaxIdLength(); - - public abstract int getMaxIndex(); - - public abstract int getMaxForeignKeys(); - - public abstract boolean getEnableGeneratedColumns(); - - public abstract boolean getEnableDefaultColumns(); - - public abstract boolean getEnableCheckConstraints(); - - public abstract int getMaxUdfs(); - - public abstract int getMaxUdfParameters(); - - public abstract int getMaxViews(); - - public abstract int getMaxChangeStreams(); - - public static Builder builder() { - return builder(Dialect.GOOGLE_STANDARD_SQL); - } - - public static Builder builder(Dialect dialect) { - - return new AutoValue_RandomDdlGenerator.Builder() - .setDialect(dialect) - .setRandom(new Random()) - .setArrayChance(20) - .setRemoteUdfChance(20) - .setMaxPkComponents(3) - .setMaxBranchPerLevel(new int[] {2, 2, 1, 1, 1, 1, 1}) - .setMaxUdfs(0) - .setMaxUdfParameters(2) - .setMaxViews(0) - .setMaxIndex(2) - .setMaxForeignKeys(2) - .setEnableCheckConstraints(true) - .setMaxColumns(8) - .setMaxIdLength(11) - .setEnableGeneratedColumns(true) - .setEnableDefaultColumns(true) - // Change stream is only supported in GoogleSQL, not in PostgreSQL. - .setMaxChangeStreams(dialect == Dialect.GOOGLE_STANDARD_SQL ? 2 : 0); - } - - /** A builder for {@link RandomDdlGenerator}. */ - @AutoValue.Builder - public abstract static class Builder { - - public abstract Builder setDialect(Dialect dialect); - - public abstract Builder setRandom(Random rnd); - - public abstract Builder setArrayChance(int chance); - - public abstract Builder setRemoteUdfChance(int chance); - - public abstract Builder setMaxBranchPerLevel(int[] arr); - - public abstract Builder setMaxPkComponents(int val); - - public abstract Builder setMaxIdLength(int val); - - public abstract Builder setMaxColumns(int val); - - public abstract RandomDdlGenerator build(); - - public abstract Builder setMaxIndex(int indexes); - - public abstract Builder setMaxForeignKeys(int foreignKeys); - - public abstract Builder setEnableGeneratedColumns(boolean enable); - - public abstract Builder setEnableDefaultColumns(boolean enable); - - public abstract Builder setEnableCheckConstraints(boolean checkConstraints); - - public abstract Builder setMaxUdfs(int maxUdfs); - - public abstract Builder setMaxUdfParameters(int maxUdfParameters); - - public abstract Builder setMaxViews(int maxViews); - - public abstract Builder setMaxChangeStreams(int maxChangeStreams); - } - - public abstract Builder toBuilder(); - - private Set allIdentifiers = Sets.newHashSet(); - - public Ddl generate() { - Ddl.Builder builder = Ddl.builder(getDialect()); - int numParentTables = 1 + getRandom().nextInt(getMaxBranchPerLevel()[0]); - for (int i = 0; i < numParentTables; i++) { - generateTable(builder, null, 0); - } - int numUdfs = getRandom().nextInt(getMaxUdfs() + 1); - for (int i = 0; i < numUdfs; i++) { - generateUdf(builder); - } - int numViews = getRandom().nextInt(getMaxViews() + 1); - for (int i = 0; i < numViews; i++) { - generateView(builder); - } - int numChangeStreams = getRandom().nextInt(getMaxChangeStreams() + 1); - for (int i = 0; i < numChangeStreams; i++) { - generateChangeStream(builder); - } - - return builder.build(); - } - - private void generateUdf(Ddl.Builder builder) { - String name = generateIdentifier(getMaxIdLength()); - Udf.Builder udfBuilder = - builder - .createUdf(name) - .definition("select 1") - .dialect(Dialect.GOOGLE_STANDARD_SQL) - .name(name); - if (getRandom().nextBoolean()) { - Type type = - generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); - udfBuilder.type(type.getCode().getName()); - } - - if (getRandom().nextInt(100) <= getRemoteUdfChance()) { - udfBuilder.language("REMOTE"); - } - - if (!"REMOTE".equals(udfBuilder.language())) { - if (getRandom().nextBoolean()) { - udfBuilder.security(SqlSecurity.INVOKER); - } - } else { - if (getDialect() == Dialect.GOOGLE_STANDARD_SQL) { - udfBuilder.options( - ImmutableList.of( - "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"")); - } else { - udfBuilder.definition( -<<<<<<< HEAD - "\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\""); -======= - "{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/myfunc\"}"); ->>>>>>> ff46924a7 (Add support for REMOTE UDFs.) - } - } - - int numUdfParameters = getRandom().nextInt(getMaxUdfParameters() + 1); - for (int i = 0; i < numUdfParameters; i++) { - String paramName = generateIdentifier(getMaxIdLength()); - Type type = - generateType((getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); - UdfParameter.Builder udfParameterBuilder = - udfBuilder.parameter(paramName).type(type.getCode().getName()); - if (getRandom().nextBoolean()) { - udfParameterBuilder.defaultExpression(addDefaultValueToColumn(type)); - } - udfParameterBuilder.endUdfParameter(); - } - udfBuilder.endUdf(); - } - - private void generateView(Ddl.Builder builder) { - String name = generateIdentifier(getMaxIdLength()); - View.Builder viewBuilder = builder.createView(name); - if (getRandom().nextBoolean()) { - viewBuilder.security(View.SqlSecurity.INVOKER); - } else { - viewBuilder.security(View.SqlSecurity.DEFINER); - } - - Table sourceTable = selectRandomTable(builder); - if (sourceTable == null) { - viewBuilder.query("select 1"); - } else { - StringBuilder queryBuilder = new StringBuilder("select "); - boolean firstIncluded = true; - for (Column column : sourceTable.columns()) { - if (getRandom().nextBoolean()) { - if (!firstIncluded) { - queryBuilder.append(", "); - } - if (getDialect() == Dialect.POSTGRESQL) { - queryBuilder.append("\""); - } - queryBuilder.append(column.name()); - if (getDialect() == Dialect.POSTGRESQL) { - queryBuilder.append("\""); - } - firstIncluded = false; - } - } - if (firstIncluded) { - queryBuilder.append("1"); - } - queryBuilder.append(" from "); - if (getDialect() == Dialect.POSTGRESQL) { - queryBuilder.append("\""); - } - queryBuilder.append(sourceTable.name()); - if (getDialect() == Dialect.POSTGRESQL) { - queryBuilder.append("\""); - } - viewBuilder.query(queryBuilder.toString()); - } - - viewBuilder.endView(); - } - - private void generateChangeStream(Ddl.Builder builder) { - if (getDialect() == Dialect.POSTGRESQL) { - throw new IllegalArgumentException("Change stream is not supported in PostgreSQL dialect."); - } - - String name = generateIdentifier(getMaxIdLength()); - ChangeStream.Builder changeStreamBuilder = builder.createChangeStream(name); - - generateChangeStreamForClause(builder, changeStreamBuilder); - - ImmutableList.Builder options = ImmutableList.builder(); - if (getRandom().nextBoolean()) { - options.add("retention_period=\"7d\""); - } - if (getRandom().nextBoolean()) { - options.add("value_capture_type=\"OLD_AND_NEW_VALUES\""); - } - changeStreamBuilder.options(options.build()); - - changeStreamBuilder.endChangeStream(); - } - - private void generateChangeStreamForClause( - Ddl.Builder builder, ChangeStream.Builder changeStreamBuilder) { - boolean forAll = getRandom().nextBoolean(); - if (forAll) { - changeStreamBuilder.forClause("FOR ALL"); - return; - } - - Table table = selectRandomTable(builder); - if (table == null) { - return; - } - - StringBuilder forClause = new StringBuilder("FOR `").append(table.name()).append("`"); - boolean allColumns = getRandom().nextBoolean(); - if (allColumns) { - changeStreamBuilder.forClause(forClause.toString()); - return; - } - - // Select a random set of watched columns, excluding primary keys and generated columns. - Set watchedColumns = Sets.newHashSet(); - Set primaryKeys = - table.primaryKeys().stream().map(pk -> pk.name()).collect(Collectors.toSet()); - for (Column column : table.columns()) { - if (getRandom().nextBoolean() - && !primaryKeys.contains(column.name()) - && !column.isGenerated()) { - watchedColumns.add("`" + column.name() + "`"); - } - } - forClause.append("(").append(String.join(", ", watchedColumns)).append(")"); - changeStreamBuilder.forClause(forClause.toString()); - } - - private void generateTable(Ddl.Builder builder, Table parent, int level) { - String name = generateIdentifier(getMaxIdLength()); - Table.Builder tableBuilder = builder.createTable(name); - - Random rnd = getRandom(); - int pkSize = 0; - if (parent != null) { - tableBuilder.interleaveInParent(parent.name()); - tableBuilder.interleaveType( - getDialect() == Dialect.GOOGLE_STANDARD_SQL && rnd.nextBoolean() - ? InterleaveType.IN - : InterleaveType.IN_PARENT); - for (IndexColumn pk : parent.primaryKeys()) { - Column pkColumn = parent.column(pk.name()); - tableBuilder.addColumn(pkColumn); - tableBuilder.primaryKey().set(pk).end(); - pkSize++; - } - } - - int numPks = Math.min(1 + rnd.nextInt(getMaxPkComponents()), MAX_PKS - pkSize); - for (int i = 0; i < numPks; i++) { - Column pkColumn = - generateColumn( - (getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? PK_TYPES : PG_PK_TYPES, -1); - tableBuilder.addColumn(pkColumn); - - IndexColumn.Order order = rnd.nextBoolean() ? IndexColumn.Order.ASC : IndexColumn.Order.DESC; - if (getDialect() == Dialect.POSTGRESQL) { - order = IndexColumn.Order.ASC; - } - IndexColumn pk = IndexColumn.create(pkColumn.name(), order, getDialect()); - tableBuilder.primaryKey().set(pk).end(); - } - - int numColumns = rnd.nextInt(getMaxColumns()); - - for (int i = 0; i < numColumns; i++) { - Column column = - generateColumn( - (getDialect() == Dialect.GOOGLE_STANDARD_SQL) ? COLUMN_TYPES : PG_COLUMN_TYPES, - getArrayChance()); - tableBuilder.addColumn(column); - } - - Table table = tableBuilder.build(); - - if (getEnableGeneratedColumns()) { - // Add a generated column - Column depColumn = table.columns().get(rnd.nextInt(table.columns().size())); - String expr = depColumn.name(); - if (getDialect() == Dialect.POSTGRESQL) { - expr = "\"" + expr + "\""; - } - Column generatedColumn = - Column.builder(getDialect()) - .name("generated") - .type(depColumn.type()) - .max() - .notNull(depColumn.notNull()) - .generatedAs(expr) - .stored() - .autoBuild(); - tableBuilder.addColumn(generatedColumn); - table = tableBuilder.build(); - } - - int numIndexes = rnd.nextInt(getMaxIndex()); - ImmutableList.Builder indexes = ImmutableList.builder(); - for (int i = 0; i < numIndexes; i++) { - Index.Builder index = - Index.builder(getDialect()).name(generateIdentifier(getMaxIdLength())).table(name); - IndexColumn.IndexColumnsBuilder columns = index.columns(); - ImmutableList.Builder filters = ImmutableList.builder(); - boolean interleaved = rnd.nextBoolean(); - Set pks = Sets.newHashSet(); - // Do not interleave indexes at the last table level. - // This causes tests to fail as generated schema exceeds interleaving limit. - int finalLevel = getMaxBranchPerLevel().length - 1; - if (interleaved && level < finalLevel) { - index.interleaveIn(table.name()); - } - for (IndexColumn pk : table.primaryKeys()) { - if (interleaved) { - columns.set(pk); - if (rnd.nextBoolean()) { - filters.add("\"" + pk.name() + "\" IS NOT NULL"); - } - } - pks.add(pk.name()); - } - - int maxNumIndexColumns = MAX_PKS - pks.size(); - int indexColumns = 0; - for (int j = 0; j < table.columns().size(); j++) { - Column cm = table.columns().get(j); - String columnName = cm.name(); - if (indexColumns >= maxNumIndexColumns) { - break; - } - // Already added. - if (interleaved && pks.contains(columnName)) { - continue; - } - if (cm.type().getCode() == Type.Code.ARRAY || cm.type().getCode() == Type.Code.PG_ARRAY) { - continue; - } - // Skip the types that may generate NaN value, as NaN cannot be used as a key - if (cm.type().getCode() == Type.Code.FLOAT32 - || cm.type().getCode() == Type.Code.PG_FLOAT4 - || cm.type().getCode() == Type.Code.FLOAT64 - || cm.type().getCode() == Type.Code.PG_FLOAT8 - || cm.type().getCode() == Type.Code.PG_NUMERIC) { - continue; - } - int val = rnd.nextInt(4); - switch (val) { - case 0: - columns.create().name(columnName).asc(); - if (!pks.contains(columnName)) { - indexColumns++; - } - break; - case 1: - columns.create().name(columnName).desc(); - if (!pks.contains(columnName)) { - indexColumns++; - } - break; - case 2: - if (!pks.contains(columnName)) { - columns.create().name(columnName).storing(); - } - break; - default: - // skip this column - } - // skip the primary key column if it is randomed to storing - if (val < 2 || (val < 3 && !pks.contains(columnName))) { - if (getDialect() == Dialect.POSTGRESQL) { - if (rnd.nextBoolean()) { - columns.nullsFirst(); - } else { - columns.nullsLast(); - } - } - columns.endIndexColumn(); - if (rnd.nextBoolean()) { - filters.add("\"" + columnName + "\" IS NOT NULL"); - } - } - } - columns.end(); - index.nullFiltered(rnd.nextBoolean()); - index.filter(String.join(" AND ", filters.build())); - // index.unique(rnd.nextBoolean()); - if (indexColumns > 0) { - indexes.add(index.build().prettyPrint()); - } - } - tableBuilder.indexes(indexes.build()); - - if (parent != null) { - // Create redundant foreign keys to the parent table. - int numForeignKeys = rnd.nextInt(getMaxForeignKeys()); - ImmutableList.Builder foreignKeys = ImmutableList.builder(); - for (int i = 0; i < numForeignKeys; i++) { - ForeignKey.Builder foreignKeyBuilder = - ForeignKey.builder(getDialect()) - .name(generateIdentifier(getMaxIdLength())) - .table(name) - .referencedTable(parent.name()); - for (IndexColumn pk : parent.primaryKeys()) { - foreignKeyBuilder.columnsBuilder().add(pk.name()); - foreignKeyBuilder.referencedColumnsBuilder().add(pk.name()); - } - if (rnd.nextBoolean()) { - foreignKeyBuilder.referentialAction(Optional.of(generateRandomReferentialAction(rnd))); - } - if (rnd.nextBoolean()) { - foreignKeyBuilder.isEnforced(rnd.nextBoolean()); - } - ForeignKey foreignKey = foreignKeyBuilder.build(); - if (foreignKey.columns().size() > 0) { - foreignKeys.add(foreignKey.prettyPrint()); - } - } - tableBuilder.foreignKeys(foreignKeys.build()); - } - - while (getEnableCheckConstraints()) { - ImmutableList.Builder checkConstraints = ImmutableList.builder(); - // Pick a random column to add check constraint on. - ImmutableList columns = table.columns(); - int colIndex = rnd.nextInt(columns.size()); - Column column = columns.get(colIndex); - if (getDialect() == Dialect.GOOGLE_STANDARD_SQL - && !CHECK_CONSTRAINT_TYPES.contains(column.type().getCode())) { - continue; - } - if (getDialect() == Dialect.POSTGRESQL - && !PG_CHECK_CONSTRAINT_TYPES.contains(column.type().getCode())) { - continue; - } - // An expression that won't be trivially optimized away by query optimizer. - - String expr = "TO_HEX(SHA1(CAST(" + column.name() + " AS STRING))) <= '~'"; - String checkName = generateIdentifier(getMaxIdLength()); - if (getDialect() == Dialect.POSTGRESQL) { - expr = "LENGTH(CAST(\"" + column.name() + "\" AS VARCHAR)) > '-1'::bigint"; - checkName = "\"" + checkName + "\""; - } - checkConstraints.add("CONSTRAINT " + checkName + " CHECK(" + expr + ")"); - tableBuilder.checkConstraints(checkConstraints.build()); - break; - } - - tableBuilder.endTable(); - - table = tableBuilder.build(); - - int nextLevel = level + 1; - int[] maxBranchPerLevel = getMaxBranchPerLevel(); - if (nextLevel < maxBranchPerLevel.length - && maxBranchPerLevel[nextLevel] > 0 - && table.primaryKeys().size() < MAX_PKS) { - generateTable(builder, table, nextLevel); - } - } - - private ReferentialAction generateRandomReferentialAction(Random rnd) { - return rnd.nextBoolean() - ? ReferentialAction.ON_DELETE_CASCADE - : ReferentialAction.ON_DELETE_NO_ACTION; - } - - private String addDefaultValueToColumn(Type type) { - String expr = null; - if (getEnableDefaultColumns()) { - // Generate default values to columns with certain types only: - switch (type.getCode()) { - case BOOL: - case PG_BOOL: - expr = "(false)"; - break; - case INT64: - expr = "(100)"; - break; - case PG_INT8: - expr = "'100'::bigint"; - break; - case STRING: - case PG_VARCHAR: - expr = "'John'"; - break; - } - } - return expr; - } - - private Column generateColumn(Type.Code[] codes, int arrayPercentage) { - int length = 1 + getRandom().nextInt(getMaxIdLength()); - String name = generateIdentifier(length); - Type type = generateType(codes, arrayPercentage); - int size = -1; - boolean nullable = getRandom().nextBoolean(); - String expr = addDefaultValueToColumn(type); - return Column.builder(getDialect()) - .name(name) - .type(type) - .size(size) - .notNull(nullable) - .defaultExpression(expr) - .autoBuild(); - } - - private String generateIdentifier(int length) { - String id; - while (true) { - id = RandomUtils.randomAlphanumeric(length); - if (!allIdentifiers.contains(id.toLowerCase())) { - break; - } - } - allIdentifiers.add(id.toLowerCase()); - return id; - } - - private Type generateType(Type.Code[] codes, int arrayPercentage) { - boolean isArray = getRandom().nextInt(100) <= arrayPercentage; - Type.Code code = randomCode(codes); - if (isArray) { - if (getDialect() == Dialect.POSTGRESQL) { - return Type.pgArray(typeOf(code)); - } - return Type.array(typeOf(code)); - } - return typeOf(code); - } - - private Table selectRandomTable(Ddl.Builder builder) { - Collection
tables = builder.tables(); - int tablesToSkip = getRandom().nextInt(tables.size()); - for (Table table : tables) { - if (tablesToSkip > 0) { - --tablesToSkip; - } else { - return table; - } - } - return null; - } - - private Type typeOf(Type.Code code) { - switch (code) { - case BOOL: - return Type.bool(); - case FLOAT32: - return Type.float32(); - case FLOAT64: - return Type.float64(); - case STRING: - return Type.string(); - case BYTES: - return Type.bytes(); - case TIMESTAMP: - return Type.timestamp(); - case DATE: - return Type.date(); - case INT64: - return Type.int64(); - case PG_BOOL: - return Type.pgBool(); - case PG_INT8: - return Type.pgInt8(); - case PG_FLOAT4: - return Type.pgFloat4(); - case PG_FLOAT8: - return Type.pgFloat8(); - case PG_TEXT: - return Type.pgText(); - case PG_VARCHAR: - return Type.pgVarchar(); - case PG_BYTEA: - return Type.pgBytea(); - case PG_TIMESTAMPTZ: - return Type.pgTimestamptz(); - case PG_NUMERIC: - return Type.pgNumeric(); - case PG_DATE: - return Type.pgDate(); - } - throw new IllegalArgumentException("Arrays and Structs are not supported"); - } - - private Type.Code randomCode(Type.Code[] codes) { - return codes[getRandom().nextInt(codes.length)]; - } -} diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java index 90f1931ff0..8c7310d86a 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java @@ -129,5 +129,9 @@ public void testPgRemoteUdf() { equalToCompressingWhiteSpace( "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/''myfunc\"}'")); + + assertThrows(IllegalArgumentException.class, + () -> Udf.builder().name("bar").specificName("s1.bar").dialect(Dialect.POSTGRESQL).type("TEXT") + .language("REMOTE").options(ImmutableList.of("option = value")).build().toString()); } } From d097c3c61a5a723aabf03a8dd038971c55959113 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Mon, 1 Jun 2026 10:37:34 -0700 Subject: [PATCH 16/19] Fix for presubmits. --- .../it/gcp/dataflow/DirectRunnerClient.java | 2 +- pom.xml | 2 +- .../cloud/teleport/spanner/ddl/Udf.java | 34 +++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java index 8041dc7a80..efce55e16a 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java @@ -265,7 +265,7 @@ public void cancel() { currentJob.setCurrentState(JobState.CANCELLED.toString()); try { - this.interrupt(); + this.stop(); } catch (Exception e) { LOG.warn("Error cancelling job", e); } diff --git a/pom.xml b/pom.xml index 1d3c5d8bbf..254447f187 100644 --- a/pom.xml +++ b/pom.xml @@ -48,7 +48,7 @@ 3.6.2 3.4.0 1.7.1 - 0.8.14 + 0.8.13 2.73.0 diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index 8d156b4c8f..6c0b90d873 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -95,9 +95,9 @@ public void prettyPrint(Appendable appendable) throws IOException { if (language() != null && language().equalsIgnoreCase("REMOTE")) { String determinism; if (dialect() == Dialect.GOOGLE_STANDARD_SQL) { - determinism = "NOT DETERMINISTIC"; - } else { - determinism = "VOLATILE"; + determinism = "NOT DETERMINISTIC"; + } else { + determinism = "VOLATILE"; } appendable.append(" ").append(determinism); } @@ -118,26 +118,26 @@ public void prettyPrint(Appendable appendable) throws IOException { if (!options().isEmpty()) { if (dialect() == Dialect.GOOGLE_STANDARD_SQL) { - appendable.append(" OPTIONS (").append(String.join(", ", options())).append(")"); - } else { - throw new IllegalArgumentException( - "Options are not supported in PostgreSQL dialect for non-remote UDFs."); + appendable.append(" OPTIONS (").append(String.join(", ", options())).append(")"); + } else { + throw new IllegalArgumentException( + "Options are not supported in PostgreSQL dialect for non-remote UDFs."); } } if (definition() != null && !definition().isEmpty()) { if (dialect() == Dialect.GOOGLE_STANDARD_SQL) { - appendable.append(" AS (").append(definition()).append(")"); + appendable.append(" AS (").append(definition()).append(")"); + } else { + if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { + appendable.append(" RETURN ").append(definition()); } else { - if (language() == null || language().isEmpty() || "SQL".equalsIgnoreCase(language())) { - appendable.append(" RETURN ").append(definition()); - } else { - // Other languages use AS definition instead of sql body. - appendable - .append(" AS '") - .append(PG_DEFINITION_ESCAPER.escape(definition())) - .append("'"); - } + // Other languages use AS definition instead of sql body. + appendable + .append(" AS '") + .append(PG_DEFINITION_ESCAPER.escape(definition())) + .append("'"); + } } } } From 7c39e71e68ab67656daa50e257ac548f0106a09e Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Mon, 1 Jun 2026 14:58:20 -0700 Subject: [PATCH 17/19] Spotless --- .../com/google/cloud/teleport/spanner/ddl/Udf.java | 5 +---- .../cloud/teleport/spanner/ImportPipelineIT.java | 11 ++++++++--- .../spanner/ddl/InformationSchemaScannerIT.java | 4 +++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index 6c0b90d873..62ec41e094 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -133,10 +133,7 @@ public void prettyPrint(Appendable appendable) throws IOException { appendable.append(" RETURN ").append(definition()); } else { // Other languages use AS definition instead of sql body. - appendable - .append(" AS '") - .append(PG_DEFINITION_ESCAPER.escape(definition())) - .append("'"); + appendable.append("·AS·'").append(PG_DEFINITION_ESCAPER.escape(definition())).append("'"); } } } diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java index 7d323c97c2..82aef50deb 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ImportPipelineIT.java @@ -253,9 +253,14 @@ private void testGoogleSqlImportPipelineBase( assertThat(float32Records).hasSize(9); assertThatStructs(float32Records).hasRecordsUnordered(getFloat32TableExpectedRows()); - assertThat(spannerResourceManager - .runQuery("SELECT CONCAT(ROUTINE_SCHEMA, '.', ROUTINE_NAME) FROM INFORMATION_SCHEMA.ROUTINES").stream() - .map(row -> row.getString(0)).collect(Collectors.toList())).containsExactly("UdfSchema.Remote"); + assertThat( + spannerResourceManager + .runQuery( + "SELECT CONCAT(ROUTINE_SCHEMA, '.', ROUTINE_NAME) FROM INFORMATION_SCHEMA.ROUTINES") + .stream() + .map(row -> row.getString(0)) + .collect(Collectors.toList())) + .containsExactly("UdfSchema.Remote"); } @Test diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java index 6c606dae40..a2fb664747 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/InformationSchemaScannerIT.java @@ -706,7 +706,9 @@ public void simpleUdf() throws Exception { assertEquals(udf3.language(), "REMOTE"); assertThat( udf3.options(), - hasItems("endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", "max_batching_rows=50")); + hasItems( + "endpoint=\"https://us-central1-myproject.cloudfunctions.net/myfunc\"", + "max_batching_rows=50")); assertEquals(udf3.definition(), ""); assertEquals(udf3.security(), Udf.SqlSecurity.INVOKER); assertThat( From a370772b3a2e721c61ac5ee311cd0edcf322c6bc Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 2 Jun 2026 13:29:33 -0700 Subject: [PATCH 18/19] More spotless. --- .../cloud/teleport/spanner/ddl/UdfTest.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java index 8c7310d86a..db8ade30b3 100644 --- a/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java +++ b/v1/src/test/java/com/google/cloud/teleport/spanner/ddl/UdfTest.java @@ -130,8 +130,17 @@ public void testPgRemoteUdf() { "CREATE FUNCTION \"foo\"(\"p1\" BIGINT) RETURNS TEXT VOLATILE LANGUAGE REMOTE" + " AS '{\"endpoint\": \"https://us-central1-myproject.cloudfunctions.net/''myfunc\"}'")); - assertThrows(IllegalArgumentException.class, - () -> Udf.builder().name("bar").specificName("s1.bar").dialect(Dialect.POSTGRESQL).type("TEXT") - .language("REMOTE").options(ImmutableList.of("option = value")).build().toString()); + assertThrows( + IllegalArgumentException.class, + () -> + Udf.builder() + .name("bar") + .specificName("s1.bar") + .dialect(Dialect.POSTGRESQL) + .type("TEXT") + .language("REMOTE") + .options(ImmutableList.of("option = value")) + .build() + .toString()); } } From 72c19babdcff848dfcc03d18fe19364a526fde91 Mon Sep 17 00:00:00 2001 From: Adrian Wisniewski Date: Tue, 2 Jun 2026 14:00:04 -0700 Subject: [PATCH 19/19] Spotless --- v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java index 62ec41e094..9ab5caac2b 100644 --- a/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java +++ b/v1/src/main/java/com/google/cloud/teleport/spanner/ddl/Udf.java @@ -133,7 +133,7 @@ public void prettyPrint(Appendable appendable) throws IOException { appendable.append(" RETURN ").append(definition()); } else { // Other languages use AS definition instead of sql body. - appendable.append("·AS·'").append(PG_DEFINITION_ESCAPER.escape(definition())).append("'"); + appendable.append(" AS '").append(PG_DEFINITION_ESCAPER.escape(definition())).append("'"); } } }