From 3a90adcc83ff47226b15a3c95647ed68110b42ce Mon Sep 17 00:00:00 2001 From: shangeyao Date: Tue, 23 Jun 2026 16:48:23 +0800 Subject: [PATCH 1/5] [Feature] Support Apache Flink 2.x (2.0, 2.1, 2.2) Closes #4367 Add support for Apache Flink 2.x series (2.0, 2.1, 2.2) to StreamPark. Key changes: - Extend FlinkVersion.scala to recognize 2.x versions - Create new shims modules: streampark-flink-shims_flink-2.0/2.1/2.2 - Migrate to Java DataStream API (Scala DataStream API removed in Flink 2.x) - Handle ParameterTool relocation from org.apache.flink.api.java.utils to org.apache.flink.util - Implement Flink 2.1 Model API (createModel, fromCall, etc.) - Implement Flink 2.2 FunctionDescriptor and Materialized Tables APIs - Update console-service EnvInitializer regex to match 2.x shims JARs - Add 2.0/2.1/2.2 shims JAR dependencies to console-service pom.xml All shims modules (1.12-1.20 + 2.0-2.2) compile successfully and tests pass. --- .../streampark/common/conf/FlinkVersion.scala | 2 + .../streampark-console-service/pom.xml | 21 + .../console/core/runner/EnvInitializer.java | 2 +- .../streampark-flink-shims/pom.xml | 3 + .../flink/core/EnhancerImplicit.scala | 16 +- .../streampark-flink-shims_flink-2.0/pom.xml | 152 ++++ .../flink/core/StreamEnvConfigFunction.java | 34 + .../flink/core/TableEnvConfigFunction.java | 34 + .../flink/core/EnhancerImplicit.scala | 72 ++ .../flink/core/FlinkClientTrait.scala | 49 ++ .../flink/core/FlinkClusterClient.scala | 64 ++ .../flink/core/FlinkKubernetesClient.scala | 32 + .../core/FlinkKubernetesClientTrait.scala | 38 + .../flink/core/FlinkSqlExecutor.scala | 169 +++++ .../flink/core/FlinkSqlValidator.scala | 152 ++++ .../flink/core/FlinkStreamTableTraitV2.scala | 396 +++++++++++ .../core/FlinkStreamingInitializerV2.scala | 151 ++++ .../flink/core/FlinkTableInitializerV2.scala | 250 +++++++ .../flink/core/FlinkTableTrait.scala | 176 +++++ .../flink/core/SqlCommandParser.scala | 651 ++++++++++++++++++ .../flink/core/StreamEnvConfig.scala | 27 + .../flink/core/StreamTableContext.scala | 239 +++++++ .../streampark/flink/core/TableContext.scala | 144 ++++ .../streampark/flink/core/TableExt.scala | 39 ++ .../flink/core/conf/FlinkConfiguration.scala | 26 + .../streampark-flink-shims_flink-2.1/pom.xml | 152 ++++ .../flink/core/StreamEnvConfigFunction.java | 34 + .../flink/core/TableEnvConfigFunction.java | 34 + .../flink/core/EnhancerImplicit.scala | 72 ++ .../flink/core/FlinkClientTrait.scala | 49 ++ .../flink/core/FlinkClusterClient.scala | 64 ++ .../flink/core/FlinkKubernetesClient.scala | 32 + .../core/FlinkKubernetesClientTrait.scala | 38 + .../flink/core/FlinkSqlExecutor.scala | 169 +++++ .../flink/core/FlinkSqlValidator.scala | 152 ++++ .../flink/core/FlinkStreamTableTraitV2.scala | 396 +++++++++++ .../core/FlinkStreamingInitializerV2.scala | 151 ++++ .../flink/core/FlinkTableInitializerV2.scala | 250 +++++++ .../flink/core/FlinkTableTrait.scala | 176 +++++ .../flink/core/SqlCommandParser.scala | 651 ++++++++++++++++++ .../flink/core/StreamEnvConfig.scala | 27 + .../flink/core/StreamTableContext.scala | 286 ++++++++ .../streampark/flink/core/TableContext.scala | 190 +++++ .../streampark/flink/core/TableExt.scala | 39 ++ .../flink/core/conf/FlinkConfiguration.scala | 26 + .../streampark-flink-shims_flink-2.2/pom.xml | 152 ++++ .../flink/core/StreamEnvConfigFunction.java | 34 + .../flink/core/TableEnvConfigFunction.java | 34 + .../flink/core/EnhancerImplicit.scala | 72 ++ .../flink/core/FlinkClientTrait.scala | 49 ++ .../flink/core/FlinkClusterClient.scala | 64 ++ .../flink/core/FlinkKubernetesClient.scala | 32 + .../core/FlinkKubernetesClientTrait.scala | 38 + .../flink/core/FlinkSqlExecutor.scala | 169 +++++ .../flink/core/FlinkSqlValidator.scala | 152 ++++ .../flink/core/FlinkStreamTableTraitV2.scala | 396 +++++++++++ .../core/FlinkStreamingInitializerV2.scala | 151 ++++ .../flink/core/FlinkTableInitializerV2.scala | 250 +++++++ .../flink/core/FlinkTableTrait.scala | 176 +++++ .../flink/core/SqlCommandParser.scala | 651 ++++++++++++++++++ .../flink/core/StreamEnvConfig.scala | 27 + .../flink/core/StreamTableContext.scala | 317 +++++++++ .../streampark/flink/core/TableContext.scala | 221 ++++++ .../streampark/flink/core/TableExt.scala | 39 ++ .../flink/core/conf/FlinkConfiguration.scala | 26 + 65 files changed, 8974 insertions(+), 3 deletions(-) create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/pom.xml create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableContext.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableExt.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/pom.xml create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableContext.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableExt.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/pom.xml create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableContext.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableExt.scala create mode 100644 streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala b/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala index 83a09a227c..722b085e44 100644 --- a/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala +++ b/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala @@ -127,6 +127,7 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { def checkVersion(throwException: Boolean = true): Boolean = { version.split("\\.").map(_.trim.toInt) match { case Array(1, v, _) if v >= 12 && v <= 20 => true + case Array(2, v, _) if v >= 0 => true case _ => if (throwException) { throw new UnsupportedOperationException(s"Unsupported flink version: $version") @@ -139,6 +140,7 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { def checkVersion(sinceVersion: Int): Boolean = { version.split("\\.").map(_.trim.toInt) match { case Array(1, v, _) if v >= sinceVersion => true + case Array(2, v, _) if v >= sinceVersion => true case _ => false } } diff --git a/streampark-console/streampark-console-service/pom.xml b/streampark-console/streampark-console-service/pom.xml index 3b94cd0ca1..52f1347574 100644 --- a/streampark-console/streampark-console-service/pom.xml +++ b/streampark-console/streampark-console-service/pom.xml @@ -587,6 +587,27 @@ ${project.version} ${project.build.directory}/shims + + + org.apache.streampark + streampark-flink-shims_flink-2.0_${scala.binary.version} + ${project.version} + ${project.build.directory}/shims + + + + org.apache.streampark + streampark-flink-shims_flink-2.1_${scala.binary.version} + ${project.version} + ${project.build.directory}/shims + + + + org.apache.streampark + streampark-flink-shims_flink-2.2_${scala.binary.version} + ${project.version} + ${project.build.directory}/shims + org.apache.streampark diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java index c9addb4178..6ae9f86e26 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java @@ -71,7 +71,7 @@ public class EnvInitializer implements ApplicationRunner { private final FileFilter fileFilter = p -> !".gitkeep".equals(p.getName()); private static final Pattern PATTERN_FLINK_SHIMS_JAR = Pattern.compile( - "^streampark-flink-shims_flink-(1.1[2-9]|1\\.2[0-9])_(2.12)-(.*).jar$", + "^streampark-flink-shims_flink-(1\\.1[2-9]|1\\.2[0-9]|2\\.[0-9])_(2\\.12)-(.*).jar$", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); @SneakyThrows diff --git a/streampark-flink/streampark-flink-shims/pom.xml b/streampark-flink/streampark-flink-shims/pom.xml index 16e1764486..a951978853 100644 --- a/streampark-flink/streampark-flink-shims/pom.xml +++ b/streampark-flink/streampark-flink-shims/pom.xml @@ -41,6 +41,9 @@ streampark-flink-shims_flink-1.18 streampark-flink-shims_flink-1.19 streampark-flink-shims_flink-1.20 + streampark-flink-shims_flink-2.0 + streampark-flink-shims_flink-2.1 + streampark-flink-shims_flink-2.2 diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims-base/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims-base/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala index d855ee0302..0e6f49c06c 100644 --- a/streampark-flink/streampark-flink-shims/streampark-flink-shims-base/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims-base/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala @@ -23,7 +23,8 @@ import org.apache.streampark.common.util.DeflaterUtils import org.apache.flink.api.java.utils.ParameterTool import org.apache.flink.configuration.PipelineOptions import org.apache.flink.table.api.TableEnvironment -import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.api.bridge.scala.{StreamTableEnvironment => ScalaStreamTableEnvironment} import scala.util.Try @@ -58,7 +59,18 @@ object EnhancerImplicit { } - implicit class EnhanceStreamExecutionEnvironment(env: StreamTableEnvironment) { + implicit class EnhanceStreamExecutionEnvironment(env: ScalaStreamTableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): ScalaStreamTableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME, appName) + } + env + } + } + + implicit class EnhanceJavaStreamTableEnvironment(env: StreamTableEnvironment) { private[flink] def setAppName(implicit parameter: ParameterTool): StreamTableEnvironment = { val appName = parameter.getAppName() diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/pom.xml b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/pom.xml new file mode 100644 index 0000000000..a34b365441 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/pom.xml @@ -0,0 +1,152 @@ + + + + 4.0.0 + + + org.apache.streampark + streampark-flink-shims + 2.2.0-SNAPSHOT + + + streampark-flink-shims_flink-2.0_${scala.binary.version} + StreamPark : Flink Shims 2.0 + + + 2.0.2 + + + + + org.apache.streampark + streampark-common_${scala.binary.version} + ${project.version} + + + + + org.apache.flink + flink-core + ${flink.version} + provided + + + + org.apache.flink + flink-streaming-java + ${flink.version} + provided + + + + org.apache.flink + flink-table-api-java + ${flink.version} + provided + + + + org.apache.flink + flink-table-api-java-bridge + ${flink.version} + provided + + + + org.apache.flink + flink-table-planner_${scala.binary.version} + ${flink.version} + provided + + + + org.apache.flink + flink-clients + ${flink.version} + provided + + + + org.apache.flink + flink-kubernetes + ${flink.version} + provided + + + + org.apache.flink + flink-yarn + ${flink.version} + provided + + + + org.apache.flink + flink-statebackend-rocksdb + ${flink.version} + provided + + + + org.apache.hadoop + hadoop-client-api + true + + + + org.apache.hadoop + hadoop-client-runtime + true + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + true + ${project.basedir}/target/dependency-reduced-pom.xml + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + + + diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java new file mode 100644 index 0000000000..d1ed71bb32 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.util.ParameterTool; + +@FunctionalInterface +public interface StreamEnvConfigFunction { + + /** + * When used to initialize StreamExecutionEnvironment, it can be used to implement this function + * and customize the parameters to be set... + * + * @param environment + * @param parameterTool + */ + void configuration(StreamExecutionEnvironment environment, ParameterTool parameterTool); +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java new file mode 100644 index 0000000000..0e57b74b5c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core; + +import org.apache.flink.table.api.TableConfig; +import org.apache.flink.util.ParameterTool; + +@FunctionalInterface +public interface TableEnvConfigFunction { + + /** + * When used to initialize the TableEnvironment, it can be used to implement this function and + * customize the parameters to be set... + * + * @param tableConfig + * @param parameterTool + */ + void configuration(TableConfig tableConfig, ParameterTool parameterTool); +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala new file mode 100644 index 0000000000..0bf7bc15ae --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.{KEY_APP_NAME, KEY_FLINK_APP_NAME} +import org.apache.streampark.common.util.DeflaterUtils + +import org.apache.flink.configuration.PipelineOptions +import org.apache.flink.table.api.TableEnvironment +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.util.ParameterTool + +import scala.util.Try + +object EnhancerImplicit { + + implicit class EnhanceParameterTool(parameterTool: ParameterTool) { + + private[flink] def getAppName(name: String = null, required: Boolean = false): String = { + val appName = name match { + case null => + Try(DeflaterUtils.unzipString(parameterTool.get(KEY_APP_NAME(), null))) + .getOrElse(parameterTool.get(KEY_FLINK_APP_NAME, null)) + case x => x + } + if (required) { + require(appName != null, "[StreamPark] Application name cannot be null") + } + appName + } + + } + + implicit class EnhanceTableEnvironment(env: TableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): TableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME.key, appName) + } + env + } + + } + + implicit class EnhanceStreamTableEnvironment(env: StreamTableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): StreamTableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME.key, appName) + } + env + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala new file mode 100644 index 0000000000..a0ebcb274c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.api.common.JobID +import org.apache.flink.client.program.ClusterClient +import org.apache.flink.core.execution.SavepointFormatType + +import java.util.concurrent.CompletableFuture + +abstract class FlinkClientTrait[T](clusterClient: ClusterClient[T]) { + + def triggerSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = { + clusterClient.triggerSavepoint(jobID, savepointDir, SavepointFormatType.DEFAULT) + } + + def cancelWithSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = { + clusterClient.cancelWithSavepoint(jobID, savepointDir, SavepointFormatType.DEFAULT) + } + + def stopWithSavepoint( + jobID: JobID, + advanceToEndOfEventTime: Boolean, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = + clusterClient.stopWithSavepoint(jobID, advanceToEndOfEventTime, savepointDir, SavepointFormatType.DEFAULT) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala new file mode 100644 index 0000000000..ecfc8dabb3 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.api.common.JobID +import org.apache.flink.client.program.ClusterClient +import org.apache.flink.core.execution.SavepointFormatType + +import java.util.concurrent.CompletableFuture + +class FlinkClusterClient[T](clusterClient: ClusterClient[T]) + extends FlinkClientTrait[T](clusterClient) { + + override def triggerSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.triggerSavepoint( + jobID, + savepointDir, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + + override def cancelWithSavepoint( + jobID: JobID, + savepointDirectory: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.cancelWithSavepoint( + jobID, + savepointDirectory, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + + override def stopWithSavepoint( + jobID: JobID, + advanceToEndOfEventTime: Boolean, + savepointDirectory: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.stopWithSavepoint( + jobID, + advanceToEndOfEventTime, + savepointDirectory, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala new file mode 100644 index 0000000000..707ba43f0b --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient +import org.apache.flink.kubernetes.kubeclient.resources.KubernetesService + +import java.util.Optional + +class FlinkKubernetesClient(kubeClient: FlinkKubeClient) + extends FlinkKubernetesClientTrait(kubeClient) { + + override def getService(serviceName: String): Optional[KubernetesService] = { + kubeClient.getService(serviceName) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala new file mode 100644 index 0000000000..16155bf040 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient +import org.apache.flink.kubernetes.kubeclient.resources.KubernetesService + +import java.util.Optional + +abstract class FlinkKubernetesClientTrait(kubeClient: FlinkKubeClient) { + + /** + * Get the kubernetes service of the given flink clusterId. + * + * @param serviceName + * the name of the service + * @return + * Return the optional kubernetes service of the specified name. + */ + def getService(serviceName: String): Optional[KubernetesService] = + kubeClient.getService(serviceName) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala new file mode 100644 index 0000000000..1c0df899f5 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.KEY_FLINK_SQL +import org.apache.streampark.common.util.{AssertUtils, Logger} +import org.apache.streampark.flink.core.SqlCommand._ + +import org.apache.commons.lang3.StringUtils +import org.apache.flink.configuration.{Configuration, ExecutionOptions} +import org.apache.flink.table.api.TableEnvironment +import org.apache.flink.util.ParameterTool + +import java.util +import java.util.concurrent.locks.ReentrantReadWriteLock + +import scala.collection.mutable +import scala.util.Try + +object FlinkSqlExecutor extends Logger { + + private[this] val lock = new ReentrantReadWriteLock().writeLock + + private[streampark] def executeSql( + sql: String, + parameter: ParameterTool, + context: TableEnvironment)(implicit callbackFunc: String => Unit = null): Unit = { + + val flinkSql: String = + if (StringUtils.isBlank(sql)) parameter.get(KEY_FLINK_SQL()) + else parameter.get(sql) + require(StringUtils.isNotBlank(flinkSql), "verify failed: flink sql cannot be empty") + + def callback(r: String): Unit = { + callbackFunc match { + case null => logInfo(r) + case x => x(r) + } + } + + val runMode = parameter.get(ExecutionOptions.RUNTIME_MODE.key()) + + var hasInsert = false + val statementSet = context.createStatementSet() + SqlCommandParser + .parseSQL(flinkSql) + .foreach(x => { + val args = if (x.operands.isEmpty) null else x.operands.head + val command = x.command.name + x.command match { + // For display sql statement result information + case SHOW_CATALOGS => + val catalogs = context.listCatalogs + callback(s"$command: ${catalogs.mkString("\n")}") + case SHOW_CURRENT_CATALOG => + val catalog = context.getCurrentCatalog + callback(s"$command: $catalog") + case SHOW_DATABASES => + val databases = context.listDatabases + callback(s"$command: ${databases.mkString("\n")}") + case SHOW_CURRENT_DATABASE => + val database = context.getCurrentDatabase + callback(s"$command: $database") + case SHOW_TABLES => + val tables = + context.listTables().filter(!_.startsWith("UnnamedTable")) + callback(s"$command: ${tables.mkString("\n")}") + case SHOW_FUNCTIONS => + val functions = context.listUserDefinedFunctions() + callback(s"$command: ${functions.mkString("\n")}") + case SHOW_MODULES => + val modules = context.listModules() + callback(s"$command: ${modules.mkString("\n")}") + case DESC | DESCRIBE => + val schema = context.scan(args).getSchema + val builder = new mutable.StringBuilder() + builder.append("Column\tType\n") + for (i <- 0 to schema.getFieldCount) { + builder.append( + schema.getFieldName(i).get() + "\t" + schema + .getFieldDataType(i) + .get() + "\n") + } + callback(builder.toString()) + case EXPLAIN => + val tableResult = context.executeSql(x.originSql) + val r = tableResult.collect().next().getField(0).toString + callback(r) + // For specific statement, such as: SET/RESET/INSERT/SELECT + case SET => + val operand = x.operands(1) + logInfo(s"$command: $args --> $operand") + context.getConfig.getConfiguration.setString(args, operand) + case RESET | RESET_ALL => + val confDataField = + classOf[Configuration].getDeclaredField("confData") + confDataField.setAccessible(true) + val confData = confDataField + .get(context.getConfig.getConfiguration) + .asInstanceOf[util.HashMap[String, AnyRef]] + confData.synchronized { + if (x.command == RESET) { + confData.remove(args) + } else { + confData.clear() + } + } + logInfo(s"$command: $args") + case BEGIN_STATEMENT_SET | END_STATEMENT_SET => + logWarn(s"SQL Client Syntax: ${x.command.name} ") + case INSERT => + statementSet.addInsertSql(x.originSql) + hasInsert = true + case SELECT => + logError("StreamPark dose not support 'SELECT' statement now!") + throw new RuntimeException("StreamPark dose not support 'select' statement now!") + case DELETE | UPDATE => + AssertUtils.required( + runMode != "STREAMING", + s"Currently, ${command.toUpperCase()} statement only supports in batch mode, " + + s"and it requires the target table connector implements the SupportsRowLevelDelete, " + + s"For more details please refer to: https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/dev/table/sql/$command") + case _ => + try { + lock.lock() + val result = context.executeSql(x.originSql) + logInfo(s"$command:$args") + } finally { + if (lock.isHeldByCurrentThread) { + lock.unlock() + } + } + } + }) + + if (hasInsert) { + statementSet.execute() match { + case t if t != null => + Try(t.getJobClient.get.getJobID).getOrElse(null) match { + case x if x != null => logInfo(s"jobId:$x") + case _ => + } + case _ => + } + } else { + logError("No 'INSERT' statement to trigger the execution of the Flink job.") + throw new RuntimeException("No 'INSERT' statement to trigger the execution of the Flink job.") + } + + logInfo( + s"\n\n\n==============flinkSql==============\n\n $flinkSql\n\n============================\n\n\n") + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala new file mode 100644 index 0000000000..42344c9d7f --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.util.{ExceptionUtils, Logger} +import org.apache.streampark.flink.core.SqlCommand._ + +import org.apache.calcite.config.Lex +import org.apache.calcite.sql.parser.SqlParser +import org.apache.calcite.sql.parser.SqlParser.Config +import org.apache.flink.sql.parser.validate.FlinkSqlConformance +import org.apache.flink.table.api.SqlDialect +import org.apache.flink.table.api.SqlDialect.{DEFAULT, HIVE} +import org.apache.flink.table.api.config.TableConfigOptions +import org.apache.flink.table.planner.delegation.FlinkSqlParserFactories + +import scala.language.existentials +import scala.util.{Failure, Try} + +object FlinkSqlValidator extends Logger { + + private[this] val FLINK112_CALCITE_PARSER_CLASS = + "org.apache.flink.table.planner.calcite.CalciteParser" + + private[this] val FLINK113_PLUS_CALCITE_PARSER_CLASS = + "org.apache.flink.table.planner.parse.CalciteParser" + + private[this] val SYNTAX_ERROR_REGEXP = + ".*at\\sline\\s(\\d+),\\scolumn\\s(\\d+).*".r + + private[this] lazy val sqlParserConfigMap: Map[String, SqlParser.Config] = { + def getConfig(sqlDialect: SqlDialect): Config = { + val conformance = sqlDialect match { + case HIVE => + try { + FlinkSqlConformance.DEFAULT + } catch { + // for flink 1.18+ + case _: NoSuchFieldError => FlinkSqlConformance.DEFAULT + case e: Throwable => + throw new IllegalArgumentException("Init Flink sql Dialect error: ", e) + } + case DEFAULT => FlinkSqlConformance.DEFAULT + case _ => + throw new UnsupportedOperationException(s"Unsupported sqlDialect: $sqlDialect") + } + SqlParser.config + .withParserFactory(FlinkSqlParserFactories.create(conformance)) + .withConformance(conformance) + .withLex(Lex.JAVA) + .withIdentifierMaxLength(256) + } + + Map( + SqlDialect.DEFAULT.name() -> getConfig(SqlDialect.DEFAULT), + SqlDialect.HIVE.name() -> getConfig(SqlDialect.HIVE)) + } + + def verifySql(sql: String): FlinkSqlValidationResult = { + val sqlCommands = SqlCommandParser.parseSQL(sql, r => return r) + var sqlDialect = SqlDialect.DEFAULT.name().toLowerCase() + var hasInsert = false + for (call <- sqlCommands) { + val args = call.operands.head + val command = call.command + command match { + case SET | RESET => + if (command == SET && args == TableConfigOptions.TABLE_SQL_DIALECT.key()) { + sqlDialect = call.operands.last + } + case BEGIN_STATEMENT_SET | END_STATEMENT_SET => + logWarn(s"SQL Client Syntax: ${call.command.name} ") + case _ => + if (command == INSERT) { + hasInsert = true + } + Try { + val calciteClass = Try(Class.forName(FLINK112_CALCITE_PARSER_CLASS)) + .getOrElse(Class.forName(FLINK113_PLUS_CALCITE_PARSER_CLASS)) + sqlDialect.toUpperCase() match { + case "HIVE" => + case "DEFAULT" => + val parser = calciteClass + .getConstructor(Array(classOf[Config]): _*) + .newInstance(sqlParserConfigMap(sqlDialect.toUpperCase())) + val method = + parser.getClass.getDeclaredMethod("parse", classOf[String]) + method.setAccessible(true) + method.invoke(parser, call.originSql) + case _ => + throw new UnsupportedOperationException(s"unsupported dialect: $sqlDialect") + } + } match { + case Failure(e) => + val exception = ExceptionUtils.stringifyException(e) + val causedBy = exception.drop(exception.indexOf("Caused by:")) + val cleanUpError = exception.replaceAll("[\r\n]", "") + if (SYNTAX_ERROR_REGEXP.findAllMatchIn(cleanUpError).nonEmpty) { + val SYNTAX_ERROR_REGEXP(line, column) = cleanUpError + val errorLine = call.lineStart + line.toInt - 1 + return FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + errorLine = errorLine, + errorColumn = column.toInt, + sql = call.originSql, + exception = causedBy.replaceAll(s"at\\sline\\s$line", s"at line $errorLine")) + } else { + return FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + sql = call.originSql, + exception = causedBy) + } + case _ => + } + } + } + + if (hasInsert) { + FlinkSqlValidationResult() + } else { + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = sqlCommands.head.lineStart, + lineEnd = sqlCommands.last.lineEnd, + exception = "No 'INSERT' statement to trigger the execution of the Flink job.") + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala new file mode 100644 index 0000000000..bc8ee999db --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList +import org.apache.streampark.common.util.Utils +import org.apache.streampark.flink.core.EnhancerImplicit._ + +import org.apache.flink.api.common.{JobExecutionResult, RuntimeExecutionMode} +import org.apache.flink.api.common.cache.DistributedCache +import org.apache.flink.api.common.eventtime.WatermarkStrategy +import org.apache.flink.api.common.io.{FileInputFormat, FilePathFilter, InputFormat} +import org.apache.flink.api.connector.source.{Source, SourceSplit} +import org.apache.flink.api.java.tuple +import org.apache.flink.configuration.ReadableConfig +import org.apache.flink.core.execution.{JobClient, JobListener} +import org.apache.flink.streaming.api.CheckpointingMode +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.streaming.api.environment.{CheckpointConfig, StreamExecutionEnvironment} +import org.apache.flink.streaming.api.functions.source.FileProcessingMode +import org.apache.flink.streaming.api.graph.StreamGraph +import org.apache.flink.table.api._ +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.catalog.Catalog +import org.apache.flink.table.functions._ +import org.apache.flink.table.module.Module +import org.apache.flink.types.Row +import org.apache.flink.util.{ParameterTool, SplittableIterator} + +import java.util.Optional + +/** + * Integration api of stream and table for Flink 2.x (using Java DataStream API instead of removed Scala API). + * + * @param parameter + * parameter + * @param streamEnv + * streamEnv + * @param tableEnv + * tableEnv + */ +abstract class FlinkStreamTableTraitV2( + val parameter: ParameterTool, + private val streamEnv: StreamExecutionEnvironment, + private val tableEnv: StreamTableEnvironment) + extends StreamTableEnvironment { + + /** + * Once a Table has been converted to a DataStream, the DataStream job must be executed using the + * execute method of the StreamExecutionEnvironment. + */ + var isConvertedToDataStream: Boolean = false + + /** Recommended to use this Api to start tasks */ + def start(name: String = null): JobExecutionResult = { + val appName = parameter.getAppName(name, true) + execute(appName) + } + + @deprecated def execute(jobName: String): JobExecutionResult = { + Utils.printLogo(s"FlinkStreamTable $jobName Starting...") + if (isConvertedToDataStream) { + streamEnv.execute(jobName) + } else null + } + + def sql(sql: String = null)(implicit callback: String => Unit = null): Unit = + FlinkSqlExecutor.executeSql(sql, parameter, this) + + // ...streamEnv api start... + + def $getCachedFiles: JavaList[tuple.Tuple2[String, DistributedCache.DistributedCacheEntry]] = + this.streamEnv.getCachedFiles + + def $getJobListeners: JavaList[JobListener] = this.streamEnv.getJobListeners + + def $setParallelism(parallelism: Int): Unit = + this.streamEnv.setParallelism(parallelism) + + def $setRuntimeMode(deployMode: RuntimeExecutionMode): StreamExecutionEnvironment = + this.streamEnv.setRuntimeMode(deployMode) + + def $setMaxParallelism(maxParallelism: Int): Unit = + this.streamEnv.setMaxParallelism(maxParallelism) + + def $getParallelism: Int = this.streamEnv.getParallelism + + def $getMaxParallelism: Int = this.streamEnv.getMaxParallelism + + def $setBufferTimeout(timeoutMillis: Long): StreamExecutionEnvironment = + this.streamEnv.setBufferTimeout(timeoutMillis) + + def $getBufferTimeout: Long = this.streamEnv.getBufferTimeout + + def $disableOperatorChaining(): StreamExecutionEnvironment = + this.streamEnv.disableOperatorChaining() + + def $getCheckpointConfig: CheckpointConfig = + this.streamEnv.getCheckpointConfig + + def $enableCheckpointing(interval: Long, mode: CheckpointingMode): StreamExecutionEnvironment = + this.streamEnv.enableCheckpointing(interval, mode) + + def $enableCheckpointing(interval: Long): StreamExecutionEnvironment = + this.streamEnv.enableCheckpointing(interval) + + def $getCheckpointingMode: CheckpointingMode = + this.streamEnv.getCheckpointingMode + + def $configure(configuration: ReadableConfig, classLoader: ClassLoader): Unit = + this.streamEnv.configure(configuration, classLoader) + + def $fromSequence(from: Long, to: Long): DataStream[java.lang.Long] = + this.streamEnv.fromSequence(from, to) + + // fromData with varargs removed in Flink 2.0 + def $fromData[T](data: T): DataStream[T] = + this.streamEnv.fromData(data) + + def $fromCollection[T](data: java.util.Collection[T]): DataStream[T] = + this.streamEnv.fromCollection(data) + + def $fromParallelCollection[T](data: SplittableIterator[T], clazz: Class[T]): DataStream[T] = + this.streamEnv.fromParallelCollection(data, clazz) + + def $readFile[T](inputFormat: FileInputFormat[T], filePath: String): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath) + + def $readFile[T]( + inputFormat: FileInputFormat[T], + filePath: String, + watchType: FileProcessingMode, + interval: Long): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath, watchType, interval) + + def $socketTextStream( + hostname: String, + port: Int, + delimiter: Char, + maxRetry: Long): DataStream[String] = + this.streamEnv.socketTextStream(hostname, port, delimiter, maxRetry) + + def $createInput[T](inputFormat: InputFormat[T, _]): DataStream[T] = + this.streamEnv.createInput(inputFormat) + + def $fromSource[T]( + source: Source[T, _ <: SourceSplit, _], + watermarkStrategy: WatermarkStrategy[T], + sourceName: String): DataStream[T] = + this.streamEnv.fromSource(source, watermarkStrategy, sourceName) + + def $registerJobListener(jobListener: JobListener): Unit = + this.streamEnv.registerJobListener(jobListener) + + def $clearJobListeners(): Unit = this.streamEnv.clearJobListeners() + + def $executeAsync(): JobClient = this.streamEnv.executeAsync() + + def $executeAsync(jobName: String): JobClient = + this.streamEnv.executeAsync(jobName) + + def $getExecutionPlan: String = this.streamEnv.getExecutionPlan + + def $getStreamGraph: StreamGraph = this.streamEnv.getStreamGraph + + def $registerCachedFile(filePath: String, name: String): Unit = + this.streamEnv.registerCachedFile(filePath, name) + + def $registerCachedFile(filePath: String, name: String, executable: Boolean): Unit = + this.streamEnv.registerCachedFile(filePath, name, executable) + + def $isUnalignedCheckpointsEnabled: Boolean = + this.streamEnv.isUnalignedCheckpointsEnabled + + def $isForceUnalignedCheckpoints: Boolean = + this.streamEnv.isForceUnalignedCheckpoints + + @deprecated def $readFile[T]( + inputFormat: FileInputFormat[T], + filePath: String, + watchType: FileProcessingMode, + interval: Long, + filter: FilePathFilter): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath, watchType, interval, filter) + + // ...streamEnv api end... + + override def fromDataStream[T](dataStream: DataStream[T]): Table = + tableEnv.fromDataStream(dataStream) + + override def fromDataStream[T](dataStream: DataStream[T], schema: Schema): Table = + tableEnv.fromDataStream(dataStream, schema) + + override def fromChangelogStream(dataStream: DataStream[Row]): Table = + tableEnv.fromChangelogStream(dataStream) + + override def fromChangelogStream(dataStream: DataStream[Row], schema: Schema): Table = + tableEnv.fromChangelogStream(dataStream, schema) + + override def fromChangelogStream( + dataStream: DataStream[Row], + schema: Schema, + changelogMode: org.apache.flink.table.connector.ChangelogMode): Table = + tableEnv.fromChangelogStream(dataStream, schema, changelogMode) + + override def createTemporaryView[T](path: String, dataStream: DataStream[T]): Unit = + tableEnv.createTemporaryView(path, dataStream) + + override def createTemporaryView[T]( + path: String, + dataStream: DataStream[T], + schema: Schema): Unit = + tableEnv.createTemporaryView(path, dataStream, schema) + + override def toDataStream(table: Table): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table) + } + + override def toDataStream[T](table: Table, targetClass: Class[T]): DataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table, targetClass) + } + + override def toDataStream[T]( + table: Table, + targetDataType: org.apache.flink.table.types.AbstractDataType[_]): DataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table, targetDataType) + } + + override def toChangelogStream(table: Table): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table) + } + + override def toChangelogStream(table: Table, targetSchema: Schema): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema) + } + + override def toChangelogStream( + table: Table, + targetSchema: Schema, + changelogMode: org.apache.flink.table.connector.ChangelogMode): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema, changelogMode) + } + + override def createStatementSet(): org.apache.flink.table.api.bridge.java.StreamStatementSet = + tableEnv.createStatementSet() + + // ...table env delegation... + + override def fromValues(values: org.apache.flink.table.expressions.Expression*): Table = + tableEnv.fromValues(values) + + override def fromValues( + rowType: org.apache.flink.table.types.AbstractDataType[_], + values: org.apache.flink.table.expressions.Expression*): Table = + tableEnv.fromValues(rowType, values: _*) + + override def fromValues(values: java.lang.Iterable[_]): Table = + tableEnv.fromValues(values) + + override def fromValues( + rowType: org.apache.flink.table.types.AbstractDataType[_], + values: java.lang.Iterable[_]): Table = + tableEnv.fromValues(rowType, values) + + override def registerCatalog(catalogName: String, catalog: Catalog): Unit = + tableEnv.registerCatalog(catalogName, catalog) + + override def getCatalog(catalogName: String): Optional[Catalog] = + tableEnv.getCatalog(catalogName) + + override def loadModule(moduleName: String, module: Module): Unit = + tableEnv.loadModule(moduleName, module) + + override def unloadModule(moduleName: String): Unit = + tableEnv.unloadModule(moduleName) + + override def createTemporarySystemFunction( + name: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporarySystemFunction(name, functionClass) + + override def createTemporarySystemFunction( + name: String, + functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporarySystemFunction(name, functionInstance) + + override def dropTemporarySystemFunction(name: String): Boolean = + tableEnv.dropTemporarySystemFunction(name) + + override def createFunction(path: String, functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createFunction(path, functionClass) + + override def createFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, functionClass) + + override def dropFunction(path: String): Boolean = tableEnv.dropFunction(path) + + override def createTemporaryFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporaryFunction(path, functionClass) + + override def createTemporaryFunction(path: String, functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporaryFunction(path, functionInstance) + + override def dropTemporaryFunction(path: String): Boolean = + tableEnv.dropTemporaryFunction(path) + + override def createTemporaryView(path: String, view: Table): Unit = + tableEnv.createTemporaryView(path, view) + + override def from(path: String): Table = tableEnv.from(path) + + override def listCatalogs(): Array[String] = tableEnv.listCatalogs() + + override def listModules(): Array[String] = tableEnv.listModules() + + override def listDatabases(): Array[String] = tableEnv.listDatabases() + + override def listTables(): Array[String] = tableEnv.listTables() + + override def listViews(): Array[String] = tableEnv.listViews() + + override def listTemporaryTables(): Array[String] = + tableEnv.listTemporaryTables + + override def listTemporaryViews(): Array[String] = + tableEnv.listTemporaryViews() + + override def listUserDefinedFunctions(): Array[String] = + tableEnv.listUserDefinedFunctions() + + override def listFunctions(): Array[String] = tableEnv.listFunctions() + + override def dropTemporaryTable(path: String): Boolean = + tableEnv.dropTemporaryTable(path) + + override def dropTemporaryView(path: String): Boolean = + tableEnv.dropTemporaryView(path) + + override def explainSql(statement: String, extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, extraDetails: _*) + + override def sqlQuery(query: String): Table = tableEnv.sqlQuery(query) + + override def executeSql(statement: String): TableResult = + tableEnv.executeSql(statement) + + override def getCurrentCatalog: String = tableEnv.getCurrentCatalog + + override def useCatalog(catalogName: String): Unit = + tableEnv.useCatalog(catalogName) + + override def getCurrentDatabase: String = tableEnv.getCurrentDatabase + + override def useDatabase(databaseName: String): Unit = + tableEnv.useDatabase(databaseName) + + override def getConfig: TableConfig = tableEnv.getConfig + + @deprecated override def registerFunction(name: String, function: ScalarFunction): Unit = + tableEnv.registerFunction(name, function) + + @deprecated override def registerTable(name: String, table: Table): Unit = + tableEnv.registerTable(name, table) + + @deprecated override def scan(tablePath: String*): Table = + tableEnv.scan(tablePath: _*) + + @deprecated override def getCompletionHints(statement: String, position: Int): Array[String] = + tableEnv.getCompletionHints(statement, position) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala new file mode 100644 index 0000000000..69d1d19d8c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys._ +import org.apache.streampark.common.enums.ApiType +import org.apache.streampark.common.enums.ApiType.ApiType +import org.apache.streampark.common.util._ +import org.apache.streampark.common.util.Implicits._ +import org.apache.streampark.flink.core.conf.FlinkConfiguration + +import collection.{mutable, Map} +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.TableConfig +import org.apache.flink.util.ParameterTool + +import java.io.File + +private[flink] object FlinkStreamingInitializerV2 { + + def initialize( + args: Array[String], + config: (StreamExecutionEnvironment, ParameterTool) => Unit): (ParameterTool, StreamExecutionEnvironment) = { + val flinkInitializer = new FlinkStreamingInitializerV2(args, ApiType.SCALA) + flinkInitializer.streamEnvConfFunc = config + (flinkInitializer.configuration.parameter, flinkInitializer.streamEnv) + } + + def initialize(args: StreamEnvConfig): (ParameterTool, StreamExecutionEnvironment) = { + val flinkInitializer = + new FlinkStreamingInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaStreamEnvConfFunc = args.conf + (flinkInitializer.configuration.parameter, flinkInitializer.streamEnv) + } +} + +private[flink] class FlinkStreamingInitializerV2(args: Array[String], apiType: ApiType) + extends Logger { + + var streamEnvConfFunc: (StreamExecutionEnvironment, ParameterTool) => Unit = _ + + var tableConfFunc: (TableConfig, ParameterTool) => Unit = _ + + var javaStreamEnvConfFunc: StreamEnvConfigFunction = _ + + var javaTableEnvConfFunc: TableEnvConfigFunction = _ + + implicit private[flink] val parameter: ParameterTool = configuration.parameter + + lazy val streamEnv: StreamExecutionEnvironment = { + val env = StreamExecutionEnvironment.getExecutionEnvironment(configuration.envConfig) + + apiType match { + case ApiType.JAVA if javaStreamEnvConfFunc != null => + javaStreamEnvConfFunc.configuration(env, configuration.parameter) + case ApiType.SCALA if streamEnvConfFunc != null => + streamEnvConfFunc(env, configuration.parameter) + case _ => + } + env.getConfig.setGlobalJobParameters(configuration.parameter) + env + } + + lazy val configuration: FlinkConfiguration = initParameter() + + def initParameter(): FlinkConfiguration = { + val argsMap = ParameterTool.fromArgs(args) + val config = argsMap.get(KEY_APP_CONF(), null) match { + case null | "" => + throw new ExceptionInInitializerError( + "[StreamPark] Usage:can't find config,please set \"--conf $path \" in main arguments") + case file => file + } + val configMap = parseConfig(config) + val properConf = extractConfigByPrefix(configMap, KEY_FLINK_PROPERTY_PREFIX) + val appConf = extractConfigByPrefix(configMap, KEY_APP_PREFIX) + + // config priority: explicitly specified priority > project profiles > system profiles + val parameter = ParameterTool + .fromSystemProperties() + .mergeWith(ParameterTool.fromMap(properConf)) + .mergeWith(ParameterTool.fromMap(appConf)) + .mergeWith(argsMap) + + val envConfig = Configuration.fromMap(properConf) + FlinkConfiguration(parameter, envConfig, null) + } + + def parseConfig(config: String): Map[String, String] = { + + lazy val content = DeflaterUtils.unzipString(config.drop(7)) + + def readConfig(text: String): Map[String, String] = { + val format = config.split("\\.").last.toLowerCase + format match { + case "yml" | "yaml" => PropertiesUtils.fromYamlText(text) + case "conf" => PropertiesUtils.fromHoconText(text) + case "properties" => PropertiesUtils.fromPropertiesText(text) + case _ => + throw new IllegalArgumentException( + "[StreamPark] Usage: application config file error,must be [yaml|conf|properties]") + } + } + + val map = config match { + case x if x.startsWith("yaml://") => PropertiesUtils.fromYamlText(content) + case x if x.startsWith("conf://") => + PropertiesUtils.fromHoconText(content) + case x if x.startsWith("prop://") => + PropertiesUtils.fromPropertiesText(content) + case x if x.startsWith("hdfs://") => + // If the configuration file with the hdfs, user will need to copy the hdfs-related configuration files under the resources dir + val text = HdfsUtils.read(x) + readConfig(text) + case _ => + val configFile = new File(config) + require( + configFile.exists(), + s"[StreamPark] Usage: application config file: $configFile is not found!!!") + val text = FileUtils.readFile(configFile) + readConfig(text) + } + map.filter(_._2.nonEmpty) + } + + def extractConfigByPrefix(configMap: Map[String, String], prefix: String): Map[String, String] = { + val map = mutable.Map[String, String]() + configMap.foreach(x => + if (x._1.startsWith(prefix)) { + map += x._1.drop(prefix.length) -> x._2 + }) + map + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala new file mode 100644 index 0000000000..9a441db545 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys._ +import org.apache.streampark.common.enums.{ApiType, PlannerType} +import org.apache.streampark.common.enums.ApiType.ApiType +import org.apache.streampark.common.util.{DeflaterUtils, PropertiesUtils} +import org.apache.streampark.common.util.Implicits._ +import org.apache.streampark.flink.core.EnhancerImplicit._ +import org.apache.streampark.flink.core.conf.FlinkConfiguration + +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.{EnvironmentSettings, TableConfig, TableEnvironment} +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.util.ParameterTool + +import java.io.File + +import scala.collection.{mutable, Map} +import scala.util.{Failure, Success, Try} + +private[flink] object FlinkTableInitializerV2 { + + def initialize( + args: Array[String], + config: (TableConfig, ParameterTool) => Unit): (ParameterTool, TableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args, ApiType.SCALA) + flinkInitializer.tableConfFunc = config + (flinkInitializer.configuration.parameter, flinkInitializer.tableEnv) + } + + def initialize(args: TableEnvConfig): (ParameterTool, TableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaTableEnvConfFunc = args.conf + (flinkInitializer.configuration.parameter, flinkInitializer.tableEnv) + } + + def initialize( + args: Array[String], + configStream: (StreamExecutionEnvironment, ParameterTool) => Unit, + configTable: (TableConfig, ParameterTool) => Unit): (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment) = { + + val flinkInitializer = new FlinkTableInitializerV2(args, ApiType.SCALA) + flinkInitializer.streamEnvConfFunc = configStream + flinkInitializer.tableConfFunc = configTable + ( + flinkInitializer.configuration.parameter, + flinkInitializer.streamEnv, + flinkInitializer.streamTableEnv) + } + + def initialize( + args: StreamTableEnvConfig): (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaStreamEnvConfFunc = args.streamConfig + flinkInitializer.javaTableEnvConfFunc = args.tableConfig + ( + flinkInitializer.configuration.parameter, + flinkInitializer.streamEnv, + flinkInitializer.streamTableEnv) + } + +} + +private[flink] class FlinkTableInitializerV2(args: Array[String], apiType: ApiType) + extends FlinkStreamingInitializerV2(args, apiType) { + + private[this] lazy val envSettings = { + + val builder = EnvironmentSettings.newInstance() + + Try(PlannerType.withName(parameter.get(KEY_FLINK_TABLE_PLANNER))) + .getOrElse(PlannerType.BLINK) match { + case PlannerType.BLINK => + val useBlinkPlanner = + Try(builder.getClass.getDeclaredMethod("useBlinkPlanner")) + .getOrElse(null) + if (useBlinkPlanner == null) { + logWarn("useBlinkPlanner deprecated") + } else { + useBlinkPlanner.setAccessible(true) + useBlinkPlanner.invoke(builder) + logInfo("blinkPlanner will be used.") + } + case PlannerType.OLD => + val useOldPlanner = Try(builder.getClass.getDeclaredMethod("useOldPlanner")).getOrElse(null) + if (useOldPlanner == null) { + logWarn("useOldPlanner deprecated") + } else { + useOldPlanner.setAccessible(true) + useOldPlanner.invoke(builder) + logInfo("useOldPlanner will be used.") + } + case PlannerType.ANY => + val useAnyPlanner = Try(builder.getClass.getDeclaredMethod("useAnyPlanner")).getOrElse(null) + if (useAnyPlanner == null) { + logWarn("useAnyPlanner deprecated") + } else { + logInfo("useAnyPlanner will be used.") + useAnyPlanner.setAccessible(true) + useAnyPlanner.invoke(builder) + } + } + + parameter.get(KEY_FLINK_CONF(), null) match { + case null | "" => + throw new ExceptionInInitializerError( + "[StreamPark] Usage:can't find config,please set \"--flink.conf $conf \" in main arguments") + case conf => builder.withConfiguration( + Configuration.fromMap(PropertiesUtils.fromYamlText(DeflaterUtils.unzipString(conf)))) + } + val buildWith = + (parameter.get(KEY_FLINK_TABLE_CATALOG), parameter.get(KEY_FLINK_TABLE_DATABASE)) + buildWith match { + case (x: String, y: String) if x != null && y != null => + logInfo(s"with built in catalog: $x") + logInfo(s"with built in database: $y") + builder.withBuiltInCatalogName(x) + builder.withBuiltInDatabaseName(y) + case (x: String, _) if x != null => + logInfo(s"with built in catalog: $x") + builder.withBuiltInCatalogName(x) + case (_, y: String) if y != null => + logInfo(s"with built in database: $y") + builder.withBuiltInDatabaseName(y) + case _ => + } + builder + } + + lazy val tableEnv: TableEnvironment = { + logInfo(s"job working in batch mode") + envSettings.inBatchMode() + val tableEnv = TableEnvironment.create(envSettings.build()).setAppName + apiType match { + case ApiType.JAVA if javaTableEnvConfFunc != null => + javaTableEnvConfFunc.configuration(tableEnv.getConfig, parameter) + case ApiType.SCALA if tableConfFunc != null => + tableConfFunc(tableEnv.getConfig, parameter) + case _ => + } + tableEnv + } + + lazy val streamTableEnv: StreamTableEnvironment = { + logInfo(s"components should work in streaming mode") + envSettings.inStreamingMode() + val setting = envSettings.build() + + if (streamEnvConfFunc != null) { + streamEnvConfFunc(streamEnv, parameter) + } + if (javaStreamEnvConfFunc != null) { + javaStreamEnvConfFunc.configuration(streamEnv, parameter) + } + val streamTableEnv = + StreamTableEnvironment.create(streamEnv, setting).setAppName + apiType match { + case ApiType.JAVA if javaTableEnvConfFunc != null => + javaTableEnvConfFunc.configuration(streamTableEnv.getConfig, parameter) + case ApiType.SCALA if tableConfFunc != null => + tableConfFunc(streamTableEnv.getConfig, parameter) + case _ => + } + streamTableEnv + } + + /** In case of table SQL, the parameter conf is not required, it depends on the developer. */ + + override def initParameter(): FlinkConfiguration = { + val configuration = { + val argsMap = ParameterTool.fromArgs(args) + argsMap.get(KEY_APP_CONF(), null) match { + case null | "" => + logWarn("Usage:can't find config,you can set \"--conf $path \" in main arguments") + val parameter = + ParameterTool.fromSystemProperties().mergeWith(argsMap) + FlinkConfiguration(parameter, new Configuration(), new Configuration()) + case file => + val configMap = parseConfig(file) + // set sql.. + val sqlConf = mutable.Map[String, String]() + configMap.foreach(x => { + if (x._1.startsWith(KEY_SQL_PREFIX)) { + sqlConf += x._1.drop(KEY_SQL_PREFIX.length) -> x._2 + } + }) + + // config priority: explicitly specified priority > project profiles > system profiles + val properConf = + extractConfigByPrefix(configMap, KEY_FLINK_PROPERTY_PREFIX) + val appConf = extractConfigByPrefix(configMap, KEY_APP_PREFIX) + val tableConf = + extractConfigByPrefix(configMap, KEY_FLINK_TABLE_PREFIX) + + val tableConfig = Configuration.fromMap(tableConf) + val envConfig = Configuration.fromMap(properConf) + + val parameter = ParameterTool + .fromSystemProperties() + .mergeWith(ParameterTool.fromMap(properConf)) + .mergeWith(ParameterTool.fromMap(tableConf)) + .mergeWith(ParameterTool.fromMap(appConf)) + .mergeWith(ParameterTool.fromMap(sqlConf)) + .mergeWith(argsMap) + + FlinkConfiguration(parameter, envConfig, tableConfig) + } + } + + configuration.parameter.get(KEY_FLINK_SQL()) match { + case null => configuration + case param => + // for streampark-console + Try(DeflaterUtils.unzipString(param)) match { + case Success(value) => + configuration.copy(parameter = configuration.parameter.mergeWith( + ParameterTool.fromMap(Map(KEY_FLINK_SQL() -> value)))) + case Failure(_) => + val sqlFile = new File(param) + Try(PropertiesUtils.fromYamlFile(sqlFile.getAbsolutePath)) match { + case Success(value) => + configuration.copy(parameter = + configuration.parameter.mergeWith(ParameterTool.fromMap(value))) + case Failure(e) => + new IllegalArgumentException(s"[StreamPark] init sql error.$e") + configuration + } + } + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala new file mode 100644 index 0000000000..32bef31194 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Utils +import org.apache.streampark.flink.core.EnhancerImplicit._ + +import org.apache.flink.api.common.JobExecutionResult +import org.apache.flink.table.api._ +import org.apache.flink.table.catalog.Catalog +import org.apache.flink.table.expressions.Expression +import org.apache.flink.table.functions._ +import org.apache.flink.table.module.Module +import org.apache.flink.table.types.AbstractDataType +import org.apache.flink.util.ParameterTool + +import java.lang +import java.util.Optional + +abstract class FlinkTableTrait(val parameter: ParameterTool, private val tableEnv: TableEnvironment) + extends TableEnvironment { + + def start(): JobExecutionResult = { + val appName = parameter.getAppName(required = true) + execute(appName) + } + + def execute(jobName: String): JobExecutionResult = { + Utils.printLogo(s"FlinkTable $jobName Starting...") + null + } + + def sql(sql: String = null): Unit = + FlinkSqlExecutor.executeSql(sql, parameter, this) + + override def fromValues(values: Expression*): Table = + tableEnv.fromValues(values) + + override def fromValues(rowType: AbstractDataType[_], values: Expression*): Table = + tableEnv.fromValues(rowType, values: _*) + + override def fromValues(values: lang.Iterable[_]): Table = + tableEnv.fromValues(values) + + override def fromValues(rowType: AbstractDataType[_], values: lang.Iterable[_]): Table = + tableEnv.fromValues(rowType, values) + + override def registerCatalog(catalogName: String, catalog: Catalog): Unit = + tableEnv.registerCatalog(catalogName, catalog) + + override def getCatalog(catalogName: String): Optional[Catalog] = + tableEnv.getCatalog(catalogName) + + override def loadModule(moduleName: String, module: Module): Unit = + tableEnv.loadModule(moduleName, module) + + override def unloadModule(moduleName: String): Unit = + tableEnv.unloadModule(moduleName) + + override def createTemporarySystemFunction( + name: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporarySystemFunction(name, functionClass) + + override def createTemporarySystemFunction( + name: String, + functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporarySystemFunction(name, functionInstance) + + override def dropTemporarySystemFunction(name: String): Boolean = + tableEnv.dropTemporarySystemFunction(name) + + override def createFunction(path: String, functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createFunction(path, functionClass) + + override def createFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, functionClass) + + override def dropFunction(path: String): Boolean = tableEnv.dropFunction(path) + + override def createTemporaryFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporaryFunction(path, functionClass) + + override def createTemporaryFunction(path: String, functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporaryFunction(path, functionInstance) + + override def dropTemporaryFunction(path: String): Boolean = + tableEnv.dropTemporaryFunction(path) + + override def createTemporaryView(path: String, view: Table): Unit = + tableEnv.createTemporaryView(path, view) + + override def from(path: String): Table = tableEnv.from(path) + + override def listCatalogs(): Array[String] = tableEnv.listCatalogs() + + override def listModules(): Array[String] = tableEnv.listModules() + + override def listDatabases(): Array[String] = tableEnv.listDatabases() + + override def listTables(): Array[String] = tableEnv.listTables() + + override def listViews(): Array[String] = tableEnv.listViews() + + override def listTemporaryTables(): Array[String] = + tableEnv.listTemporaryTables + + override def listTemporaryViews(): Array[String] = + tableEnv.listTemporaryViews() + + override def listUserDefinedFunctions(): Array[String] = + tableEnv.listUserDefinedFunctions() + + override def listFunctions(): Array[String] = tableEnv.listFunctions() + + override def dropTemporaryTable(path: String): Boolean = + tableEnv.dropTemporaryTable(path) + + override def dropTemporaryView(path: String): Boolean = + tableEnv.dropTemporaryView(path) + + override def explainSql(statement: String, extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, extraDetails: _*) + + override def sqlQuery(query: String): Table = tableEnv.sqlQuery(query) + + override def executeSql(statement: String): TableResult = + tableEnv.executeSql(statement) + + override def getCurrentCatalog: String = tableEnv.getCurrentCatalog + + override def useCatalog(catalogName: String): Unit = + tableEnv.useCatalog(catalogName) + + override def getCurrentDatabase: String = tableEnv.getCurrentDatabase + + override def useDatabase(databaseName: String): Unit = + tableEnv.useDatabase(databaseName) + + override def getConfig: TableConfig = tableEnv.getConfig + + override def createStatementSet(): StatementSet = + tableEnv.createStatementSet() + + @deprecated override def registerFunction(name: String, function: ScalarFunction): Unit = + tableEnv.registerFunction(name, function) + + @deprecated override def registerTable(name: String, table: Table): Unit = + tableEnv.registerTable(name, table) + + @deprecated override def scan(tablePath: String*): Table = + tableEnv.scan(tablePath: _*) + + @deprecated override def getCompletionHints(statement: String, position: Int): Array[String] = + tableEnv.getCompletionHints(statement, position) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala new file mode 100644 index 0000000000..037509af2a --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala @@ -0,0 +1,651 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.PARAM_PREFIX +import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.util.Logger + +import enumeratum.EnumEntry +import org.apache.commons.lang3.StringUtils + +import java.lang.{Boolean => JavaBool} +import java.util.Scanner +import java.util.regex.{Matcher, Pattern} + +import scala.annotation.tailrec +import scala.collection.{immutable, mutable} +import scala.collection.mutable.ListBuffer +import scala.util.control.Breaks.{break, breakable} + +object SqlCommandParser extends Logger { + + def parseSQL( + sql: String, + validationCallback: FlinkSqlValidationResult => Unit = null): List[SqlCommandCall] = { + val sqlEmptyError = "verify failed: flink sql cannot be empty." + require(StringUtils.isNotBlank(sql), sqlEmptyError) + val sqlSegments = SqlSplitter.splitSql(sql) + sqlSegments match { + case s if s.isEmpty => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + exception = sqlEmptyError)) + null + } else { + throw new IllegalArgumentException(sqlEmptyError) + } + case segments => + val calls = new ListBuffer[SqlCommandCall] + for (segment <- segments) { + parseLine(segment) match { + case Some(x) => calls += x + case _ => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.UNSUPPORTED_SQL, + lineStart = segment.start, + lineEnd = segment.end, + exception = s"unsupported sql", + sql = segment.sql)) + } else { + throw new UnsupportedOperationException(s"unsupported sql: ${segment.sql}") + } + } + } + + calls.toList match { + case c if c.isEmpty => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + exception = "flink sql syntax error, no executable sql")) + null + } else { + throw new UnsupportedOperationException("flink sql syntax error, no executable sql") + } + case r => r + } + } + } + + private[this] def parseLine(sqlSegment: SqlSegment): Option[SqlCommandCall] = { + val sqlCommand = SqlCommand.get(sqlSegment.sql.trim) + if (sqlCommand == null) None + else { + val matcher = sqlCommand.matcher + val groups = new Array[String](matcher.groupCount) + for (i <- groups.indices) { + groups(i) = matcher.group(i + 1) + } + sqlCommand + .converter(groups) + .map(x => + SqlCommandCall(sqlSegment.start, sqlSegment.end, sqlCommand, x, sqlSegment.sql.trim)) + } + } + +} + +object Converters { + val NO_OPERANDS = (_: Array[String]) => Some(Array.empty[String]) +} + +sealed abstract class SqlCommand( + val name: String, + private val regex: String, + val converter: Array[String] => Option[Array[String]] = (x: Array[String]) => + Some(Array[String](x.head))) + extends EnumEntry { + var matcher: Matcher = _ + + def matches(input: String): Boolean = { + if (StringUtils.isBlank(regex)) false + else { + val pattern = + Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL) + matcher = pattern.matcher(input) + matcher.matches() + } + } +} + +object SqlCommand extends enumeratum.Enum[SqlCommand] { + + def get(stmt: String): SqlCommand = { + var cmd: SqlCommand = null + breakable { + this.values.foreach(x => { + if (x.matches(stmt)) { + cmd = x + break() + } + }) + } + cmd + } + + val values: immutable.IndexedSeq[SqlCommand] = findValues + + // ---- SELECT Statements-------------------------------------------------------------------------------------------------------------------------------- + case object SELECT extends SqlCommand("select", "(SELECT\\s+.+)") + + // ----CREATE Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + *
 CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name ( {
+   *  |  |  }[ ,
+   * ...n] [  ] [  ][ , ...n] ) [COMMENT table_comment]
+   * [PARTITIONED BY (partition_column_name1, partition_column_name2, ...)] WITH (key1=val1,
+   * key2=val2, ...) [ LIKE source_table [(  )] ] 
CREATE CATALOG catalog_name WITH (key1=val1, key2=val2, ...) */ + case object CREATE_CATALOG extends SqlCommand("create catalog", "(CREATE\\s+CATALOG\\s+.+)") + + /** + *
 CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name
[COMMENT database_comment]
+ * WITH (key1=val1, key2=val2, ...)
+ */ + case object CREATE_DATABASE extends SqlCommand("create database", "(CREATE\\s+DATABASE\\s+.+)") + + /** + *
 CREATE [TEMPORARY] VIEW [IF NOT EXISTS] [catalog_name.][db_name.]view_name [( columnName
+   * [, columnName ]* )] [COMMENT view_comment] AS query_expression< 
CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF NOT EXISTS] + * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] DROP statements are used to remove a catalog with the given catalog name or to remove a + * registered table/view/function from the current or specified Catalog. + * + * Flink SQL supports the following DROP statements for now: * DROP CATALOG * DROP TABLE * DROP + * DATABASE * DROP VIEW * DROP FUNCTION + */ + + /** DROP CATALOG [IF EXISTS] catalog_name */ + case object DROP_CATALOG extends SqlCommand("drop catalog", "(DROP\\s+CATALOG\\s+.+)") + + /** DROP [TEMPORARY] TABLE [IF EXISTS] [catalog_name.][db_name.]table_name */ + case object DROP_TABLE extends SqlCommand("drop table", "(DROP\\s+(TEMPORARY\\s+|)TABLE\\s+.+)") + + /** DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ] */ + case object DROP_DATABASE extends SqlCommand("drop database", "(DROP\\s+DATABASE\\s+.+)") + + /** DROP [TEMPORARY] VIEW [IF EXISTS] [catalog_name.][db_name.]view_name */ + case object DROP_VIEW extends SqlCommand("drop view", "(DROP\\s+(TEMPORARY\\s+|)VIEW\\s+.+)") + + /** + * DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] + * [catalog_name.][db_name.]function_name + */ + case object DROP_FUNCTION + extends SqlCommand( + "drop function", + "(DROP\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + + // ----ALTER Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + * ALTER TABLE [catalog_name.][db_name.]table_name RENAME TO new_table_name + * + * ALTER TABLE [catalog_name.][db_name.]table_name SET (key1=val1, key2=val2, + * ...) + */ + case object ALTER_TABLE extends SqlCommand("alter table", "(ALTER\\s+TABLE\\s+.+)") + + /** + * ALTER VIEW [catalog_name.][db_name.]view_name RENAME TO new_view_name + * + * ALTER VIEW [catalog_name.][db_name.]view_name AS new_query_expression + */ + case object ALTER_VIEW extends SqlCommand("alter view", "(ALTER\\s+VIEW\\s+.+)") + + /** ALTER DATABASE [catalog_name.]db_name SET (key1=val1, key2=val2, ...) */ + case object ALTER_DATABASE extends SqlCommand("alter database", "(ALTER\\s+DATABASE\\s+.+)") + + /** + * ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] + * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] + */ + case object ALTER_FUNCTION + extends SqlCommand( + "alter function", + "(ALTER\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + + // ---- INSERT Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** + * INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name [PARTITION part_spec] + * [column_list] select_statement INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name + * VALUES values_row [, values_row ...] + */ + case object INSERT extends SqlCommand("insert", "(INSERT\\s+(INTO|OVERWRITE)\\s+.+)") + + // ---- DESCRIBE Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ + case object DESC extends SqlCommand("desc", "(DESC\\s+.+)") + + /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ + case object DESCRIBE extends SqlCommand("describe", "(DESCRIBE\\s+.+)") + + // ---- EXPLAIN Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** + * For flink-1.13.x: EXPLAIN PLAN FOR ``
For + * flink-1.14.x: EXPLAIN ESTIMATED_COST, CHANGELOG_MODE, JSON_EXECUTION_PLAN + * ``
For flink-1.15.x:
 EXPLAIN
+   * [([ExplainDetail[, ExplainDetail]*]) | PLAN FOR]
+   * 
+   *
+   * statement_set: EXECUTE STATEMENT SET BEGIN insert_statement; ... insert_statement; END; 
+ * Recommended not to use the form of flink-1.15.x + */ + case object EXPLAIN extends SqlCommand("explain", "(EXPLAIN\\s+.+)") + + // ---- USE Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** USE CATALOG catalog_name */ + case object USE_CATALOG extends SqlCommand("use catalog", "(USE\\s+CATALOG\\s+.+)") + + /** USE MODULES module_name1[, module_name2, ...] */ + case object USE_MODULES extends SqlCommand("use modules", "(USE\\s+MODULES\\s+.+)") + + /** USE [catalog_name.]database_name */ + case object USE_DATABASE extends SqlCommand("use database", "(USE\\s+(?!(CATALOG|MODULES)).+)") + + // ----SHOW Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** SHOW CATALOGS */ + case object SHOW_CATALOGS extends SqlCommand("show catalogs", "(SHOW\\s+CATALOGS\\s*)") + + /** SHOW CURRENT CATALOG */ + case object SHOW_CURRENT_CATALOG + extends SqlCommand("show current catalog", "(SHOW\\s+CURRENT\\s+CATALOG\\s*)") + + /** SHOW DATABASES */ + case object SHOW_DATABASES extends SqlCommand("show databases", "(SHOW\\s+DATABASES\\s*)") + + /** SHOW CURRENT DATABASE */ + case object SHOW_CURRENT_DATABASE + extends SqlCommand("show current database", "(SHOW\\s+CURRENT\\s+DATABASE\\s*)") + + /** + * SHOW TABLES,support from flink-1.13.x
SHOW TABLES [ ( FROM | IN ) + * [catalog_name.]database_name ] [ [NOT] LIKE ` ], support from flink-1.15.x + */ + case object SHOW_TABLES extends SqlCommand("show tables", "(SHOW\\s+TABLES.*)") + + /** SHOW CREATE TABLE, flink-1.14.x support. */ + case object SHOW_CREATE_TABLE + extends SqlCommand("show create table", "(SHOW\\s+CREATE\\s+TABLE\\s+.+)") + + /** + * SHOW COLUMNS ( FROM | IN ) [`[`catalog_name.]database.]`` [ [NOT] LIKE + * ``],flink-1.15.x support. + */ + case object SHOW_COLUMNS extends SqlCommand("show columns", "(SHOW\\s+COLUMNS\\s+.+)") + + /** SHOW VIEWS */ + case object SHOW_VIEWS extends SqlCommand("show views", "(SHOW\\s+VIEWS\\s*)") + + /** SHOW CREATE VIEW */ + case object SHOW_CREATE_VIEW + extends SqlCommand("show create view", "(SHOW\\s+CREATE\\s+VIEW\\s+.+)") + + /** SHOW [USER] FUNCTIONS */ + case object SHOW_FUNCTIONS + extends SqlCommand("show functions", "(SHOW\\s+(USER\\s+|)FUNCTIONS\\s*)") + + /** SHOW [FULL] MODULES */ + case object SHOW_MODULES extends SqlCommand("show modules", "(SHOW\\s+(FULL\\s+|)MODULES\\s*)") + + // ----LOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** LOAD MODULE module_name [WITH ('key1' = 'val1', 'key2' = 'val2', ...)] */ + case object LOAD_MODULE extends SqlCommand("load module", "(LOAD\\s+MODULE\\s+.+)") + + // ----UNLOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** UNLOAD MODULE module_name */ + case object UNLOAD_MODULE extends SqlCommand("unload module", "(UNLOAD\\s+MODULE\\s+.+)") + + // ----SET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** SET ('key' = 'value') */ + case object SET + extends SqlCommand( + "set", + "SET(\\s+(\\S+)\\s*=(.*))?", + { + case a if a.length < 3 => None + case a if a.head == null => Some(Array[String](cleanUp(a.head))) + case a => Some(Array[String](cleanUp(a(1)), cleanUp(a(2)))) + }) + + // ----RESET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** RESET ('key') */ + case object RESET extends SqlCommand("reset", "RESET\\s+'(.*)'") + + /** RESET */ + case object RESET_ALL extends SqlCommand("reset all", "RESET", _ => Some(Array[String]("ALL"))) + + // ----INSERT SET Statements-------------------------------------------------------------------------------------------------------------------------------- + /* + *
+   * SQL Client execute each INSERT INTO statement as a single Flink job. However,
+   * this is sometimes not optimal because some part of the pipeline can be reused.
+   * SQL Client supports STATEMENT SET syntax to execute a set of SQL statements.
+   * This is an equivalent feature with StatementSet in Table API.
+   * The STATEMENT SET syntax encloses one or more INSERT INTO statements.
+   * All statements in a STATEMENT SET block are holistically optimized and executed as a single Flink job.
+   * Joint optimization and execution allows for reusing common intermediate results and can therefore significantly
+   * improve the efficiency of executing multiple queries.
+   * 
+ */ + /** This is SQL Client's syntax, don't use in our platform. */ + @deprecated + case object BEGIN_STATEMENT_SET + extends SqlCommand("begin statement set", "BEGIN\\s+STATEMENT\\s+SET", Converters.NO_OPERANDS) + + /** This is SQL Client's syntax, don't use in our platform. */ + @deprecated + case object END_STATEMENT_SET + extends SqlCommand("end statement set", "END", Converters.NO_OPERANDS) + + // Since: 2.1.2 for flink 1.18 + case object DELETE extends SqlCommand("delete", "(DELETE\\s+FROM\\s+.+)") + + // Since: 2.1.2 for flink 1.18 + case object UPDATE extends SqlCommand("update", "(UPDATE\\s+.+)") + + private[this] def cleanUp(sql: String): String = + sql.trim.replaceAll("^(['\"])|(['\"])$", "") + +} + +/** Call of SQL command with operands and command type. */ +case class SqlCommandCall( + lineStart: Int, + lineEnd: Int, + command: SqlCommand, + operands: Array[String], + originSql: String) {} + +case class FlinkSqlValidationResult( + success: JavaBool = true, + failedType: FlinkSqlValidationFailedType = null, + lineStart: Int = 0, + lineEnd: Int = 0, + errorLine: Int = 0, + errorColumn: Int = 0, + sql: String = null, + exception: String = null) + +case class SqlSegment(start: Int, end: Int, sql: String) + +object SqlSplitter { + + private lazy val singleLineCommentPrefixList = Set[String](PARAM_PREFIX) + + /** + * Split whole text into multiple sql statements. Two Steps: Step 1, split the whole text into + * multiple sql statements. Step 2, refine the results. Replace the preceding sql statements with + * empty lines, so that we can get the correct line number in the parsing error message. e.g: + * select a from table_1; select a from table_2; select a from table_3; The above text will be + * splitted into: sql_1: select a from table_1 sql_2: \nselect a from table_2 sql_3: \n\nselect a + * from table_3 + * + * @param sql + * @return + */ + def splitSql(sql: String): List[SqlSegment] = { + val queries = ListBuffer[String]() + val lastIndex = if (StringUtils.isNotBlank(sql)) sql.length - 1 else 0 + var query = new mutable.StringBuilder + + var multiLineComment = false + var singleLineComment = false + var singleQuoteString = false + var doubleQuoteString = false + var lineNum: Int = 0 + val lineNumMap = new collection.mutable.HashMap[Int, (Int, Int)]() + + // Whether each line of the record is empty. If it is empty, it is false. If it is not empty, it is true + val lineDescriptor = { + val scanner = new Scanner(sql) + val descriptor = new collection.mutable.HashMap[Int, Boolean] + var lineNumber = 0 + var startComment = false + var hasComment = false + + while (scanner.hasNextLine) { + lineNumber += 1 + val line = scanner.nextLine().trim + val nonEmpty = + StringUtils.isNotBlank(line) && !line.startsWith(PARAM_PREFIX) + if (line.startsWith("/*")) { + startComment = true + hasComment = true + } + + descriptor += lineNumber -> (nonEmpty && !hasComment) + + if (startComment && line.endsWith("*/")) { + startComment = false + hasComment = false + } + } + descriptor + } + + @tailrec + def findStartLine(num: Int): Int = + if (num >= lineDescriptor.size || lineDescriptor(num)) num + else findStartLine(num + 1) + + def markLineNumber(): Unit = { + val line = lineNum + 1 + if (lineNumMap.isEmpty) { + lineNumMap += (0 -> (findStartLine(1) -> line)) + } else { + val index = lineNumMap.size + val start = lineNumMap(lineNumMap.size - 1)._2 + 1 + lineNumMap += (index -> (findStartLine(start) -> line)) + } + } + + for (idx <- 0 until sql.length) { + + if (sql.charAt(idx) == '\n') lineNum += 1 + + breakable { + val ch = sql.charAt(idx) + + // end of single line comment + if (singleLineComment && (ch == '\n')) { + singleLineComment = false + query += ch + if (idx == lastIndex && query.toString.trim.nonEmpty) { + // add query when it is the end of sql. + queries += query.toString + } + break() + } + + // end of multiple line comment + if (multiLineComment && (idx - 1) >= 0 && sql.charAt(idx - 1) == '/' + && (idx - 2) >= 0 && sql.charAt(idx - 2) == '*') { + multiLineComment = false + } + + // single quote start or end mark + if (ch == '\'' && !(singleLineComment || multiLineComment)) { + if (singleQuoteString) { + singleQuoteString = false + } else if (!doubleQuoteString) { + singleQuoteString = true + } + } + + // double quote start or end mark + if (ch == '"' && !(singleLineComment || multiLineComment)) { + if (doubleQuoteString && idx > 0) { + doubleQuoteString = false + } else if (!singleQuoteString) { + doubleQuoteString = true + } + } + + // single line comment or multiple line comment start mark + if (!singleQuoteString && !doubleQuoteString && !multiLineComment && !singleLineComment && idx < lastIndex) { + if (isSingleLineComment(sql.charAt(idx), sql.charAt(idx + 1))) { + singleLineComment = true + } else if (sql.charAt(idx) == '/' && sql.length > (idx + 2) + && sql.charAt(idx + 1) == '*' && sql.charAt(idx + 2) != '+') { + multiLineComment = true + } + } + + if (ch == ';' && !singleQuoteString && !doubleQuoteString && !multiLineComment && !singleLineComment) { + markLineNumber() + // meet the end of semicolon + if (query.toString.trim.nonEmpty) { + queries += query.toString + query = new mutable.StringBuilder + } + } else if (idx == lastIndex) { + markLineNumber() + + // meet the last character + if (!singleLineComment && !multiLineComment) { + query += ch + } + + if (query.toString.trim.nonEmpty) { + queries += query.toString + query = new mutable.StringBuilder + } + } else if (!singleLineComment && !multiLineComment) { + // normal case, not in single line comment and not in multiple line comment + query += ch + } else if (ch == '\n') { + query += ch + } + } + } + + val refinedQueries = new collection.mutable.HashMap[Int, String]() + for (i <- queries.indices) { + val currStatement = queries(i) + if (isSingleLineComment(currStatement) || isMultipleLineComment(currStatement)) { + // transform comment line as blank lines + if (refinedQueries.nonEmpty) { + val lastRefinedQuery = refinedQueries.last + refinedQueries(refinedQueries.size - 1) = + lastRefinedQuery + extractLineBreaks(currStatement) + } + } else { + var linesPlaceholder = "" + if (i > 0) { + linesPlaceholder = extractLineBreaks(refinedQueries(i - 1)) + } + // add some blank lines before the statement to keep the original line number + val refinedQuery = linesPlaceholder + currStatement + refinedQueries += refinedQueries.size -> refinedQuery + } + } + + val set = new ListBuffer[SqlSegment] + refinedQueries.foreach(x => { + val line = lineNumMap(x._1) + set += SqlSegment(line._1, line._2, x._2) + }) + set.toList.sortWith((a, b) => a.start < b.start) + } + + /** + * extract line breaks + * + * @param text + * @return + */ + private[this] def extractLineBreaks(text: String): String = { + val builder = new mutable.StringBuilder + for (i <- 0 until text.length) { + if (text.charAt(i) == '\n') { + builder.append('\n') + } + } + builder.toString + } + + private[this] def isSingleLineComment(text: String) = + text.trim.startsWith(PARAM_PREFIX) + + private[this] def isMultipleLineComment(text: String) = + text.trim.startsWith("/*") && text.trim.endsWith("*/") + + /** + * check single-line comment + * + * @param curChar + * @param nextChar + * @return + */ + private[this] def isSingleLineComment(curChar: Char, nextChar: Char): Boolean = { + var flag = false + for (singleCommentPrefix <- singleLineCommentPrefixList) { + singleCommentPrefix.length match { + case 1 if curChar == singleCommentPrefix.charAt(0) => flag = true + case 2 + if curChar == singleCommentPrefix.charAt(0) && nextChar == singleCommentPrefix.charAt( + 1) => + flag = true + case _ => + } + } + flag + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala new file mode 100644 index 0000000000..574945fa39 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +class StreamEnvConfig(val args: Array[String], val conf: StreamEnvConfigFunction) + +class StreamTableEnvConfig( + val args: Array[String], + val streamConfig: StreamEnvConfigFunction, + val tableConfig: TableEnvConfigFunction) + +class TableEnvConfig(val args: Array[String], val conf: TableEnvConfigFunction) diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala new file mode 100644 index 0000000000..856a2ab7c3 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList + +import org.apache.flink.api.common.typeinfo.TypeInformation +import org.apache.flink.api.java.tuple +import org.apache.flink.streaming.api.datastream.{DataStream => JavaDataStream} +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api._ +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.catalog.CatalogDescriptor +import org.apache.flink.table.connector.ChangelogMode +import org.apache.flink.table.expressions.Expression +import org.apache.flink.table.module.ModuleEntry +import org.apache.flink.table.resource.ResourceUri +import org.apache.flink.table.types.AbstractDataType +import org.apache.flink.types.Row +import org.apache.flink.util.ParameterTool + +class StreamTableContext( + override val parameter: ParameterTool, + private val streamEnv: StreamExecutionEnvironment, + private val tableEnv: StreamTableEnvironment) + extends FlinkStreamTableTraitV2(parameter, streamEnv, tableEnv) { + + def this(args: (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment)) = + this(args._1, args._2, args._3) + + def this(args: StreamTableEnvConfig) = + this(FlinkTableInitializerV2.initialize(args)) + + override def fromDataStream[T](dataStream: JavaDataStream[T], schema: Schema): Table = + tableEnv.fromDataStream[T](dataStream, schema) + + /** @deprecated old API */ + override def fromDataStream[T](dataStream: JavaDataStream[T], expressions: Expression*): Table = + tableEnv.fromDataStream(dataStream, expressions: _*) + + override def fromChangelogStream(dataStream: JavaDataStream[Row]): Table = + tableEnv.fromChangelogStream(dataStream) + + override def fromChangelogStream(dataStream: JavaDataStream[Row], schema: Schema): Table = + tableEnv.fromChangelogStream(dataStream, schema) + + override def fromChangelogStream( + dataStream: JavaDataStream[Row], + schema: Schema, + changelogMode: ChangelogMode): Table = + tableEnv.fromChangelogStream(dataStream, schema, changelogMode) + + override def createTemporaryView[T]( + path: String, + dataStream: JavaDataStream[T], + schema: Schema): Unit = + tableEnv.createTemporaryView[T](path, dataStream, schema) + + /** @deprecated old API */ + @deprecated override def createTemporaryView[T]( + path: String, + dataStream: JavaDataStream[T], + expressions: Expression*): Unit = + tableEnv.createTemporaryView(path, dataStream, expressions: _*) + + override def toDataStream(table: Table): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table) + } + + override def toDataStream[T](table: Table, targetClass: Class[T]): JavaDataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream[T](table, targetClass) + } + + override def toDataStream[T](table: Table, targetDataType: AbstractDataType[_]): JavaDataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream[T](table, targetDataType) + } + + override def toChangelogStream(table: Table): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table) + } + + override def toChangelogStream(table: Table, targetSchema: Schema): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema) + } + + override def toChangelogStream( + table: Table, + targetSchema: Schema, + changelogMode: ChangelogMode): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema, changelogMode) + } + + override def createStatementSet(): org.apache.flink.table.api.bridge.java.StreamStatementSet = + tableEnv.createStatementSet() + + override def useModules(strings: String*): Unit = + tableEnv.useModules(strings: _*) + + override def createTemporaryTable(path: String, descriptor: TableDescriptor): Unit = + tableEnv.createTemporaryTable(path, descriptor) + + override def createTable(path: String, descriptor: TableDescriptor): Unit = + tableEnv.createTable(path, descriptor) + + override def from(descriptor: TableDescriptor): Table = + tableEnv.from(descriptor) + + override def listFullModules(): Array[ModuleEntry] = + tableEnv.listFullModules() + + /** @since 1.15 */ + override def listTables(s: String, s1: String): Array[String] = + tableEnv.listTables(s, s1) + + /** @since 1.15 */ + override def loadPlan(planReference: PlanReference): CompiledPlan = + tableEnv.loadPlan(planReference) + + /** @since 1.15 */ + override def compilePlanSql(s: String): CompiledPlan = + tableEnv.compilePlanSql(s) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, className, resourceUris, ignoreIfExists) + + /** @since 1.17 */ + override def createTemporaryFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporaryFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createTemporarySystemFunction( + name: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporarySystemFunction(name, className, resourceUris) + + /** @since 1.17 */ + override def explainSql( + statement: String, + format: ExplainFormat, + extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, format, extraDetails: _*) + + /** @since 1.18 */ + override def createCatalog(catalog: String, catalogDescriptor: CatalogDescriptor): Unit = { + tableEnv.createCatalog(catalog, catalogDescriptor) + } + + /** @deprecated old API */ + @deprecated override def toAppendStream[T]( + table: Table, + typeInformation: TypeInformation[T]): JavaDataStream[T] = + tableEnv.toAppendStream(table, typeInformation) + + /** @deprecated old API */ + @deprecated override def toRetractStream[T]( + table: Table, + typeInformation: TypeInformation[T]): JavaDataStream[tuple.Tuple2[java.lang.Boolean, T]] = + tableEnv.toRetractStream(table, typeInformation) + + /** since Flink 2.0 */ + override def toAppendStream[T](table: Table, clazz: Class[T]): JavaDataStream[T] = + tableEnv.toAppendStream(table, clazz) + + /** since Flink 2.0 */ + override def toRetractStream[T](table: Table, clazz: Class[T]): JavaDataStream[tuple.Tuple2[java.lang.Boolean, T]] = + tableEnv.toRetractStream(table, clazz) + + /** since Flink 2.0 */ + override def createTable(path: String, descriptor: TableDescriptor, ignoreIfExists: Boolean): Boolean = + tableEnv.createTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createTemporaryTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table, ignoreIfExists: Boolean): Boolean = + tableEnv.createView(path, view, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table): Unit = + tableEnv.createView(path, view) + + /** since Flink 2.0 */ + override def dropTable(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropTable(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropTable(path: String): Boolean = + tableEnv.dropTable(path) + + /** since Flink 2.0 */ + override def dropView(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropView(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropView(path: String): Boolean = + tableEnv.dropView(path) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableContext.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableContext.scala new file mode 100644 index 0000000000..cbe9af0f57 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableContext.scala @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList + +import org.apache.flink.table.api._ +import org.apache.flink.table.catalog.CatalogDescriptor +import org.apache.flink.table.module.ModuleEntry +import org.apache.flink.table.resource.ResourceUri +import org.apache.flink.util.ParameterTool + +class TableContext(override val parameter: ParameterTool, private val tableEnv: TableEnvironment) + extends FlinkTableTrait(parameter, tableEnv) { + + def this(args: (ParameterTool, TableEnvironment)) = this(args._1, args._2) + + def this(args: TableEnvConfig) = this(FlinkTableInitializerV2.initialize(args)) + + override def useModules(strings: String*): Unit = + tableEnv.useModules(strings: _*) + + override def createTemporaryTable(path: String, descriptor: TableDescriptor): Unit = { + tableEnv.createTemporaryTable(path, descriptor) + } + + override def createTable(path: String, descriptor: TableDescriptor): Unit = { + tableEnv.createTable(path, descriptor) + } + + override def from(tableDescriptor: TableDescriptor): Table = { + tableEnv.from(tableDescriptor) + } + + override def listFullModules(): Array[ModuleEntry] = + tableEnv.listFullModules() + + /** @since 1.15 */ + override def listTables(catalogName: String, databaseName: String): Array[String] = + tableEnv.listTables(catalogName, databaseName) + + /** @since 1.15 */ + override def loadPlan(planReference: PlanReference): CompiledPlan = + tableEnv.loadPlan(planReference) + + /** @since 1.15 */ + override def compilePlanSql(stmt: String): CompiledPlan = + tableEnv.compilePlanSql(stmt) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, className, resourceUris, ignoreIfExists) + + /** @since 1.17 */ + override def createTemporaryFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporaryFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createTemporarySystemFunction( + name: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporarySystemFunction(name, className, resourceUris) + + /** @since 1.17 */ + override def explainSql( + statement: String, + format: ExplainFormat, + extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, format, extraDetails: _*) + + /** @since 1.18 */ + override def createCatalog(catalog: String, catalogDescriptor: CatalogDescriptor): Unit = { + tableEnv.createCatalog(catalog, catalogDescriptor) + } + + /** since Flink 2.0 */ + override def createTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Boolean = + tableEnv.createTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createTemporaryTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table, ignoreIfExists: Boolean): Boolean = + tableEnv.createView(path, view, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table): Unit = + tableEnv.createView(path, view) + + /** since Flink 2.0 */ + override def dropTable(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropTable(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropTable(path: String): Boolean = + tableEnv.dropTable(path) + + /** since Flink 2.0 */ + override def dropView(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropView(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropView(path: String): Boolean = + tableEnv.dropView(path) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableExt.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableExt.scala new file mode 100644 index 0000000000..3c4ab76c95 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/TableExt.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.table.api.{Table => FlinkTable} +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.types.Row + +object TableExt { + + class Table(val table: FlinkTable) { + def ->(field: String, fields: String*): FlinkTable = + table.as(field, fields: _*) + } + + class TableConversions( + table: FlinkTable, + streamTableEnv: StreamTableEnvironment) { + + def \\ : DataStream[Row] = streamTableEnv.toDataStream(table) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala new file mode 100644 index 0000000000..b419eb8337 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.0/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core.conf + +import org.apache.flink.configuration.Configuration +import org.apache.flink.util.ParameterTool + +case class FlinkConfiguration( + parameter: ParameterTool, + envConfig: Configuration, + tableConfig: Configuration) diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/pom.xml b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/pom.xml new file mode 100644 index 0000000000..1cd257589a --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/pom.xml @@ -0,0 +1,152 @@ + + + + 4.0.0 + + + org.apache.streampark + streampark-flink-shims + 2.2.0-SNAPSHOT + + + streampark-flink-shims_flink-2.1_${scala.binary.version} + StreamPark : Flink Shims 2.1 + + + 2.1.2 + + + + + org.apache.streampark + streampark-common_${scala.binary.version} + ${project.version} + + + + + org.apache.flink + flink-core + ${flink.version} + provided + + + + org.apache.flink + flink-streaming-java + ${flink.version} + provided + + + + org.apache.flink + flink-table-api-java + ${flink.version} + provided + + + + org.apache.flink + flink-table-api-java-bridge + ${flink.version} + provided + + + + org.apache.flink + flink-table-planner_${scala.binary.version} + ${flink.version} + provided + + + + org.apache.flink + flink-clients + ${flink.version} + provided + + + + org.apache.flink + flink-kubernetes + ${flink.version} + provided + + + + org.apache.flink + flink-yarn + ${flink.version} + provided + + + + org.apache.flink + flink-statebackend-rocksdb + ${flink.version} + provided + + + + org.apache.hadoop + hadoop-client-api + true + + + + org.apache.hadoop + hadoop-client-runtime + true + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + true + ${project.basedir}/target/dependency-reduced-pom.xml + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + + + diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java new file mode 100644 index 0000000000..d1ed71bb32 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.util.ParameterTool; + +@FunctionalInterface +public interface StreamEnvConfigFunction { + + /** + * When used to initialize StreamExecutionEnvironment, it can be used to implement this function + * and customize the parameters to be set... + * + * @param environment + * @param parameterTool + */ + void configuration(StreamExecutionEnvironment environment, ParameterTool parameterTool); +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java new file mode 100644 index 0000000000..0e57b74b5c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core; + +import org.apache.flink.table.api.TableConfig; +import org.apache.flink.util.ParameterTool; + +@FunctionalInterface +public interface TableEnvConfigFunction { + + /** + * When used to initialize the TableEnvironment, it can be used to implement this function and + * customize the parameters to be set... + * + * @param tableConfig + * @param parameterTool + */ + void configuration(TableConfig tableConfig, ParameterTool parameterTool); +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala new file mode 100644 index 0000000000..0bf7bc15ae --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.{KEY_APP_NAME, KEY_FLINK_APP_NAME} +import org.apache.streampark.common.util.DeflaterUtils + +import org.apache.flink.configuration.PipelineOptions +import org.apache.flink.table.api.TableEnvironment +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.util.ParameterTool + +import scala.util.Try + +object EnhancerImplicit { + + implicit class EnhanceParameterTool(parameterTool: ParameterTool) { + + private[flink] def getAppName(name: String = null, required: Boolean = false): String = { + val appName = name match { + case null => + Try(DeflaterUtils.unzipString(parameterTool.get(KEY_APP_NAME(), null))) + .getOrElse(parameterTool.get(KEY_FLINK_APP_NAME, null)) + case x => x + } + if (required) { + require(appName != null, "[StreamPark] Application name cannot be null") + } + appName + } + + } + + implicit class EnhanceTableEnvironment(env: TableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): TableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME.key, appName) + } + env + } + + } + + implicit class EnhanceStreamTableEnvironment(env: StreamTableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): StreamTableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME.key, appName) + } + env + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala new file mode 100644 index 0000000000..a0ebcb274c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.api.common.JobID +import org.apache.flink.client.program.ClusterClient +import org.apache.flink.core.execution.SavepointFormatType + +import java.util.concurrent.CompletableFuture + +abstract class FlinkClientTrait[T](clusterClient: ClusterClient[T]) { + + def triggerSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = { + clusterClient.triggerSavepoint(jobID, savepointDir, SavepointFormatType.DEFAULT) + } + + def cancelWithSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = { + clusterClient.cancelWithSavepoint(jobID, savepointDir, SavepointFormatType.DEFAULT) + } + + def stopWithSavepoint( + jobID: JobID, + advanceToEndOfEventTime: Boolean, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = + clusterClient.stopWithSavepoint(jobID, advanceToEndOfEventTime, savepointDir, SavepointFormatType.DEFAULT) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala new file mode 100644 index 0000000000..ecfc8dabb3 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.api.common.JobID +import org.apache.flink.client.program.ClusterClient +import org.apache.flink.core.execution.SavepointFormatType + +import java.util.concurrent.CompletableFuture + +class FlinkClusterClient[T](clusterClient: ClusterClient[T]) + extends FlinkClientTrait[T](clusterClient) { + + override def triggerSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.triggerSavepoint( + jobID, + savepointDir, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + + override def cancelWithSavepoint( + jobID: JobID, + savepointDirectory: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.cancelWithSavepoint( + jobID, + savepointDirectory, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + + override def stopWithSavepoint( + jobID: JobID, + advanceToEndOfEventTime: Boolean, + savepointDirectory: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.stopWithSavepoint( + jobID, + advanceToEndOfEventTime, + savepointDirectory, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala new file mode 100644 index 0000000000..707ba43f0b --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient +import org.apache.flink.kubernetes.kubeclient.resources.KubernetesService + +import java.util.Optional + +class FlinkKubernetesClient(kubeClient: FlinkKubeClient) + extends FlinkKubernetesClientTrait(kubeClient) { + + override def getService(serviceName: String): Optional[KubernetesService] = { + kubeClient.getService(serviceName) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala new file mode 100644 index 0000000000..16155bf040 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient +import org.apache.flink.kubernetes.kubeclient.resources.KubernetesService + +import java.util.Optional + +abstract class FlinkKubernetesClientTrait(kubeClient: FlinkKubeClient) { + + /** + * Get the kubernetes service of the given flink clusterId. + * + * @param serviceName + * the name of the service + * @return + * Return the optional kubernetes service of the specified name. + */ + def getService(serviceName: String): Optional[KubernetesService] = + kubeClient.getService(serviceName) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala new file mode 100644 index 0000000000..1c0df899f5 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.KEY_FLINK_SQL +import org.apache.streampark.common.util.{AssertUtils, Logger} +import org.apache.streampark.flink.core.SqlCommand._ + +import org.apache.commons.lang3.StringUtils +import org.apache.flink.configuration.{Configuration, ExecutionOptions} +import org.apache.flink.table.api.TableEnvironment +import org.apache.flink.util.ParameterTool + +import java.util +import java.util.concurrent.locks.ReentrantReadWriteLock + +import scala.collection.mutable +import scala.util.Try + +object FlinkSqlExecutor extends Logger { + + private[this] val lock = new ReentrantReadWriteLock().writeLock + + private[streampark] def executeSql( + sql: String, + parameter: ParameterTool, + context: TableEnvironment)(implicit callbackFunc: String => Unit = null): Unit = { + + val flinkSql: String = + if (StringUtils.isBlank(sql)) parameter.get(KEY_FLINK_SQL()) + else parameter.get(sql) + require(StringUtils.isNotBlank(flinkSql), "verify failed: flink sql cannot be empty") + + def callback(r: String): Unit = { + callbackFunc match { + case null => logInfo(r) + case x => x(r) + } + } + + val runMode = parameter.get(ExecutionOptions.RUNTIME_MODE.key()) + + var hasInsert = false + val statementSet = context.createStatementSet() + SqlCommandParser + .parseSQL(flinkSql) + .foreach(x => { + val args = if (x.operands.isEmpty) null else x.operands.head + val command = x.command.name + x.command match { + // For display sql statement result information + case SHOW_CATALOGS => + val catalogs = context.listCatalogs + callback(s"$command: ${catalogs.mkString("\n")}") + case SHOW_CURRENT_CATALOG => + val catalog = context.getCurrentCatalog + callback(s"$command: $catalog") + case SHOW_DATABASES => + val databases = context.listDatabases + callback(s"$command: ${databases.mkString("\n")}") + case SHOW_CURRENT_DATABASE => + val database = context.getCurrentDatabase + callback(s"$command: $database") + case SHOW_TABLES => + val tables = + context.listTables().filter(!_.startsWith("UnnamedTable")) + callback(s"$command: ${tables.mkString("\n")}") + case SHOW_FUNCTIONS => + val functions = context.listUserDefinedFunctions() + callback(s"$command: ${functions.mkString("\n")}") + case SHOW_MODULES => + val modules = context.listModules() + callback(s"$command: ${modules.mkString("\n")}") + case DESC | DESCRIBE => + val schema = context.scan(args).getSchema + val builder = new mutable.StringBuilder() + builder.append("Column\tType\n") + for (i <- 0 to schema.getFieldCount) { + builder.append( + schema.getFieldName(i).get() + "\t" + schema + .getFieldDataType(i) + .get() + "\n") + } + callback(builder.toString()) + case EXPLAIN => + val tableResult = context.executeSql(x.originSql) + val r = tableResult.collect().next().getField(0).toString + callback(r) + // For specific statement, such as: SET/RESET/INSERT/SELECT + case SET => + val operand = x.operands(1) + logInfo(s"$command: $args --> $operand") + context.getConfig.getConfiguration.setString(args, operand) + case RESET | RESET_ALL => + val confDataField = + classOf[Configuration].getDeclaredField("confData") + confDataField.setAccessible(true) + val confData = confDataField + .get(context.getConfig.getConfiguration) + .asInstanceOf[util.HashMap[String, AnyRef]] + confData.synchronized { + if (x.command == RESET) { + confData.remove(args) + } else { + confData.clear() + } + } + logInfo(s"$command: $args") + case BEGIN_STATEMENT_SET | END_STATEMENT_SET => + logWarn(s"SQL Client Syntax: ${x.command.name} ") + case INSERT => + statementSet.addInsertSql(x.originSql) + hasInsert = true + case SELECT => + logError("StreamPark dose not support 'SELECT' statement now!") + throw new RuntimeException("StreamPark dose not support 'select' statement now!") + case DELETE | UPDATE => + AssertUtils.required( + runMode != "STREAMING", + s"Currently, ${command.toUpperCase()} statement only supports in batch mode, " + + s"and it requires the target table connector implements the SupportsRowLevelDelete, " + + s"For more details please refer to: https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/dev/table/sql/$command") + case _ => + try { + lock.lock() + val result = context.executeSql(x.originSql) + logInfo(s"$command:$args") + } finally { + if (lock.isHeldByCurrentThread) { + lock.unlock() + } + } + } + }) + + if (hasInsert) { + statementSet.execute() match { + case t if t != null => + Try(t.getJobClient.get.getJobID).getOrElse(null) match { + case x if x != null => logInfo(s"jobId:$x") + case _ => + } + case _ => + } + } else { + logError("No 'INSERT' statement to trigger the execution of the Flink job.") + throw new RuntimeException("No 'INSERT' statement to trigger the execution of the Flink job.") + } + + logInfo( + s"\n\n\n==============flinkSql==============\n\n $flinkSql\n\n============================\n\n\n") + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala new file mode 100644 index 0000000000..42344c9d7f --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.util.{ExceptionUtils, Logger} +import org.apache.streampark.flink.core.SqlCommand._ + +import org.apache.calcite.config.Lex +import org.apache.calcite.sql.parser.SqlParser +import org.apache.calcite.sql.parser.SqlParser.Config +import org.apache.flink.sql.parser.validate.FlinkSqlConformance +import org.apache.flink.table.api.SqlDialect +import org.apache.flink.table.api.SqlDialect.{DEFAULT, HIVE} +import org.apache.flink.table.api.config.TableConfigOptions +import org.apache.flink.table.planner.delegation.FlinkSqlParserFactories + +import scala.language.existentials +import scala.util.{Failure, Try} + +object FlinkSqlValidator extends Logger { + + private[this] val FLINK112_CALCITE_PARSER_CLASS = + "org.apache.flink.table.planner.calcite.CalciteParser" + + private[this] val FLINK113_PLUS_CALCITE_PARSER_CLASS = + "org.apache.flink.table.planner.parse.CalciteParser" + + private[this] val SYNTAX_ERROR_REGEXP = + ".*at\\sline\\s(\\d+),\\scolumn\\s(\\d+).*".r + + private[this] lazy val sqlParserConfigMap: Map[String, SqlParser.Config] = { + def getConfig(sqlDialect: SqlDialect): Config = { + val conformance = sqlDialect match { + case HIVE => + try { + FlinkSqlConformance.DEFAULT + } catch { + // for flink 1.18+ + case _: NoSuchFieldError => FlinkSqlConformance.DEFAULT + case e: Throwable => + throw new IllegalArgumentException("Init Flink sql Dialect error: ", e) + } + case DEFAULT => FlinkSqlConformance.DEFAULT + case _ => + throw new UnsupportedOperationException(s"Unsupported sqlDialect: $sqlDialect") + } + SqlParser.config + .withParserFactory(FlinkSqlParserFactories.create(conformance)) + .withConformance(conformance) + .withLex(Lex.JAVA) + .withIdentifierMaxLength(256) + } + + Map( + SqlDialect.DEFAULT.name() -> getConfig(SqlDialect.DEFAULT), + SqlDialect.HIVE.name() -> getConfig(SqlDialect.HIVE)) + } + + def verifySql(sql: String): FlinkSqlValidationResult = { + val sqlCommands = SqlCommandParser.parseSQL(sql, r => return r) + var sqlDialect = SqlDialect.DEFAULT.name().toLowerCase() + var hasInsert = false + for (call <- sqlCommands) { + val args = call.operands.head + val command = call.command + command match { + case SET | RESET => + if (command == SET && args == TableConfigOptions.TABLE_SQL_DIALECT.key()) { + sqlDialect = call.operands.last + } + case BEGIN_STATEMENT_SET | END_STATEMENT_SET => + logWarn(s"SQL Client Syntax: ${call.command.name} ") + case _ => + if (command == INSERT) { + hasInsert = true + } + Try { + val calciteClass = Try(Class.forName(FLINK112_CALCITE_PARSER_CLASS)) + .getOrElse(Class.forName(FLINK113_PLUS_CALCITE_PARSER_CLASS)) + sqlDialect.toUpperCase() match { + case "HIVE" => + case "DEFAULT" => + val parser = calciteClass + .getConstructor(Array(classOf[Config]): _*) + .newInstance(sqlParserConfigMap(sqlDialect.toUpperCase())) + val method = + parser.getClass.getDeclaredMethod("parse", classOf[String]) + method.setAccessible(true) + method.invoke(parser, call.originSql) + case _ => + throw new UnsupportedOperationException(s"unsupported dialect: $sqlDialect") + } + } match { + case Failure(e) => + val exception = ExceptionUtils.stringifyException(e) + val causedBy = exception.drop(exception.indexOf("Caused by:")) + val cleanUpError = exception.replaceAll("[\r\n]", "") + if (SYNTAX_ERROR_REGEXP.findAllMatchIn(cleanUpError).nonEmpty) { + val SYNTAX_ERROR_REGEXP(line, column) = cleanUpError + val errorLine = call.lineStart + line.toInt - 1 + return FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + errorLine = errorLine, + errorColumn = column.toInt, + sql = call.originSql, + exception = causedBy.replaceAll(s"at\\sline\\s$line", s"at line $errorLine")) + } else { + return FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + sql = call.originSql, + exception = causedBy) + } + case _ => + } + } + } + + if (hasInsert) { + FlinkSqlValidationResult() + } else { + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = sqlCommands.head.lineStart, + lineEnd = sqlCommands.last.lineEnd, + exception = "No 'INSERT' statement to trigger the execution of the Flink job.") + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala new file mode 100644 index 0000000000..102e925f10 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList +import org.apache.streampark.common.util.Utils +import org.apache.streampark.flink.core.EnhancerImplicit._ + +import org.apache.flink.api.common.{JobExecutionResult, RuntimeExecutionMode} +import org.apache.flink.api.common.cache.DistributedCache +import org.apache.flink.api.common.eventtime.WatermarkStrategy +import org.apache.flink.api.common.io.{FileInputFormat, FilePathFilter, InputFormat} +import org.apache.flink.api.connector.source.{Source, SourceSplit} +import org.apache.flink.api.java.tuple +import org.apache.flink.configuration.ReadableConfig +import org.apache.flink.core.execution.{JobClient, JobListener} +import org.apache.flink.streaming.api.CheckpointingMode +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.streaming.api.environment.{CheckpointConfig, StreamExecutionEnvironment} +import org.apache.flink.streaming.api.functions.source.FileProcessingMode +import org.apache.flink.streaming.api.graph.StreamGraph +import org.apache.flink.table.api._ +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.catalog.Catalog +import org.apache.flink.table.functions._ +import org.apache.flink.table.module.Module +import org.apache.flink.types.Row +import org.apache.flink.util.{ParameterTool, SplittableIterator} + +import java.util.Optional + +/** + * Integration api of stream and table (Flink 2.0 - Java DataStream API) + * + * @param parameter + * parameter + * @param streamEnv + * streamEnv + * @param tableEnv + * tableEnv + */ +abstract class FlinkStreamTableTraitV2( + val parameter: ParameterTool, + private val streamEnv: StreamExecutionEnvironment, + private val tableEnv: StreamTableEnvironment) + extends StreamTableEnvironment { + + /** + * Once a Table has been converted to a DataStream, the DataStream job must be executed using the + * execute method of the StreamExecutionEnvironment. + */ + var isConvertedToDataStream: Boolean = false + + /** Recommended to use this Api to start tasks */ + def start(name: String = null): JobExecutionResult = { + val appName = parameter.getAppName(name, true) + execute(appName) + } + + @deprecated def execute(jobName: String): JobExecutionResult = { + Utils.printLogo(s"FlinkStreamTable $jobName Starting...") + if (isConvertedToDataStream) { + streamEnv.execute(jobName) + } else null + } + + def sql(sql: String = null)(implicit callback: String => Unit = null): Unit = + FlinkSqlExecutor.executeSql(sql, parameter, this) + + // ...streamEnv api start... + + def $getCachedFiles: JavaList[tuple.Tuple2[String, DistributedCache.DistributedCacheEntry]] = + this.streamEnv.getCachedFiles + + def $getJobListeners: JavaList[JobListener] = this.streamEnv.getJobListeners + + def $setParallelism(parallelism: Int): Unit = + this.streamEnv.setParallelism(parallelism) + + def $setRuntimeMode(deployMode: RuntimeExecutionMode): StreamExecutionEnvironment = + this.streamEnv.setRuntimeMode(deployMode) + + def $setMaxParallelism(maxParallelism: Int): Unit = + this.streamEnv.setMaxParallelism(maxParallelism) + + def $getParallelism: Int = this.streamEnv.getParallelism + + def $getMaxParallelism: Int = this.streamEnv.getMaxParallelism + + def $setBufferTimeout(timeoutMillis: Long): StreamExecutionEnvironment = + this.streamEnv.setBufferTimeout(timeoutMillis) + + def $getBufferTimeout: Long = this.streamEnv.getBufferTimeout + + def $disableOperatorChaining(): StreamExecutionEnvironment = + this.streamEnv.disableOperatorChaining() + + def $getCheckpointConfig: CheckpointConfig = + this.streamEnv.getCheckpointConfig + + def $enableCheckpointing(interval: Long, mode: CheckpointingMode): StreamExecutionEnvironment = + this.streamEnv.enableCheckpointing(interval, mode) + + def $enableCheckpointing(interval: Long): StreamExecutionEnvironment = + this.streamEnv.enableCheckpointing(interval) + + def $getCheckpointingMode: CheckpointingMode = + this.streamEnv.getCheckpointingMode + + def $configure(configuration: ReadableConfig, classLoader: ClassLoader): Unit = + this.streamEnv.configure(configuration, classLoader) + + def $fromSequence(from: Long, to: Long): DataStream[java.lang.Long] = + this.streamEnv.fromSequence(from, to) + + // fromData with varargs removed in Flink 2.0 + def $fromData[T](data: T): DataStream[T] = + this.streamEnv.fromData(data) + + def $fromCollection[T](data: java.util.Collection[T]): DataStream[T] = + this.streamEnv.fromCollection(data) + + def $fromParallelCollection[T](data: SplittableIterator[T], clazz: Class[T]): DataStream[T] = + this.streamEnv.fromParallelCollection(data, clazz) + + def $readFile[T](inputFormat: FileInputFormat[T], filePath: String): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath) + + def $readFile[T]( + inputFormat: FileInputFormat[T], + filePath: String, + watchType: FileProcessingMode, + interval: Long): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath, watchType, interval) + + def $socketTextStream( + hostname: String, + port: Int, + delimiter: Char, + maxRetry: Long): DataStream[String] = + this.streamEnv.socketTextStream(hostname, port, delimiter, maxRetry) + + def $createInput[T](inputFormat: InputFormat[T, _]): DataStream[T] = + this.streamEnv.createInput(inputFormat) + + def $fromSource[T]( + source: Source[T, _ <: SourceSplit, _], + watermarkStrategy: WatermarkStrategy[T], + sourceName: String): DataStream[T] = + this.streamEnv.fromSource(source, watermarkStrategy, sourceName) + + def $registerJobListener(jobListener: JobListener): Unit = + this.streamEnv.registerJobListener(jobListener) + + def $clearJobListeners(): Unit = this.streamEnv.clearJobListeners() + + def $executeAsync(): JobClient = this.streamEnv.executeAsync() + + def $executeAsync(jobName: String): JobClient = + this.streamEnv.executeAsync(jobName) + + def $getExecutionPlan: String = this.streamEnv.getExecutionPlan + + def $getStreamGraph: StreamGraph = this.streamEnv.getStreamGraph + + def $registerCachedFile(filePath: String, name: String): Unit = + this.streamEnv.registerCachedFile(filePath, name) + + def $registerCachedFile(filePath: String, name: String, executable: Boolean): Unit = + this.streamEnv.registerCachedFile(filePath, name, executable) + + def $isUnalignedCheckpointsEnabled: Boolean = + this.streamEnv.isUnalignedCheckpointsEnabled + + def $isForceUnalignedCheckpoints: Boolean = + this.streamEnv.isForceUnalignedCheckpoints + + @deprecated def $readFile[T]( + inputFormat: FileInputFormat[T], + filePath: String, + watchType: FileProcessingMode, + interval: Long, + filter: FilePathFilter): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath, watchType, interval, filter) + + // ...streamEnv api end... + + override def fromDataStream[T](dataStream: DataStream[T]): Table = + tableEnv.fromDataStream(dataStream) + + override def fromDataStream[T](dataStream: DataStream[T], schema: Schema): Table = + tableEnv.fromDataStream(dataStream, schema) + + override def fromChangelogStream(dataStream: DataStream[Row]): Table = + tableEnv.fromChangelogStream(dataStream) + + override def fromChangelogStream(dataStream: DataStream[Row], schema: Schema): Table = + tableEnv.fromChangelogStream(dataStream, schema) + + override def fromChangelogStream( + dataStream: DataStream[Row], + schema: Schema, + changelogMode: org.apache.flink.table.connector.ChangelogMode): Table = + tableEnv.fromChangelogStream(dataStream, schema, changelogMode) + + override def createTemporaryView[T](path: String, dataStream: DataStream[T]): Unit = + tableEnv.createTemporaryView(path, dataStream) + + override def createTemporaryView[T]( + path: String, + dataStream: DataStream[T], + schema: Schema): Unit = + tableEnv.createTemporaryView(path, dataStream, schema) + + override def toDataStream(table: Table): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table) + } + + override def toDataStream[T](table: Table, targetClass: Class[T]): DataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table, targetClass) + } + + override def toDataStream[T]( + table: Table, + targetDataType: org.apache.flink.table.types.AbstractDataType[_]): DataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table, targetDataType) + } + + override def toChangelogStream(table: Table): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table) + } + + override def toChangelogStream(table: Table, targetSchema: Schema): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema) + } + + override def toChangelogStream( + table: Table, + targetSchema: Schema, + changelogMode: org.apache.flink.table.connector.ChangelogMode): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema, changelogMode) + } + + override def createStatementSet(): org.apache.flink.table.api.bridge.java.StreamStatementSet = + tableEnv.createStatementSet() + + // ...table env delegation... + + override def fromValues(values: org.apache.flink.table.expressions.Expression*): Table = + tableEnv.fromValues(values) + + override def fromValues( + rowType: org.apache.flink.table.types.AbstractDataType[_], + values: org.apache.flink.table.expressions.Expression*): Table = + tableEnv.fromValues(rowType, values: _*) + + override def fromValues(values: java.lang.Iterable[_]): Table = + tableEnv.fromValues(values) + + override def fromValues( + rowType: org.apache.flink.table.types.AbstractDataType[_], + values: java.lang.Iterable[_]): Table = + tableEnv.fromValues(rowType, values) + + override def registerCatalog(catalogName: String, catalog: Catalog): Unit = + tableEnv.registerCatalog(catalogName, catalog) + + override def getCatalog(catalogName: String): Optional[Catalog] = + tableEnv.getCatalog(catalogName) + + override def loadModule(moduleName: String, module: Module): Unit = + tableEnv.loadModule(moduleName, module) + + override def unloadModule(moduleName: String): Unit = + tableEnv.unloadModule(moduleName) + + override def createTemporarySystemFunction( + name: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporarySystemFunction(name, functionClass) + + override def createTemporarySystemFunction( + name: String, + functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporarySystemFunction(name, functionInstance) + + override def dropTemporarySystemFunction(name: String): Boolean = + tableEnv.dropTemporarySystemFunction(name) + + override def createFunction(path: String, functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createFunction(path, functionClass) + + override def createFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, functionClass) + + override def dropFunction(path: String): Boolean = tableEnv.dropFunction(path) + + override def createTemporaryFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporaryFunction(path, functionClass) + + override def createTemporaryFunction(path: String, functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporaryFunction(path, functionInstance) + + override def dropTemporaryFunction(path: String): Boolean = + tableEnv.dropTemporaryFunction(path) + + override def createTemporaryView(path: String, view: Table): Unit = + tableEnv.createTemporaryView(path, view) + + override def from(path: String): Table = tableEnv.from(path) + + override def listCatalogs(): Array[String] = tableEnv.listCatalogs() + + override def listModules(): Array[String] = tableEnv.listModules() + + override def listDatabases(): Array[String] = tableEnv.listDatabases() + + override def listTables(): Array[String] = tableEnv.listTables() + + override def listViews(): Array[String] = tableEnv.listViews() + + override def listTemporaryTables(): Array[String] = + tableEnv.listTemporaryTables + + override def listTemporaryViews(): Array[String] = + tableEnv.listTemporaryViews() + + override def listUserDefinedFunctions(): Array[String] = + tableEnv.listUserDefinedFunctions() + + override def listFunctions(): Array[String] = tableEnv.listFunctions() + + override def dropTemporaryTable(path: String): Boolean = + tableEnv.dropTemporaryTable(path) + + override def dropTemporaryView(path: String): Boolean = + tableEnv.dropTemporaryView(path) + + override def explainSql(statement: String, extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, extraDetails: _*) + + override def sqlQuery(query: String): Table = tableEnv.sqlQuery(query) + + override def executeSql(statement: String): TableResult = + tableEnv.executeSql(statement) + + override def getCurrentCatalog: String = tableEnv.getCurrentCatalog + + override def useCatalog(catalogName: String): Unit = + tableEnv.useCatalog(catalogName) + + override def getCurrentDatabase: String = tableEnv.getCurrentDatabase + + override def useDatabase(databaseName: String): Unit = + tableEnv.useDatabase(databaseName) + + override def getConfig: TableConfig = tableEnv.getConfig + + @deprecated override def registerFunction(name: String, function: ScalarFunction): Unit = + tableEnv.registerFunction(name, function) + + @deprecated override def registerTable(name: String, table: Table): Unit = + tableEnv.registerTable(name, table) + + @deprecated override def scan(tablePath: String*): Table = + tableEnv.scan(tablePath: _*) + + @deprecated override def getCompletionHints(statement: String, position: Int): Array[String] = + tableEnv.getCompletionHints(statement, position) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala new file mode 100644 index 0000000000..69d1d19d8c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys._ +import org.apache.streampark.common.enums.ApiType +import org.apache.streampark.common.enums.ApiType.ApiType +import org.apache.streampark.common.util._ +import org.apache.streampark.common.util.Implicits._ +import org.apache.streampark.flink.core.conf.FlinkConfiguration + +import collection.{mutable, Map} +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.TableConfig +import org.apache.flink.util.ParameterTool + +import java.io.File + +private[flink] object FlinkStreamingInitializerV2 { + + def initialize( + args: Array[String], + config: (StreamExecutionEnvironment, ParameterTool) => Unit): (ParameterTool, StreamExecutionEnvironment) = { + val flinkInitializer = new FlinkStreamingInitializerV2(args, ApiType.SCALA) + flinkInitializer.streamEnvConfFunc = config + (flinkInitializer.configuration.parameter, flinkInitializer.streamEnv) + } + + def initialize(args: StreamEnvConfig): (ParameterTool, StreamExecutionEnvironment) = { + val flinkInitializer = + new FlinkStreamingInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaStreamEnvConfFunc = args.conf + (flinkInitializer.configuration.parameter, flinkInitializer.streamEnv) + } +} + +private[flink] class FlinkStreamingInitializerV2(args: Array[String], apiType: ApiType) + extends Logger { + + var streamEnvConfFunc: (StreamExecutionEnvironment, ParameterTool) => Unit = _ + + var tableConfFunc: (TableConfig, ParameterTool) => Unit = _ + + var javaStreamEnvConfFunc: StreamEnvConfigFunction = _ + + var javaTableEnvConfFunc: TableEnvConfigFunction = _ + + implicit private[flink] val parameter: ParameterTool = configuration.parameter + + lazy val streamEnv: StreamExecutionEnvironment = { + val env = StreamExecutionEnvironment.getExecutionEnvironment(configuration.envConfig) + + apiType match { + case ApiType.JAVA if javaStreamEnvConfFunc != null => + javaStreamEnvConfFunc.configuration(env, configuration.parameter) + case ApiType.SCALA if streamEnvConfFunc != null => + streamEnvConfFunc(env, configuration.parameter) + case _ => + } + env.getConfig.setGlobalJobParameters(configuration.parameter) + env + } + + lazy val configuration: FlinkConfiguration = initParameter() + + def initParameter(): FlinkConfiguration = { + val argsMap = ParameterTool.fromArgs(args) + val config = argsMap.get(KEY_APP_CONF(), null) match { + case null | "" => + throw new ExceptionInInitializerError( + "[StreamPark] Usage:can't find config,please set \"--conf $path \" in main arguments") + case file => file + } + val configMap = parseConfig(config) + val properConf = extractConfigByPrefix(configMap, KEY_FLINK_PROPERTY_PREFIX) + val appConf = extractConfigByPrefix(configMap, KEY_APP_PREFIX) + + // config priority: explicitly specified priority > project profiles > system profiles + val parameter = ParameterTool + .fromSystemProperties() + .mergeWith(ParameterTool.fromMap(properConf)) + .mergeWith(ParameterTool.fromMap(appConf)) + .mergeWith(argsMap) + + val envConfig = Configuration.fromMap(properConf) + FlinkConfiguration(parameter, envConfig, null) + } + + def parseConfig(config: String): Map[String, String] = { + + lazy val content = DeflaterUtils.unzipString(config.drop(7)) + + def readConfig(text: String): Map[String, String] = { + val format = config.split("\\.").last.toLowerCase + format match { + case "yml" | "yaml" => PropertiesUtils.fromYamlText(text) + case "conf" => PropertiesUtils.fromHoconText(text) + case "properties" => PropertiesUtils.fromPropertiesText(text) + case _ => + throw new IllegalArgumentException( + "[StreamPark] Usage: application config file error,must be [yaml|conf|properties]") + } + } + + val map = config match { + case x if x.startsWith("yaml://") => PropertiesUtils.fromYamlText(content) + case x if x.startsWith("conf://") => + PropertiesUtils.fromHoconText(content) + case x if x.startsWith("prop://") => + PropertiesUtils.fromPropertiesText(content) + case x if x.startsWith("hdfs://") => + // If the configuration file with the hdfs, user will need to copy the hdfs-related configuration files under the resources dir + val text = HdfsUtils.read(x) + readConfig(text) + case _ => + val configFile = new File(config) + require( + configFile.exists(), + s"[StreamPark] Usage: application config file: $configFile is not found!!!") + val text = FileUtils.readFile(configFile) + readConfig(text) + } + map.filter(_._2.nonEmpty) + } + + def extractConfigByPrefix(configMap: Map[String, String], prefix: String): Map[String, String] = { + val map = mutable.Map[String, String]() + configMap.foreach(x => + if (x._1.startsWith(prefix)) { + map += x._1.drop(prefix.length) -> x._2 + }) + map + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala new file mode 100644 index 0000000000..9a441db545 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys._ +import org.apache.streampark.common.enums.{ApiType, PlannerType} +import org.apache.streampark.common.enums.ApiType.ApiType +import org.apache.streampark.common.util.{DeflaterUtils, PropertiesUtils} +import org.apache.streampark.common.util.Implicits._ +import org.apache.streampark.flink.core.EnhancerImplicit._ +import org.apache.streampark.flink.core.conf.FlinkConfiguration + +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.{EnvironmentSettings, TableConfig, TableEnvironment} +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.util.ParameterTool + +import java.io.File + +import scala.collection.{mutable, Map} +import scala.util.{Failure, Success, Try} + +private[flink] object FlinkTableInitializerV2 { + + def initialize( + args: Array[String], + config: (TableConfig, ParameterTool) => Unit): (ParameterTool, TableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args, ApiType.SCALA) + flinkInitializer.tableConfFunc = config + (flinkInitializer.configuration.parameter, flinkInitializer.tableEnv) + } + + def initialize(args: TableEnvConfig): (ParameterTool, TableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaTableEnvConfFunc = args.conf + (flinkInitializer.configuration.parameter, flinkInitializer.tableEnv) + } + + def initialize( + args: Array[String], + configStream: (StreamExecutionEnvironment, ParameterTool) => Unit, + configTable: (TableConfig, ParameterTool) => Unit): (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment) = { + + val flinkInitializer = new FlinkTableInitializerV2(args, ApiType.SCALA) + flinkInitializer.streamEnvConfFunc = configStream + flinkInitializer.tableConfFunc = configTable + ( + flinkInitializer.configuration.parameter, + flinkInitializer.streamEnv, + flinkInitializer.streamTableEnv) + } + + def initialize( + args: StreamTableEnvConfig): (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaStreamEnvConfFunc = args.streamConfig + flinkInitializer.javaTableEnvConfFunc = args.tableConfig + ( + flinkInitializer.configuration.parameter, + flinkInitializer.streamEnv, + flinkInitializer.streamTableEnv) + } + +} + +private[flink] class FlinkTableInitializerV2(args: Array[String], apiType: ApiType) + extends FlinkStreamingInitializerV2(args, apiType) { + + private[this] lazy val envSettings = { + + val builder = EnvironmentSettings.newInstance() + + Try(PlannerType.withName(parameter.get(KEY_FLINK_TABLE_PLANNER))) + .getOrElse(PlannerType.BLINK) match { + case PlannerType.BLINK => + val useBlinkPlanner = + Try(builder.getClass.getDeclaredMethod("useBlinkPlanner")) + .getOrElse(null) + if (useBlinkPlanner == null) { + logWarn("useBlinkPlanner deprecated") + } else { + useBlinkPlanner.setAccessible(true) + useBlinkPlanner.invoke(builder) + logInfo("blinkPlanner will be used.") + } + case PlannerType.OLD => + val useOldPlanner = Try(builder.getClass.getDeclaredMethod("useOldPlanner")).getOrElse(null) + if (useOldPlanner == null) { + logWarn("useOldPlanner deprecated") + } else { + useOldPlanner.setAccessible(true) + useOldPlanner.invoke(builder) + logInfo("useOldPlanner will be used.") + } + case PlannerType.ANY => + val useAnyPlanner = Try(builder.getClass.getDeclaredMethod("useAnyPlanner")).getOrElse(null) + if (useAnyPlanner == null) { + logWarn("useAnyPlanner deprecated") + } else { + logInfo("useAnyPlanner will be used.") + useAnyPlanner.setAccessible(true) + useAnyPlanner.invoke(builder) + } + } + + parameter.get(KEY_FLINK_CONF(), null) match { + case null | "" => + throw new ExceptionInInitializerError( + "[StreamPark] Usage:can't find config,please set \"--flink.conf $conf \" in main arguments") + case conf => builder.withConfiguration( + Configuration.fromMap(PropertiesUtils.fromYamlText(DeflaterUtils.unzipString(conf)))) + } + val buildWith = + (parameter.get(KEY_FLINK_TABLE_CATALOG), parameter.get(KEY_FLINK_TABLE_DATABASE)) + buildWith match { + case (x: String, y: String) if x != null && y != null => + logInfo(s"with built in catalog: $x") + logInfo(s"with built in database: $y") + builder.withBuiltInCatalogName(x) + builder.withBuiltInDatabaseName(y) + case (x: String, _) if x != null => + logInfo(s"with built in catalog: $x") + builder.withBuiltInCatalogName(x) + case (_, y: String) if y != null => + logInfo(s"with built in database: $y") + builder.withBuiltInDatabaseName(y) + case _ => + } + builder + } + + lazy val tableEnv: TableEnvironment = { + logInfo(s"job working in batch mode") + envSettings.inBatchMode() + val tableEnv = TableEnvironment.create(envSettings.build()).setAppName + apiType match { + case ApiType.JAVA if javaTableEnvConfFunc != null => + javaTableEnvConfFunc.configuration(tableEnv.getConfig, parameter) + case ApiType.SCALA if tableConfFunc != null => + tableConfFunc(tableEnv.getConfig, parameter) + case _ => + } + tableEnv + } + + lazy val streamTableEnv: StreamTableEnvironment = { + logInfo(s"components should work in streaming mode") + envSettings.inStreamingMode() + val setting = envSettings.build() + + if (streamEnvConfFunc != null) { + streamEnvConfFunc(streamEnv, parameter) + } + if (javaStreamEnvConfFunc != null) { + javaStreamEnvConfFunc.configuration(streamEnv, parameter) + } + val streamTableEnv = + StreamTableEnvironment.create(streamEnv, setting).setAppName + apiType match { + case ApiType.JAVA if javaTableEnvConfFunc != null => + javaTableEnvConfFunc.configuration(streamTableEnv.getConfig, parameter) + case ApiType.SCALA if tableConfFunc != null => + tableConfFunc(streamTableEnv.getConfig, parameter) + case _ => + } + streamTableEnv + } + + /** In case of table SQL, the parameter conf is not required, it depends on the developer. */ + + override def initParameter(): FlinkConfiguration = { + val configuration = { + val argsMap = ParameterTool.fromArgs(args) + argsMap.get(KEY_APP_CONF(), null) match { + case null | "" => + logWarn("Usage:can't find config,you can set \"--conf $path \" in main arguments") + val parameter = + ParameterTool.fromSystemProperties().mergeWith(argsMap) + FlinkConfiguration(parameter, new Configuration(), new Configuration()) + case file => + val configMap = parseConfig(file) + // set sql.. + val sqlConf = mutable.Map[String, String]() + configMap.foreach(x => { + if (x._1.startsWith(KEY_SQL_PREFIX)) { + sqlConf += x._1.drop(KEY_SQL_PREFIX.length) -> x._2 + } + }) + + // config priority: explicitly specified priority > project profiles > system profiles + val properConf = + extractConfigByPrefix(configMap, KEY_FLINK_PROPERTY_PREFIX) + val appConf = extractConfigByPrefix(configMap, KEY_APP_PREFIX) + val tableConf = + extractConfigByPrefix(configMap, KEY_FLINK_TABLE_PREFIX) + + val tableConfig = Configuration.fromMap(tableConf) + val envConfig = Configuration.fromMap(properConf) + + val parameter = ParameterTool + .fromSystemProperties() + .mergeWith(ParameterTool.fromMap(properConf)) + .mergeWith(ParameterTool.fromMap(tableConf)) + .mergeWith(ParameterTool.fromMap(appConf)) + .mergeWith(ParameterTool.fromMap(sqlConf)) + .mergeWith(argsMap) + + FlinkConfiguration(parameter, envConfig, tableConfig) + } + } + + configuration.parameter.get(KEY_FLINK_SQL()) match { + case null => configuration + case param => + // for streampark-console + Try(DeflaterUtils.unzipString(param)) match { + case Success(value) => + configuration.copy(parameter = configuration.parameter.mergeWith( + ParameterTool.fromMap(Map(KEY_FLINK_SQL() -> value)))) + case Failure(_) => + val sqlFile = new File(param) + Try(PropertiesUtils.fromYamlFile(sqlFile.getAbsolutePath)) match { + case Success(value) => + configuration.copy(parameter = + configuration.parameter.mergeWith(ParameterTool.fromMap(value))) + case Failure(e) => + new IllegalArgumentException(s"[StreamPark] init sql error.$e") + configuration + } + } + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala new file mode 100644 index 0000000000..32bef31194 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Utils +import org.apache.streampark.flink.core.EnhancerImplicit._ + +import org.apache.flink.api.common.JobExecutionResult +import org.apache.flink.table.api._ +import org.apache.flink.table.catalog.Catalog +import org.apache.flink.table.expressions.Expression +import org.apache.flink.table.functions._ +import org.apache.flink.table.module.Module +import org.apache.flink.table.types.AbstractDataType +import org.apache.flink.util.ParameterTool + +import java.lang +import java.util.Optional + +abstract class FlinkTableTrait(val parameter: ParameterTool, private val tableEnv: TableEnvironment) + extends TableEnvironment { + + def start(): JobExecutionResult = { + val appName = parameter.getAppName(required = true) + execute(appName) + } + + def execute(jobName: String): JobExecutionResult = { + Utils.printLogo(s"FlinkTable $jobName Starting...") + null + } + + def sql(sql: String = null): Unit = + FlinkSqlExecutor.executeSql(sql, parameter, this) + + override def fromValues(values: Expression*): Table = + tableEnv.fromValues(values) + + override def fromValues(rowType: AbstractDataType[_], values: Expression*): Table = + tableEnv.fromValues(rowType, values: _*) + + override def fromValues(values: lang.Iterable[_]): Table = + tableEnv.fromValues(values) + + override def fromValues(rowType: AbstractDataType[_], values: lang.Iterable[_]): Table = + tableEnv.fromValues(rowType, values) + + override def registerCatalog(catalogName: String, catalog: Catalog): Unit = + tableEnv.registerCatalog(catalogName, catalog) + + override def getCatalog(catalogName: String): Optional[Catalog] = + tableEnv.getCatalog(catalogName) + + override def loadModule(moduleName: String, module: Module): Unit = + tableEnv.loadModule(moduleName, module) + + override def unloadModule(moduleName: String): Unit = + tableEnv.unloadModule(moduleName) + + override def createTemporarySystemFunction( + name: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporarySystemFunction(name, functionClass) + + override def createTemporarySystemFunction( + name: String, + functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporarySystemFunction(name, functionInstance) + + override def dropTemporarySystemFunction(name: String): Boolean = + tableEnv.dropTemporarySystemFunction(name) + + override def createFunction(path: String, functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createFunction(path, functionClass) + + override def createFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, functionClass) + + override def dropFunction(path: String): Boolean = tableEnv.dropFunction(path) + + override def createTemporaryFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporaryFunction(path, functionClass) + + override def createTemporaryFunction(path: String, functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporaryFunction(path, functionInstance) + + override def dropTemporaryFunction(path: String): Boolean = + tableEnv.dropTemporaryFunction(path) + + override def createTemporaryView(path: String, view: Table): Unit = + tableEnv.createTemporaryView(path, view) + + override def from(path: String): Table = tableEnv.from(path) + + override def listCatalogs(): Array[String] = tableEnv.listCatalogs() + + override def listModules(): Array[String] = tableEnv.listModules() + + override def listDatabases(): Array[String] = tableEnv.listDatabases() + + override def listTables(): Array[String] = tableEnv.listTables() + + override def listViews(): Array[String] = tableEnv.listViews() + + override def listTemporaryTables(): Array[String] = + tableEnv.listTemporaryTables + + override def listTemporaryViews(): Array[String] = + tableEnv.listTemporaryViews() + + override def listUserDefinedFunctions(): Array[String] = + tableEnv.listUserDefinedFunctions() + + override def listFunctions(): Array[String] = tableEnv.listFunctions() + + override def dropTemporaryTable(path: String): Boolean = + tableEnv.dropTemporaryTable(path) + + override def dropTemporaryView(path: String): Boolean = + tableEnv.dropTemporaryView(path) + + override def explainSql(statement: String, extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, extraDetails: _*) + + override def sqlQuery(query: String): Table = tableEnv.sqlQuery(query) + + override def executeSql(statement: String): TableResult = + tableEnv.executeSql(statement) + + override def getCurrentCatalog: String = tableEnv.getCurrentCatalog + + override def useCatalog(catalogName: String): Unit = + tableEnv.useCatalog(catalogName) + + override def getCurrentDatabase: String = tableEnv.getCurrentDatabase + + override def useDatabase(databaseName: String): Unit = + tableEnv.useDatabase(databaseName) + + override def getConfig: TableConfig = tableEnv.getConfig + + override def createStatementSet(): StatementSet = + tableEnv.createStatementSet() + + @deprecated override def registerFunction(name: String, function: ScalarFunction): Unit = + tableEnv.registerFunction(name, function) + + @deprecated override def registerTable(name: String, table: Table): Unit = + tableEnv.registerTable(name, table) + + @deprecated override def scan(tablePath: String*): Table = + tableEnv.scan(tablePath: _*) + + @deprecated override def getCompletionHints(statement: String, position: Int): Array[String] = + tableEnv.getCompletionHints(statement, position) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala new file mode 100644 index 0000000000..037509af2a --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala @@ -0,0 +1,651 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.PARAM_PREFIX +import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.util.Logger + +import enumeratum.EnumEntry +import org.apache.commons.lang3.StringUtils + +import java.lang.{Boolean => JavaBool} +import java.util.Scanner +import java.util.regex.{Matcher, Pattern} + +import scala.annotation.tailrec +import scala.collection.{immutable, mutable} +import scala.collection.mutable.ListBuffer +import scala.util.control.Breaks.{break, breakable} + +object SqlCommandParser extends Logger { + + def parseSQL( + sql: String, + validationCallback: FlinkSqlValidationResult => Unit = null): List[SqlCommandCall] = { + val sqlEmptyError = "verify failed: flink sql cannot be empty." + require(StringUtils.isNotBlank(sql), sqlEmptyError) + val sqlSegments = SqlSplitter.splitSql(sql) + sqlSegments match { + case s if s.isEmpty => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + exception = sqlEmptyError)) + null + } else { + throw new IllegalArgumentException(sqlEmptyError) + } + case segments => + val calls = new ListBuffer[SqlCommandCall] + for (segment <- segments) { + parseLine(segment) match { + case Some(x) => calls += x + case _ => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.UNSUPPORTED_SQL, + lineStart = segment.start, + lineEnd = segment.end, + exception = s"unsupported sql", + sql = segment.sql)) + } else { + throw new UnsupportedOperationException(s"unsupported sql: ${segment.sql}") + } + } + } + + calls.toList match { + case c if c.isEmpty => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + exception = "flink sql syntax error, no executable sql")) + null + } else { + throw new UnsupportedOperationException("flink sql syntax error, no executable sql") + } + case r => r + } + } + } + + private[this] def parseLine(sqlSegment: SqlSegment): Option[SqlCommandCall] = { + val sqlCommand = SqlCommand.get(sqlSegment.sql.trim) + if (sqlCommand == null) None + else { + val matcher = sqlCommand.matcher + val groups = new Array[String](matcher.groupCount) + for (i <- groups.indices) { + groups(i) = matcher.group(i + 1) + } + sqlCommand + .converter(groups) + .map(x => + SqlCommandCall(sqlSegment.start, sqlSegment.end, sqlCommand, x, sqlSegment.sql.trim)) + } + } + +} + +object Converters { + val NO_OPERANDS = (_: Array[String]) => Some(Array.empty[String]) +} + +sealed abstract class SqlCommand( + val name: String, + private val regex: String, + val converter: Array[String] => Option[Array[String]] = (x: Array[String]) => + Some(Array[String](x.head))) + extends EnumEntry { + var matcher: Matcher = _ + + def matches(input: String): Boolean = { + if (StringUtils.isBlank(regex)) false + else { + val pattern = + Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL) + matcher = pattern.matcher(input) + matcher.matches() + } + } +} + +object SqlCommand extends enumeratum.Enum[SqlCommand] { + + def get(stmt: String): SqlCommand = { + var cmd: SqlCommand = null + breakable { + this.values.foreach(x => { + if (x.matches(stmt)) { + cmd = x + break() + } + }) + } + cmd + } + + val values: immutable.IndexedSeq[SqlCommand] = findValues + + // ---- SELECT Statements-------------------------------------------------------------------------------------------------------------------------------- + case object SELECT extends SqlCommand("select", "(SELECT\\s+.+)") + + // ----CREATE Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + *
 CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name ( {
+   *  |  |  }[ ,
+   * ...n] [  ] [  ][ , ...n] ) [COMMENT table_comment]
+   * [PARTITIONED BY (partition_column_name1, partition_column_name2, ...)] WITH (key1=val1,
+   * key2=val2, ...) [ LIKE source_table [(  )] ] 
CREATE CATALOG catalog_name WITH (key1=val1, key2=val2, ...) */ + case object CREATE_CATALOG extends SqlCommand("create catalog", "(CREATE\\s+CATALOG\\s+.+)") + + /** + *
 CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name
[COMMENT database_comment]
+ * WITH (key1=val1, key2=val2, ...)
+ */ + case object CREATE_DATABASE extends SqlCommand("create database", "(CREATE\\s+DATABASE\\s+.+)") + + /** + *
 CREATE [TEMPORARY] VIEW [IF NOT EXISTS] [catalog_name.][db_name.]view_name [( columnName
+   * [, columnName ]* )] [COMMENT view_comment] AS query_expression< 
CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF NOT EXISTS] + * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] DROP statements are used to remove a catalog with the given catalog name or to remove a + * registered table/view/function from the current or specified Catalog. + * + * Flink SQL supports the following DROP statements for now: * DROP CATALOG * DROP TABLE * DROP + * DATABASE * DROP VIEW * DROP FUNCTION + */ + + /** DROP CATALOG [IF EXISTS] catalog_name */ + case object DROP_CATALOG extends SqlCommand("drop catalog", "(DROP\\s+CATALOG\\s+.+)") + + /** DROP [TEMPORARY] TABLE [IF EXISTS] [catalog_name.][db_name.]table_name */ + case object DROP_TABLE extends SqlCommand("drop table", "(DROP\\s+(TEMPORARY\\s+|)TABLE\\s+.+)") + + /** DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ] */ + case object DROP_DATABASE extends SqlCommand("drop database", "(DROP\\s+DATABASE\\s+.+)") + + /** DROP [TEMPORARY] VIEW [IF EXISTS] [catalog_name.][db_name.]view_name */ + case object DROP_VIEW extends SqlCommand("drop view", "(DROP\\s+(TEMPORARY\\s+|)VIEW\\s+.+)") + + /** + * DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] + * [catalog_name.][db_name.]function_name + */ + case object DROP_FUNCTION + extends SqlCommand( + "drop function", + "(DROP\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + + // ----ALTER Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + * ALTER TABLE [catalog_name.][db_name.]table_name RENAME TO new_table_name + * + * ALTER TABLE [catalog_name.][db_name.]table_name SET (key1=val1, key2=val2, + * ...) + */ + case object ALTER_TABLE extends SqlCommand("alter table", "(ALTER\\s+TABLE\\s+.+)") + + /** + * ALTER VIEW [catalog_name.][db_name.]view_name RENAME TO new_view_name + * + * ALTER VIEW [catalog_name.][db_name.]view_name AS new_query_expression + */ + case object ALTER_VIEW extends SqlCommand("alter view", "(ALTER\\s+VIEW\\s+.+)") + + /** ALTER DATABASE [catalog_name.]db_name SET (key1=val1, key2=val2, ...) */ + case object ALTER_DATABASE extends SqlCommand("alter database", "(ALTER\\s+DATABASE\\s+.+)") + + /** + * ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] + * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] + */ + case object ALTER_FUNCTION + extends SqlCommand( + "alter function", + "(ALTER\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + + // ---- INSERT Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** + * INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name [PARTITION part_spec] + * [column_list] select_statement INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name + * VALUES values_row [, values_row ...] + */ + case object INSERT extends SqlCommand("insert", "(INSERT\\s+(INTO|OVERWRITE)\\s+.+)") + + // ---- DESCRIBE Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ + case object DESC extends SqlCommand("desc", "(DESC\\s+.+)") + + /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ + case object DESCRIBE extends SqlCommand("describe", "(DESCRIBE\\s+.+)") + + // ---- EXPLAIN Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** + * For flink-1.13.x: EXPLAIN PLAN FOR ``
For + * flink-1.14.x: EXPLAIN ESTIMATED_COST, CHANGELOG_MODE, JSON_EXECUTION_PLAN + * ``
For flink-1.15.x:
 EXPLAIN
+   * [([ExplainDetail[, ExplainDetail]*]) | PLAN FOR]
+   * 
+   *
+   * statement_set: EXECUTE STATEMENT SET BEGIN insert_statement; ... insert_statement; END; 
+ * Recommended not to use the form of flink-1.15.x + */ + case object EXPLAIN extends SqlCommand("explain", "(EXPLAIN\\s+.+)") + + // ---- USE Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** USE CATALOG catalog_name */ + case object USE_CATALOG extends SqlCommand("use catalog", "(USE\\s+CATALOG\\s+.+)") + + /** USE MODULES module_name1[, module_name2, ...] */ + case object USE_MODULES extends SqlCommand("use modules", "(USE\\s+MODULES\\s+.+)") + + /** USE [catalog_name.]database_name */ + case object USE_DATABASE extends SqlCommand("use database", "(USE\\s+(?!(CATALOG|MODULES)).+)") + + // ----SHOW Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** SHOW CATALOGS */ + case object SHOW_CATALOGS extends SqlCommand("show catalogs", "(SHOW\\s+CATALOGS\\s*)") + + /** SHOW CURRENT CATALOG */ + case object SHOW_CURRENT_CATALOG + extends SqlCommand("show current catalog", "(SHOW\\s+CURRENT\\s+CATALOG\\s*)") + + /** SHOW DATABASES */ + case object SHOW_DATABASES extends SqlCommand("show databases", "(SHOW\\s+DATABASES\\s*)") + + /** SHOW CURRENT DATABASE */ + case object SHOW_CURRENT_DATABASE + extends SqlCommand("show current database", "(SHOW\\s+CURRENT\\s+DATABASE\\s*)") + + /** + * SHOW TABLES,support from flink-1.13.x
SHOW TABLES [ ( FROM | IN ) + * [catalog_name.]database_name ] [ [NOT] LIKE ` ], support from flink-1.15.x + */ + case object SHOW_TABLES extends SqlCommand("show tables", "(SHOW\\s+TABLES.*)") + + /** SHOW CREATE TABLE, flink-1.14.x support. */ + case object SHOW_CREATE_TABLE + extends SqlCommand("show create table", "(SHOW\\s+CREATE\\s+TABLE\\s+.+)") + + /** + * SHOW COLUMNS ( FROM | IN ) [`[`catalog_name.]database.]`` [ [NOT] LIKE + * ``],flink-1.15.x support. + */ + case object SHOW_COLUMNS extends SqlCommand("show columns", "(SHOW\\s+COLUMNS\\s+.+)") + + /** SHOW VIEWS */ + case object SHOW_VIEWS extends SqlCommand("show views", "(SHOW\\s+VIEWS\\s*)") + + /** SHOW CREATE VIEW */ + case object SHOW_CREATE_VIEW + extends SqlCommand("show create view", "(SHOW\\s+CREATE\\s+VIEW\\s+.+)") + + /** SHOW [USER] FUNCTIONS */ + case object SHOW_FUNCTIONS + extends SqlCommand("show functions", "(SHOW\\s+(USER\\s+|)FUNCTIONS\\s*)") + + /** SHOW [FULL] MODULES */ + case object SHOW_MODULES extends SqlCommand("show modules", "(SHOW\\s+(FULL\\s+|)MODULES\\s*)") + + // ----LOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** LOAD MODULE module_name [WITH ('key1' = 'val1', 'key2' = 'val2', ...)] */ + case object LOAD_MODULE extends SqlCommand("load module", "(LOAD\\s+MODULE\\s+.+)") + + // ----UNLOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** UNLOAD MODULE module_name */ + case object UNLOAD_MODULE extends SqlCommand("unload module", "(UNLOAD\\s+MODULE\\s+.+)") + + // ----SET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** SET ('key' = 'value') */ + case object SET + extends SqlCommand( + "set", + "SET(\\s+(\\S+)\\s*=(.*))?", + { + case a if a.length < 3 => None + case a if a.head == null => Some(Array[String](cleanUp(a.head))) + case a => Some(Array[String](cleanUp(a(1)), cleanUp(a(2)))) + }) + + // ----RESET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** RESET ('key') */ + case object RESET extends SqlCommand("reset", "RESET\\s+'(.*)'") + + /** RESET */ + case object RESET_ALL extends SqlCommand("reset all", "RESET", _ => Some(Array[String]("ALL"))) + + // ----INSERT SET Statements-------------------------------------------------------------------------------------------------------------------------------- + /* + *
+   * SQL Client execute each INSERT INTO statement as a single Flink job. However,
+   * this is sometimes not optimal because some part of the pipeline can be reused.
+   * SQL Client supports STATEMENT SET syntax to execute a set of SQL statements.
+   * This is an equivalent feature with StatementSet in Table API.
+   * The STATEMENT SET syntax encloses one or more INSERT INTO statements.
+   * All statements in a STATEMENT SET block are holistically optimized and executed as a single Flink job.
+   * Joint optimization and execution allows for reusing common intermediate results and can therefore significantly
+   * improve the efficiency of executing multiple queries.
+   * 
+ */ + /** This is SQL Client's syntax, don't use in our platform. */ + @deprecated + case object BEGIN_STATEMENT_SET + extends SqlCommand("begin statement set", "BEGIN\\s+STATEMENT\\s+SET", Converters.NO_OPERANDS) + + /** This is SQL Client's syntax, don't use in our platform. */ + @deprecated + case object END_STATEMENT_SET + extends SqlCommand("end statement set", "END", Converters.NO_OPERANDS) + + // Since: 2.1.2 for flink 1.18 + case object DELETE extends SqlCommand("delete", "(DELETE\\s+FROM\\s+.+)") + + // Since: 2.1.2 for flink 1.18 + case object UPDATE extends SqlCommand("update", "(UPDATE\\s+.+)") + + private[this] def cleanUp(sql: String): String = + sql.trim.replaceAll("^(['\"])|(['\"])$", "") + +} + +/** Call of SQL command with operands and command type. */ +case class SqlCommandCall( + lineStart: Int, + lineEnd: Int, + command: SqlCommand, + operands: Array[String], + originSql: String) {} + +case class FlinkSqlValidationResult( + success: JavaBool = true, + failedType: FlinkSqlValidationFailedType = null, + lineStart: Int = 0, + lineEnd: Int = 0, + errorLine: Int = 0, + errorColumn: Int = 0, + sql: String = null, + exception: String = null) + +case class SqlSegment(start: Int, end: Int, sql: String) + +object SqlSplitter { + + private lazy val singleLineCommentPrefixList = Set[String](PARAM_PREFIX) + + /** + * Split whole text into multiple sql statements. Two Steps: Step 1, split the whole text into + * multiple sql statements. Step 2, refine the results. Replace the preceding sql statements with + * empty lines, so that we can get the correct line number in the parsing error message. e.g: + * select a from table_1; select a from table_2; select a from table_3; The above text will be + * splitted into: sql_1: select a from table_1 sql_2: \nselect a from table_2 sql_3: \n\nselect a + * from table_3 + * + * @param sql + * @return + */ + def splitSql(sql: String): List[SqlSegment] = { + val queries = ListBuffer[String]() + val lastIndex = if (StringUtils.isNotBlank(sql)) sql.length - 1 else 0 + var query = new mutable.StringBuilder + + var multiLineComment = false + var singleLineComment = false + var singleQuoteString = false + var doubleQuoteString = false + var lineNum: Int = 0 + val lineNumMap = new collection.mutable.HashMap[Int, (Int, Int)]() + + // Whether each line of the record is empty. If it is empty, it is false. If it is not empty, it is true + val lineDescriptor = { + val scanner = new Scanner(sql) + val descriptor = new collection.mutable.HashMap[Int, Boolean] + var lineNumber = 0 + var startComment = false + var hasComment = false + + while (scanner.hasNextLine) { + lineNumber += 1 + val line = scanner.nextLine().trim + val nonEmpty = + StringUtils.isNotBlank(line) && !line.startsWith(PARAM_PREFIX) + if (line.startsWith("/*")) { + startComment = true + hasComment = true + } + + descriptor += lineNumber -> (nonEmpty && !hasComment) + + if (startComment && line.endsWith("*/")) { + startComment = false + hasComment = false + } + } + descriptor + } + + @tailrec + def findStartLine(num: Int): Int = + if (num >= lineDescriptor.size || lineDescriptor(num)) num + else findStartLine(num + 1) + + def markLineNumber(): Unit = { + val line = lineNum + 1 + if (lineNumMap.isEmpty) { + lineNumMap += (0 -> (findStartLine(1) -> line)) + } else { + val index = lineNumMap.size + val start = lineNumMap(lineNumMap.size - 1)._2 + 1 + lineNumMap += (index -> (findStartLine(start) -> line)) + } + } + + for (idx <- 0 until sql.length) { + + if (sql.charAt(idx) == '\n') lineNum += 1 + + breakable { + val ch = sql.charAt(idx) + + // end of single line comment + if (singleLineComment && (ch == '\n')) { + singleLineComment = false + query += ch + if (idx == lastIndex && query.toString.trim.nonEmpty) { + // add query when it is the end of sql. + queries += query.toString + } + break() + } + + // end of multiple line comment + if (multiLineComment && (idx - 1) >= 0 && sql.charAt(idx - 1) == '/' + && (idx - 2) >= 0 && sql.charAt(idx - 2) == '*') { + multiLineComment = false + } + + // single quote start or end mark + if (ch == '\'' && !(singleLineComment || multiLineComment)) { + if (singleQuoteString) { + singleQuoteString = false + } else if (!doubleQuoteString) { + singleQuoteString = true + } + } + + // double quote start or end mark + if (ch == '"' && !(singleLineComment || multiLineComment)) { + if (doubleQuoteString && idx > 0) { + doubleQuoteString = false + } else if (!singleQuoteString) { + doubleQuoteString = true + } + } + + // single line comment or multiple line comment start mark + if (!singleQuoteString && !doubleQuoteString && !multiLineComment && !singleLineComment && idx < lastIndex) { + if (isSingleLineComment(sql.charAt(idx), sql.charAt(idx + 1))) { + singleLineComment = true + } else if (sql.charAt(idx) == '/' && sql.length > (idx + 2) + && sql.charAt(idx + 1) == '*' && sql.charAt(idx + 2) != '+') { + multiLineComment = true + } + } + + if (ch == ';' && !singleQuoteString && !doubleQuoteString && !multiLineComment && !singleLineComment) { + markLineNumber() + // meet the end of semicolon + if (query.toString.trim.nonEmpty) { + queries += query.toString + query = new mutable.StringBuilder + } + } else if (idx == lastIndex) { + markLineNumber() + + // meet the last character + if (!singleLineComment && !multiLineComment) { + query += ch + } + + if (query.toString.trim.nonEmpty) { + queries += query.toString + query = new mutable.StringBuilder + } + } else if (!singleLineComment && !multiLineComment) { + // normal case, not in single line comment and not in multiple line comment + query += ch + } else if (ch == '\n') { + query += ch + } + } + } + + val refinedQueries = new collection.mutable.HashMap[Int, String]() + for (i <- queries.indices) { + val currStatement = queries(i) + if (isSingleLineComment(currStatement) || isMultipleLineComment(currStatement)) { + // transform comment line as blank lines + if (refinedQueries.nonEmpty) { + val lastRefinedQuery = refinedQueries.last + refinedQueries(refinedQueries.size - 1) = + lastRefinedQuery + extractLineBreaks(currStatement) + } + } else { + var linesPlaceholder = "" + if (i > 0) { + linesPlaceholder = extractLineBreaks(refinedQueries(i - 1)) + } + // add some blank lines before the statement to keep the original line number + val refinedQuery = linesPlaceholder + currStatement + refinedQueries += refinedQueries.size -> refinedQuery + } + } + + val set = new ListBuffer[SqlSegment] + refinedQueries.foreach(x => { + val line = lineNumMap(x._1) + set += SqlSegment(line._1, line._2, x._2) + }) + set.toList.sortWith((a, b) => a.start < b.start) + } + + /** + * extract line breaks + * + * @param text + * @return + */ + private[this] def extractLineBreaks(text: String): String = { + val builder = new mutable.StringBuilder + for (i <- 0 until text.length) { + if (text.charAt(i) == '\n') { + builder.append('\n') + } + } + builder.toString + } + + private[this] def isSingleLineComment(text: String) = + text.trim.startsWith(PARAM_PREFIX) + + private[this] def isMultipleLineComment(text: String) = + text.trim.startsWith("/*") && text.trim.endsWith("*/") + + /** + * check single-line comment + * + * @param curChar + * @param nextChar + * @return + */ + private[this] def isSingleLineComment(curChar: Char, nextChar: Char): Boolean = { + var flag = false + for (singleCommentPrefix <- singleLineCommentPrefixList) { + singleCommentPrefix.length match { + case 1 if curChar == singleCommentPrefix.charAt(0) => flag = true + case 2 + if curChar == singleCommentPrefix.charAt(0) && nextChar == singleCommentPrefix.charAt( + 1) => + flag = true + case _ => + } + } + flag + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala new file mode 100644 index 0000000000..574945fa39 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +class StreamEnvConfig(val args: Array[String], val conf: StreamEnvConfigFunction) + +class StreamTableEnvConfig( + val args: Array[String], + val streamConfig: StreamEnvConfigFunction, + val tableConfig: TableEnvConfigFunction) + +class TableEnvConfig(val args: Array[String], val conf: TableEnvConfigFunction) diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala new file mode 100644 index 0000000000..990e8fdd4f --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList + +import org.apache.flink.api.common.typeinfo.TypeInformation +import org.apache.flink.api.java.tuple +import org.apache.flink.streaming.api.datastream.{DataStream => JavaDataStream} +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api._ +import org.apache.flink.table.api.ModelDescriptor +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.catalog.CatalogDescriptor +import org.apache.flink.table.connector.ChangelogMode +import org.apache.flink.table.expressions.Expression +import org.apache.flink.table.functions.UserDefinedFunction +import org.apache.flink.table.module.ModuleEntry +import org.apache.flink.table.resource.ResourceUri +import org.apache.flink.table.types.AbstractDataType +import org.apache.flink.types.Row +import org.apache.flink.util.ParameterTool + +class StreamTableContext( + override val parameter: ParameterTool, + private val streamEnv: StreamExecutionEnvironment, + private val tableEnv: StreamTableEnvironment) + extends FlinkStreamTableTraitV2(parameter, streamEnv, tableEnv) { + + def this(args: (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment)) = + this(args._1, args._2, args._3) + + def this(args: StreamTableEnvConfig) = + this(FlinkTableInitializerV2.initialize(args)) + + override def fromDataStream[T](dataStream: JavaDataStream[T], schema: Schema): Table = + tableEnv.fromDataStream[T](dataStream, schema) + + /** @deprecated old API */ + override def fromDataStream[T](dataStream: JavaDataStream[T], expressions: Expression*): Table = + tableEnv.fromDataStream(dataStream, expressions: _*) + + override def fromChangelogStream(dataStream: JavaDataStream[Row]): Table = + tableEnv.fromChangelogStream(dataStream) + + override def fromChangelogStream(dataStream: JavaDataStream[Row], schema: Schema): Table = + tableEnv.fromChangelogStream(dataStream, schema) + + override def fromChangelogStream( + dataStream: JavaDataStream[Row], + schema: Schema, + changelogMode: ChangelogMode): Table = + tableEnv.fromChangelogStream(dataStream, schema, changelogMode) + + override def createTemporaryView[T]( + path: String, + dataStream: JavaDataStream[T], + schema: Schema): Unit = + tableEnv.createTemporaryView[T](path, dataStream, schema) + + /** @deprecated old API */ + @deprecated override def createTemporaryView[T]( + path: String, + dataStream: JavaDataStream[T], + expressions: Expression*): Unit = + tableEnv.createTemporaryView(path, dataStream, expressions: _*) + + override def toDataStream(table: Table): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table) + } + + override def toDataStream[T](table: Table, targetClass: Class[T]): JavaDataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream[T](table, targetClass) + } + + override def toDataStream[T](table: Table, targetDataType: AbstractDataType[_]): JavaDataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream[T](table, targetDataType) + } + + override def toChangelogStream(table: Table): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table) + } + + override def toChangelogStream(table: Table, targetSchema: Schema): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema) + } + + override def toChangelogStream( + table: Table, + targetSchema: Schema, + changelogMode: ChangelogMode): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema, changelogMode) + } + + override def createStatementSet(): org.apache.flink.table.api.bridge.java.StreamStatementSet = + tableEnv.createStatementSet() + + override def useModules(strings: String*): Unit = + tableEnv.useModules(strings: _*) + + override def createTemporaryTable(path: String, descriptor: TableDescriptor): Unit = + tableEnv.createTemporaryTable(path, descriptor) + + override def createTable(path: String, descriptor: TableDescriptor): Unit = + tableEnv.createTable(path, descriptor) + + override def from(descriptor: TableDescriptor): Table = + tableEnv.from(descriptor) + + override def listFullModules(): Array[ModuleEntry] = + tableEnv.listFullModules() + + /** @since 1.15 */ + override def listTables(s: String, s1: String): Array[String] = + tableEnv.listTables(s, s1) + + /** @since 1.15 */ + override def loadPlan(planReference: PlanReference): CompiledPlan = + tableEnv.loadPlan(planReference) + + /** @since 1.15 */ + override def compilePlanSql(s: String): CompiledPlan = + tableEnv.compilePlanSql(s) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, className, resourceUris, ignoreIfExists) + + /** @since 1.17 */ + override def createTemporaryFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporaryFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createTemporarySystemFunction( + name: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporarySystemFunction(name, className, resourceUris) + + /** @since 1.17 */ + override def explainSql( + statement: String, + format: ExplainFormat, + extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, format, extraDetails: _*) + + /** @since 1.18 */ + override def createCatalog(catalog: String, catalogDescriptor: CatalogDescriptor): Unit = { + tableEnv.createCatalog(catalog, catalogDescriptor) + } + + /** @deprecated old API */ + @deprecated override def toAppendStream[T]( + table: Table, + typeInformation: TypeInformation[T]): JavaDataStream[T] = + tableEnv.toAppendStream(table, typeInformation) + + /** @deprecated old API */ + @deprecated override def toRetractStream[T]( + table: Table, + typeInformation: TypeInformation[T]): JavaDataStream[tuple.Tuple2[java.lang.Boolean, T]] = + tableEnv.toRetractStream(table, typeInformation) + + /** since Flink 2.0 */ + override def toAppendStream[T](table: Table, clazz: Class[T]): JavaDataStream[T] = + tableEnv.toAppendStream(table, clazz) + + /** since Flink 2.0 */ + override def toRetractStream[T](table: Table, clazz: Class[T]): JavaDataStream[tuple.Tuple2[java.lang.Boolean, T]] = + tableEnv.toRetractStream(table, clazz) + + /** since Flink 2.0 */ + override def createTable(path: String, descriptor: TableDescriptor, ignoreIfExists: Boolean): Boolean = + tableEnv.createTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createTemporaryTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table, ignoreIfExists: Boolean): Boolean = + tableEnv.createView(path, view, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table): Unit = + tableEnv.createView(path, view) + + /** since Flink 2.0 */ + override def dropTable(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropTable(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropTable(path: String): Boolean = + tableEnv.dropTable(path) + + /** since Flink 2.0 */ + override def dropView(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropView(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropView(path: String): Boolean = + tableEnv.dropView(path) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createModel(path, descriptor) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createTemporaryModel(path, descriptor) + + /** since Flink 2.1 */ + override def dropModel(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropModel(path, ignoreIfNotExists) + + /** since Flink 2.1 */ + override def dropModel(path: String): Boolean = + tableEnv.dropModel(path) + + /** since Flink 2.1 */ + override def dropTemporaryModel(path: String): Boolean = + tableEnv.dropTemporaryModel(path) + + /** since Flink 2.1 */ + override def fromCall(functionClass: Class[_ <: UserDefinedFunction], arguments: Object*): Table = + tableEnv.fromCall(functionClass, arguments: _*) + + /** since Flink 2.1 */ + override def fromCall(functionName: String, arguments: Object*): Table = + tableEnv.fromCall(functionName, arguments: _*) + + /** since Flink 2.1 */ + override def listModels(): Array[String] = + tableEnv.listModels() + + /** since Flink 2.1 */ + override def listTemporaryModels(): Array[String] = + tableEnv.listTemporaryModels() + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableContext.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableContext.scala new file mode 100644 index 0000000000..890d1bc1f6 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableContext.scala @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList + +import org.apache.flink.table.api._ +import org.apache.flink.table.api.ModelDescriptor +import org.apache.flink.table.catalog.CatalogDescriptor +import org.apache.flink.table.functions.UserDefinedFunction +import org.apache.flink.table.module.ModuleEntry +import org.apache.flink.table.resource.ResourceUri +import org.apache.flink.util.ParameterTool + +class TableContext(override val parameter: ParameterTool, private val tableEnv: TableEnvironment) + extends FlinkTableTrait(parameter, tableEnv) { + + def this(args: (ParameterTool, TableEnvironment)) = this(args._1, args._2) + + def this(args: TableEnvConfig) = this(FlinkTableInitializerV2.initialize(args)) + + override def useModules(strings: String*): Unit = + tableEnv.useModules(strings: _*) + + override def createTemporaryTable(path: String, descriptor: TableDescriptor): Unit = { + tableEnv.createTemporaryTable(path, descriptor) + } + + override def createTable(path: String, descriptor: TableDescriptor): Unit = { + tableEnv.createTable(path, descriptor) + } + + override def from(tableDescriptor: TableDescriptor): Table = { + tableEnv.from(tableDescriptor) + } + + override def listFullModules(): Array[ModuleEntry] = + tableEnv.listFullModules() + + /** @since 1.15 */ + override def listTables(catalogName: String, databaseName: String): Array[String] = + tableEnv.listTables(catalogName, databaseName) + + /** @since 1.15 */ + override def loadPlan(planReference: PlanReference): CompiledPlan = + tableEnv.loadPlan(planReference) + + /** @since 1.15 */ + override def compilePlanSql(stmt: String): CompiledPlan = + tableEnv.compilePlanSql(stmt) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, className, resourceUris, ignoreIfExists) + + /** @since 1.17 */ + override def createTemporaryFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporaryFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createTemporarySystemFunction( + name: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporarySystemFunction(name, className, resourceUris) + + /** @since 1.17 */ + override def explainSql( + statement: String, + format: ExplainFormat, + extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, format, extraDetails: _*) + + /** @since 1.18 */ + override def createCatalog(catalog: String, catalogDescriptor: CatalogDescriptor): Unit = { + tableEnv.createCatalog(catalog, catalogDescriptor) + } + + /** since Flink 2.0 */ + override def createTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Boolean = + tableEnv.createTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createTemporaryTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table, ignoreIfExists: Boolean): Boolean = + tableEnv.createView(path, view, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table): Unit = + tableEnv.createView(path, view) + + /** since Flink 2.0 */ + override def dropTable(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropTable(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropTable(path: String): Boolean = + tableEnv.dropTable(path) + + /** since Flink 2.0 */ + override def dropView(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropView(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropView(path: String): Boolean = + tableEnv.dropView(path) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createModel(path, descriptor) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createTemporaryModel(path, descriptor) + + /** since Flink 2.1 */ + override def dropModel(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropModel(path, ignoreIfNotExists) + + /** since Flink 2.1 */ + override def dropModel(path: String): Boolean = + tableEnv.dropModel(path) + + /** since Flink 2.1 */ + override def dropTemporaryModel(path: String): Boolean = + tableEnv.dropTemporaryModel(path) + + /** since Flink 2.1 */ + override def fromCall(functionClass: Class[_ <: UserDefinedFunction], arguments: Object*): Table = + tableEnv.fromCall(functionClass, arguments: _*) + + /** since Flink 2.1 */ + override def fromCall(functionName: String, arguments: Object*): Table = + tableEnv.fromCall(functionName, arguments: _*) + + /** since Flink 2.1 */ + override def listModels(): Array[String] = + tableEnv.listModels() + + /** since Flink 2.1 */ + override def listTemporaryModels(): Array[String] = + tableEnv.listTemporaryModels() + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableExt.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableExt.scala new file mode 100644 index 0000000000..3c4ab76c95 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/TableExt.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.table.api.{Table => FlinkTable} +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.types.Row + +object TableExt { + + class Table(val table: FlinkTable) { + def ->(field: String, fields: String*): FlinkTable = + table.as(field, fields: _*) + } + + class TableConversions( + table: FlinkTable, + streamTableEnv: StreamTableEnvironment) { + + def \\ : DataStream[Row] = streamTableEnv.toDataStream(table) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala new file mode 100644 index 0000000000..b419eb8337 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.1/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core.conf + +import org.apache.flink.configuration.Configuration +import org.apache.flink.util.ParameterTool + +case class FlinkConfiguration( + parameter: ParameterTool, + envConfig: Configuration, + tableConfig: Configuration) diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/pom.xml b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/pom.xml new file mode 100644 index 0000000000..6a96c811ae --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/pom.xml @@ -0,0 +1,152 @@ + + + + 4.0.0 + + + org.apache.streampark + streampark-flink-shims + 2.2.0-SNAPSHOT + + + streampark-flink-shims_flink-2.2_${scala.binary.version} + StreamPark : Flink Shims 2.2 + + + 2.2.1 + + + + + org.apache.streampark + streampark-common_${scala.binary.version} + ${project.version} + + + + + org.apache.flink + flink-core + ${flink.version} + provided + + + + org.apache.flink + flink-streaming-java + ${flink.version} + provided + + + + org.apache.flink + flink-table-api-java + ${flink.version} + provided + + + + org.apache.flink + flink-table-api-java-bridge + ${flink.version} + provided + + + + org.apache.flink + flink-table-planner_${scala.binary.version} + ${flink.version} + provided + + + + org.apache.flink + flink-clients + ${flink.version} + provided + + + + org.apache.flink + flink-kubernetes + ${flink.version} + provided + + + + org.apache.flink + flink-yarn + ${flink.version} + provided + + + + org.apache.flink + flink-statebackend-rocksdb + ${flink.version} + provided + + + + org.apache.hadoop + hadoop-client-api + true + + + + org.apache.hadoop + hadoop-client-runtime + true + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + true + ${project.basedir}/target/dependency-reduced-pom.xml + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + + + diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java new file mode 100644 index 0000000000..d1ed71bb32 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/StreamEnvConfigFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.util.ParameterTool; + +@FunctionalInterface +public interface StreamEnvConfigFunction { + + /** + * When used to initialize StreamExecutionEnvironment, it can be used to implement this function + * and customize the parameters to be set... + * + * @param environment + * @param parameterTool + */ + void configuration(StreamExecutionEnvironment environment, ParameterTool parameterTool); +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java new file mode 100644 index 0000000000..0e57b74b5c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/java/org/apache/streampark/flink/core/TableEnvConfigFunction.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core; + +import org.apache.flink.table.api.TableConfig; +import org.apache.flink.util.ParameterTool; + +@FunctionalInterface +public interface TableEnvConfigFunction { + + /** + * When used to initialize the TableEnvironment, it can be used to implement this function and + * customize the parameters to be set... + * + * @param tableConfig + * @param parameterTool + */ + void configuration(TableConfig tableConfig, ParameterTool parameterTool); +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala new file mode 100644 index 0000000000..0bf7bc15ae --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/EnhancerImplicit.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.{KEY_APP_NAME, KEY_FLINK_APP_NAME} +import org.apache.streampark.common.util.DeflaterUtils + +import org.apache.flink.configuration.PipelineOptions +import org.apache.flink.table.api.TableEnvironment +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.util.ParameterTool + +import scala.util.Try + +object EnhancerImplicit { + + implicit class EnhanceParameterTool(parameterTool: ParameterTool) { + + private[flink] def getAppName(name: String = null, required: Boolean = false): String = { + val appName = name match { + case null => + Try(DeflaterUtils.unzipString(parameterTool.get(KEY_APP_NAME(), null))) + .getOrElse(parameterTool.get(KEY_FLINK_APP_NAME, null)) + case x => x + } + if (required) { + require(appName != null, "[StreamPark] Application name cannot be null") + } + appName + } + + } + + implicit class EnhanceTableEnvironment(env: TableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): TableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME.key, appName) + } + env + } + + } + + implicit class EnhanceStreamTableEnvironment(env: StreamTableEnvironment) { + + private[flink] def setAppName(implicit parameter: ParameterTool): StreamTableEnvironment = { + val appName = parameter.getAppName() + if (appName != null) { + env.getConfig.getConfiguration.setString(PipelineOptions.NAME.key, appName) + } + env + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala new file mode 100644 index 0000000000..a0ebcb274c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClientTrait.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.api.common.JobID +import org.apache.flink.client.program.ClusterClient +import org.apache.flink.core.execution.SavepointFormatType + +import java.util.concurrent.CompletableFuture + +abstract class FlinkClientTrait[T](clusterClient: ClusterClient[T]) { + + def triggerSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = { + clusterClient.triggerSavepoint(jobID, savepointDir, SavepointFormatType.DEFAULT) + } + + def cancelWithSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = { + clusterClient.cancelWithSavepoint(jobID, savepointDir, SavepointFormatType.DEFAULT) + } + + def stopWithSavepoint( + jobID: JobID, + advanceToEndOfEventTime: Boolean, + savepointDir: String, + nativeFormat: Boolean = false): CompletableFuture[String] = + clusterClient.stopWithSavepoint(jobID, advanceToEndOfEventTime, savepointDir, SavepointFormatType.DEFAULT) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala new file mode 100644 index 0000000000..ecfc8dabb3 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkClusterClient.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.api.common.JobID +import org.apache.flink.client.program.ClusterClient +import org.apache.flink.core.execution.SavepointFormatType + +import java.util.concurrent.CompletableFuture + +class FlinkClusterClient[T](clusterClient: ClusterClient[T]) + extends FlinkClientTrait[T](clusterClient) { + + override def triggerSavepoint( + jobID: JobID, + savepointDir: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.triggerSavepoint( + jobID, + savepointDir, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + + override def cancelWithSavepoint( + jobID: JobID, + savepointDirectory: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.cancelWithSavepoint( + jobID, + savepointDirectory, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + + override def stopWithSavepoint( + jobID: JobID, + advanceToEndOfEventTime: Boolean, + savepointDirectory: String, + nativeFormat: Boolean): CompletableFuture[String] = { + clusterClient.stopWithSavepoint( + jobID, + advanceToEndOfEventTime, + savepointDirectory, + if (nativeFormat) SavepointFormatType.NATIVE + else SavepointFormatType.CANONICAL) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala new file mode 100644 index 0000000000..707ba43f0b --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClient.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient +import org.apache.flink.kubernetes.kubeclient.resources.KubernetesService + +import java.util.Optional + +class FlinkKubernetesClient(kubeClient: FlinkKubeClient) + extends FlinkKubernetesClientTrait(kubeClient) { + + override def getService(serviceName: String): Optional[KubernetesService] = { + kubeClient.getService(serviceName) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala new file mode 100644 index 0000000000..16155bf040 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkKubernetesClientTrait.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient +import org.apache.flink.kubernetes.kubeclient.resources.KubernetesService + +import java.util.Optional + +abstract class FlinkKubernetesClientTrait(kubeClient: FlinkKubeClient) { + + /** + * Get the kubernetes service of the given flink clusterId. + * + * @param serviceName + * the name of the service + * @return + * Return the optional kubernetes service of the specified name. + */ + def getService(serviceName: String): Optional[KubernetesService] = + kubeClient.getService(serviceName) + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala new file mode 100644 index 0000000000..1c0df899f5 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlExecutor.scala @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.KEY_FLINK_SQL +import org.apache.streampark.common.util.{AssertUtils, Logger} +import org.apache.streampark.flink.core.SqlCommand._ + +import org.apache.commons.lang3.StringUtils +import org.apache.flink.configuration.{Configuration, ExecutionOptions} +import org.apache.flink.table.api.TableEnvironment +import org.apache.flink.util.ParameterTool + +import java.util +import java.util.concurrent.locks.ReentrantReadWriteLock + +import scala.collection.mutable +import scala.util.Try + +object FlinkSqlExecutor extends Logger { + + private[this] val lock = new ReentrantReadWriteLock().writeLock + + private[streampark] def executeSql( + sql: String, + parameter: ParameterTool, + context: TableEnvironment)(implicit callbackFunc: String => Unit = null): Unit = { + + val flinkSql: String = + if (StringUtils.isBlank(sql)) parameter.get(KEY_FLINK_SQL()) + else parameter.get(sql) + require(StringUtils.isNotBlank(flinkSql), "verify failed: flink sql cannot be empty") + + def callback(r: String): Unit = { + callbackFunc match { + case null => logInfo(r) + case x => x(r) + } + } + + val runMode = parameter.get(ExecutionOptions.RUNTIME_MODE.key()) + + var hasInsert = false + val statementSet = context.createStatementSet() + SqlCommandParser + .parseSQL(flinkSql) + .foreach(x => { + val args = if (x.operands.isEmpty) null else x.operands.head + val command = x.command.name + x.command match { + // For display sql statement result information + case SHOW_CATALOGS => + val catalogs = context.listCatalogs + callback(s"$command: ${catalogs.mkString("\n")}") + case SHOW_CURRENT_CATALOG => + val catalog = context.getCurrentCatalog + callback(s"$command: $catalog") + case SHOW_DATABASES => + val databases = context.listDatabases + callback(s"$command: ${databases.mkString("\n")}") + case SHOW_CURRENT_DATABASE => + val database = context.getCurrentDatabase + callback(s"$command: $database") + case SHOW_TABLES => + val tables = + context.listTables().filter(!_.startsWith("UnnamedTable")) + callback(s"$command: ${tables.mkString("\n")}") + case SHOW_FUNCTIONS => + val functions = context.listUserDefinedFunctions() + callback(s"$command: ${functions.mkString("\n")}") + case SHOW_MODULES => + val modules = context.listModules() + callback(s"$command: ${modules.mkString("\n")}") + case DESC | DESCRIBE => + val schema = context.scan(args).getSchema + val builder = new mutable.StringBuilder() + builder.append("Column\tType\n") + for (i <- 0 to schema.getFieldCount) { + builder.append( + schema.getFieldName(i).get() + "\t" + schema + .getFieldDataType(i) + .get() + "\n") + } + callback(builder.toString()) + case EXPLAIN => + val tableResult = context.executeSql(x.originSql) + val r = tableResult.collect().next().getField(0).toString + callback(r) + // For specific statement, such as: SET/RESET/INSERT/SELECT + case SET => + val operand = x.operands(1) + logInfo(s"$command: $args --> $operand") + context.getConfig.getConfiguration.setString(args, operand) + case RESET | RESET_ALL => + val confDataField = + classOf[Configuration].getDeclaredField("confData") + confDataField.setAccessible(true) + val confData = confDataField + .get(context.getConfig.getConfiguration) + .asInstanceOf[util.HashMap[String, AnyRef]] + confData.synchronized { + if (x.command == RESET) { + confData.remove(args) + } else { + confData.clear() + } + } + logInfo(s"$command: $args") + case BEGIN_STATEMENT_SET | END_STATEMENT_SET => + logWarn(s"SQL Client Syntax: ${x.command.name} ") + case INSERT => + statementSet.addInsertSql(x.originSql) + hasInsert = true + case SELECT => + logError("StreamPark dose not support 'SELECT' statement now!") + throw new RuntimeException("StreamPark dose not support 'select' statement now!") + case DELETE | UPDATE => + AssertUtils.required( + runMode != "STREAMING", + s"Currently, ${command.toUpperCase()} statement only supports in batch mode, " + + s"and it requires the target table connector implements the SupportsRowLevelDelete, " + + s"For more details please refer to: https://nightlies.apache.org/flink/flink-docs-release-1.18/docs/dev/table/sql/$command") + case _ => + try { + lock.lock() + val result = context.executeSql(x.originSql) + logInfo(s"$command:$args") + } finally { + if (lock.isHeldByCurrentThread) { + lock.unlock() + } + } + } + }) + + if (hasInsert) { + statementSet.execute() match { + case t if t != null => + Try(t.getJobClient.get.getJobID).getOrElse(null) match { + case x if x != null => logInfo(s"jobId:$x") + case _ => + } + case _ => + } + } else { + logError("No 'INSERT' statement to trigger the execution of the Flink job.") + throw new RuntimeException("No 'INSERT' statement to trigger the execution of the Flink job.") + } + + logInfo( + s"\n\n\n==============flinkSql==============\n\n $flinkSql\n\n============================\n\n\n") + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala new file mode 100644 index 0000000000..42344c9d7f --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkSqlValidator.scala @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.util.{ExceptionUtils, Logger} +import org.apache.streampark.flink.core.SqlCommand._ + +import org.apache.calcite.config.Lex +import org.apache.calcite.sql.parser.SqlParser +import org.apache.calcite.sql.parser.SqlParser.Config +import org.apache.flink.sql.parser.validate.FlinkSqlConformance +import org.apache.flink.table.api.SqlDialect +import org.apache.flink.table.api.SqlDialect.{DEFAULT, HIVE} +import org.apache.flink.table.api.config.TableConfigOptions +import org.apache.flink.table.planner.delegation.FlinkSqlParserFactories + +import scala.language.existentials +import scala.util.{Failure, Try} + +object FlinkSqlValidator extends Logger { + + private[this] val FLINK112_CALCITE_PARSER_CLASS = + "org.apache.flink.table.planner.calcite.CalciteParser" + + private[this] val FLINK113_PLUS_CALCITE_PARSER_CLASS = + "org.apache.flink.table.planner.parse.CalciteParser" + + private[this] val SYNTAX_ERROR_REGEXP = + ".*at\\sline\\s(\\d+),\\scolumn\\s(\\d+).*".r + + private[this] lazy val sqlParserConfigMap: Map[String, SqlParser.Config] = { + def getConfig(sqlDialect: SqlDialect): Config = { + val conformance = sqlDialect match { + case HIVE => + try { + FlinkSqlConformance.DEFAULT + } catch { + // for flink 1.18+ + case _: NoSuchFieldError => FlinkSqlConformance.DEFAULT + case e: Throwable => + throw new IllegalArgumentException("Init Flink sql Dialect error: ", e) + } + case DEFAULT => FlinkSqlConformance.DEFAULT + case _ => + throw new UnsupportedOperationException(s"Unsupported sqlDialect: $sqlDialect") + } + SqlParser.config + .withParserFactory(FlinkSqlParserFactories.create(conformance)) + .withConformance(conformance) + .withLex(Lex.JAVA) + .withIdentifierMaxLength(256) + } + + Map( + SqlDialect.DEFAULT.name() -> getConfig(SqlDialect.DEFAULT), + SqlDialect.HIVE.name() -> getConfig(SqlDialect.HIVE)) + } + + def verifySql(sql: String): FlinkSqlValidationResult = { + val sqlCommands = SqlCommandParser.parseSQL(sql, r => return r) + var sqlDialect = SqlDialect.DEFAULT.name().toLowerCase() + var hasInsert = false + for (call <- sqlCommands) { + val args = call.operands.head + val command = call.command + command match { + case SET | RESET => + if (command == SET && args == TableConfigOptions.TABLE_SQL_DIALECT.key()) { + sqlDialect = call.operands.last + } + case BEGIN_STATEMENT_SET | END_STATEMENT_SET => + logWarn(s"SQL Client Syntax: ${call.command.name} ") + case _ => + if (command == INSERT) { + hasInsert = true + } + Try { + val calciteClass = Try(Class.forName(FLINK112_CALCITE_PARSER_CLASS)) + .getOrElse(Class.forName(FLINK113_PLUS_CALCITE_PARSER_CLASS)) + sqlDialect.toUpperCase() match { + case "HIVE" => + case "DEFAULT" => + val parser = calciteClass + .getConstructor(Array(classOf[Config]): _*) + .newInstance(sqlParserConfigMap(sqlDialect.toUpperCase())) + val method = + parser.getClass.getDeclaredMethod("parse", classOf[String]) + method.setAccessible(true) + method.invoke(parser, call.originSql) + case _ => + throw new UnsupportedOperationException(s"unsupported dialect: $sqlDialect") + } + } match { + case Failure(e) => + val exception = ExceptionUtils.stringifyException(e) + val causedBy = exception.drop(exception.indexOf("Caused by:")) + val cleanUpError = exception.replaceAll("[\r\n]", "") + if (SYNTAX_ERROR_REGEXP.findAllMatchIn(cleanUpError).nonEmpty) { + val SYNTAX_ERROR_REGEXP(line, column) = cleanUpError + val errorLine = call.lineStart + line.toInt - 1 + return FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + errorLine = errorLine, + errorColumn = column.toInt, + sql = call.originSql, + exception = causedBy.replaceAll(s"at\\sline\\s$line", s"at line $errorLine")) + } else { + return FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = call.lineStart, + lineEnd = call.lineEnd, + sql = call.originSql, + exception = causedBy) + } + case _ => + } + } + } + + if (hasInsert) { + FlinkSqlValidationResult() + } else { + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.SYNTAX_ERROR, + lineStart = sqlCommands.head.lineStart, + lineEnd = sqlCommands.last.lineEnd, + exception = "No 'INSERT' statement to trigger the execution of the Flink job.") + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala new file mode 100644 index 0000000000..102e925f10 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamTableTraitV2.scala @@ -0,0 +1,396 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList +import org.apache.streampark.common.util.Utils +import org.apache.streampark.flink.core.EnhancerImplicit._ + +import org.apache.flink.api.common.{JobExecutionResult, RuntimeExecutionMode} +import org.apache.flink.api.common.cache.DistributedCache +import org.apache.flink.api.common.eventtime.WatermarkStrategy +import org.apache.flink.api.common.io.{FileInputFormat, FilePathFilter, InputFormat} +import org.apache.flink.api.connector.source.{Source, SourceSplit} +import org.apache.flink.api.java.tuple +import org.apache.flink.configuration.ReadableConfig +import org.apache.flink.core.execution.{JobClient, JobListener} +import org.apache.flink.streaming.api.CheckpointingMode +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.streaming.api.environment.{CheckpointConfig, StreamExecutionEnvironment} +import org.apache.flink.streaming.api.functions.source.FileProcessingMode +import org.apache.flink.streaming.api.graph.StreamGraph +import org.apache.flink.table.api._ +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.catalog.Catalog +import org.apache.flink.table.functions._ +import org.apache.flink.table.module.Module +import org.apache.flink.types.Row +import org.apache.flink.util.{ParameterTool, SplittableIterator} + +import java.util.Optional + +/** + * Integration api of stream and table (Flink 2.0 - Java DataStream API) + * + * @param parameter + * parameter + * @param streamEnv + * streamEnv + * @param tableEnv + * tableEnv + */ +abstract class FlinkStreamTableTraitV2( + val parameter: ParameterTool, + private val streamEnv: StreamExecutionEnvironment, + private val tableEnv: StreamTableEnvironment) + extends StreamTableEnvironment { + + /** + * Once a Table has been converted to a DataStream, the DataStream job must be executed using the + * execute method of the StreamExecutionEnvironment. + */ + var isConvertedToDataStream: Boolean = false + + /** Recommended to use this Api to start tasks */ + def start(name: String = null): JobExecutionResult = { + val appName = parameter.getAppName(name, true) + execute(appName) + } + + @deprecated def execute(jobName: String): JobExecutionResult = { + Utils.printLogo(s"FlinkStreamTable $jobName Starting...") + if (isConvertedToDataStream) { + streamEnv.execute(jobName) + } else null + } + + def sql(sql: String = null)(implicit callback: String => Unit = null): Unit = + FlinkSqlExecutor.executeSql(sql, parameter, this) + + // ...streamEnv api start... + + def $getCachedFiles: JavaList[tuple.Tuple2[String, DistributedCache.DistributedCacheEntry]] = + this.streamEnv.getCachedFiles + + def $getJobListeners: JavaList[JobListener] = this.streamEnv.getJobListeners + + def $setParallelism(parallelism: Int): Unit = + this.streamEnv.setParallelism(parallelism) + + def $setRuntimeMode(deployMode: RuntimeExecutionMode): StreamExecutionEnvironment = + this.streamEnv.setRuntimeMode(deployMode) + + def $setMaxParallelism(maxParallelism: Int): Unit = + this.streamEnv.setMaxParallelism(maxParallelism) + + def $getParallelism: Int = this.streamEnv.getParallelism + + def $getMaxParallelism: Int = this.streamEnv.getMaxParallelism + + def $setBufferTimeout(timeoutMillis: Long): StreamExecutionEnvironment = + this.streamEnv.setBufferTimeout(timeoutMillis) + + def $getBufferTimeout: Long = this.streamEnv.getBufferTimeout + + def $disableOperatorChaining(): StreamExecutionEnvironment = + this.streamEnv.disableOperatorChaining() + + def $getCheckpointConfig: CheckpointConfig = + this.streamEnv.getCheckpointConfig + + def $enableCheckpointing(interval: Long, mode: CheckpointingMode): StreamExecutionEnvironment = + this.streamEnv.enableCheckpointing(interval, mode) + + def $enableCheckpointing(interval: Long): StreamExecutionEnvironment = + this.streamEnv.enableCheckpointing(interval) + + def $getCheckpointingMode: CheckpointingMode = + this.streamEnv.getCheckpointingMode + + def $configure(configuration: ReadableConfig, classLoader: ClassLoader): Unit = + this.streamEnv.configure(configuration, classLoader) + + def $fromSequence(from: Long, to: Long): DataStream[java.lang.Long] = + this.streamEnv.fromSequence(from, to) + + // fromData with varargs removed in Flink 2.0 + def $fromData[T](data: T): DataStream[T] = + this.streamEnv.fromData(data) + + def $fromCollection[T](data: java.util.Collection[T]): DataStream[T] = + this.streamEnv.fromCollection(data) + + def $fromParallelCollection[T](data: SplittableIterator[T], clazz: Class[T]): DataStream[T] = + this.streamEnv.fromParallelCollection(data, clazz) + + def $readFile[T](inputFormat: FileInputFormat[T], filePath: String): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath) + + def $readFile[T]( + inputFormat: FileInputFormat[T], + filePath: String, + watchType: FileProcessingMode, + interval: Long): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath, watchType, interval) + + def $socketTextStream( + hostname: String, + port: Int, + delimiter: Char, + maxRetry: Long): DataStream[String] = + this.streamEnv.socketTextStream(hostname, port, delimiter, maxRetry) + + def $createInput[T](inputFormat: InputFormat[T, _]): DataStream[T] = + this.streamEnv.createInput(inputFormat) + + def $fromSource[T]( + source: Source[T, _ <: SourceSplit, _], + watermarkStrategy: WatermarkStrategy[T], + sourceName: String): DataStream[T] = + this.streamEnv.fromSource(source, watermarkStrategy, sourceName) + + def $registerJobListener(jobListener: JobListener): Unit = + this.streamEnv.registerJobListener(jobListener) + + def $clearJobListeners(): Unit = this.streamEnv.clearJobListeners() + + def $executeAsync(): JobClient = this.streamEnv.executeAsync() + + def $executeAsync(jobName: String): JobClient = + this.streamEnv.executeAsync(jobName) + + def $getExecutionPlan: String = this.streamEnv.getExecutionPlan + + def $getStreamGraph: StreamGraph = this.streamEnv.getStreamGraph + + def $registerCachedFile(filePath: String, name: String): Unit = + this.streamEnv.registerCachedFile(filePath, name) + + def $registerCachedFile(filePath: String, name: String, executable: Boolean): Unit = + this.streamEnv.registerCachedFile(filePath, name, executable) + + def $isUnalignedCheckpointsEnabled: Boolean = + this.streamEnv.isUnalignedCheckpointsEnabled + + def $isForceUnalignedCheckpoints: Boolean = + this.streamEnv.isForceUnalignedCheckpoints + + @deprecated def $readFile[T]( + inputFormat: FileInputFormat[T], + filePath: String, + watchType: FileProcessingMode, + interval: Long, + filter: FilePathFilter): DataStream[T] = + this.streamEnv.readFile(inputFormat, filePath, watchType, interval, filter) + + // ...streamEnv api end... + + override def fromDataStream[T](dataStream: DataStream[T]): Table = + tableEnv.fromDataStream(dataStream) + + override def fromDataStream[T](dataStream: DataStream[T], schema: Schema): Table = + tableEnv.fromDataStream(dataStream, schema) + + override def fromChangelogStream(dataStream: DataStream[Row]): Table = + tableEnv.fromChangelogStream(dataStream) + + override def fromChangelogStream(dataStream: DataStream[Row], schema: Schema): Table = + tableEnv.fromChangelogStream(dataStream, schema) + + override def fromChangelogStream( + dataStream: DataStream[Row], + schema: Schema, + changelogMode: org.apache.flink.table.connector.ChangelogMode): Table = + tableEnv.fromChangelogStream(dataStream, schema, changelogMode) + + override def createTemporaryView[T](path: String, dataStream: DataStream[T]): Unit = + tableEnv.createTemporaryView(path, dataStream) + + override def createTemporaryView[T]( + path: String, + dataStream: DataStream[T], + schema: Schema): Unit = + tableEnv.createTemporaryView(path, dataStream, schema) + + override def toDataStream(table: Table): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table) + } + + override def toDataStream[T](table: Table, targetClass: Class[T]): DataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table, targetClass) + } + + override def toDataStream[T]( + table: Table, + targetDataType: org.apache.flink.table.types.AbstractDataType[_]): DataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table, targetDataType) + } + + override def toChangelogStream(table: Table): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table) + } + + override def toChangelogStream(table: Table, targetSchema: Schema): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema) + } + + override def toChangelogStream( + table: Table, + targetSchema: Schema, + changelogMode: org.apache.flink.table.connector.ChangelogMode): DataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema, changelogMode) + } + + override def createStatementSet(): org.apache.flink.table.api.bridge.java.StreamStatementSet = + tableEnv.createStatementSet() + + // ...table env delegation... + + override def fromValues(values: org.apache.flink.table.expressions.Expression*): Table = + tableEnv.fromValues(values) + + override def fromValues( + rowType: org.apache.flink.table.types.AbstractDataType[_], + values: org.apache.flink.table.expressions.Expression*): Table = + tableEnv.fromValues(rowType, values: _*) + + override def fromValues(values: java.lang.Iterable[_]): Table = + tableEnv.fromValues(values) + + override def fromValues( + rowType: org.apache.flink.table.types.AbstractDataType[_], + values: java.lang.Iterable[_]): Table = + tableEnv.fromValues(rowType, values) + + override def registerCatalog(catalogName: String, catalog: Catalog): Unit = + tableEnv.registerCatalog(catalogName, catalog) + + override def getCatalog(catalogName: String): Optional[Catalog] = + tableEnv.getCatalog(catalogName) + + override def loadModule(moduleName: String, module: Module): Unit = + tableEnv.loadModule(moduleName, module) + + override def unloadModule(moduleName: String): Unit = + tableEnv.unloadModule(moduleName) + + override def createTemporarySystemFunction( + name: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporarySystemFunction(name, functionClass) + + override def createTemporarySystemFunction( + name: String, + functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporarySystemFunction(name, functionInstance) + + override def dropTemporarySystemFunction(name: String): Boolean = + tableEnv.dropTemporarySystemFunction(name) + + override def createFunction(path: String, functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createFunction(path, functionClass) + + override def createFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, functionClass) + + override def dropFunction(path: String): Boolean = tableEnv.dropFunction(path) + + override def createTemporaryFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporaryFunction(path, functionClass) + + override def createTemporaryFunction(path: String, functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporaryFunction(path, functionInstance) + + override def dropTemporaryFunction(path: String): Boolean = + tableEnv.dropTemporaryFunction(path) + + override def createTemporaryView(path: String, view: Table): Unit = + tableEnv.createTemporaryView(path, view) + + override def from(path: String): Table = tableEnv.from(path) + + override def listCatalogs(): Array[String] = tableEnv.listCatalogs() + + override def listModules(): Array[String] = tableEnv.listModules() + + override def listDatabases(): Array[String] = tableEnv.listDatabases() + + override def listTables(): Array[String] = tableEnv.listTables() + + override def listViews(): Array[String] = tableEnv.listViews() + + override def listTemporaryTables(): Array[String] = + tableEnv.listTemporaryTables + + override def listTemporaryViews(): Array[String] = + tableEnv.listTemporaryViews() + + override def listUserDefinedFunctions(): Array[String] = + tableEnv.listUserDefinedFunctions() + + override def listFunctions(): Array[String] = tableEnv.listFunctions() + + override def dropTemporaryTable(path: String): Boolean = + tableEnv.dropTemporaryTable(path) + + override def dropTemporaryView(path: String): Boolean = + tableEnv.dropTemporaryView(path) + + override def explainSql(statement: String, extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, extraDetails: _*) + + override def sqlQuery(query: String): Table = tableEnv.sqlQuery(query) + + override def executeSql(statement: String): TableResult = + tableEnv.executeSql(statement) + + override def getCurrentCatalog: String = tableEnv.getCurrentCatalog + + override def useCatalog(catalogName: String): Unit = + tableEnv.useCatalog(catalogName) + + override def getCurrentDatabase: String = tableEnv.getCurrentDatabase + + override def useDatabase(databaseName: String): Unit = + tableEnv.useDatabase(databaseName) + + override def getConfig: TableConfig = tableEnv.getConfig + + @deprecated override def registerFunction(name: String, function: ScalarFunction): Unit = + tableEnv.registerFunction(name, function) + + @deprecated override def registerTable(name: String, table: Table): Unit = + tableEnv.registerTable(name, table) + + @deprecated override def scan(tablePath: String*): Table = + tableEnv.scan(tablePath: _*) + + @deprecated override def getCompletionHints(statement: String, position: Int): Array[String] = + tableEnv.getCompletionHints(statement, position) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala new file mode 100644 index 0000000000..69d1d19d8c --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkStreamingInitializerV2.scala @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys._ +import org.apache.streampark.common.enums.ApiType +import org.apache.streampark.common.enums.ApiType.ApiType +import org.apache.streampark.common.util._ +import org.apache.streampark.common.util.Implicits._ +import org.apache.streampark.flink.core.conf.FlinkConfiguration + +import collection.{mutable, Map} +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.TableConfig +import org.apache.flink.util.ParameterTool + +import java.io.File + +private[flink] object FlinkStreamingInitializerV2 { + + def initialize( + args: Array[String], + config: (StreamExecutionEnvironment, ParameterTool) => Unit): (ParameterTool, StreamExecutionEnvironment) = { + val flinkInitializer = new FlinkStreamingInitializerV2(args, ApiType.SCALA) + flinkInitializer.streamEnvConfFunc = config + (flinkInitializer.configuration.parameter, flinkInitializer.streamEnv) + } + + def initialize(args: StreamEnvConfig): (ParameterTool, StreamExecutionEnvironment) = { + val flinkInitializer = + new FlinkStreamingInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaStreamEnvConfFunc = args.conf + (flinkInitializer.configuration.parameter, flinkInitializer.streamEnv) + } +} + +private[flink] class FlinkStreamingInitializerV2(args: Array[String], apiType: ApiType) + extends Logger { + + var streamEnvConfFunc: (StreamExecutionEnvironment, ParameterTool) => Unit = _ + + var tableConfFunc: (TableConfig, ParameterTool) => Unit = _ + + var javaStreamEnvConfFunc: StreamEnvConfigFunction = _ + + var javaTableEnvConfFunc: TableEnvConfigFunction = _ + + implicit private[flink] val parameter: ParameterTool = configuration.parameter + + lazy val streamEnv: StreamExecutionEnvironment = { + val env = StreamExecutionEnvironment.getExecutionEnvironment(configuration.envConfig) + + apiType match { + case ApiType.JAVA if javaStreamEnvConfFunc != null => + javaStreamEnvConfFunc.configuration(env, configuration.parameter) + case ApiType.SCALA if streamEnvConfFunc != null => + streamEnvConfFunc(env, configuration.parameter) + case _ => + } + env.getConfig.setGlobalJobParameters(configuration.parameter) + env + } + + lazy val configuration: FlinkConfiguration = initParameter() + + def initParameter(): FlinkConfiguration = { + val argsMap = ParameterTool.fromArgs(args) + val config = argsMap.get(KEY_APP_CONF(), null) match { + case null | "" => + throw new ExceptionInInitializerError( + "[StreamPark] Usage:can't find config,please set \"--conf $path \" in main arguments") + case file => file + } + val configMap = parseConfig(config) + val properConf = extractConfigByPrefix(configMap, KEY_FLINK_PROPERTY_PREFIX) + val appConf = extractConfigByPrefix(configMap, KEY_APP_PREFIX) + + // config priority: explicitly specified priority > project profiles > system profiles + val parameter = ParameterTool + .fromSystemProperties() + .mergeWith(ParameterTool.fromMap(properConf)) + .mergeWith(ParameterTool.fromMap(appConf)) + .mergeWith(argsMap) + + val envConfig = Configuration.fromMap(properConf) + FlinkConfiguration(parameter, envConfig, null) + } + + def parseConfig(config: String): Map[String, String] = { + + lazy val content = DeflaterUtils.unzipString(config.drop(7)) + + def readConfig(text: String): Map[String, String] = { + val format = config.split("\\.").last.toLowerCase + format match { + case "yml" | "yaml" => PropertiesUtils.fromYamlText(text) + case "conf" => PropertiesUtils.fromHoconText(text) + case "properties" => PropertiesUtils.fromPropertiesText(text) + case _ => + throw new IllegalArgumentException( + "[StreamPark] Usage: application config file error,must be [yaml|conf|properties]") + } + } + + val map = config match { + case x if x.startsWith("yaml://") => PropertiesUtils.fromYamlText(content) + case x if x.startsWith("conf://") => + PropertiesUtils.fromHoconText(content) + case x if x.startsWith("prop://") => + PropertiesUtils.fromPropertiesText(content) + case x if x.startsWith("hdfs://") => + // If the configuration file with the hdfs, user will need to copy the hdfs-related configuration files under the resources dir + val text = HdfsUtils.read(x) + readConfig(text) + case _ => + val configFile = new File(config) + require( + configFile.exists(), + s"[StreamPark] Usage: application config file: $configFile is not found!!!") + val text = FileUtils.readFile(configFile) + readConfig(text) + } + map.filter(_._2.nonEmpty) + } + + def extractConfigByPrefix(configMap: Map[String, String], prefix: String): Map[String, String] = { + val map = mutable.Map[String, String]() + configMap.foreach(x => + if (x._1.startsWith(prefix)) { + map += x._1.drop(prefix.length) -> x._2 + }) + map + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala new file mode 100644 index 0000000000..9a441db545 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableInitializerV2.scala @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys._ +import org.apache.streampark.common.enums.{ApiType, PlannerType} +import org.apache.streampark.common.enums.ApiType.ApiType +import org.apache.streampark.common.util.{DeflaterUtils, PropertiesUtils} +import org.apache.streampark.common.util.Implicits._ +import org.apache.streampark.flink.core.EnhancerImplicit._ +import org.apache.streampark.flink.core.conf.FlinkConfiguration + +import org.apache.flink.configuration.Configuration +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api.{EnvironmentSettings, TableConfig, TableEnvironment} +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.util.ParameterTool + +import java.io.File + +import scala.collection.{mutable, Map} +import scala.util.{Failure, Success, Try} + +private[flink] object FlinkTableInitializerV2 { + + def initialize( + args: Array[String], + config: (TableConfig, ParameterTool) => Unit): (ParameterTool, TableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args, ApiType.SCALA) + flinkInitializer.tableConfFunc = config + (flinkInitializer.configuration.parameter, flinkInitializer.tableEnv) + } + + def initialize(args: TableEnvConfig): (ParameterTool, TableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaTableEnvConfFunc = args.conf + (flinkInitializer.configuration.parameter, flinkInitializer.tableEnv) + } + + def initialize( + args: Array[String], + configStream: (StreamExecutionEnvironment, ParameterTool) => Unit, + configTable: (TableConfig, ParameterTool) => Unit): (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment) = { + + val flinkInitializer = new FlinkTableInitializerV2(args, ApiType.SCALA) + flinkInitializer.streamEnvConfFunc = configStream + flinkInitializer.tableConfFunc = configTable + ( + flinkInitializer.configuration.parameter, + flinkInitializer.streamEnv, + flinkInitializer.streamTableEnv) + } + + def initialize( + args: StreamTableEnvConfig): (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment) = { + val flinkInitializer = new FlinkTableInitializerV2(args.args, ApiType.JAVA) + flinkInitializer.javaStreamEnvConfFunc = args.streamConfig + flinkInitializer.javaTableEnvConfFunc = args.tableConfig + ( + flinkInitializer.configuration.parameter, + flinkInitializer.streamEnv, + flinkInitializer.streamTableEnv) + } + +} + +private[flink] class FlinkTableInitializerV2(args: Array[String], apiType: ApiType) + extends FlinkStreamingInitializerV2(args, apiType) { + + private[this] lazy val envSettings = { + + val builder = EnvironmentSettings.newInstance() + + Try(PlannerType.withName(parameter.get(KEY_FLINK_TABLE_PLANNER))) + .getOrElse(PlannerType.BLINK) match { + case PlannerType.BLINK => + val useBlinkPlanner = + Try(builder.getClass.getDeclaredMethod("useBlinkPlanner")) + .getOrElse(null) + if (useBlinkPlanner == null) { + logWarn("useBlinkPlanner deprecated") + } else { + useBlinkPlanner.setAccessible(true) + useBlinkPlanner.invoke(builder) + logInfo("blinkPlanner will be used.") + } + case PlannerType.OLD => + val useOldPlanner = Try(builder.getClass.getDeclaredMethod("useOldPlanner")).getOrElse(null) + if (useOldPlanner == null) { + logWarn("useOldPlanner deprecated") + } else { + useOldPlanner.setAccessible(true) + useOldPlanner.invoke(builder) + logInfo("useOldPlanner will be used.") + } + case PlannerType.ANY => + val useAnyPlanner = Try(builder.getClass.getDeclaredMethod("useAnyPlanner")).getOrElse(null) + if (useAnyPlanner == null) { + logWarn("useAnyPlanner deprecated") + } else { + logInfo("useAnyPlanner will be used.") + useAnyPlanner.setAccessible(true) + useAnyPlanner.invoke(builder) + } + } + + parameter.get(KEY_FLINK_CONF(), null) match { + case null | "" => + throw new ExceptionInInitializerError( + "[StreamPark] Usage:can't find config,please set \"--flink.conf $conf \" in main arguments") + case conf => builder.withConfiguration( + Configuration.fromMap(PropertiesUtils.fromYamlText(DeflaterUtils.unzipString(conf)))) + } + val buildWith = + (parameter.get(KEY_FLINK_TABLE_CATALOG), parameter.get(KEY_FLINK_TABLE_DATABASE)) + buildWith match { + case (x: String, y: String) if x != null && y != null => + logInfo(s"with built in catalog: $x") + logInfo(s"with built in database: $y") + builder.withBuiltInCatalogName(x) + builder.withBuiltInDatabaseName(y) + case (x: String, _) if x != null => + logInfo(s"with built in catalog: $x") + builder.withBuiltInCatalogName(x) + case (_, y: String) if y != null => + logInfo(s"with built in database: $y") + builder.withBuiltInDatabaseName(y) + case _ => + } + builder + } + + lazy val tableEnv: TableEnvironment = { + logInfo(s"job working in batch mode") + envSettings.inBatchMode() + val tableEnv = TableEnvironment.create(envSettings.build()).setAppName + apiType match { + case ApiType.JAVA if javaTableEnvConfFunc != null => + javaTableEnvConfFunc.configuration(tableEnv.getConfig, parameter) + case ApiType.SCALA if tableConfFunc != null => + tableConfFunc(tableEnv.getConfig, parameter) + case _ => + } + tableEnv + } + + lazy val streamTableEnv: StreamTableEnvironment = { + logInfo(s"components should work in streaming mode") + envSettings.inStreamingMode() + val setting = envSettings.build() + + if (streamEnvConfFunc != null) { + streamEnvConfFunc(streamEnv, parameter) + } + if (javaStreamEnvConfFunc != null) { + javaStreamEnvConfFunc.configuration(streamEnv, parameter) + } + val streamTableEnv = + StreamTableEnvironment.create(streamEnv, setting).setAppName + apiType match { + case ApiType.JAVA if javaTableEnvConfFunc != null => + javaTableEnvConfFunc.configuration(streamTableEnv.getConfig, parameter) + case ApiType.SCALA if tableConfFunc != null => + tableConfFunc(streamTableEnv.getConfig, parameter) + case _ => + } + streamTableEnv + } + + /** In case of table SQL, the parameter conf is not required, it depends on the developer. */ + + override def initParameter(): FlinkConfiguration = { + val configuration = { + val argsMap = ParameterTool.fromArgs(args) + argsMap.get(KEY_APP_CONF(), null) match { + case null | "" => + logWarn("Usage:can't find config,you can set \"--conf $path \" in main arguments") + val parameter = + ParameterTool.fromSystemProperties().mergeWith(argsMap) + FlinkConfiguration(parameter, new Configuration(), new Configuration()) + case file => + val configMap = parseConfig(file) + // set sql.. + val sqlConf = mutable.Map[String, String]() + configMap.foreach(x => { + if (x._1.startsWith(KEY_SQL_PREFIX)) { + sqlConf += x._1.drop(KEY_SQL_PREFIX.length) -> x._2 + } + }) + + // config priority: explicitly specified priority > project profiles > system profiles + val properConf = + extractConfigByPrefix(configMap, KEY_FLINK_PROPERTY_PREFIX) + val appConf = extractConfigByPrefix(configMap, KEY_APP_PREFIX) + val tableConf = + extractConfigByPrefix(configMap, KEY_FLINK_TABLE_PREFIX) + + val tableConfig = Configuration.fromMap(tableConf) + val envConfig = Configuration.fromMap(properConf) + + val parameter = ParameterTool + .fromSystemProperties() + .mergeWith(ParameterTool.fromMap(properConf)) + .mergeWith(ParameterTool.fromMap(tableConf)) + .mergeWith(ParameterTool.fromMap(appConf)) + .mergeWith(ParameterTool.fromMap(sqlConf)) + .mergeWith(argsMap) + + FlinkConfiguration(parameter, envConfig, tableConfig) + } + } + + configuration.parameter.get(KEY_FLINK_SQL()) match { + case null => configuration + case param => + // for streampark-console + Try(DeflaterUtils.unzipString(param)) match { + case Success(value) => + configuration.copy(parameter = configuration.parameter.mergeWith( + ParameterTool.fromMap(Map(KEY_FLINK_SQL() -> value)))) + case Failure(_) => + val sqlFile = new File(param) + Try(PropertiesUtils.fromYamlFile(sqlFile.getAbsolutePath)) match { + case Success(value) => + configuration.copy(parameter = + configuration.parameter.mergeWith(ParameterTool.fromMap(value))) + case Failure(e) => + new IllegalArgumentException(s"[StreamPark] init sql error.$e") + configuration + } + } + } + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala new file mode 100644 index 0000000000..32bef31194 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/FlinkTableTrait.scala @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Utils +import org.apache.streampark.flink.core.EnhancerImplicit._ + +import org.apache.flink.api.common.JobExecutionResult +import org.apache.flink.table.api._ +import org.apache.flink.table.catalog.Catalog +import org.apache.flink.table.expressions.Expression +import org.apache.flink.table.functions._ +import org.apache.flink.table.module.Module +import org.apache.flink.table.types.AbstractDataType +import org.apache.flink.util.ParameterTool + +import java.lang +import java.util.Optional + +abstract class FlinkTableTrait(val parameter: ParameterTool, private val tableEnv: TableEnvironment) + extends TableEnvironment { + + def start(): JobExecutionResult = { + val appName = parameter.getAppName(required = true) + execute(appName) + } + + def execute(jobName: String): JobExecutionResult = { + Utils.printLogo(s"FlinkTable $jobName Starting...") + null + } + + def sql(sql: String = null): Unit = + FlinkSqlExecutor.executeSql(sql, parameter, this) + + override def fromValues(values: Expression*): Table = + tableEnv.fromValues(values) + + override def fromValues(rowType: AbstractDataType[_], values: Expression*): Table = + tableEnv.fromValues(rowType, values: _*) + + override def fromValues(values: lang.Iterable[_]): Table = + tableEnv.fromValues(values) + + override def fromValues(rowType: AbstractDataType[_], values: lang.Iterable[_]): Table = + tableEnv.fromValues(rowType, values) + + override def registerCatalog(catalogName: String, catalog: Catalog): Unit = + tableEnv.registerCatalog(catalogName, catalog) + + override def getCatalog(catalogName: String): Optional[Catalog] = + tableEnv.getCatalog(catalogName) + + override def loadModule(moduleName: String, module: Module): Unit = + tableEnv.loadModule(moduleName, module) + + override def unloadModule(moduleName: String): Unit = + tableEnv.unloadModule(moduleName) + + override def createTemporarySystemFunction( + name: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporarySystemFunction(name, functionClass) + + override def createTemporarySystemFunction( + name: String, + functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporarySystemFunction(name, functionInstance) + + override def dropTemporarySystemFunction(name: String): Boolean = + tableEnv.dropTemporarySystemFunction(name) + + override def createFunction(path: String, functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createFunction(path, functionClass) + + override def createFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, functionClass) + + override def dropFunction(path: String): Boolean = tableEnv.dropFunction(path) + + override def createTemporaryFunction( + path: String, + functionClass: Class[_ <: UserDefinedFunction]): Unit = + tableEnv.createTemporaryFunction(path, functionClass) + + override def createTemporaryFunction(path: String, functionInstance: UserDefinedFunction): Unit = + tableEnv.createTemporaryFunction(path, functionInstance) + + override def dropTemporaryFunction(path: String): Boolean = + tableEnv.dropTemporaryFunction(path) + + override def createTemporaryView(path: String, view: Table): Unit = + tableEnv.createTemporaryView(path, view) + + override def from(path: String): Table = tableEnv.from(path) + + override def listCatalogs(): Array[String] = tableEnv.listCatalogs() + + override def listModules(): Array[String] = tableEnv.listModules() + + override def listDatabases(): Array[String] = tableEnv.listDatabases() + + override def listTables(): Array[String] = tableEnv.listTables() + + override def listViews(): Array[String] = tableEnv.listViews() + + override def listTemporaryTables(): Array[String] = + tableEnv.listTemporaryTables + + override def listTemporaryViews(): Array[String] = + tableEnv.listTemporaryViews() + + override def listUserDefinedFunctions(): Array[String] = + tableEnv.listUserDefinedFunctions() + + override def listFunctions(): Array[String] = tableEnv.listFunctions() + + override def dropTemporaryTable(path: String): Boolean = + tableEnv.dropTemporaryTable(path) + + override def dropTemporaryView(path: String): Boolean = + tableEnv.dropTemporaryView(path) + + override def explainSql(statement: String, extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, extraDetails: _*) + + override def sqlQuery(query: String): Table = tableEnv.sqlQuery(query) + + override def executeSql(statement: String): TableResult = + tableEnv.executeSql(statement) + + override def getCurrentCatalog: String = tableEnv.getCurrentCatalog + + override def useCatalog(catalogName: String): Unit = + tableEnv.useCatalog(catalogName) + + override def getCurrentDatabase: String = tableEnv.getCurrentDatabase + + override def useDatabase(databaseName: String): Unit = + tableEnv.useDatabase(databaseName) + + override def getConfig: TableConfig = tableEnv.getConfig + + override def createStatementSet(): StatementSet = + tableEnv.createStatementSet() + + @deprecated override def registerFunction(name: String, function: ScalarFunction): Unit = + tableEnv.registerFunction(name, function) + + @deprecated override def registerTable(name: String, table: Table): Unit = + tableEnv.registerTable(name, table) + + @deprecated override def scan(tablePath: String*): Table = + tableEnv.scan(tablePath: _*) + + @deprecated override def getCompletionHints(statement: String, position: Int): Array[String] = + tableEnv.getCompletionHints(statement, position) +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala new file mode 100644 index 0000000000..037509af2a --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/SqlCommandParser.scala @@ -0,0 +1,651 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.conf.ConfigKeys.PARAM_PREFIX +import org.apache.streampark.common.enums.FlinkSqlValidationFailedType +import org.apache.streampark.common.util.Logger + +import enumeratum.EnumEntry +import org.apache.commons.lang3.StringUtils + +import java.lang.{Boolean => JavaBool} +import java.util.Scanner +import java.util.regex.{Matcher, Pattern} + +import scala.annotation.tailrec +import scala.collection.{immutable, mutable} +import scala.collection.mutable.ListBuffer +import scala.util.control.Breaks.{break, breakable} + +object SqlCommandParser extends Logger { + + def parseSQL( + sql: String, + validationCallback: FlinkSqlValidationResult => Unit = null): List[SqlCommandCall] = { + val sqlEmptyError = "verify failed: flink sql cannot be empty." + require(StringUtils.isNotBlank(sql), sqlEmptyError) + val sqlSegments = SqlSplitter.splitSql(sql) + sqlSegments match { + case s if s.isEmpty => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + exception = sqlEmptyError)) + null + } else { + throw new IllegalArgumentException(sqlEmptyError) + } + case segments => + val calls = new ListBuffer[SqlCommandCall] + for (segment <- segments) { + parseLine(segment) match { + case Some(x) => calls += x + case _ => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.UNSUPPORTED_SQL, + lineStart = segment.start, + lineEnd = segment.end, + exception = s"unsupported sql", + sql = segment.sql)) + } else { + throw new UnsupportedOperationException(s"unsupported sql: ${segment.sql}") + } + } + } + + calls.toList match { + case c if c.isEmpty => + if (validationCallback != null) { + validationCallback( + FlinkSqlValidationResult( + success = false, + failedType = FlinkSqlValidationFailedType.VERIFY_FAILED, + exception = "flink sql syntax error, no executable sql")) + null + } else { + throw new UnsupportedOperationException("flink sql syntax error, no executable sql") + } + case r => r + } + } + } + + private[this] def parseLine(sqlSegment: SqlSegment): Option[SqlCommandCall] = { + val sqlCommand = SqlCommand.get(sqlSegment.sql.trim) + if (sqlCommand == null) None + else { + val matcher = sqlCommand.matcher + val groups = new Array[String](matcher.groupCount) + for (i <- groups.indices) { + groups(i) = matcher.group(i + 1) + } + sqlCommand + .converter(groups) + .map(x => + SqlCommandCall(sqlSegment.start, sqlSegment.end, sqlCommand, x, sqlSegment.sql.trim)) + } + } + +} + +object Converters { + val NO_OPERANDS = (_: Array[String]) => Some(Array.empty[String]) +} + +sealed abstract class SqlCommand( + val name: String, + private val regex: String, + val converter: Array[String] => Option[Array[String]] = (x: Array[String]) => + Some(Array[String](x.head))) + extends EnumEntry { + var matcher: Matcher = _ + + def matches(input: String): Boolean = { + if (StringUtils.isBlank(regex)) false + else { + val pattern = + Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL) + matcher = pattern.matcher(input) + matcher.matches() + } + } +} + +object SqlCommand extends enumeratum.Enum[SqlCommand] { + + def get(stmt: String): SqlCommand = { + var cmd: SqlCommand = null + breakable { + this.values.foreach(x => { + if (x.matches(stmt)) { + cmd = x + break() + } + }) + } + cmd + } + + val values: immutable.IndexedSeq[SqlCommand] = findValues + + // ---- SELECT Statements-------------------------------------------------------------------------------------------------------------------------------- + case object SELECT extends SqlCommand("select", "(SELECT\\s+.+)") + + // ----CREATE Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + *
 CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name ( {
+   *  |  |  }[ ,
+   * ...n] [  ] [  ][ , ...n] ) [COMMENT table_comment]
+   * [PARTITIONED BY (partition_column_name1, partition_column_name2, ...)] WITH (key1=val1,
+   * key2=val2, ...) [ LIKE source_table [(  )] ] 
CREATE CATALOG catalog_name WITH (key1=val1, key2=val2, ...) */ + case object CREATE_CATALOG extends SqlCommand("create catalog", "(CREATE\\s+CATALOG\\s+.+)") + + /** + *
 CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name
[COMMENT database_comment]
+ * WITH (key1=val1, key2=val2, ...)
+ */ + case object CREATE_DATABASE extends SqlCommand("create database", "(CREATE\\s+DATABASE\\s+.+)") + + /** + *
 CREATE [TEMPORARY] VIEW [IF NOT EXISTS] [catalog_name.][db_name.]view_name [( columnName
+   * [, columnName ]* )] [COMMENT view_comment] AS query_expression< 
CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF NOT EXISTS] + * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] DROP statements are used to remove a catalog with the given catalog name or to remove a + * registered table/view/function from the current or specified Catalog. + * + * Flink SQL supports the following DROP statements for now: * DROP CATALOG * DROP TABLE * DROP + * DATABASE * DROP VIEW * DROP FUNCTION + */ + + /** DROP CATALOG [IF EXISTS] catalog_name */ + case object DROP_CATALOG extends SqlCommand("drop catalog", "(DROP\\s+CATALOG\\s+.+)") + + /** DROP [TEMPORARY] TABLE [IF EXISTS] [catalog_name.][db_name.]table_name */ + case object DROP_TABLE extends SqlCommand("drop table", "(DROP\\s+(TEMPORARY\\s+|)TABLE\\s+.+)") + + /** DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ] */ + case object DROP_DATABASE extends SqlCommand("drop database", "(DROP\\s+DATABASE\\s+.+)") + + /** DROP [TEMPORARY] VIEW [IF EXISTS] [catalog_name.][db_name.]view_name */ + case object DROP_VIEW extends SqlCommand("drop view", "(DROP\\s+(TEMPORARY\\s+|)VIEW\\s+.+)") + + /** + * DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] + * [catalog_name.][db_name.]function_name + */ + case object DROP_FUNCTION + extends SqlCommand( + "drop function", + "(DROP\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + + // ----ALTER Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** + * ALTER TABLE [catalog_name.][db_name.]table_name RENAME TO new_table_name + * + * ALTER TABLE [catalog_name.][db_name.]table_name SET (key1=val1, key2=val2, + * ...) + */ + case object ALTER_TABLE extends SqlCommand("alter table", "(ALTER\\s+TABLE\\s+.+)") + + /** + * ALTER VIEW [catalog_name.][db_name.]view_name RENAME TO new_view_name + * + * ALTER VIEW [catalog_name.][db_name.]view_name AS new_query_expression + */ + case object ALTER_VIEW extends SqlCommand("alter view", "(ALTER\\s+VIEW\\s+.+)") + + /** ALTER DATABASE [catalog_name.]db_name SET (key1=val1, key2=val2, ...) */ + case object ALTER_DATABASE extends SqlCommand("alter database", "(ALTER\\s+DATABASE\\s+.+)") + + /** + * ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] + * [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] + */ + case object ALTER_FUNCTION + extends SqlCommand( + "alter function", + "(ALTER\\s+(TEMPORARY\\s+|TEMPORARY\\s+SYSTEM\\s+|)FUNCTION\\s+.+)") + + // ---- INSERT Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** + * INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name [PARTITION part_spec] + * [column_list] select_statement INSERT { INTO | OVERWRITE } [catalog_name.][db_name.]table_name + * VALUES values_row [, values_row ...] + */ + case object INSERT extends SqlCommand("insert", "(INSERT\\s+(INTO|OVERWRITE)\\s+.+)") + + // ---- DESCRIBE Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ + case object DESC extends SqlCommand("desc", "(DESC\\s+.+)") + + /** { DESCRIBE | DESC } [catalog_name.][db_name.]table_name */ + case object DESCRIBE extends SqlCommand("describe", "(DESCRIBE\\s+.+)") + + // ---- EXPLAIN Statement-------------------------------------------------------------------------------------------------------------------------------- + + /** + * For flink-1.13.x: EXPLAIN PLAN FOR ``
For + * flink-1.14.x: EXPLAIN ESTIMATED_COST, CHANGELOG_MODE, JSON_EXECUTION_PLAN + * ``
For flink-1.15.x:
 EXPLAIN
+   * [([ExplainDetail[, ExplainDetail]*]) | PLAN FOR]
+   * 
+   *
+   * statement_set: EXECUTE STATEMENT SET BEGIN insert_statement; ... insert_statement; END; 
+ * Recommended not to use the form of flink-1.15.x + */ + case object EXPLAIN extends SqlCommand("explain", "(EXPLAIN\\s+.+)") + + // ---- USE Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** USE CATALOG catalog_name */ + case object USE_CATALOG extends SqlCommand("use catalog", "(USE\\s+CATALOG\\s+.+)") + + /** USE MODULES module_name1[, module_name2, ...] */ + case object USE_MODULES extends SqlCommand("use modules", "(USE\\s+MODULES\\s+.+)") + + /** USE [catalog_name.]database_name */ + case object USE_DATABASE extends SqlCommand("use database", "(USE\\s+(?!(CATALOG|MODULES)).+)") + + // ----SHOW Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** SHOW CATALOGS */ + case object SHOW_CATALOGS extends SqlCommand("show catalogs", "(SHOW\\s+CATALOGS\\s*)") + + /** SHOW CURRENT CATALOG */ + case object SHOW_CURRENT_CATALOG + extends SqlCommand("show current catalog", "(SHOW\\s+CURRENT\\s+CATALOG\\s*)") + + /** SHOW DATABASES */ + case object SHOW_DATABASES extends SqlCommand("show databases", "(SHOW\\s+DATABASES\\s*)") + + /** SHOW CURRENT DATABASE */ + case object SHOW_CURRENT_DATABASE + extends SqlCommand("show current database", "(SHOW\\s+CURRENT\\s+DATABASE\\s*)") + + /** + * SHOW TABLES,support from flink-1.13.x
SHOW TABLES [ ( FROM | IN ) + * [catalog_name.]database_name ] [ [NOT] LIKE ` ], support from flink-1.15.x + */ + case object SHOW_TABLES extends SqlCommand("show tables", "(SHOW\\s+TABLES.*)") + + /** SHOW CREATE TABLE, flink-1.14.x support. */ + case object SHOW_CREATE_TABLE + extends SqlCommand("show create table", "(SHOW\\s+CREATE\\s+TABLE\\s+.+)") + + /** + * SHOW COLUMNS ( FROM | IN ) [`[`catalog_name.]database.]`` [ [NOT] LIKE + * ``],flink-1.15.x support. + */ + case object SHOW_COLUMNS extends SqlCommand("show columns", "(SHOW\\s+COLUMNS\\s+.+)") + + /** SHOW VIEWS */ + case object SHOW_VIEWS extends SqlCommand("show views", "(SHOW\\s+VIEWS\\s*)") + + /** SHOW CREATE VIEW */ + case object SHOW_CREATE_VIEW + extends SqlCommand("show create view", "(SHOW\\s+CREATE\\s+VIEW\\s+.+)") + + /** SHOW [USER] FUNCTIONS */ + case object SHOW_FUNCTIONS + extends SqlCommand("show functions", "(SHOW\\s+(USER\\s+|)FUNCTIONS\\s*)") + + /** SHOW [FULL] MODULES */ + case object SHOW_MODULES extends SqlCommand("show modules", "(SHOW\\s+(FULL\\s+|)MODULES\\s*)") + + // ----LOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** LOAD MODULE module_name [WITH ('key1' = 'val1', 'key2' = 'val2', ...)] */ + case object LOAD_MODULE extends SqlCommand("load module", "(LOAD\\s+MODULE\\s+.+)") + + // ----UNLOAD Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** UNLOAD MODULE module_name */ + case object UNLOAD_MODULE extends SqlCommand("unload module", "(UNLOAD\\s+MODULE\\s+.+)") + + // ----SET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** SET ('key' = 'value') */ + case object SET + extends SqlCommand( + "set", + "SET(\\s+(\\S+)\\s*=(.*))?", + { + case a if a.length < 3 => None + case a if a.head == null => Some(Array[String](cleanUp(a.head))) + case a => Some(Array[String](cleanUp(a(1)), cleanUp(a(2)))) + }) + + // ----RESET Statements-------------------------------------------------------------------------------------------------------------------------------- + + /** RESET ('key') */ + case object RESET extends SqlCommand("reset", "RESET\\s+'(.*)'") + + /** RESET */ + case object RESET_ALL extends SqlCommand("reset all", "RESET", _ => Some(Array[String]("ALL"))) + + // ----INSERT SET Statements-------------------------------------------------------------------------------------------------------------------------------- + /* + *
+   * SQL Client execute each INSERT INTO statement as a single Flink job. However,
+   * this is sometimes not optimal because some part of the pipeline can be reused.
+   * SQL Client supports STATEMENT SET syntax to execute a set of SQL statements.
+   * This is an equivalent feature with StatementSet in Table API.
+   * The STATEMENT SET syntax encloses one or more INSERT INTO statements.
+   * All statements in a STATEMENT SET block are holistically optimized and executed as a single Flink job.
+   * Joint optimization and execution allows for reusing common intermediate results and can therefore significantly
+   * improve the efficiency of executing multiple queries.
+   * 
+ */ + /** This is SQL Client's syntax, don't use in our platform. */ + @deprecated + case object BEGIN_STATEMENT_SET + extends SqlCommand("begin statement set", "BEGIN\\s+STATEMENT\\s+SET", Converters.NO_OPERANDS) + + /** This is SQL Client's syntax, don't use in our platform. */ + @deprecated + case object END_STATEMENT_SET + extends SqlCommand("end statement set", "END", Converters.NO_OPERANDS) + + // Since: 2.1.2 for flink 1.18 + case object DELETE extends SqlCommand("delete", "(DELETE\\s+FROM\\s+.+)") + + // Since: 2.1.2 for flink 1.18 + case object UPDATE extends SqlCommand("update", "(UPDATE\\s+.+)") + + private[this] def cleanUp(sql: String): String = + sql.trim.replaceAll("^(['\"])|(['\"])$", "") + +} + +/** Call of SQL command with operands and command type. */ +case class SqlCommandCall( + lineStart: Int, + lineEnd: Int, + command: SqlCommand, + operands: Array[String], + originSql: String) {} + +case class FlinkSqlValidationResult( + success: JavaBool = true, + failedType: FlinkSqlValidationFailedType = null, + lineStart: Int = 0, + lineEnd: Int = 0, + errorLine: Int = 0, + errorColumn: Int = 0, + sql: String = null, + exception: String = null) + +case class SqlSegment(start: Int, end: Int, sql: String) + +object SqlSplitter { + + private lazy val singleLineCommentPrefixList = Set[String](PARAM_PREFIX) + + /** + * Split whole text into multiple sql statements. Two Steps: Step 1, split the whole text into + * multiple sql statements. Step 2, refine the results. Replace the preceding sql statements with + * empty lines, so that we can get the correct line number in the parsing error message. e.g: + * select a from table_1; select a from table_2; select a from table_3; The above text will be + * splitted into: sql_1: select a from table_1 sql_2: \nselect a from table_2 sql_3: \n\nselect a + * from table_3 + * + * @param sql + * @return + */ + def splitSql(sql: String): List[SqlSegment] = { + val queries = ListBuffer[String]() + val lastIndex = if (StringUtils.isNotBlank(sql)) sql.length - 1 else 0 + var query = new mutable.StringBuilder + + var multiLineComment = false + var singleLineComment = false + var singleQuoteString = false + var doubleQuoteString = false + var lineNum: Int = 0 + val lineNumMap = new collection.mutable.HashMap[Int, (Int, Int)]() + + // Whether each line of the record is empty. If it is empty, it is false. If it is not empty, it is true + val lineDescriptor = { + val scanner = new Scanner(sql) + val descriptor = new collection.mutable.HashMap[Int, Boolean] + var lineNumber = 0 + var startComment = false + var hasComment = false + + while (scanner.hasNextLine) { + lineNumber += 1 + val line = scanner.nextLine().trim + val nonEmpty = + StringUtils.isNotBlank(line) && !line.startsWith(PARAM_PREFIX) + if (line.startsWith("/*")) { + startComment = true + hasComment = true + } + + descriptor += lineNumber -> (nonEmpty && !hasComment) + + if (startComment && line.endsWith("*/")) { + startComment = false + hasComment = false + } + } + descriptor + } + + @tailrec + def findStartLine(num: Int): Int = + if (num >= lineDescriptor.size || lineDescriptor(num)) num + else findStartLine(num + 1) + + def markLineNumber(): Unit = { + val line = lineNum + 1 + if (lineNumMap.isEmpty) { + lineNumMap += (0 -> (findStartLine(1) -> line)) + } else { + val index = lineNumMap.size + val start = lineNumMap(lineNumMap.size - 1)._2 + 1 + lineNumMap += (index -> (findStartLine(start) -> line)) + } + } + + for (idx <- 0 until sql.length) { + + if (sql.charAt(idx) == '\n') lineNum += 1 + + breakable { + val ch = sql.charAt(idx) + + // end of single line comment + if (singleLineComment && (ch == '\n')) { + singleLineComment = false + query += ch + if (idx == lastIndex && query.toString.trim.nonEmpty) { + // add query when it is the end of sql. + queries += query.toString + } + break() + } + + // end of multiple line comment + if (multiLineComment && (idx - 1) >= 0 && sql.charAt(idx - 1) == '/' + && (idx - 2) >= 0 && sql.charAt(idx - 2) == '*') { + multiLineComment = false + } + + // single quote start or end mark + if (ch == '\'' && !(singleLineComment || multiLineComment)) { + if (singleQuoteString) { + singleQuoteString = false + } else if (!doubleQuoteString) { + singleQuoteString = true + } + } + + // double quote start or end mark + if (ch == '"' && !(singleLineComment || multiLineComment)) { + if (doubleQuoteString && idx > 0) { + doubleQuoteString = false + } else if (!singleQuoteString) { + doubleQuoteString = true + } + } + + // single line comment or multiple line comment start mark + if (!singleQuoteString && !doubleQuoteString && !multiLineComment && !singleLineComment && idx < lastIndex) { + if (isSingleLineComment(sql.charAt(idx), sql.charAt(idx + 1))) { + singleLineComment = true + } else if (sql.charAt(idx) == '/' && sql.length > (idx + 2) + && sql.charAt(idx + 1) == '*' && sql.charAt(idx + 2) != '+') { + multiLineComment = true + } + } + + if (ch == ';' && !singleQuoteString && !doubleQuoteString && !multiLineComment && !singleLineComment) { + markLineNumber() + // meet the end of semicolon + if (query.toString.trim.nonEmpty) { + queries += query.toString + query = new mutable.StringBuilder + } + } else if (idx == lastIndex) { + markLineNumber() + + // meet the last character + if (!singleLineComment && !multiLineComment) { + query += ch + } + + if (query.toString.trim.nonEmpty) { + queries += query.toString + query = new mutable.StringBuilder + } + } else if (!singleLineComment && !multiLineComment) { + // normal case, not in single line comment and not in multiple line comment + query += ch + } else if (ch == '\n') { + query += ch + } + } + } + + val refinedQueries = new collection.mutable.HashMap[Int, String]() + for (i <- queries.indices) { + val currStatement = queries(i) + if (isSingleLineComment(currStatement) || isMultipleLineComment(currStatement)) { + // transform comment line as blank lines + if (refinedQueries.nonEmpty) { + val lastRefinedQuery = refinedQueries.last + refinedQueries(refinedQueries.size - 1) = + lastRefinedQuery + extractLineBreaks(currStatement) + } + } else { + var linesPlaceholder = "" + if (i > 0) { + linesPlaceholder = extractLineBreaks(refinedQueries(i - 1)) + } + // add some blank lines before the statement to keep the original line number + val refinedQuery = linesPlaceholder + currStatement + refinedQueries += refinedQueries.size -> refinedQuery + } + } + + val set = new ListBuffer[SqlSegment] + refinedQueries.foreach(x => { + val line = lineNumMap(x._1) + set += SqlSegment(line._1, line._2, x._2) + }) + set.toList.sortWith((a, b) => a.start < b.start) + } + + /** + * extract line breaks + * + * @param text + * @return + */ + private[this] def extractLineBreaks(text: String): String = { + val builder = new mutable.StringBuilder + for (i <- 0 until text.length) { + if (text.charAt(i) == '\n') { + builder.append('\n') + } + } + builder.toString + } + + private[this] def isSingleLineComment(text: String) = + text.trim.startsWith(PARAM_PREFIX) + + private[this] def isMultipleLineComment(text: String) = + text.trim.startsWith("/*") && text.trim.endsWith("*/") + + /** + * check single-line comment + * + * @param curChar + * @param nextChar + * @return + */ + private[this] def isSingleLineComment(curChar: Char, nextChar: Char): Boolean = { + var flag = false + for (singleCommentPrefix <- singleLineCommentPrefixList) { + singleCommentPrefix.length match { + case 1 if curChar == singleCommentPrefix.charAt(0) => flag = true + case 2 + if curChar == singleCommentPrefix.charAt(0) && nextChar == singleCommentPrefix.charAt( + 1) => + flag = true + case _ => + } + } + flag + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala new file mode 100644 index 0000000000..574945fa39 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamEnvConfig.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +class StreamEnvConfig(val args: Array[String], val conf: StreamEnvConfigFunction) + +class StreamTableEnvConfig( + val args: Array[String], + val streamConfig: StreamEnvConfigFunction, + val tableConfig: TableEnvConfigFunction) + +class TableEnvConfig(val args: Array[String], val conf: TableEnvConfigFunction) diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala new file mode 100644 index 0000000000..41c4a48f7d --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/StreamTableContext.scala @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList + +import org.apache.flink.api.common.typeinfo.TypeInformation +import org.apache.flink.api.java.tuple +import org.apache.flink.streaming.api.datastream.{DataStream => JavaDataStream} +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment +import org.apache.flink.table.api._ +import org.apache.flink.table.api.ModelDescriptor +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.table.catalog.CatalogDescriptor +import org.apache.flink.table.connector.ChangelogMode +import org.apache.flink.table.expressions.Expression +import org.apache.flink.table.functions.UserDefinedFunction +import org.apache.flink.table.module.ModuleEntry +import org.apache.flink.table.resource.ResourceUri +import org.apache.flink.table.types.AbstractDataType +import org.apache.flink.types.Row +import org.apache.flink.util.ParameterTool + +class StreamTableContext( + override val parameter: ParameterTool, + private val streamEnv: StreamExecutionEnvironment, + private val tableEnv: StreamTableEnvironment) + extends FlinkStreamTableTraitV2(parameter, streamEnv, tableEnv) { + + def this(args: (ParameterTool, StreamExecutionEnvironment, StreamTableEnvironment)) = + this(args._1, args._2, args._3) + + def this(args: StreamTableEnvConfig) = + this(FlinkTableInitializerV2.initialize(args)) + + override def fromDataStream[T](dataStream: JavaDataStream[T], schema: Schema): Table = + tableEnv.fromDataStream[T](dataStream, schema) + + /** @deprecated old API */ + override def fromDataStream[T](dataStream: JavaDataStream[T], expressions: Expression*): Table = + tableEnv.fromDataStream(dataStream, expressions: _*) + + override def fromChangelogStream(dataStream: JavaDataStream[Row]): Table = + tableEnv.fromChangelogStream(dataStream) + + override def fromChangelogStream(dataStream: JavaDataStream[Row], schema: Schema): Table = + tableEnv.fromChangelogStream(dataStream, schema) + + override def fromChangelogStream( + dataStream: JavaDataStream[Row], + schema: Schema, + changelogMode: ChangelogMode): Table = + tableEnv.fromChangelogStream(dataStream, schema, changelogMode) + + override def createTemporaryView[T]( + path: String, + dataStream: JavaDataStream[T], + schema: Schema): Unit = + tableEnv.createTemporaryView[T](path, dataStream, schema) + + /** @deprecated old API */ + @deprecated override def createTemporaryView[T]( + path: String, + dataStream: JavaDataStream[T], + expressions: Expression*): Unit = + tableEnv.createTemporaryView(path, dataStream, expressions: _*) + + override def toDataStream(table: Table): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toDataStream(table) + } + + override def toDataStream[T](table: Table, targetClass: Class[T]): JavaDataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream[T](table, targetClass) + } + + override def toDataStream[T](table: Table, targetDataType: AbstractDataType[_]): JavaDataStream[T] = { + isConvertedToDataStream = true + tableEnv.toDataStream[T](table, targetDataType) + } + + override def toChangelogStream(table: Table): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table) + } + + override def toChangelogStream(table: Table, targetSchema: Schema): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema) + } + + override def toChangelogStream( + table: Table, + targetSchema: Schema, + changelogMode: ChangelogMode): JavaDataStream[Row] = { + isConvertedToDataStream = true + tableEnv.toChangelogStream(table, targetSchema, changelogMode) + } + + override def createStatementSet(): org.apache.flink.table.api.bridge.java.StreamStatementSet = + tableEnv.createStatementSet() + + override def useModules(strings: String*): Unit = + tableEnv.useModules(strings: _*) + + override def createTemporaryTable(path: String, descriptor: TableDescriptor): Unit = + tableEnv.createTemporaryTable(path, descriptor) + + override def createTable(path: String, descriptor: TableDescriptor): Unit = + tableEnv.createTable(path, descriptor) + + override def from(descriptor: TableDescriptor): Table = + tableEnv.from(descriptor) + + override def listFullModules(): Array[ModuleEntry] = + tableEnv.listFullModules() + + /** @since 1.15 */ + override def listTables(s: String, s1: String): Array[String] = + tableEnv.listTables(s, s1) + + /** @since 1.15 */ + override def loadPlan(planReference: PlanReference): CompiledPlan = + tableEnv.loadPlan(planReference) + + /** @since 1.15 */ + override def compilePlanSql(s: String): CompiledPlan = + tableEnv.compilePlanSql(s) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, className, resourceUris, ignoreIfExists) + + /** @since 1.17 */ + override def createTemporaryFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporaryFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createTemporarySystemFunction( + name: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporarySystemFunction(name, className, resourceUris) + + /** @since 1.17 */ + override def explainSql( + statement: String, + format: ExplainFormat, + extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, format, extraDetails: _*) + + /** @since 1.18 */ + override def createCatalog(catalog: String, catalogDescriptor: CatalogDescriptor): Unit = { + tableEnv.createCatalog(catalog, catalogDescriptor) + } + + /** @deprecated old API */ + @deprecated override def toAppendStream[T]( + table: Table, + typeInformation: TypeInformation[T]): JavaDataStream[T] = + tableEnv.toAppendStream(table, typeInformation) + + /** @deprecated old API */ + @deprecated override def toRetractStream[T]( + table: Table, + typeInformation: TypeInformation[T]): JavaDataStream[tuple.Tuple2[java.lang.Boolean, T]] = + tableEnv.toRetractStream(table, typeInformation) + + /** since Flink 2.0 */ + override def toAppendStream[T](table: Table, clazz: Class[T]): JavaDataStream[T] = + tableEnv.toAppendStream(table, clazz) + + /** since Flink 2.0 */ + override def toRetractStream[T](table: Table, clazz: Class[T]): JavaDataStream[tuple.Tuple2[java.lang.Boolean, T]] = + tableEnv.toRetractStream(table, clazz) + + /** since Flink 2.0 */ + override def createTable(path: String, descriptor: TableDescriptor, ignoreIfExists: Boolean): Boolean = + tableEnv.createTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createTemporaryTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table, ignoreIfExists: Boolean): Boolean = + tableEnv.createView(path, view, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table): Unit = + tableEnv.createView(path, view) + + /** since Flink 2.0 */ + override def dropTable(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropTable(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropTable(path: String): Boolean = + tableEnv.dropTable(path) + + /** since Flink 2.0 */ + override def dropView(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropView(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropView(path: String): Boolean = + tableEnv.dropView(path) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createModel(path, descriptor) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createTemporaryModel(path, descriptor) + + /** since Flink 2.1 */ + override def dropModel(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropModel(path, ignoreIfNotExists) + + /** since Flink 2.1 */ + override def dropModel(path: String): Boolean = + tableEnv.dropModel(path) + + /** since Flink 2.1 */ + override def dropTemporaryModel(path: String): Boolean = + tableEnv.dropTemporaryModel(path) + + /** since Flink 2.1 */ + override def fromCall(functionClass: Class[_ <: UserDefinedFunction], arguments: Object*): Table = + tableEnv.fromCall(functionClass, arguments: _*) + + /** since Flink 2.1 */ + override def fromCall(functionName: String, arguments: Object*): Table = + tableEnv.fromCall(functionName, arguments: _*) + + /** since Flink 2.1 */ + override def listModels(): Array[String] = + tableEnv.listModels() + + /** since Flink 2.1 */ + override def listTemporaryModels(): Array[String] = + tableEnv.listTemporaryModels() + + /** since Flink 2.2 */ + override def createFunction( + path: String, + descriptor: FunctionDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, descriptor, ignoreIfExists) + + /** since Flink 2.2 */ + override def createFunction(path: String, descriptor: FunctionDescriptor): Unit = + tableEnv.createFunction(path, descriptor) + + /** since Flink 2.2 */ + override def createTemporaryFunction(path: String, descriptor: FunctionDescriptor): Unit = + tableEnv.createTemporaryFunction(path, descriptor) + + /** since Flink 2.2 */ + override def createTemporarySystemFunction(name: String, descriptor: FunctionDescriptor): Unit = + tableEnv.createTemporarySystemFunction(name, descriptor) + + /** since Flink 2.2 */ + override def fromModel(descriptor: ModelDescriptor): Model = + tableEnv.fromModel(descriptor) + + /** since Flink 2.2 */ + override def fromModel(path: String): Model = + tableEnv.fromModel(path) + + /** since Flink 2.2 */ + override def listMaterializedTables(): Array[String] = + tableEnv.listMaterializedTables() + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableContext.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableContext.scala new file mode 100644 index 0000000000..fbbdc8f78a --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableContext.scala @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.streampark.common.util.Implicits.JavaList + +import org.apache.flink.table.api._ +import org.apache.flink.table.api.ModelDescriptor +import org.apache.flink.table.catalog.CatalogDescriptor +import org.apache.flink.table.functions.UserDefinedFunction +import org.apache.flink.table.module.ModuleEntry +import org.apache.flink.table.resource.ResourceUri +import org.apache.flink.util.ParameterTool + +class TableContext(override val parameter: ParameterTool, private val tableEnv: TableEnvironment) + extends FlinkTableTrait(parameter, tableEnv) { + + def this(args: (ParameterTool, TableEnvironment)) = this(args._1, args._2) + + def this(args: TableEnvConfig) = this(FlinkTableInitializerV2.initialize(args)) + + override def useModules(strings: String*): Unit = + tableEnv.useModules(strings: _*) + + override def createTemporaryTable(path: String, descriptor: TableDescriptor): Unit = { + tableEnv.createTemporaryTable(path, descriptor) + } + + override def createTable(path: String, descriptor: TableDescriptor): Unit = { + tableEnv.createTable(path, descriptor) + } + + override def from(tableDescriptor: TableDescriptor): Table = { + tableEnv.from(tableDescriptor) + } + + override def listFullModules(): Array[ModuleEntry] = + tableEnv.listFullModules() + + /** @since 1.15 */ + override def listTables(catalogName: String, databaseName: String): Array[String] = + tableEnv.listTables(catalogName, databaseName) + + /** @since 1.15 */ + override def loadPlan(planReference: PlanReference): CompiledPlan = + tableEnv.loadPlan(planReference) + + /** @since 1.15 */ + override def compilePlanSql(stmt: String): CompiledPlan = + tableEnv.compilePlanSql(stmt) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri], + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, className, resourceUris, ignoreIfExists) + + /** @since 1.17 */ + override def createTemporaryFunction( + path: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporaryFunction(path, className, resourceUris) + + /** @since 1.17 */ + override def createTemporarySystemFunction( + name: String, + className: String, + resourceUris: JavaList[ResourceUri]): Unit = + tableEnv.createTemporarySystemFunction(name, className, resourceUris) + + /** @since 1.17 */ + override def explainSql( + statement: String, + format: ExplainFormat, + extraDetails: ExplainDetail*): String = + tableEnv.explainSql(statement, format, extraDetails: _*) + + /** @since 1.18 */ + override def createCatalog(catalog: String, catalogDescriptor: CatalogDescriptor): Unit = { + tableEnv.createCatalog(catalog, catalogDescriptor) + } + + /** since Flink 2.0 */ + override def createTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Boolean = + tableEnv.createTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createTemporaryTable( + path: String, + descriptor: TableDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryTable(path, descriptor, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table, ignoreIfExists: Boolean): Boolean = + tableEnv.createView(path, view, ignoreIfExists) + + /** since Flink 2.0 */ + override def createView(path: String, view: Table): Unit = + tableEnv.createView(path, view) + + /** since Flink 2.0 */ + override def dropTable(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropTable(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropTable(path: String): Boolean = + tableEnv.dropTable(path) + + /** since Flink 2.0 */ + override def dropView(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropView(path, ignoreIfNotExists) + + /** since Flink 2.0 */ + override def dropView(path: String): Boolean = + tableEnv.dropView(path) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createModel(path, descriptor) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor, ignoreIfExists: Boolean): Unit = + tableEnv.createTemporaryModel(path, descriptor, ignoreIfExists) + + /** since Flink 2.1 */ + override def createTemporaryModel(path: String, descriptor: ModelDescriptor): Unit = + tableEnv.createTemporaryModel(path, descriptor) + + /** since Flink 2.1 */ + override def dropModel(path: String, ignoreIfNotExists: Boolean): Boolean = + tableEnv.dropModel(path, ignoreIfNotExists) + + /** since Flink 2.1 */ + override def dropModel(path: String): Boolean = + tableEnv.dropModel(path) + + /** since Flink 2.1 */ + override def dropTemporaryModel(path: String): Boolean = + tableEnv.dropTemporaryModel(path) + + /** since Flink 2.1 */ + override def fromCall(functionClass: Class[_ <: UserDefinedFunction], arguments: Object*): Table = + tableEnv.fromCall(functionClass, arguments: _*) + + /** since Flink 2.1 */ + override def fromCall(functionName: String, arguments: Object*): Table = + tableEnv.fromCall(functionName, arguments: _*) + + /** since Flink 2.1 */ + override def listModels(): Array[String] = + tableEnv.listModels() + + /** since Flink 2.1 */ + override def listTemporaryModels(): Array[String] = + tableEnv.listTemporaryModels() + + /** since Flink 2.2 */ + override def createFunction( + path: String, + descriptor: FunctionDescriptor, + ignoreIfExists: Boolean): Unit = + tableEnv.createFunction(path, descriptor, ignoreIfExists) + + /** since Flink 2.2 */ + override def createFunction(path: String, descriptor: FunctionDescriptor): Unit = + tableEnv.createFunction(path, descriptor) + + /** since Flink 2.2 */ + override def createTemporaryFunction(path: String, descriptor: FunctionDescriptor): Unit = + tableEnv.createTemporaryFunction(path, descriptor) + + /** since Flink 2.2 */ + override def createTemporarySystemFunction(name: String, descriptor: FunctionDescriptor): Unit = + tableEnv.createTemporarySystemFunction(name, descriptor) + + /** since Flink 2.2 */ + override def fromModel(descriptor: ModelDescriptor): Model = + tableEnv.fromModel(descriptor) + + /** since Flink 2.2 */ + override def fromModel(path: String): Model = + tableEnv.fromModel(path) + + /** since Flink 2.2 */ + override def listMaterializedTables(): Array[String] = + tableEnv.listMaterializedTables() + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableExt.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableExt.scala new file mode 100644 index 0000000000..3c4ab76c95 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/TableExt.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core + +import org.apache.flink.streaming.api.datastream.DataStream +import org.apache.flink.table.api.{Table => FlinkTable} +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment +import org.apache.flink.types.Row + +object TableExt { + + class Table(val table: FlinkTable) { + def ->(field: String, fields: String*): FlinkTable = + table.as(field, fields: _*) + } + + class TableConversions( + table: FlinkTable, + streamTableEnv: StreamTableEnvironment) { + + def \\ : DataStream[Row] = streamTableEnv.toDataStream(table) + } + +} diff --git a/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala new file mode 100644 index 0000000000..b419eb8337 --- /dev/null +++ b/streampark-flink/streampark-flink-shims/streampark-flink-shims_flink-2.2/src/main/scala/org/apache/streampark/flink/core/conf/FlinkConfiguration.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.flink.core.conf + +import org.apache.flink.configuration.Configuration +import org.apache.flink.util.ParameterTool + +case class FlinkConfiguration( + parameter: ParameterTool, + envConfig: Configuration, + tableConfig: Configuration) From 8f9967190baa6879eed5fe461ddc6e63ecf3ffe3 Mon Sep 17 00:00:00 2001 From: shangeyao Date: Wed, 24 Jun 2026 23:33:11 +0800 Subject: [PATCH 2/5] fix: Flink 2.x version detection fails on Java 8 - checkVersion: support Flink 2.x (2.0, 2.1, 2.2) versions - version detection: fallback to parsing flink-dist JAR filename when CliFrontend fails (Flink 2.x requires Java 11+, but server may run Java 8) - EnvInitializer: extend shims JAR pattern to match 2.x (2.0-2.2) Fixes: java.lang.UnsupportedClassVersionError when adding Flink 2.2 --- .../streampark/common/conf/FlinkVersion.scala | 26 ++++++++++++++----- .../console/core/runner/EnvInitializer.java | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala b/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala index 722b085e44..278a587b4a 100644 --- a/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala +++ b/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala @@ -34,19 +34,23 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { private[this] lazy val FLINK_VERSION_PATTERN = Pattern.compile("^Version: (.*), Commit ID: (.*)$") - private[this] lazy val FLINK_SCALA_VERSION_PATTERN = - Pattern.compile("^flink-dist_(\\d\\.\\d+).*.jar$") + private[this] lazy val FLINK_DIST_VERSION_PATTERN = Pattern.compile( + "^flink-dist_(\\d+\\.\\d+).*\\.jar$|^flink-dist-(\\d+\\.\\d+\\.\\d+)\\.jar$") private[this] lazy val APACHE_FLINK_VERSION_PATTERN = Pattern.compile("(^\\d+\\.\\d+\\.\\d+)") private[this] lazy val OTHER_FLINK_VERSION_PATTERN = Pattern.compile("(\\d+\\.\\d+)(-*)") lazy val scalaVersion: String = { - val matcher = FLINK_SCALA_VERSION_PATTERN.matcher(flinkDistJar.getName) + val matcher = FLINK_DIST_VERSION_PATTERN.matcher(flinkDistJar.getName) if (matcher.matches()) { - matcher.group(1) + if (matcher.group(1) != null) { + matcher.group(1) + } else { + // flink 2.x doesn't have scala version in jar name, default to 2.12 + "2.12" + } } else { - // flink 1.15 + on support scala 2.12 "2.12" } } @@ -93,6 +97,14 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { }) logInfo(buffer.toString()) + if (flinkVersion == null) { + // Fallback: parse version from flink-dist JAR filename (e.g. flink-dist-2.2.1.jar) + val distMatcher = FLINK_DIST_VERSION_PATTERN.matcher(flinkDistJar.getName) + if (distMatcher.matches() && distMatcher.group(2) != null) { + flinkVersion = distMatcher.group(2) + logInfo(s"Flink version parsed from dist jar name: $flinkVersion") + } + } if (flinkVersion == null) { throw new IllegalStateException(s"[StreamPark] parse flink version failed. $buffer") } @@ -127,7 +139,7 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { def checkVersion(throwException: Boolean = true): Boolean = { version.split("\\.").map(_.trim.toInt) match { case Array(1, v, _) if v >= 12 && v <= 20 => true - case Array(2, v, _) if v >= 0 => true + case Array(2, v, _) if v >= 0 && v <= 2 => true case _ => if (throwException) { throw new UnsupportedOperationException(s"Unsupported flink version: $version") @@ -140,7 +152,7 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { def checkVersion(sinceVersion: Int): Boolean = { version.split("\\.").map(_.trim.toInt) match { case Array(1, v, _) if v >= sinceVersion => true - case Array(2, v, _) if v >= sinceVersion => true + case Array(2, _, _) => true case _ => false } } diff --git a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java index 6ae9f86e26..ad7e84bc06 100644 --- a/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java +++ b/streampark-console/streampark-console-service/src/main/java/org/apache/streampark/console/core/runner/EnvInitializer.java @@ -71,7 +71,7 @@ public class EnvInitializer implements ApplicationRunner { private final FileFilter fileFilter = p -> !".gitkeep".equals(p.getName()); private static final Pattern PATTERN_FLINK_SHIMS_JAR = Pattern.compile( - "^streampark-flink-shims_flink-(1\\.1[2-9]|1\\.2[0-9]|2\\.[0-9])_(2\\.12)-(.*).jar$", + "^streampark-flink-shims_flink-(1.1[2-9]|1\\.2[0-9]|2\\.[0-2])_(2.12)-(.*).jar$", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); @SneakyThrows From 2430c17ef2d3956b50f8d171ab55526fe94a1cb3 Mon Sep 17 00:00:00 2001 From: shangeyao Date: Thu, 25 Jun 2026 00:22:13 +0800 Subject: [PATCH 3/5] [Build] Skip Flink 2.x shims on JDK 8 builds Flink 2.x dependencies require JDK 11+ to compile. Gate the 2.0/2.1/2.2 shims modules and console packaging behind a JDK 11+ Maven profile so Java 8 CI matrices keep passing. Generated-by: Cursor Co-authored-by: Cursor --- .../streampark-console-service/pom.xml | 68 +++++++++++++------ .../streampark-flink-shims/pom.xml | 18 ++++- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/streampark-console/streampark-console-service/pom.xml b/streampark-console/streampark-console-service/pom.xml index 52f1347574..a03b66008d 100644 --- a/streampark-console/streampark-console-service/pom.xml +++ b/streampark-console/streampark-console-service/pom.xml @@ -587,27 +587,6 @@ ${project.version} ${project.build.directory}/shims
- - - org.apache.streampark - streampark-flink-shims_flink-2.0_${scala.binary.version} - ${project.version} - ${project.build.directory}/shims - - - - org.apache.streampark - streampark-flink-shims_flink-2.1_${scala.binary.version} - ${project.version} - ${project.build.directory}/shims - - - - org.apache.streampark - streampark-flink-shims_flink-2.2_${scala.binary.version} - ${project.version} - ${project.build.directory}/shims - org.apache.streampark @@ -803,6 +782,53 @@ + + + flink-2.x-shims + + [11,) + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-flink-2.x-shims + + copy + + package + + + + org.apache.streampark + streampark-flink-shims_flink-2.0_${scala.binary.version} + ${project.version} + ${project.build.directory}/shims + + + org.apache.streampark + streampark-flink-shims_flink-2.1_${scala.binary.version} + ${project.version} + ${project.build.directory}/shims + + + org.apache.streampark + streampark-flink-shims_flink-2.2_${scala.binary.version} + ${project.version} + ${project.build.directory}/shims + + + + + + + + + + diff --git a/streampark-flink/streampark-flink-shims/pom.xml b/streampark-flink/streampark-flink-shims/pom.xml index a951978853..09b14e509e 100644 --- a/streampark-flink/streampark-flink-shims/pom.xml +++ b/streampark-flink/streampark-flink-shims/pom.xml @@ -41,9 +41,21 @@ streampark-flink-shims_flink-1.18 streampark-flink-shims_flink-1.19 streampark-flink-shims_flink-1.20 - streampark-flink-shims_flink-2.0 - streampark-flink-shims_flink-2.1 - streampark-flink-shims_flink-2.2 + + + + flink-2.x-shims + + [11,) + + + streampark-flink-shims_flink-2.0 + streampark-flink-shims_flink-2.1 + streampark-flink-shims_flink-2.2 + + + + From 1b95c66ab0801962d741460a2cdec11b8943a447 Mon Sep 17 00:00:00 2001 From: shangeyao Date: Thu, 25 Jun 2026 00:30:13 +0800 Subject: [PATCH 4/5] [Docs] Add Flink 2.x JDK and build guide Document JDK 11+ requirements for Flink 2.x runtime, shims packaging on JDK 11+ builds, and manual configuration scenarios. Generated-by: Cursor Co-authored-by: Cursor --- .../src/main/assembly/conf/streampark-env.sh | 3 + .../main/assembly/script/FLINK_JDK_GUIDE.md | 98 +++++++++++++++++++ .../assembly/script/FLINK_JDK_GUIDE.zh.md | 94 ++++++++++++++++++ .../src/main/assembly/script/README.md | 7 ++ .../src/locales/lang/en/setting/flinkHome.ts | 3 +- .../locales/lang/zh-CN/setting/flinkHome.ts | 3 +- 6 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md create mode 100644 streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md diff --git a/streampark-console/streampark-console-service/src/main/assembly/conf/streampark-env.sh b/streampark-console/streampark-console-service/src/main/assembly/conf/streampark-env.sh index 3fa607db0d..bf766aaa11 100644 --- a/streampark-console/streampark-console-service/src/main/assembly/conf/streampark-env.sh +++ b/streampark-console/streampark-console-service/src/main/assembly/conf/streampark-env.sh @@ -38,3 +38,6 @@ # The java implementation to use. By default, this environment # variable is REQUIRED on ALL platforms except OS X! # export JAVA_HOME= + +# Flink 2.x shims are packaged only when StreamPark is built with JDK 11+. +# StreamPark Console can still run on JDK 8. See script/FLINK_JDK_GUIDE.md for details. diff --git a/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md new file mode 100644 index 0000000000..d6510c5076 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md @@ -0,0 +1,98 @@ +# Flink JDK and Build Guide + +StreamPark supports multiple Flink major versions through version-specific shims modules. +Flink 2.x introduces a higher JDK requirement for both **building** and **running** Flink workloads. + +## JDK Requirements by Flink Version + +| Flink Version | Minimum JDK (Flink runtime) | StreamPark Console JDK | +|---------------|----------------------------|-------------------------| +| Flink 1.12–1.20 | JDK 8 | JDK 8 / JDK 11 | +| Flink 2.0–2.2 | JDK 11 | JDK 8 / JDK 11 | + +> **Note:** StreamPark Console can continue to run on JDK 8. Using Flink 2.x does **not** +> require upgrading the StreamPark service JDK. + +## What StreamPark Does Automatically + +1. **Flink environment registration** + - Parses Flink version from `$FLINK_HOME/lib/flink-dist*.jar` or falls back to + `flink-dist --version`. + - Registration does not require StreamPark to be built with Flink 2.x shims. +2. **Flink 2.x shims packaging** + - Flink 2.0/2.1/2.2 shims are packaged only when StreamPark is **built with JDK 11+** + (Maven profile `flink-2.x-shims`). + +## Build Output vs JDK Version + +| Build JDK | Flink 1.12–1.20 shims | Flink 2.0–2.2 shims | +|-----------|------------------------|----------------------| +| JDK 8 | Included | **Not included** | +| JDK 11+ | Included | Included | + +If you build StreamPark with JDK 8 (for example via `./build.sh` on a Java 8 host), the +distribution will **not** contain Flink 2.x shims JARs under `lib/`. Flink 2.x management +and job submission will not work until you rebuild with JDK 11+. + +### Recommended build command for full Flink 2.x support + +```bash +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 +./build.sh +``` + +Or with Maven directly: + +```bash +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 +./mvnw -Pshaded,webapp,dist -DskipTests clean install +``` + +The `flink-2.x-shims` profile activates automatically when the build JDK is 11 or higher. + +## When Manual JDK Configuration Is Required + +### 1. Building a release with Flink 2.x support + +Required when the build host defaults to JDK 8 but you need Flink 2.x shims in the +distribution package. + +Set `JAVA_HOME` to JDK 11+ before running `./build.sh` or Maven (see above). + +### 2. Running Flink 2.x jobs + +Required when Flink 2.x clusters or client commands need a compatible JDK at runtime. + +Configure JDK in the Flink installation used by StreamPark: + +```bash +# $FLINK_HOME/conf/flink-env.sh +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 +``` + +Then update or re-register the Flink environment in StreamPark if needed. + +## Verification + +Verify Flink CLI with the same `FLINK_HOME`: + +```bash +export FLINK_HOME=/path/to/flink +source $FLINK_HOME/conf/flink-env.sh +$FLINK_HOME/bin/flink --version +``` + +Verify Flink 2.x shims are present in a StreamPark build: + +```bash +ls $STREAMPARK_HOME/lib/streampark-flink-shims_flink-2.* +``` + +If this command returns no files, the distribution was built with JDK 8 and must be rebuilt +with JDK 11+ for Flink 2.x support. + +## Related Files + +- StreamPark service JDK: `$STREAMPARK_HOME/conf/streampark-env.sh` +- Flink installation JDK: `$FLINK_HOME/conf/flink-env.sh` +- Maven profile: `flink-2.x-shims` in `streampark-flink-shims/pom.xml` diff --git a/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md new file mode 100644 index 0000000000..3242984922 --- /dev/null +++ b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md @@ -0,0 +1,94 @@ +# Flink JDK 与构建说明 + +StreamPark 通过版本 shims 模块支持多个 Flink 大版本。Flink 2.x 对 **构建** 和 **运行** +Flink 作业提出了更高的 JDK 要求。 + +## Flink 版本与 JDK 要求 + +| Flink 版本 | 最低 JDK(Flink 运行时) | StreamPark Console JDK | +|-----------------|--------------------------|-------------------------| +| Flink 1.12–1.20 | JDK 8 | JDK 8 / JDK 11 | +| Flink 2.0–2.2 | JDK 11 | JDK 8 / JDK 11 | + +> **说明:** StreamPark Console 可以继续运行在 JDK 8 上。使用 Flink 2.x **不需要**升级 +> StreamPark 服务的 JDK。 + +## StreamPark 自动处理的内容 + +1. **注册 Flink 环境** + - 从 `$FLINK_HOME/lib/flink-dist*.jar` 解析版本,或 fallback 到 + `flink-dist --version`。 + - 注册 Flink 环境不要求 StreamPark 发行包中已包含 Flink 2.x shims。 +2. **Flink 2.x shims 打包** + - 仅当 StreamPark 使用 **JDK 11+** 构建时,才会打包 Flink 2.0/2.1/2.2 shims + (Maven profile:`flink-2.x-shims`)。 + +## 构建 JDK 与产物差异 + +| 构建 JDK | Flink 1.12–1.20 shims | Flink 2.0–2.2 shims | +|----------|------------------------|----------------------| +| JDK 8 | 包含 | **不包含** | +| JDK 11+ | 包含 | 包含 | + +若在 JDK 8 环境下执行 `./build.sh` 构建,发行包的 `lib/` 目录中 **不会** 包含 Flink 2.x +shims JAR。此时无法管理和提交 Flink 2.x 作业,需使用 JDK 11+ 重新构建。 + +### 推荐:完整支持 Flink 2.x 的构建方式 + +```bash +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 +./build.sh +``` + +或直接使用 Maven: + +```bash +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 +./mvnw -Pshaded,webapp,dist -DskipTests clean install +``` + +当构建 JDK 为 11 及以上时,`flink-2.x-shims` profile 会自动激活。 + +## 何时需要手动配置 JDK + +### 1. 构建包含 Flink 2.x 支持的发行包 + +当构建主机默认使用 JDK 8,但需要在发行包中包含 Flink 2.x shims 时,必须在执行 +`./build.sh` 或 Maven 前将 `JAVA_HOME` 设置为 JDK 11+(见上文)。 + +### 2. 运行 Flink 2.x 作业 + +当 Flink 2.x 集群或客户端命令在运行时需要兼容的 JDK 时,请在 StreamPark 使用的 Flink +安装目录中配置: + +```bash +# $FLINK_HOME/conf/flink-env.sh +export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 +``` + +如有需要,在 StreamPark 中更新或重新注册对应 Flink 环境。 + +## 验证方式 + +使用相同的 `FLINK_HOME` 验证 Flink CLI: + +```bash +export FLINK_HOME=/path/to/flink +source $FLINK_HOME/conf/flink-env.sh +$FLINK_HOME/bin/flink --version +``` + +验证 StreamPark 构建产物是否包含 Flink 2.x shims: + +```bash +ls $STREAMPARK_HOME/lib/streampark-flink-shims_flink-2.* +``` + +若上述命令无输出,说明该发行包由 JDK 8 构建,需使用 JDK 11+ 重新构建才能获得 Flink 2.x +支持。 + +## 相关配置文件 + +- StreamPark 服务 JDK:`$STREAMPARK_HOME/conf/streampark-env.sh` +- Flink 安装 JDK:`$FLINK_HOME/conf/flink-env.sh` +- Maven profile:`streampark-flink-shims/pom.xml` 中的 `flink-2.x-shims` diff --git a/streampark-console/streampark-console-service/src/main/assembly/script/README.md b/streampark-console/streampark-console-service/src/main/assembly/script/README.md index 0191fe1411..e40e20e02f 100644 --- a/streampark-console/streampark-console-service/src/main/assembly/script/README.md +++ b/streampark-console/streampark-console-service/src/main/assembly/script/README.md @@ -21,3 +21,10 @@ For example: - `1.2.3.sql` needs to be executed when StreamPark is upgraded from `1.2.2` to `1.2.3`. - `1.2.3.sql` and `2.0.0.sql` needs to be executed when StreamPark is upgraded from `1.2.2` to `2.0.0`. + +## Flink JDK and Build Guide + +If you use Flink 2.x or need to build/package Flink 2.x shims, see: + +- English: [FLINK_JDK_GUIDE.md](./FLINK_JDK_GUIDE.md) +- 中文: [FLINK_JDK_GUIDE.zh.md](./FLINK_JDK_GUIDE.zh.md) diff --git a/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts b/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts index 75543d1d67..b4a6036b86 100644 --- a/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts +++ b/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts @@ -32,7 +32,8 @@ export default { flinkNameTips: 'The flink name, e.g: flink-1.12', flinkNameIsRepeated: 'Flink name already exists', flinkNameIsRequired: 'Flink name is required', - flinkHomeTips: 'The absolute path of the FLINK_HOME', + flinkHomeTips: + 'The absolute path of FLINK_HOME. Flink 2.x requires JDK 11+ at runtime; Flink 2.x shims are packaged only when StreamPark is built with JDK 11+. See script/FLINK_JDK_GUIDE.md in the installation package.', flinkHomeIsRequired: 'Flink home is required', flinkHomePathIsInvalid: 'Flink home path is invalid', flinkDistNotFound: 'Can not find flink-dist in FLINK_HOME/lib', diff --git a/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts b/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts index 3e1ce02236..df02e45b9c 100644 --- a/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts +++ b/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts @@ -32,7 +32,8 @@ export default { flinkNameTips: 'Flink别名,举例: Flink-1.12', flinkNameIsRepeated: 'Flink名称已存在', flinkNameIsRequired: 'Flink名称必填', - flinkHomeTips: 'Flink所在服务器的绝对路径,举例: /usr/local/flink', + flinkHomeTips: + 'Flink所在服务器的绝对路径,举例: /usr/local/flink。Flink 2.x 运行需 JDK 11+;Flink 2.x shims 需 JDK 11+ 构建 StreamPark 才会打包,详见 script/FLINK_JDK_GUIDE.zh.md', flinkHomeIsRequired: 'Flink Home必填', flinkHomePathIsInvalid: 'Flink Home路径无效', flinkDistNotFound: 'flink/lib 路径下未找到 flink-dist jar文件', From 3d8d45de1538ec10103a7357e9e86d528acf67f5 Mon Sep 17 00:00:00 2001 From: shangeyao Date: Thu, 25 Jun 2026 16:24:24 +0800 Subject: [PATCH 5/5] [Common][Flink] Register Flink 2.x env on JDK 8 Console without service JDK upgrade Parse Flink version from flink-dist JAR names first and resolve Flink JAVA_HOME from flink-env.sh for CLI fallback and job submission, so Console can stay on JDK 8. Generated-by: Cursor Co-authored-by: Cursor --- .../streampark/common/conf/FlinkVersion.scala | 174 ++++++++++-------- .../common/util/ClassLoaderUtils.scala | 45 ++++- .../common/util/FlinkEnvUtils.scala | 100 ++++++++++ .../common/conf/FlinkVersionTest.scala | 55 ++++++ .../common/util/FlinkEnvUtilsTest.scala | 48 +++++ .../main/assembly/script/FLINK_JDK_GUIDE.md | 8 +- .../assembly/script/FLINK_JDK_GUIDE.zh.md | 8 +- .../src/locales/lang/en/setting/flinkHome.ts | 2 +- .../locales/lang/zh-CN/setting/flinkHome.ts | 2 +- .../flink/client/trait/FlinkClientTrait.scala | 4 + 10 files changed, 357 insertions(+), 89 deletions(-) create mode 100644 streampark-common/src/main/scala/org/apache/streampark/common/util/FlinkEnvUtils.scala create mode 100644 streampark-common/src/test/scala/org/apache/streampark/common/conf/FlinkVersionTest.scala create mode 100644 streampark-common/src/test/scala/org/apache/streampark/common/util/FlinkEnvUtilsTest.scala diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala b/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala index 278a587b4a..feb5113ac1 100644 --- a/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala +++ b/streampark-common/src/main/scala/org/apache/streampark/common/conf/FlinkVersion.scala @@ -17,7 +17,7 @@ package org.apache.streampark.common.conf -import org.apache.streampark.common.util.{CommandUtils, Logger} +import org.apache.streampark.common.util.{CommandUtils, FlinkEnvUtils, Logger} import org.apache.streampark.common.util.Implicits._ import java.io.File @@ -34,28 +34,15 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { private[this] lazy val FLINK_VERSION_PATTERN = Pattern.compile("^Version: (.*), Commit ID: (.*)$") - private[this] lazy val FLINK_DIST_VERSION_PATTERN = Pattern.compile( - "^flink-dist_(\\d+\\.\\d+).*\\.jar$|^flink-dist-(\\d+\\.\\d+\\.\\d+)\\.jar$") - private[this] lazy val APACHE_FLINK_VERSION_PATTERN = Pattern.compile("(^\\d+\\.\\d+\\.\\d+)") private[this] lazy val OTHER_FLINK_VERSION_PATTERN = Pattern.compile("(\\d+\\.\\d+)(-*)") - lazy val scalaVersion: String = { - val matcher = FLINK_DIST_VERSION_PATTERN.matcher(flinkDistJar.getName) - if (matcher.matches()) { - if (matcher.group(1) != null) { - matcher.group(1) - } else { - // flink 2.x doesn't have scala version in jar name, default to 2.12 - "2.12" - } - } else { - "2.12" - } - } + private[this] lazy val FLINK_DIST_UNDERSCORE_PATTERN = + Pattern.compile("^flink-dist_(\\d+\\.\\d+)-(\\d+\\.\\d+(?:\\.\\d+)?(?:-SNAPSHOT)?)\\.jar$") - lazy val fullVersion: String = s"${version}_$scalaVersion" + private[this] lazy val FLINK_DIST_DASH_PATTERN = + Pattern.compile("^flink-dist-(\\d+\\.\\d+(?:\\.\\d+)?(?:-SNAPSHOT)?)\\.jar$") lazy val flinkLib: File = { require(flinkHome != null, "[StreamPark] flinkHome must not be null.") @@ -67,52 +54,40 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { lib } - lazy val flinkLibs: List[URL] = flinkLib.listFiles().map(_.toURI.toURL).toList + lazy val flinkDistJar: File = { + val distJar = flinkLib.listFiles().filter(_.getName.matches("flink-dist.*\\.jar")) + distJar match { + case x if x.isEmpty => + throw new IllegalArgumentException(s"[StreamPark] can no found flink-dist jar in $flinkLib") + case x if x.length > 1 => + throw new IllegalArgumentException( + s"[StreamPark] found multiple flink-dist jar in $flinkLib") + case _ => + } + distJar.head + } - lazy val version: String = { - val cmd = List( - s"java -classpath ${flinkDistJar.getName} org.apache.flink.client.cli.CliFrontend --version") - var flinkVersion: String = null - val buffer = new mutable.StringBuilder - CommandUtils.execute( - flinkLib.getAbsolutePath, - cmd, - new Consumer[String]() { - override def accept(out: String): Unit = { - buffer.append(out).append("\n") - val matcher = FLINK_VERSION_PATTERN.matcher(out) - if (matcher.find) { - val version = matcher.group(1) - val matcher1 = APACHE_FLINK_VERSION_PATTERN.matcher(version) - if (matcher1.find) { - flinkVersion = version - } else { - val matcher2 = OTHER_FLINK_VERSION_PATTERN.matcher(version) - if (matcher2.find) { - flinkVersion = version - } - } - } - } - }) + lazy val flinkLibs: List[URL] = flinkLib.listFiles().map(_.toURI.toURL).toList - logInfo(buffer.toString()) - if (flinkVersion == null) { - // Fallback: parse version from flink-dist JAR filename (e.g. flink-dist-2.2.1.jar) - val distMatcher = FLINK_DIST_VERSION_PATTERN.matcher(flinkDistJar.getName) - if (distMatcher.matches() && distMatcher.group(2) != null) { - flinkVersion = distMatcher.group(2) - logInfo(s"Flink version parsed from dist jar name: $flinkVersion") - } - } - if (flinkVersion == null) { - throw new IllegalStateException(s"[StreamPark] parse flink version failed. $buffer") - } - buffer.clear() - flinkVersion + private lazy val parsedVersion: (String, String) = { + parseFromDistJar() + .orElse(parseFromCliFrontend()) + .getOrElse( + throw new IllegalStateException( + s"[StreamPark] parse flink version failed for flinkHome: $flinkHome. " + + "Please check whether $FLINK_HOME/lib/flink-dist*.jar exists.")) } - // flink major version, like "1.13", "1.14" + lazy val version: String = parsedVersion._1 + + lazy val scalaVersion: String = parsedVersion._2 + + lazy val fullVersion: String = s"${version}_$scalaVersion" + + /** Resolved JAVA_HOME for Flink CLI and cluster-side JVM options. */ + lazy val javaHome: Option[String] = FlinkEnvUtils.resolveJavaHome(flinkHome, version) + + // flink major version, like "1.13", "2.2" lazy val majorVersion: String = { if (version == null) { null @@ -123,19 +98,6 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { } } - lazy val flinkDistJar: File = { - val distJar = flinkLib.listFiles().filter(_.getName.matches("flink-dist.*\\.jar")) - distJar match { - case x if x.isEmpty => - throw new IllegalArgumentException(s"[StreamPark] can no found flink-dist jar in $flinkLib") - case x if x.length > 1 => - throw new IllegalArgumentException( - s"[StreamPark] found multiple flink-dist jar in $flinkLib") - case _ => - } - distJar.head - } - def checkVersion(throwException: Boolean = true): Boolean = { version.split("\\.").map(_.trim.toInt) match { case Array(1, v, _) if v >= 12 && v <= 20 => true @@ -160,6 +122,72 @@ class FlinkVersion(val flinkHome: String) extends Serializable with Logger { // StreamPark flink shims version, like "streampark-flink-shims_flink-1.13" private lazy val shimsVersion: String = s"streampark-flink-shims_flink-$majorVersion" + private def parseFromDistJar(): Option[(String, String)] = { + val jarName = flinkDistJar.getName + val underscoreMatcher = FLINK_DIST_UNDERSCORE_PATTERN.matcher(jarName) + if (underscoreMatcher.matches()) { + val parsed = underscoreMatcher.group(2) -> underscoreMatcher.group(1) + logInfo(s"Flink version parsed from dist jar name: ${parsed._1}, scala: ${parsed._2}") + Some(parsed) + } else { + val dashMatcher = FLINK_DIST_DASH_PATTERN.matcher(jarName) + if (dashMatcher.matches()) { + val parsed = dashMatcher.group(1) -> "2.12" + logInfo(s"Flink version parsed from dist jar name: ${parsed._1}, scala: ${parsed._2}") + Some(parsed) + } else { + None + } + } + } + + private def hintFlinkVersion(): String = parseFromDistJar().map(_._1).getOrElse("1.20.0") + + private def parseFromCliFrontend(): Option[(String, String)] = { + var flinkVersion: String = null + val buffer = new mutable.StringBuilder + val javaHomeExport = FlinkEnvUtils + .resolveJavaHome(flinkHome, hintFlinkVersion()) + .map(javaHome => s"export JAVA_HOME=$javaHome&&") + .getOrElse("") + val javaCmd = FlinkEnvUtils + .resolveJavaHome(flinkHome, hintFlinkVersion()) + .map(_ + "/bin/java") + .getOrElse("java") + val cmd = List( + s"${javaHomeExport}$javaCmd -classpath ${flinkDistJar.getName} org.apache.flink.client.cli.CliFrontend --version") + CommandUtils.execute( + flinkLib.getAbsolutePath, + cmd, + new Consumer[String]() { + override def accept(out: String): Unit = { + buffer.append(out).append("\n") + val matcher = FLINK_VERSION_PATTERN.matcher(out) + if (matcher.find) { + val parsedVersion = matcher.group(1) + val matcher1 = APACHE_FLINK_VERSION_PATTERN.matcher(parsedVersion) + if (matcher1.find) { + flinkVersion = parsedVersion + } else { + val matcher2 = OTHER_FLINK_VERSION_PATTERN.matcher(parsedVersion) + if (matcher2.find) { + flinkVersion = parsedVersion + } + } + } + } + }) + + logInfo(buffer.toString()) + if (flinkVersion == null) { + None + } else { + val scalaVer = parseFromDistJar().map(_._2).getOrElse("2.12") + logInfo(s"Flink version parsed from CliFrontend: $flinkVersion, scala: $scalaVer") + Some(flinkVersion -> scalaVer) + } + } + override def toString: String = s""" |----------------------------------------- flink version ----------------------------------- diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/util/ClassLoaderUtils.scala b/streampark-common/src/main/scala/org/apache/streampark/common/util/ClassLoaderUtils.scala index e052de96c3..8f31eec97e 100644 --- a/streampark-common/src/main/scala/org/apache/streampark/common/util/ClassLoaderUtils.scala +++ b/streampark-common/src/main/scala/org/apache/streampark/common/util/ClassLoaderUtils.scala @@ -139,20 +139,49 @@ object ClassLoaderUtils extends Logger { @throws[Exception] private[this] def addURL(file: File): Unit = { - val classLoader = ClassLoader.getSystemClassLoader + val url = file.toURI.toURL + val classLoaders = Seq( + Option(Thread.currentThread().getContextClassLoader), + Option(ClassLoader.getSystemClassLoader)).flatten.distinct + + var lastError: Exception = null + classLoaders.foreach { classLoader => + try { + addURLToClasspath(classLoader, url) + return + } catch { + case e: Exception => lastError = e + } + } + throw lastError + } + + private[this] def addURLToClasspath(classLoader: ClassLoader, url: URL): Unit = { classLoader match { - case c if c.isInstanceOf[URLClassLoader] => - val addURL = classOf[URLClassLoader].getDeclaredMethod("addURL", Array(classOf[URL]): _*) + case urlClassLoader: URLClassLoader => + val addURL = + classOf[URLClassLoader].getDeclaredMethod("addURL", Array(classOf[URL]): _*) addURL.setAccessible(true) - addURL.invoke(c, file.toURI.toURL) + addURL.invoke(urlClassLoader, url) case _ => - val field = classLoader.getClass.getDeclaredField("ucp") - field.setAccessible(true) - val ucp = field.get(classLoader) + var clazz: Class[_] = classLoader.getClass + var ucpField: java.lang.reflect.Field = null + while (clazz != null && ucpField == null) { + try { + ucpField = clazz.getDeclaredField("ucp") + } catch { + case _: NoSuchFieldException => clazz = clazz.getSuperclass + } + } + if (ucpField == null) { + throw new NoSuchFieldException("ucp") + } + ucpField.setAccessible(true) + val ucp = ucpField.get(classLoader) val addURL = ucp.getClass.getDeclaredMethod("addURL", Array(classOf[URL]): _*) addURL.setAccessible(true) - addURL.invoke(ucp, file.toURI.toURL) + addURL.invoke(ucp, url) } } } diff --git a/streampark-common/src/main/scala/org/apache/streampark/common/util/FlinkEnvUtils.scala b/streampark-common/src/main/scala/org/apache/streampark/common/util/FlinkEnvUtils.scala new file mode 100644 index 0000000000..4f19e56200 --- /dev/null +++ b/streampark-common/src/main/scala/org/apache/streampark/common/util/FlinkEnvUtils.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.common.util + +import java.io.File +import java.nio.charset.StandardCharsets +import java.util.regex.Pattern + +import scala.util.Try + +object FlinkEnvUtils extends Logger { + + private[this] lazy val JAVA_HOME_PATTERN = + Pattern.compile("""(?:^|\n)\s*(?:export\s+)?JAVA_HOME\s*=\s*(?:["']([^"']+)["']|(\S+))""") + + /** Minimum Java major version required by the given Flink version string. */ + def requiredJavaMajorVersion(flinkVersion: String): Int = { + flinkVersion.split("\\.").headOption.flatMap(v => Try(v.trim.toInt).toOption) match { + case Some(major) if major >= 2 => 11 + case _ => 8 + } + } + + /** + * Resolve JAVA_HOME for Flink CLI and cluster-side JVM options. + * + * Resolution order: + * 1. `$FLINK_HOME/conf/flink-env.sh` + * 2. process environment `JAVA_HOME` + * 3. system auto-detection (macOS `/usr/libexec/java_home`, common Linux paths) + */ + def resolveJavaHome(flinkHome: String, flinkVersion: String): Option[String] = { + val minVersion = requiredJavaMajorVersion(flinkVersion) + parseJavaHomeFromFlinkEnv(flinkHome) + .filter(isValidJavaHome) + .orElse(Option(System.getenv("JAVA_HOME")).filter(isValidJavaHome)) + .orElse(detectSystemJavaHome(minVersion).filter(isValidJavaHome)) + } + + def parseJavaHomeFromFlinkEnv(flinkHome: String): Option[String] = { + val flinkEnvFile = new File(flinkHome, "conf/flink-env.sh") + if (!flinkEnvFile.exists()) { + None + } else { + val content = org.apache.commons.io.FileUtils.readFileToString(flinkEnvFile, StandardCharsets.UTF_8) + extractJavaHome(content) + } + } + + private[util] def extractJavaHome(content: String): Option[String] = { + val matcher = JAVA_HOME_PATTERN.matcher(content) + var result: Option[String] = None + while (matcher.find() && result.isEmpty) { + val value = Option(matcher.group(1)).getOrElse(matcher.group(2)) + if (value != null && value.nonEmpty && !value.startsWith("#")) { + result = Some(value.trim) + } + } + result + } + + private def detectSystemJavaHome(minMajor: Int): Option[String] = { + val os = System.getProperty("os.name", "").toLowerCase + if (os.contains("mac")) { + Try { + val (code, output) = CommandUtils.execute(s"/usr/libexec/java_home -v $minMajor 2>/dev/null") + if (code == 0 && output.trim.nonEmpty) Some(output.trim) else None + }.getOrElse(None) + } else { + val candidates = List( + Option(System.getenv(s"JAVA${minMajor}_HOME")), + Option(s"/usr/lib/jvm/java-$minMajor-openjdk"), + Option(s"/usr/lib/jvm/java-$minMajor-openjdk-amd64"), + Option(s"/usr/lib/jvm/java-$minMajor")) + .flatten + .filter(isValidJavaHome) + candidates.headOption + } + } + + private def isValidJavaHome(javaHome: String): Boolean = { + javaHome != null && javaHome.nonEmpty && new File(javaHome, "bin/java").exists() + } + +} diff --git a/streampark-common/src/test/scala/org/apache/streampark/common/conf/FlinkVersionTest.scala b/streampark-common/src/test/scala/org/apache/streampark/common/conf/FlinkVersionTest.scala new file mode 100644 index 0000000000..35f6b17fc5 --- /dev/null +++ b/streampark-common/src/test/scala/org/apache/streampark/common/conf/FlinkVersionTest.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.common.conf + +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.io.TempDir + +import java.io.File +import java.nio.file.Path + +class FlinkVersionTest { + + @Test + def testParseFlink2FromDistJar(@TempDir tempDir: Path): Unit = { + val flinkHome = tempDir.toFile + val lib = new File(flinkHome, "lib") + lib.mkdirs() + new File(lib, "flink-dist-2.2.1.jar").createNewFile() + + val flinkVersion = new FlinkVersion(flinkHome.getAbsolutePath) + assertEquals("2.2.1", flinkVersion.version) + assertEquals("2.12", flinkVersion.scalaVersion) + assertTrue(flinkVersion.checkVersion(false)) + } + + @Test + def testParseFlink1FromDistJar(@TempDir tempDir: Path): Unit = { + val flinkHome = tempDir.toFile + val lib = new File(flinkHome, "lib") + lib.mkdirs() + new File(lib, "flink-dist_2.12-1.20.0.jar").createNewFile() + + val flinkVersion = new FlinkVersion(flinkHome.getAbsolutePath) + assertEquals("1.20.0", flinkVersion.version) + assertEquals("2.12", flinkVersion.scalaVersion) + assertTrue(flinkVersion.checkVersion(false)) + } + +} diff --git a/streampark-common/src/test/scala/org/apache/streampark/common/util/FlinkEnvUtilsTest.scala b/streampark-common/src/test/scala/org/apache/streampark/common/util/FlinkEnvUtilsTest.scala new file mode 100644 index 0000000000..12c4da2118 --- /dev/null +++ b/streampark-common/src/test/scala/org/apache/streampark/common/util/FlinkEnvUtilsTest.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.streampark.common.util + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class FlinkEnvUtilsTest { + + @Test + def testRequiredJavaMajorVersion(): Unit = { + assertEquals(11, FlinkEnvUtils.requiredJavaMajorVersion("2.2.1")) + assertEquals(11, FlinkEnvUtils.requiredJavaMajorVersion("2.0.0")) + assertEquals(8, FlinkEnvUtils.requiredJavaMajorVersion("1.20.0")) + } + + @Test + def testExtractJavaHome(): Unit = { + val content = + """ + |# comment + |export JAVA_HOME="/opt/java/jdk-11" + |""".stripMargin + assertEquals("/opt/java/jdk-11", FlinkEnvUtils.extractJavaHome(content).orNull) + } + + @Test + def testExtractJavaHomeWithoutExport(): Unit = { + val content = "JAVA_HOME=/usr/lib/jvm/java-11-openjdk\n" + assertEquals("/usr/lib/jvm/java-11-openjdk", FlinkEnvUtils.extractJavaHome(content).orNull) + } + +} diff --git a/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md index d6510c5076..eb8452f0ae 100644 --- a/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md +++ b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.md @@ -16,10 +16,12 @@ Flink 2.x introduces a higher JDK requirement for both **building** and **runnin ## What StreamPark Does Automatically 1. **Flink environment registration** - - Parses Flink version from `$FLINK_HOME/lib/flink-dist*.jar` or falls back to - `flink-dist --version`. + - Parses Flink version from `$FLINK_HOME/lib/flink-dist*.jar` first (no Flink CLI required when Console runs on JDK 8). + - Falls back to `CliFrontend --version` only when the JAR name cannot be parsed (uses `JAVA_HOME` from `$FLINK_HOME/conf/flink-env.sh`). - Registration does not require StreamPark to be built with Flink 2.x shims. -2. **Flink 2.x shims packaging** +2. **Flink 2.x job submission** + - Resolves `JAVA_HOME` from `$FLINK_HOME/conf/flink-env.sh` via `FlinkEnvUtils` and applies it as Flink `env.java.home` for cluster-side JVMs. +3. **Flink 2.x shims packaging** - Flink 2.0/2.1/2.2 shims are packaged only when StreamPark is **built with JDK 11+** (Maven profile `flink-2.x-shims`). diff --git a/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md index 3242984922..bf71dbfeaa 100644 --- a/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md +++ b/streampark-console/streampark-console-service/src/main/assembly/script/FLINK_JDK_GUIDE.zh.md @@ -16,10 +16,12 @@ Flink 作业提出了更高的 JDK 要求。 ## StreamPark 自动处理的内容 1. **注册 Flink 环境** - - 从 `$FLINK_HOME/lib/flink-dist*.jar` 解析版本,或 fallback 到 - `flink-dist --version`。 + - 优先从 `$FLINK_HOME/lib/flink-dist*.jar` 文件名解析版本(Console 运行在 JDK 8 时无需执行 Flink CLI)。 + - 仅在无法从 JAR 名解析时,才 fallback 到 `CliFrontend --version`(此时会使用 `$FLINK_HOME/conf/flink-env.sh` 中的 `JAVA_HOME`)。 - 注册 Flink 环境不要求 StreamPark 发行包中已包含 Flink 2.x shims。 -2. **Flink 2.x shims 打包** +2. **提交 Flink 2.x 作业** + - 通过 `FlinkEnvUtils` 解析 `$FLINK_HOME/conf/flink-env.sh` 中的 `JAVA_HOME`,并写入 Flink 配置项 `env.java.home`,供集群侧 JVM 使用。 +3. **Flink 2.x shims 打包** - 仅当 StreamPark 使用 **JDK 11+** 构建时,才会打包 Flink 2.0/2.1/2.2 shims (Maven profile:`flink-2.x-shims`)。 diff --git a/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts b/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts index b4a6036b86..b004417a2f 100644 --- a/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts +++ b/streampark-console/streampark-console-webapp/src/locales/lang/en/setting/flinkHome.ts @@ -33,7 +33,7 @@ export default { flinkNameIsRepeated: 'Flink name already exists', flinkNameIsRequired: 'Flink name is required', flinkHomeTips: - 'The absolute path of FLINK_HOME. Flink 2.x requires JDK 11+ at runtime; Flink 2.x shims are packaged only when StreamPark is built with JDK 11+. See script/FLINK_JDK_GUIDE.md in the installation package.', + 'The absolute path of FLINK_HOME. StreamPark Console can keep running on JDK 8; configure JAVA_HOME (JDK 11+) in $FLINK_HOME/conf/flink-env.sh for Flink 2.x runtime. See script/FLINK_JDK_GUIDE.md.', flinkHomeIsRequired: 'Flink home is required', flinkHomePathIsInvalid: 'Flink home path is invalid', flinkDistNotFound: 'Can not find flink-dist in FLINK_HOME/lib', diff --git a/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts b/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts index df02e45b9c..c55f09a9fe 100644 --- a/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts +++ b/streampark-console/streampark-console-webapp/src/locales/lang/zh-CN/setting/flinkHome.ts @@ -33,7 +33,7 @@ export default { flinkNameIsRepeated: 'Flink名称已存在', flinkNameIsRequired: 'Flink名称必填', flinkHomeTips: - 'Flink所在服务器的绝对路径,举例: /usr/local/flink。Flink 2.x 运行需 JDK 11+;Flink 2.x shims 需 JDK 11+ 构建 StreamPark 才会打包,详见 script/FLINK_JDK_GUIDE.zh.md', + 'Flink所在服务器的绝对路径,举例: /usr/local/flink。StreamPark Console 可继续使用 JDK 8;Flink 2.x 运行时在 $FLINK_HOME/conf/flink-env.sh 配置 JAVA_HOME(JDK 11+)即可。详见 script/FLINK_JDK_GUIDE.zh.md', flinkHomeIsRequired: 'Flink Home必填', flinkHomePathIsInvalid: 'Flink Home路径无效', flinkDistNotFound: 'flink/lib 路径下未找到 flink-dist jar文件', diff --git a/streampark-flink/streampark-flink-client/streampark-flink-client-core/src/main/scala/org/apache/streampark/flink/client/trait/FlinkClientTrait.scala b/streampark-flink/streampark-flink-client/streampark-flink-client-core/src/main/scala/org/apache/streampark/flink/client/trait/FlinkClientTrait.scala index 0c0b7c84cd..ebfcf25a7b 100644 --- a/streampark-flink/streampark-flink-client/streampark-flink-client-core/src/main/scala/org/apache/streampark/flink/client/trait/FlinkClientTrait.scala +++ b/streampark-flink/streampark-flink-client/streampark-flink-client-core/src/main/scala/org/apache/streampark/flink/client/trait/FlinkClientTrait.scala @@ -141,6 +141,10 @@ trait FlinkClientTrait extends Logger { .safeSet(ApplicationConfiguration.APPLICATION_ARGS, extractProgramArgs(submitRequest)) .safeSet(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID, submitRequest.jobId) + submitRequest.flinkVersion.javaHome.foreach { javaHome => + flinkConfig.setString("env.java.home", javaHome) + } + if (!submitRequest.hasProp(CheckpointingOptions.MAX_RETAINED_CHECKPOINTS.key())) { val flinkDefaultConfiguration = getFlinkDefaultConfiguration( submitRequest.flinkVersion.flinkHome)