diff --git a/include/paimon/catalog/catalog.h b/include/paimon/catalog/catalog.h index 523ee0e8..47bd66ce 100644 --- a/include/paimon/catalog/catalog.h +++ b/include/paimon/catalog/catalog.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -23,7 +24,6 @@ #include "paimon/catalog/identifier.h" #include "paimon/result.h" -#include "paimon/schema/schema.h" #include "paimon/status.h" #include "paimon/type_fwd.h" #include "paimon/visibility.h" @@ -31,8 +31,14 @@ struct ArrowSchema; namespace paimon { +class Database; +class Table; +class View; +class Schema; +class Snapshot; +class PartitionStatistics; +class Tag; class Identifier; - /// This interface is responsible for reading and writing metadata such as database/table from a /// paimon catalog. class PAIMON_EXPORT Catalog { @@ -99,6 +105,380 @@ class PAIMON_EXPORT Catalog { /// status. virtual Result> ListTables(const std::string& db_name) const = 0; + /// Drops a database. + /// + /// @param name Name of the database to be dropped. + /// @param ignore_if_not_exists If true, no action is taken if the database does not exist. + /// @param cascade If true, drops all tables and functions in the database before dropping the + /// database. + /// @return A status indicating success or failure. + virtual Status DropDatabase(const std::string& name, bool ignore_if_not_exists, bool cascade) { + return Status::NotImplemented("DropDatabase not implemented"); + } + + /// Alters a database. + /// + /// @param name Name of the database to alter. + /// @param changes Properties to be changed. + /// @param ignore_if_not_exists If true, no action is taken if the database does not exist. + /// @return A status indicating success or failure. + virtual Status AlterDatabase(const std::string& name, + const std::map& changes, + bool ignore_if_not_exists) { + return Status::NotImplemented("AlterDatabase not implemented"); + } + + /// Gets a database. + /// + /// @param name Name of the database to get. + /// @return A result containing the database information, or an error status. + virtual Result> GetDatabase(const std::string& name) const { + return Status::NotImplemented("GetDatabase not implemented"); + } + + /// Gets a table. + /// + /// @param identifier Identifier of the table to get. + /// @return A result containing the table, or an error status. + virtual Result> GetTable(const Identifier& identifier) const { + return Status::NotImplemented("GetTable not implemented"); + } + + /// Gets a table by ID. + /// + /// @param table_id ID of the table to get. + /// @return A result containing the table, or an error status. + virtual Result> GetTableById(const std::string& table_id) const { + return Status::NotImplemented("GetTableById not implemented"); + } + + /// Drops a table. + /// + /// @param identifier Identifier of the table to drop. + /// @param ignore_if_not_exists If true, no action is taken if the table does not exist. + /// @return A status indicating success or failure. + virtual Status DropTable(const Identifier& identifier, bool ignore_if_not_exists) { + return Status::NotImplemented("DropTable not implemented"); + } + + /// Renames a table. + /// + /// @param from_table Current identifier of the table. + /// @param to_table New identifier for the table. + /// @param ignore_if_not_exists If true, no action is taken if the table does not exist. + /// @return A status indicating success or failure. + virtual Status RenameTable(const Identifier& from_table, const Identifier& to_table, + bool ignore_if_not_exists) { + return Status::NotImplemented("RenameTable not implemented"); + } + + /// TODO(liangzi): Support Alter table + + /// Invalidates cached table metadata. + /// + /// @param identifier Identifier of the table to invalidate. + virtual void InvalidateTable(const Identifier& identifier) {} + + /// Marks partitions as done. + /// + /// @param identifier Identifier of the table. + /// @param partitions List of partition specifications. + /// @return A status indicating success or failure. + virtual Status MarkDonePartitions( + const Identifier& identifier, + const std::vector>& partitions) { + return Status::NotImplemented("MarkDonePartitions not implemented"); + } + + /// Lists all partitions of a table. + /// + /// @param identifier Identifier of the table. + /// @return A result containing a list of partitions, or an error status. + virtual Result>> ListPartitions( + const Identifier& identifier) const { + return Status::NotImplemented("ListPartitions not implemented"); + } + + /// Creates partitions. + /// + /// @param identifier Identifier of the table. + /// @param partitions List of partition specifications to create. + /// @return A status indicating success or failure. + virtual Status CreatePartitions( + const Identifier& identifier, + const std::vector>& partitions) { + return Status::NotImplemented("CreatePartitions not implemented"); + } + + /// Drops partitions. + /// + /// @param identifier Identifier of the table. + /// @param partitions List of partition specifications to drop. + /// @return A status indicating success or failure. + virtual Status DropPartitions( + const Identifier& identifier, + const std::vector>& partitions) { + return Status::NotImplemented("DropPartitions not implemented"); + } + + /// Alters partitions. + /// + /// @param identifier Identifier of the table. + /// @param partitions List of partition statistics to alter. + /// @return A status indicating success or failure. + virtual Status AlterPartitions(const Identifier& identifier, + const std::vector& partitions) { + return Status::NotImplemented("AlterPartitions not implemented"); + } + + /// Gets a view. + /// + /// @param identifier Identifier of the view to get. + /// @return A result containing the view, or an error status. + virtual Result> GetView(const Identifier& identifier) const { + return Status::NotImplemented("GetView not implemented"); + } + + /// Drops a view. + /// + /// @param identifier Identifier of the view to drop. + /// @param ignore_if_not_exists If true, no action is taken if the view does not exist. + /// @return A status indicating success or failure. + virtual Status DropView(const Identifier& identifier, bool ignore_if_not_exists) { + return Status::NotImplemented("DropView not implemented"); + } + + /// Creates a view. + /// + /// @param identifier Identifier of the view to create. + /// @param view The view definition. + /// @param ignore_if_exists If true, no action is taken if the view already exists. + /// @return A status indicating success or failure. + virtual Status CreateView(const Identifier& identifier, const View& view, + bool ignore_if_exists) { + return Status::NotImplemented("CreateView not implemented"); + } + + /// Lists all views in a database. + /// + /// @param database_name Name of the database. + /// @return A result containing a list of view names, or an error status. + virtual Result> ListViews(const std::string& database_name) const { + return Status::NotImplemented("ListViews not implemented"); + } + + /// Renames a view. + /// + /// @param from_view Current identifier of the view. + /// @param to_view New identifier for the view. + /// @param ignore_if_not_exists If true, no action is taken if the view does not exist. + /// @return A status indicating success or failure. + virtual Status RenameView(const Identifier& from_view, const Identifier& to_view, + bool ignore_if_not_exists) { + return Status::NotImplemented("RenameView not implemented"); + } + + /// TODO(liangzi): Support Function/Snapshot/Tag/Branch/Authorizes api + + /// Repairs the entire catalog. + /// + /// @return A status indicating success or failure. + virtual Status RepairCatalog() { + return Status::NotImplemented("RepairCatalog not implemented"); + } + + /// Repairs a database. + /// + /// @param database_name Name of the database to repair. + /// @return A status indicating success or failure. + virtual Status RepairDatabase(const std::string& database_name) { + return Status::NotImplemented("RepairDatabase not implemented"); + } + + /// Repairs a table. + /// + /// @param identifier Identifier of the table to repair. + /// @return A status indicating success or failure. + virtual Status RepairTable(const Identifier& identifier) { + return Status::NotImplemented("RepairTable not implemented"); + } + + /// Registers a table. + /// + /// @param identifier Identifier of the table to register. + /// @param path Path of the table. + /// @return A status indicating success or failure. + virtual Status RegisterTable(const Identifier& identifier, const std::string& path) { + return Status::NotImplemented("RegisterTable not implemented"); + } + + /// Checks if list objects paged is supported. + /// + /// @return True if supported, false otherwise. + virtual bool SupportsListObjectsPaged() const { + return false; + } + + /// Checks if list by pattern is supported. + /// + /// @return True if supported, false otherwise. + virtual bool SupportsListByPattern() const { + return false; + } + + /// Checks if list table by type is supported. + /// + /// @return True if supported, false otherwise. + virtual bool SupportsListTableByType() const { + return false; + } + + // ==================== Version management methods ========================== + + /// Whether this catalog supports version management for tables. + /// + /// If not supported, corresponding methods will return NotImplemented status. + /// Affected methods: + /// - CommitSnapshot() + /// - LoadSnapshot() + /// - RollbackTo() + /// - CreateBranch() + /// - DropBranch() + /// - ListBranches() + /// - GetTag() + /// - CreateTag() + /// - DeleteTag() + virtual bool SupportsVersionManagement() const { + return false; + } + + /// Commit the Snapshot for table identified by the given Identifier. + /// + /// @param identifier Path of the table + /// @param table_uuid Uuid of the table to avoid wrong commit + /// @param snapshot Snapshot to be committed + /// @param statistics Statistics information of this change + /// @return A result containing true if commit succeeded, or an error status. + virtual Result CommitSnapshot(const Identifier& identifier, const std::string& table_uuid, + const std::shared_ptr& snapshot, + const std::vector& statistics) { + return Status::NotImplemented("CommitSnapshot not implemented"); + } + + /// Return the snapshot of table for given version. + /// + /// Version parsing order: + /// 1. If it is 'EARLIEST', get the earliest snapshot + /// 2. If it is 'LATEST', get the latest snapshot + /// 3. If it is a number, get snapshot by snapshot id + /// 4. Else try to get snapshot from Tag name + /// + /// @param identifier Path of the table + /// @param version Version to snapshot + /// @return A result containing the requested snapshot, or an error status. + virtual Result> LoadSnapshot(const Identifier& identifier, + const std::string& version) const { + return Status::NotImplemented("LoadSnapshot not implemented"); + } + + /// Rollback table by the given Identifier and instant. + /// + /// @param identifier Path of the table + /// @param instant Like snapshotId or tagName + /// @return A status indicating success or failure. + virtual Status RollbackTo(const Identifier& identifier, + const std::chrono::system_clock::time_point& instant) { + return RollbackTo(identifier, instant, std::nullopt); + } + + /// Rollback table by the given Identifier and instant. + /// + /// @param identifier Path of the table + /// @param instant Like snapshotId or tagName + /// @param from_snapshot Snapshot from, success only occurs when the latest snapshot is this + /// snapshot + /// @return A status indicating success or failure. + virtual Status RollbackTo(const Identifier& identifier, + const std::chrono::system_clock::time_point& instant, + const std::optional& from_snapshot) { + return Status::NotImplemented("RollbackTo not implemented"); + } + + /// Create a new branch for this table. + /// + /// By default, an empty branch will be created using the latest schema. + /// If from_tag is provided, a branch will be created from the tag and the + /// data files will be inherited from it. + /// + /// @param identifier Path of the table, cannot be system or branch name + /// @param branch The branch name + /// @param from_tag From the tag + /// @return A status indicating success or failure. + virtual Status CreateBranch(const Identifier& identifier, const std::string& branch, + const std::optional& from_tag = std::nullopt) { + return Status::NotImplemented("CreateBranch not implemented"); + } + + /// Drop the branch for this table. + /// + /// @param identifier Path of the table, cannot be system or branch name + /// @param branch The branch name + /// @return A status indicating success or failure. + virtual Status DropBranch(const Identifier& identifier, const std::string& branch) { + return Status::NotImplemented("DropBranch not implemented"); + } + + /// Fast-forward a branch to main branch. + /// + /// @param identifier Path of the table, cannot be system or branch name + /// @param branch The branch name + /// @return A status indicating success or failure. + virtual Status FastForward(const Identifier& identifier, const std::string& branch) { + return Status::NotImplemented("FastForward not implemented"); + } + + /// List all branches of the table. + /// + /// @param identifier Path of the table, cannot be system or branch name + /// @return A result containing a list of branch names, or an error status. + virtual Result> ListBranches(const Identifier& identifier) const { + return Status::NotImplemented("ListBranches not implemented"); + } + + /// Get tag for table. + /// + /// @param identifier Path of the table, cannot be system name. + /// @param tag_name Tag name + /// @return A result containing the tag information, or an error status. + virtual Result> GetTag(const Identifier& identifier, + const std::string& tag_name) const { + return Status::NotImplemented("GetTag not implemented"); + } + + /// Create tag for table. + /// + /// @param identifier Path of the table, cannot be system name. + /// @param tag_name Tag name + /// @param snapshot_id Optional snapshot id, if not provided uses latest snapshot + /// @param time_retained Optional time retained as string (e.g., "1d", "12h", "30m") + /// @param ignore_if_exists If true, ignore if tag already exists + /// @return A status indicating success or failure. + virtual Status CreateTag(const Identifier& identifier, const std::string& tag_name, + const std::optional& snapshot_id, + const std::optional& time_retained, + bool ignore_if_exists) { + return Status::NotImplemented("CreateTag not implemented"); + } + + /// Delete tag for table. + /// + /// @param identifier Path of the table, cannot be system name. + /// @param tag_name Tag name + /// @return A status indicating success or failure. + virtual Status DeleteTag(const Identifier& identifier, const std::string& tag_name) { + return Status::NotImplemented("DeleteTag not implemented"); + } + /// Checks whether a database with the specified name exists in the catalog. /// /// @param db_name The name of the database to check for existence. diff --git a/include/paimon/catalog/database.h b/include/paimon/catalog/database.h new file mode 100644 index 00000000..e242a9d5 --- /dev/null +++ b/include/paimon/catalog/database.h @@ -0,0 +1,50 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "paimon/result.h" +#include "paimon/status.h" +#include "paimon/type_fwd.h" +#include "paimon/visibility.h" + +struct ArrowSchema; + +namespace paimon { + +/// Interface of a database in a catalog. +class PAIMON_EXPORT Database { + public: + /// ================== Table Metadata ===================== + + /// A name to identify this database. + virtual std::string Name() = 0; + + /// Get the table-level options associated with this schema. + /// @return Options + virtual const std::map& Options() const = 0; + + /// Get an optional comment describing the table. + /// @return The table comment if set, or std::nullopt otherwise. + virtual std::optional Comment() const = 0; +}; + +} // namespace paimon diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index bfb4bdb2..921e37a8 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -27,7 +27,6 @@ #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/path_util.h" #include "paimon/common/utils/string_utils.h" -#include "paimon/core/schema/schema_manager.h" #include "paimon/fs/file_system.h" #include "paimon/logging.h" #include "paimon/result.h" @@ -240,4 +239,20 @@ Result> FileSystemCatalog::LoadTableSchema( return std::static_pointer_cast(*latest_schema); } +Result> FileSystemCatalog::GetTable(const Identifier& identifier) const { + std::string table_path = GetTableLocation(identifier); + PAIMON_ASSIGN_OR_RAISE(bool exist, fs_->Exists(table_path)); + if (!exist) { + return Status::NotExist(fmt::format("{} not exist", identifier.ToString())); + } + PAIMON_ASSIGN_OR_RAISE(std::optional> latest_schema, + TableSchemaExists(identifier)); + if (!latest_schema) { + return Status::NotExist( + fmt::format("load table schema for {} failed", identifier.ToString())); + } + auto schema = std::static_pointer_cast(*latest_schema); + return std::make_shared(schema, identifier.GetDatabaseName(), identifier.GetTableName()); +} + } // namespace paimon diff --git a/src/paimon/core/catalog/file_system_catalog.h b/src/paimon/core/catalog/file_system_catalog.h index 4cece2f5..408f6769 100644 --- a/src/paimon/core/catalog/file_system_catalog.h +++ b/src/paimon/core/catalog/file_system_catalog.h @@ -22,6 +22,8 @@ #include #include "paimon/catalog/catalog.h" +#include "paimon/core/schema/schema_manager.h" +#include "paimon/core/table/table.h" #include "paimon/logging.h" #include "paimon/result.h" #include "paimon/status.h" @@ -56,6 +58,7 @@ class FileSystemCatalog : public Catalog { Result> LoadTableSchema(const Identifier& identifier) const override; std::string GetRootPath() const override; std::shared_ptr GetFileSystem() const override; + Result> GetTable(const Identifier& identifier) const override; private: static std::string NewDatabasePath(const std::string& warehouse, const std::string& db_name); diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index ede6918f..fa57dbc4 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -190,6 +190,17 @@ TEST(FileSystemCatalogTest, TestCreateTableWithBlob) { ASSERT_OK_AND_ASSIGN(auto arrow_schema, table_schema->GetArrowSchema()); auto loaded_schema = arrow::ImportSchema(arrow_schema.get()).ValueOrDie(); ASSERT_TRUE(typed_schema.Equals(loaded_schema)); + + ASSERT_OK_AND_ASSIGN(std::shared_ptr
table, catalog.GetTable(Identifier("db1", "tbl1"))); + ASSERT_OK_AND_ASSIGN(auto arrow_schema_from_get_table, table->LatestSchema()->GetArrowSchema()); + auto schema_from_get_table = + arrow::ImportSchema(arrow_schema_from_get_table.get()).ValueOrDie(); + ASSERT_TRUE(typed_schema.Equals(schema_from_get_table)); + ASSERT_EQ(table->FullName(), "db1.tbl1"); + + ASSERT_NOK_WITH_MSG(catalog.GetTable(Identifier("db1", "table_xaxa")), + "Identifier{database='db1', table='table_xaxa'} not exist"); + ArrowSchemaRelease(&schema); } diff --git a/src/paimon/core/table/table.h b/src/paimon/core/table/table.h new file mode 100644 index 00000000..46df411c --- /dev/null +++ b/src/paimon/core/table/table.h @@ -0,0 +1,68 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "paimon/result.h" +#include "paimon/schema/schema.h" +#include "paimon/status.h" +#include "paimon/type_fwd.h" +#include "paimon/visibility.h" + +namespace paimon { + +/// A table provides basic abstraction for table type. +class PAIMON_EXPORT Table { + public: + Table(const std::shared_ptr& schema, const std::string& database, + const std::string& table_name) + : schema_(schema), database_(database), table_name_(table_name) {} + + virtual ~Table() = default; + + /// A name to identify this table. + virtual std::string Name() const { + return database_ + "." + table_name_; + } + + /// Full name of the table, default is database.tableName. + virtual std::string FullName() const { + return Name(); + } + + /// UUID of the table, metastore can provide the true UUID of this table, default is the full + /// name. + virtual std::string Uuid() const { + return FullName(); + } + + /// Loads the latest schema of table. + virtual std::shared_ptr LatestSchema() { + return schema_; + } + + private: + std::shared_ptr schema_; + std::string database_; + std::string table_name_; +}; + +} // namespace paimon diff --git a/src/paimon/core/view/view.h b/src/paimon/core/view/view.h new file mode 100644 index 00000000..b1d70cc3 --- /dev/null +++ b/src/paimon/core/view/view.h @@ -0,0 +1,55 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "paimon/result.h" +#include "paimon/schema/schema.h" +#include "paimon/status.h" +#include "paimon/type_fwd.h" +#include "paimon/visibility.h" + +struct ArrowSchema; + +namespace paimon { + +/// Interface for view definition. +class PAIMON_EXPORT View { + public: + View() = default; + virtual ~View() = default; + + /// A name to identify this view. + virtual std::string Name() = 0; + + /// Full name of the view, default is database.tableName. + virtual std::string FullName() { + return Name(); + } + + /// Returns the view representation. + virtual std::string Query() = 0; + + /// Loads the schema of view. + virtual std::shared_ptr GetSchema() = 0; +}; + +} // namespace paimon