From 41dcace9ed6716ce3e7da91553ef499c6e787958 Mon Sep 17 00:00:00 2001 From: Kevin Zheng Date: Wed, 7 Jan 2026 19:14:21 +0000 Subject: [PATCH 1/5] chore: Rerouted legacy client row filters to data client row filters. --- google/cloud/bigtable/row.py | 4 +- google/cloud/bigtable/row_filters.py | 803 ++++------------------- google/cloud/bigtable/table.py | 2 +- tests/unit/v2_client/test_row.py | 2 +- tests/unit/v2_client/test_row_data.py | 16 +- tests/unit/v2_client/test_row_filters.py | 102 +-- tests/unit/v2_client/test_table.py | 2 +- 7 files changed, 188 insertions(+), 743 deletions(-) diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index 752458a08..6c54ac0e5 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -230,7 +230,7 @@ def _delete_cells(self, column_family_id, columns, time_range=None, state=None): else: delete_kwargs = {} if time_range is not None: - delete_kwargs["time_range"] = time_range.to_pb() + delete_kwargs["time_range"] = time_range._to_pb() to_append = [] for column in columns: @@ -601,7 +601,7 @@ def commit(self): resp = data_client.check_and_mutate_row( table_name=self._table.name, row_key=self._row_key, - predicate_filter=self._filter.to_pb(), + predicate_filter=self._filter._to_pb(), app_profile_id=self._table._app_profile_id, true_mutations=true_mutations, false_mutations=false_mutations, diff --git a/google/cloud/bigtable/row_filters.py b/google/cloud/bigtable/row_filters.py index 53192acc8..8aa1a358c 100644 --- a/google/cloud/bigtable/row_filters.py +++ b/google/cloud/bigtable/row_filters.py @@ -17,100 +17,56 @@ import struct -from google.cloud._helpers import _microseconds_from_datetime # type: ignore -from google.cloud._helpers import _to_bytes # type: ignore -from google.cloud.bigtable_v2.types import data as data_v2_pb2 +from google.cloud.bigtable.data.row_filters import ( + RowFilter, + SinkFilter, + _BoolFilter as _BaseBoolFilter, + PassAllFilter, + BlockAllFilter, + _RegexFilter as _BaseRegexFilter, + RowKeyRegexFilter, + RowSampleFilter, + FamilyNameRegexFilter, + ColumnQualifierRegexFilter, + TimestampRange, + TimestampRangeFilter as BaseTimestampRangeFilter, + ColumnRangeFilter as BaseColumnRangeFilter, + ValueRegexFilter, + ValueRangeFilter, + _CellCountFilter as _BaseCellCountFilter, + CellsRowOffsetFilter, + CellsRowLimitFilter, + CellsColumnLimitFilter, + StripValueTransformerFilter, + ApplyLabelFilter, + _FilterCombination as _BaseFilterCombination, + RowFilterChain, + RowFilterUnion, + ConditionalRowFilter as BaseConditionalRowFilter, +) _PACK_I64 = struct.Struct(">q").pack +# The classes defined below are to provide constructors and members +# that have an interface that does not match the one used by the data +# client, for backwards compatibility purposes. -class RowFilter(object): - """Basic filter to apply to cells in a row. +# Each underscored class is an ABC. Make them into classes that can be +# instantiated with a placeholder to_dict method for consistency. - These values can be combined via :class:`RowFilterChain`, - :class:`RowFilterUnion` and :class:`ConditionalRowFilter`. - .. note:: - - This class is a do-nothing base class for all row filters. - """ - - -class _BoolFilter(RowFilter): +class _BoolFilter(_BaseBoolFilter): """Row filter that uses a boolean flag. :type flag: bool :param flag: An indicator if a setting is turned on or off. """ - def __init__(self, flag): - self.flag = flag - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.flag == self.flag - - def __ne__(self, other): - return not self == other - - -class SinkFilter(_BoolFilter): - """Advanced row filter to skip parent filters. - - :type flag: bool - :param flag: ADVANCED USE ONLY. Hook for introspection into the row filter. - Outputs all cells directly to the output of the read rather - than to any parent filter. Cannot be used within the - ``predicate_filter``, ``true_filter``, or ``false_filter`` - of a :class:`ConditionalRowFilter`. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(sink=self.flag) - - -class PassAllFilter(_BoolFilter): - """Row filter equivalent to not filtering at all. - - :type flag: bool - :param flag: Matches all cells, regardless of input. Functionally - equivalent to leaving ``filter`` unset, but included for - completeness. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(pass_all_filter=self.flag) + def _to_dict(self): + pass -class BlockAllFilter(_BoolFilter): - """Row filter that doesn't match any cells. - - :type flag: bool - :param flag: Does not match any cells, regardless of input. Useful for - temporarily disabling just part of a filter. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(block_all_filter=self.flag) - - -class _RegexFilter(RowFilter): +class _RegexFilter(_BaseRegexFilter): """Row filter that uses a regular expression. The ``regex`` must be valid RE2 patterns. See Google's @@ -124,206 +80,52 @@ class _RegexFilter(RowFilter): will be encoded as ASCII. """ - def __init__(self, regex): - self.regex = _to_bytes(regex) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.regex == self.regex - - def __ne__(self, other): - return not self == other + def _to_dict(self): + pass -class RowKeyRegexFilter(_RegexFilter): - """Row filter for a row key regular expression. - - The ``regex`` must be valid RE2 patterns. See Google's - `RE2 reference`_ for the accepted syntax. - - .. _RE2 reference: https://github.com/google/re2/wiki/Syntax - - .. note:: - - Special care need be used with the expression used. Since - each of these properties can contain arbitrary bytes, the ``\\C`` - escape sequence must be used if a true wildcard is desired. The ``.`` - character will not match the new line character ``\\n``, which may be - present in a binary value. - - :type regex: bytes - :param regex: A regular expression (RE2) to match cells from rows with row - keys that satisfy this regex. For a - ``CheckAndMutateRowRequest``, this filter is unnecessary - since the row key is already specified. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(row_key_regex_filter=self.regex) - - -class RowSampleFilter(RowFilter): - """Matches all cells from a row with probability p. - - :type sample: float - :param sample: The probability of matching a cell (must be in the - interval ``(0, 1)`` The end points are excluded). - """ - - def __init__(self, sample): - self.sample = sample - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.sample == self.sample - - def __ne__(self, other): - return not self == other - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(row_sample_filter=self.sample) - - -class FamilyNameRegexFilter(_RegexFilter): - """Row filter for a family name regular expression. - - The ``regex`` must be valid RE2 patterns. See Google's - `RE2 reference`_ for the accepted syntax. - - .. _RE2 reference: https://github.com/google/re2/wiki/Syntax +class TimestampRangeFilter(BaseTimestampRangeFilter): + """Row filter that limits cells to a range of time. - :type regex: str - :param regex: A regular expression (RE2) to match cells from columns in a - given column family. For technical reasons, the regex must - not contain the ``':'`` character, even if it is not being - used as a literal. + :type range_: :class:`TimestampRange` + :param range_: Range of time that cells should match against. """ - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(family_name_regex_filter=self.regex) - - -class ColumnQualifierRegexFilter(_RegexFilter): - """Row filter for a column qualifier regular expression. - - The ``regex`` must be valid RE2 patterns. See Google's - `RE2 reference`_ for the accepted syntax. + def __init__(self, range_): + self.range_ = range_ - .. _RE2 reference: https://github.com/google/re2/wiki/Syntax - .. note:: +class ExactValueFilter(ValueRegexFilter): + """Row filter for an exact value. - Special care need be used with the expression used. Since - each of these properties can contain arbitrary bytes, the ``\\C`` - escape sequence must be used if a true wildcard is desired. The ``.`` - character will not match the new line character ``\\n``, which may be - present in a binary value. - :type regex: bytes - :param regex: A regular expression (RE2) to match cells from column that - match this regex (irrespective of column family). + :type value: bytes or str or int + :param value: + a literal string encodable as ASCII, or the + equivalent bytes, or an integer (which will be packed into 8-bytes). """ - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(column_qualifier_regex_filter=self.regex) - - -class TimestampRange(object): - """Range of time with inclusive lower and exclusive upper bounds. + def __init__(self, value): + if isinstance(value, int): + value = _PACK_I64(value) + super(ExactValueFilter, self).__init__(value) - :type start: :class:`datetime.datetime` - :param start: (Optional) The (inclusive) lower bound of the timestamp - range. If omitted, defaults to Unix epoch. - :type end: :class:`datetime.datetime` - :param end: (Optional) The (exclusive) upper bound of the timestamp - range. If omitted, no upper bound is used. - """ +class _CellCountFilter(_BaseCellCountFilter): + """Row filter that uses an integer count of cells. - def __init__(self, start=None, end=None): - self.start = start - self.end = end - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.start == self.start and other.end == self.end - - def __ne__(self, other): - return not self == other - - def to_pb(self): - """Converts the :class:`TimestampRange` to a protobuf. - - :rtype: :class:`.data_v2_pb2.TimestampRange` - :returns: The converted current object. - """ - timestamp_range_kwargs = {} - if self.start is not None: - timestamp_range_kwargs["start_timestamp_micros"] = ( - _microseconds_from_datetime(self.start) // 1000 * 1000 - ) - if self.end is not None: - end_time = _microseconds_from_datetime(self.end) - if end_time % 1000 != 0: - end_time = end_time // 1000 * 1000 + 1000 - timestamp_range_kwargs["end_timestamp_micros"] = end_time - return data_v2_pb2.TimestampRange(**timestamp_range_kwargs) - - -class TimestampRangeFilter(RowFilter): - """Row filter that limits cells to a range of time. + The cell count is used as an offset or a limit for the number + of results returned. - :type range_: :class:`TimestampRange` - :param range_: Range of time that cells should match against. + :type num_cells: int + :param num_cells: An integer count / offset / limit. """ - def __init__(self, range_): - self.range_ = range_ - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.range_ == self.range_ - - def __ne__(self, other): - return not self == other + def _to_dict(self): + pass - def to_pb(self): - """Converts the row filter to a protobuf. - First converts the ``range_`` on the current object to a protobuf and - then uses it in the ``timestamp_range_filter`` field. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(timestamp_range_filter=self.range_.to_pb()) - - -class ColumnRangeFilter(RowFilter): +class ColumnRangeFilter(BaseColumnRangeFilter): """A row filter to restrict to a range of columns. Both the start and end column can be included or excluded in the range. @@ -368,342 +170,40 @@ def __init__( inclusive_start=None, inclusive_end=None, ): - self.column_family_id = column_family_id - - if inclusive_start is None: - inclusive_start = True - elif start_column is None: - raise ValueError( - "Inclusive start was specified but no " "start column was given." - ) - self.start_column = start_column - self.inclusive_start = inclusive_start - - if inclusive_end is None: - inclusive_end = True - elif end_column is None: - raise ValueError( - "Inclusive end was specified but no " "end column was given." - ) - self.end_column = end_column - self.inclusive_end = inclusive_end - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return ( - other.column_family_id == self.column_family_id - and other.start_column == self.start_column - and other.end_column == self.end_column - and other.inclusive_start == self.inclusive_start - and other.inclusive_end == self.inclusive_end - ) - - def __ne__(self, other): - return not self == other - - def to_pb(self): - """Converts the row filter to a protobuf. - - First converts to a :class:`.data_v2_pb2.ColumnRange` and then uses it - in the ``column_range_filter`` field. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - column_range_kwargs = {"family_name": self.column_family_id} - if self.start_column is not None: - if self.inclusive_start: - key = "start_qualifier_closed" - else: - key = "start_qualifier_open" - column_range_kwargs[key] = _to_bytes(self.start_column) - if self.end_column is not None: - if self.inclusive_end: - key = "end_qualifier_closed" - else: - key = "end_qualifier_open" - column_range_kwargs[key] = _to_bytes(self.end_column) - - column_range = data_v2_pb2.ColumnRange(**column_range_kwargs) - return data_v2_pb2.RowFilter(column_range_filter=column_range) - - -class ValueRegexFilter(_RegexFilter): - """Row filter for a value regular expression. - - The ``regex`` must be valid RE2 patterns. See Google's - `RE2 reference`_ for the accepted syntax. - - .. _RE2 reference: https://github.com/google/re2/wiki/Syntax - - .. note:: - - Special care need be used with the expression used. Since - each of these properties can contain arbitrary bytes, the ``\\C`` - escape sequence must be used if a true wildcard is desired. The ``.`` - character will not match the new line character ``\\n``, which may be - present in a binary value. - - :type regex: bytes or str - :param regex: A regular expression (RE2) to match cells with values that - match this regex. String values will be encoded as ASCII. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(value_regex_filter=self.regex) - - -class ExactValueFilter(ValueRegexFilter): - """Row filter for an exact value. - - - :type value: bytes or str or int - :param value: - a literal string encodable as ASCII, or the - equivalent bytes, or an integer (which will be packed into 8-bytes). - """ - - def __init__(self, value): - if isinstance(value, int): - value = _PACK_I64(value) - super(ExactValueFilter, self).__init__(value) - - -class ValueRangeFilter(RowFilter): - """A range of values to restrict to in a row filter. - - Will only match cells that have values in this range. - - Both the start and end value can be included or excluded in the range. - By default, we include them both, but this can be changed with optional - flags. - - :type start_value: bytes - :param start_value: The start of the range of values. If no value is used, - the backend applies no lower bound to the values. - - :type end_value: bytes - :param end_value: The end of the range of values. If no value is used, - the backend applies no upper bound to the values. - - :type inclusive_start: bool - :param inclusive_start: Boolean indicating if the start value should be - included in the range (or excluded). Defaults - to :data:`True` if ``start_value`` is passed and - no ``inclusive_start`` was given. - - :type inclusive_end: bool - :param inclusive_end: Boolean indicating if the end value should be - included in the range (or excluded). Defaults - to :data:`True` if ``end_value`` is passed and - no ``inclusive_end`` was given. - - :raises: :class:`ValueError ` if ``inclusive_start`` - is set but no ``start_value`` is given or if ``inclusive_end`` - is set but no ``end_value`` is given - """ - - def __init__( - self, start_value=None, end_value=None, inclusive_start=None, inclusive_end=None - ): - if inclusive_start is None: - inclusive_start = True - elif start_value is None: - raise ValueError( - "Inclusive start was specified but no " "start value was given." - ) - if isinstance(start_value, int): - start_value = _PACK_I64(start_value) - self.start_value = start_value - self.inclusive_start = inclusive_start - - if inclusive_end is None: - inclusive_end = True - elif end_value is None: - raise ValueError( - "Inclusive end was specified but no " "end value was given." - ) - if isinstance(end_value, int): - end_value = _PACK_I64(end_value) - self.end_value = end_value - self.inclusive_end = inclusive_end - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return ( - other.start_value == self.start_value - and other.end_value == self.end_value - and other.inclusive_start == self.inclusive_start - and other.inclusive_end == self.inclusive_end + super(ColumnRangeFilter, self).__init__( + family_id=column_family_id, + start_qualifier=start_column, + end_qualifier=end_column, + inclusive_start=inclusive_start, + inclusive_end=inclusive_end, ) - def __ne__(self, other): - return not self == other - - def to_pb(self): - """Converts the row filter to a protobuf. - - First converts to a :class:`.data_v2_pb2.ValueRange` and then uses - it to create a row filter protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - value_range_kwargs = {} - if self.start_value is not None: - if self.inclusive_start: - key = "start_value_closed" - else: - key = "start_value_open" - value_range_kwargs[key] = _to_bytes(self.start_value) - if self.end_value is not None: - if self.inclusive_end: - key = "end_value_closed" - else: - key = "end_value_open" - value_range_kwargs[key] = _to_bytes(self.end_value) - - value_range = data_v2_pb2.ValueRange(**value_range_kwargs) - return data_v2_pb2.RowFilter(value_range_filter=value_range) - - -class _CellCountFilter(RowFilter): - """Row filter that uses an integer count of cells. + @property + def column_family_id(self): + return self.family_id - The cell count is used as an offset or a limit for the number - of results returned. + @column_family_id.setter + def column_family_id(self, column_family_id): + self.family_id = column_family_id - :type num_cells: int - :param num_cells: An integer count / offset / limit. - """ - - def __init__(self, num_cells): - self.num_cells = num_cells - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.num_cells == self.num_cells - - def __ne__(self, other): - return not self == other - - -class CellsRowOffsetFilter(_CellCountFilter): - """Row filter to skip cells in a row. + @property + def start_column(self): + return self.start_qualifier - :type num_cells: int - :param num_cells: Skips the first N cells of the row. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(cells_per_row_offset_filter=self.num_cells) + @start_column.setter + def start_column(self, start_column): + self.start_qualifier = start_column + @property + def end_column(self): + return self.end_qualifier -class CellsRowLimitFilter(_CellCountFilter): - """Row filter to limit cells in a row. - - :type num_cells: int - :param num_cells: Matches only the first N cells of the row. - """ + @end_column.setter + def end_column(self, end_column): + self.end_qualifier = end_column - def to_pb(self): - """Converts the row filter to a protobuf. - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(cells_per_row_limit_filter=self.num_cells) - - -class CellsColumnLimitFilter(_CellCountFilter): - """Row filter to limit cells in a column. - - :type num_cells: int - :param num_cells: Matches only the most recent N cells within each column. - This filters a (family name, column) pair, based on - timestamps of each cell. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(cells_per_column_limit_filter=self.num_cells) - - -class StripValueTransformerFilter(_BoolFilter): - """Row filter that transforms cells into empty string (0 bytes). - - :type flag: bool - :param flag: If :data:`True`, replaces each cell's value with the empty - string. As the name indicates, this is more useful as a - transformer than a generic query / filter. - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(strip_value_transformer=self.flag) - - -class ApplyLabelFilter(RowFilter): - """Filter to apply labels to cells. - - Intended to be used as an intermediate filter on a pre-existing filtered - result set. This way if two sets are combined, the label can tell where - the cell(s) originated.This allows the client to determine which results - were produced from which part of the filter. - - .. note:: - - Due to a technical limitation of the backend, it is not currently - possible to apply multiple labels to a cell. - - :type label: str - :param label: Label to apply to cells in the output row. Values must be - at most 15 characters long, and match the pattern - ``[a-z0-9\\-]+``. - """ - - def __init__(self, label): - self.label = label - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.label == self.label - - def __ne__(self, other): - return not self == other - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - return data_v2_pb2.RowFilter(apply_label_transformer=self.label) - - -class _FilterCombination(RowFilter): +class _FilterCombination(_BaseFilterCombination): """Chain of row filters. Sends rows through several filters in sequence. The filters are "chained" @@ -714,69 +214,11 @@ class _FilterCombination(RowFilter): :param filters: List of :class:`RowFilter` """ - def __init__(self, filters=None): - if filters is None: - filters = [] - self.filters = filters + def _to_dict(self): + pass - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return other.filters == self.filters - def __ne__(self, other): - return not self == other - - -class RowFilterChain(_FilterCombination): - """Chain of row filters. - - Sends rows through several filters in sequence. The filters are "chained" - together to process a row. After the first filter is applied, the second - is applied to the filtered output and so on for subsequent filters. - - :type filters: list - :param filters: List of :class:`RowFilter` - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - chain = data_v2_pb2.RowFilter.Chain( - filters=[row_filter.to_pb() for row_filter in self.filters] - ) - return data_v2_pb2.RowFilter(chain=chain) - - -class RowFilterUnion(_FilterCombination): - """Union of row filters. - - Sends rows through several filters simultaneously, then - merges / interleaves all the filtered results together. - - If multiple cells are produced with the same column and timestamp, - they will all appear in the output row in an unspecified mutual order. - - :type filters: list - :param filters: List of :class:`RowFilter` - """ - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - interleave = data_v2_pb2.RowFilter.Interleave( - filters=[row_filter.to_pb() for row_filter in self.filters] - ) - return data_v2_pb2.RowFilter(interleave=interleave) - - -class ConditionalRowFilter(RowFilter): +class ConditionalRowFilter(BaseConditionalRowFilter): """Conditional row filter which exhibits ternary behavior. Executes one of two filters based on another filter. If the ``base_filter`` @@ -806,33 +248,36 @@ class ConditionalRowFilter(RowFilter): will be returned in the false case. """ - def __init__(self, base_filter, true_filter=None, false_filter=None): - self.base_filter = base_filter - self.true_filter = true_filter - self.false_filter = false_filter - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return ( - other.base_filter == self.base_filter - and other.true_filter == self.true_filter - and other.false_filter == self.false_filter - ) - - def __ne__(self, other): - return not self == other - - def to_pb(self): - """Converts the row filter to a protobuf. - - :rtype: :class:`.data_v2_pb2.RowFilter` - :returns: The converted current object. - """ - condition_kwargs = {"predicate_filter": self.base_filter.to_pb()} - if self.true_filter is not None: - condition_kwargs["true_filter"] = self.true_filter.to_pb() - if self.false_filter is not None: - condition_kwargs["false_filter"] = self.false_filter.to_pb() - condition = data_v2_pb2.RowFilter.Condition(**condition_kwargs) - return data_v2_pb2.RowFilter(condition=condition) + @property + def base_filter(self): + return self.predicate_filter + + @base_filter.setter + def base_filter(self, value: RowFilter): + self.predicate_filter = value + + +__all__ = ( + RowFilter, + SinkFilter, + PassAllFilter, + BlockAllFilter, + RowKeyRegexFilter, + RowSampleFilter, + FamilyNameRegexFilter, + ColumnQualifierRegexFilter, + TimestampRange, + TimestampRangeFilter, + ColumnRangeFilter, + ValueRegexFilter, + ExactValueFilter, + ValueRangeFilter, + CellsRowOffsetFilter, + CellsRowLimitFilter, + CellsColumnLimitFilter, + StripValueTransformerFilter, + ApplyLabelFilter, + RowFilterChain, + RowFilterUnion, + ConditionalRowFilter, +) diff --git a/google/cloud/bigtable/table.py b/google/cloud/bigtable/table.py index 9ce7c312a..9d2897daa 100644 --- a/google/cloud/bigtable/table.py +++ b/google/cloud/bigtable/table.py @@ -1327,7 +1327,7 @@ def _create_row_request( raise ValueError("Row range and row set cannot be " "set simultaneously") if filter_ is not None: - request_kwargs["filter"] = filter_.to_pb() + request_kwargs["filter"] = filter_._to_pb() if limit is not None: request_kwargs["rows_limit"] = limit if app_profile_id is not None: diff --git a/tests/unit/v2_client/test_row.py b/tests/unit/v2_client/test_row.py index 894b4d036..b8a1917a9 100644 --- a/tests/unit/v2_client/test_row.py +++ b/tests/unit/v2_client/test_row.py @@ -282,7 +282,7 @@ def _delete_cells_helper(time_range=None): ) ) if time_range is not None: - expected_pb.delete_from_column.time_range._pb.CopyFrom(time_range.to_pb()._pb) + expected_pb.delete_from_column.time_range._pb.CopyFrom(time_range._to_pb()._pb) assert row._pb_mutations == [expected_pb] diff --git a/tests/unit/v2_client/test_row_data.py b/tests/unit/v2_client/test_row_data.py index 7c2987b56..e2f13be5e 100644 --- a/tests/unit/v2_client/test_row_data.py +++ b/tests/unit/v2_client/test_row_data.py @@ -928,7 +928,7 @@ def test_RRRM_build_updated_request(rrrm_data): row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key25" request = _ReadRowsRequestPB( - filter=row_filter.to_pb(), + filter=row_filter._to_pb(), rows_limit=8, table_name=TABLE_NAME, app_profile_id="app-profile-id-1", @@ -941,7 +941,7 @@ def test_RRRM_build_updated_request(rrrm_data): expected_result = _ReadRowsRequestPB( table_name=TABLE_NAME, - filter=row_filter.to_pb(), + filter=row_filter._to_pb(), rows_limit=6, app_profile_id="app-profile-id-1", ) @@ -976,7 +976,7 @@ def test_RRRM_build_updated_request_no_start_key(): row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key25" request = _ReadRowsRequestPB( - filter=row_filter.to_pb(), rows_limit=8, table_name=TABLE_NAME + filter=row_filter._to_pb(), rows_limit=8, table_name=TABLE_NAME ) row_range1 = types.RowRange(end_key_open=b"row_key29") request.rows.row_ranges.append(row_range1) @@ -986,7 +986,7 @@ def test_RRRM_build_updated_request_no_start_key(): result = request_manager.build_updated_request() expected_result = _ReadRowsRequestPB( - table_name=TABLE_NAME, filter=row_filter.to_pb(), rows_limit=6 + table_name=TABLE_NAME, filter=row_filter._to_pb(), rows_limit=6 ) row_range2 = types.RowRange( @@ -1004,7 +1004,7 @@ def test_RRRM_build_updated_request_no_end_key(): row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key25" request = _ReadRowsRequestPB( - filter=row_filter.to_pb(), rows_limit=8, table_name=TABLE_NAME + filter=row_filter._to_pb(), rows_limit=8, table_name=TABLE_NAME ) row_range1 = types.RowRange(start_key_closed=b"row_key20") @@ -1015,7 +1015,7 @@ def test_RRRM_build_updated_request_no_end_key(): result = request_manager.build_updated_request() expected_result = _ReadRowsRequestPB( - table_name=TABLE_NAME, filter=row_filter.to_pb(), rows_limit=6 + table_name=TABLE_NAME, filter=row_filter._to_pb(), rows_limit=6 ) row_range2 = types.RowRange(start_key_open=last_scanned_key) expected_result.rows.row_ranges.append(row_range2) @@ -1029,7 +1029,7 @@ def test_RRRM_build_updated_request_rows(): row_filter = RowSampleFilter(0.33) last_scanned_key = b"row_key4" request = _ReadRowsRequestPB( - filter=row_filter.to_pb(), rows_limit=5, table_name=TABLE_NAME + filter=row_filter._to_pb(), rows_limit=5, table_name=TABLE_NAME ) request.rows.row_keys.extend( [b"row_key1", b"row_key2", b"row_key4", b"row_key5", b"row_key7", b"row_key9"] @@ -1040,7 +1040,7 @@ def test_RRRM_build_updated_request_rows(): result = request_manager.build_updated_request() expected_result = _ReadRowsRequestPB( - table_name=TABLE_NAME, filter=row_filter.to_pb(), rows_limit=2 + table_name=TABLE_NAME, filter=row_filter._to_pb(), rows_limit=2 ) expected_result.rows.row_keys.extend([b"row_key5", b"row_key7", b"row_key9"]) diff --git a/tests/unit/v2_client/test_row_filters.py b/tests/unit/v2_client/test_row_filters.py index b312cb942..479447b5c 100644 --- a/tests/unit/v2_client/test_row_filters.py +++ b/tests/unit/v2_client/test_row_filters.py @@ -56,7 +56,7 @@ def test_sink_filter_to_pb(): flag = True row_filter = SinkFilter(flag) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(sink=flag) assert pb_val == expected_pb @@ -66,7 +66,7 @@ def test_pass_all_filter_to_pb(): flag = True row_filter = PassAllFilter(flag) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(pass_all_filter=flag) assert pb_val == expected_pb @@ -76,7 +76,7 @@ def test_block_all_filter_to_pb(): flag = True row_filter = BlockAllFilter(flag) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(block_all_filter=flag) assert pb_val == expected_pb @@ -129,7 +129,7 @@ def test_row_key_regex_filter_to_pb(): regex = b"row-key-regex" row_filter = RowKeyRegexFilter(regex) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(row_key_regex_filter=regex) assert pb_val == expected_pb @@ -175,7 +175,7 @@ def test_row_sample_filter_to_pb(): sample = 0.25 row_filter = RowSampleFilter(sample) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(row_sample_filter=sample) assert pb_val == expected_pb @@ -185,7 +185,7 @@ def test_family_name_regex_filter_to_pb(): regex = "family-regex" row_filter = FamilyNameRegexFilter(regex) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(family_name_regex_filter=regex) assert pb_val == expected_pb @@ -195,7 +195,7 @@ def test_column_qualifier_regext_filter_to_pb(): regex = b"column-regex" row_filter = ColumnQualifierRegexFilter(regex) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(column_qualifier_regex_filter=regex) assert pb_val == expected_pb @@ -251,7 +251,7 @@ def _timestamp_range_to_pb_helper(pb_kwargs, start=None, end=None): end = _EPOCH + datetime.timedelta(microseconds=end) time_range = TimestampRange(start=start, end=end) expected_pb = _TimestampRangePB(**pb_kwargs) - time_pb = time_range.to_pb() + time_pb = time_range._to_pb() assert time_pb.start_timestamp_micros == expected_pb.start_timestamp_micros assert time_pb.end_timestamp_micros == expected_pb.end_timestamp_micros assert time_pb == expected_pb @@ -332,7 +332,7 @@ def test_timestamp_range_filter_to_pb(): range_ = TimestampRange() row_filter = TimestampRangeFilter(range_) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(timestamp_range_filter=_TimestampRangePB()) assert pb_val == expected_pb @@ -454,7 +454,7 @@ def test_column_range_filter_to_pb(): row_filter = ColumnRangeFilter(column_family_id) col_range_pb = _ColumnRangePB(family_name=column_family_id) expected_pb = _RowFilterPB(column_range_filter=col_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_column_range_filter_to_pb_inclusive_start(): @@ -467,7 +467,7 @@ def test_column_range_filter_to_pb_inclusive_start(): family_name=column_family_id, start_qualifier_closed=column ) expected_pb = _RowFilterPB(column_range_filter=col_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_column_range_filter_to_pb_exclusive_start(): @@ -482,7 +482,7 @@ def test_column_range_filter_to_pb_exclusive_start(): family_name=column_family_id, start_qualifier_open=column ) expected_pb = _RowFilterPB(column_range_filter=col_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_column_range_filter_to_pb_inclusive_end(): @@ -495,7 +495,7 @@ def test_column_range_filter_to_pb_inclusive_end(): family_name=column_family_id, end_qualifier_closed=column ) expected_pb = _RowFilterPB(column_range_filter=col_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_column_range_filter_to_pb_exclusive_end(): @@ -510,7 +510,7 @@ def test_column_range_filter_to_pb_exclusive_end(): family_name=column_family_id, end_qualifier_open=column ) expected_pb = _RowFilterPB(column_range_filter=col_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_value_regex_filter_to_pb_w_bytes(): @@ -518,7 +518,7 @@ def test_value_regex_filter_to_pb_w_bytes(): value = regex = b"value-regex" row_filter = ValueRegexFilter(value) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(value_regex_filter=regex) assert pb_val == expected_pb @@ -529,7 +529,7 @@ def test_value_regex_filter_to_pb_w_str(): value = "value-regex" regex = value.encode("ascii") row_filter = ValueRegexFilter(value) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(value_regex_filter=regex) assert pb_val == expected_pb @@ -539,7 +539,7 @@ def test_exact_value_filter_to_pb_w_bytes(): value = regex = b"value-regex" row_filter = ExactValueFilter(value) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(value_regex_filter=regex) assert pb_val == expected_pb @@ -550,7 +550,7 @@ def test_exact_value_filter_to_pb_w_str(): value = "value-regex" regex = value.encode("ascii") row_filter = ExactValueFilter(value) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(value_regex_filter=regex) assert pb_val == expected_pb @@ -562,7 +562,7 @@ def test_exact_value_filter_to_pb_w_int(): value = 1 regex = struct.Struct(">q").pack(value) row_filter = ExactValueFilter(value) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(value_regex_filter=regex) assert pb_val == expected_pb @@ -689,7 +689,7 @@ def test_value_range_filter_to_pb(): row_filter = ValueRangeFilter() expected_pb = _RowFilterPB(value_range_filter=_ValueRangePB()) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_value_range_filter_to_pb_inclusive_start(): @@ -699,7 +699,7 @@ def test_value_range_filter_to_pb_inclusive_start(): row_filter = ValueRangeFilter(start_value=value) val_range_pb = _ValueRangePB(start_value_closed=value) expected_pb = _RowFilterPB(value_range_filter=val_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_value_range_filter_to_pb_exclusive_start(): @@ -709,7 +709,7 @@ def test_value_range_filter_to_pb_exclusive_start(): row_filter = ValueRangeFilter(start_value=value, inclusive_start=False) val_range_pb = _ValueRangePB(start_value_open=value) expected_pb = _RowFilterPB(value_range_filter=val_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_value_range_filter_to_pb_inclusive_end(): @@ -719,7 +719,7 @@ def test_value_range_filter_to_pb_inclusive_end(): row_filter = ValueRangeFilter(end_value=value) val_range_pb = _ValueRangePB(end_value_closed=value) expected_pb = _RowFilterPB(value_range_filter=val_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_value_range_filter_to_pb_exclusive_end(): @@ -729,7 +729,7 @@ def test_value_range_filter_to_pb_exclusive_end(): row_filter = ValueRangeFilter(end_value=value, inclusive_end=False) val_range_pb = _ValueRangePB(end_value_open=value) expected_pb = _RowFilterPB(value_range_filter=val_range_pb) - assert row_filter.to_pb() == expected_pb + assert row_filter._to_pb() == expected_pb def test_cell_count_constructor(): @@ -772,7 +772,7 @@ def test_cells_row_offset_filter_to_pb(): num_cells = 76 row_filter = CellsRowOffsetFilter(num_cells) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(cells_per_row_offset_filter=num_cells) assert pb_val == expected_pb @@ -782,7 +782,7 @@ def test_cells_row_limit_filter_to_pb(): num_cells = 189 row_filter = CellsRowLimitFilter(num_cells) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(cells_per_row_limit_filter=num_cells) assert pb_val == expected_pb @@ -792,7 +792,7 @@ def test_cells_column_limit_filter_to_pb(): num_cells = 10 row_filter = CellsColumnLimitFilter(num_cells) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(cells_per_column_limit_filter=num_cells) assert pb_val == expected_pb @@ -802,7 +802,7 @@ def test_strip_value_transformer_filter_to_pb(): flag = True row_filter = StripValueTransformerFilter(flag) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(strip_value_transformer=flag) assert pb_val == expected_pb @@ -848,7 +848,7 @@ def test_apply_label_filter_to_pb(): label = "label" row_filter = ApplyLabelFilter(label) - pb_val = row_filter.to_pb() + pb_val = row_filter._to_pb() expected_pb = _RowFilterPB(apply_label_transformer=label) assert pb_val == expected_pb @@ -902,13 +902,13 @@ def test_row_filter_chain_to_pb(): from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) - row_filter1_pb = row_filter1.to_pb() + row_filter1_pb = row_filter1._to_pb() row_filter2 = RowSampleFilter(0.25) - row_filter2_pb = row_filter2.to_pb() + row_filter2_pb = row_filter2._to_pb() row_filter3 = RowFilterChain(filters=[row_filter1, row_filter2]) - filter_pb = row_filter3.to_pb() + filter_pb = row_filter3._to_pb() expected_pb = _RowFilterPB( chain=_RowFilterChainPB(filters=[row_filter1_pb, row_filter2_pb]) @@ -926,13 +926,13 @@ def test_row_filter_chain_to_pb_nested(): row_filter2 = RowSampleFilter(0.25) row_filter3 = RowFilterChain(filters=[row_filter1, row_filter2]) - row_filter3_pb = row_filter3.to_pb() + row_filter3_pb = row_filter3._to_pb() row_filter4 = CellsRowLimitFilter(11) - row_filter4_pb = row_filter4.to_pb() + row_filter4_pb = row_filter4._to_pb() row_filter5 = RowFilterChain(filters=[row_filter3, row_filter4]) - filter_pb = row_filter5.to_pb() + filter_pb = row_filter5._to_pb() expected_pb = _RowFilterPB( chain=_RowFilterChainPB(filters=[row_filter3_pb, row_filter4_pb]) @@ -946,13 +946,13 @@ def test_row_filter_union_to_pb(): from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) - row_filter1_pb = row_filter1.to_pb() + row_filter1_pb = row_filter1._to_pb() row_filter2 = RowSampleFilter(0.25) - row_filter2_pb = row_filter2.to_pb() + row_filter2_pb = row_filter2._to_pb() row_filter3 = RowFilterUnion(filters=[row_filter1, row_filter2]) - filter_pb = row_filter3.to_pb() + filter_pb = row_filter3._to_pb() expected_pb = _RowFilterPB( interleave=_RowFilterInterleavePB(filters=[row_filter1_pb, row_filter2_pb]) @@ -970,13 +970,13 @@ def test_row_filter_union_to_pb_nested(): row_filter2 = RowSampleFilter(0.25) row_filter3 = RowFilterUnion(filters=[row_filter1, row_filter2]) - row_filter3_pb = row_filter3.to_pb() + row_filter3_pb = row_filter3._to_pb() row_filter4 = CellsRowLimitFilter(11) - row_filter4_pb = row_filter4.to_pb() + row_filter4_pb = row_filter4._to_pb() row_filter5 = RowFilterUnion(filters=[row_filter3, row_filter4]) - filter_pb = row_filter5.to_pb() + filter_pb = row_filter5._to_pb() expected_pb = _RowFilterPB( interleave=_RowFilterInterleavePB(filters=[row_filter3_pb, row_filter4_pb]) @@ -1049,18 +1049,18 @@ def test_conditional_row_filter_to_pb(): from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) - row_filter1_pb = row_filter1.to_pb() + row_filter1_pb = row_filter1._to_pb() row_filter2 = RowSampleFilter(0.25) - row_filter2_pb = row_filter2.to_pb() + row_filter2_pb = row_filter2._to_pb() row_filter3 = CellsRowOffsetFilter(11) - row_filter3_pb = row_filter3.to_pb() + row_filter3_pb = row_filter3._to_pb() row_filter4 = ConditionalRowFilter( row_filter1, true_filter=row_filter2, false_filter=row_filter3 ) - filter_pb = row_filter4.to_pb() + filter_pb = row_filter4._to_pb() expected_pb = _RowFilterPB( condition=_RowFilterConditionPB( @@ -1078,13 +1078,13 @@ def test_conditional_row_filter_to_pb_true_only(): from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) - row_filter1_pb = row_filter1.to_pb() + row_filter1_pb = row_filter1._to_pb() row_filter2 = RowSampleFilter(0.25) - row_filter2_pb = row_filter2.to_pb() + row_filter2_pb = row_filter2._to_pb() row_filter3 = ConditionalRowFilter(row_filter1, true_filter=row_filter2) - filter_pb = row_filter3.to_pb() + filter_pb = row_filter3._to_pb() expected_pb = _RowFilterPB( condition=_RowFilterConditionPB( @@ -1100,13 +1100,13 @@ def test_conditional_row_filter_to_pb_false_only(): from google.cloud.bigtable.row_filters import StripValueTransformerFilter row_filter1 = StripValueTransformerFilter(True) - row_filter1_pb = row_filter1.to_pb() + row_filter1_pb = row_filter1._to_pb() row_filter2 = RowSampleFilter(0.25) - row_filter2_pb = row_filter2.to_pb() + row_filter2_pb = row_filter2._to_pb() row_filter3 = ConditionalRowFilter(row_filter1, false_filter=row_filter2) - filter_pb = row_filter3.to_pb() + filter_pb = row_filter3._to_pb() expected_pb = _RowFilterPB( condition=_RowFilterConditionPB( diff --git a/tests/unit/v2_client/test_table.py b/tests/unit/v2_client/test_table.py index 57eb707c4..5601f6b5e 100644 --- a/tests/unit/v2_client/test_table.py +++ b/tests/unit/v2_client/test_table.py @@ -2133,7 +2133,7 @@ def test__create_row_request_with_filter(): row_filter = RowSampleFilter(0.33) result = _create_row_request(table_name, filter_=row_filter) expected_result = _ReadRowsRequestPB( - table_name=table_name, filter=row_filter.to_pb() + table_name=table_name, filter=row_filter._to_pb() ) assert result == expected_result From f6b90f73067ffadec45a76b465def26b0635408a Mon Sep 17 00:00:00 2001 From: Kevin Zheng Date: Wed, 7 Jan 2026 21:08:32 +0000 Subject: [PATCH 2/5] Changed attribute mapping + moved ABC classes to test file + changed __all__ --- google/cloud/bigtable/row_filters.py | 188 +++++++---------------- tests/unit/v2_client/test_row_filters.py | 63 ++++---- 2 files changed, 86 insertions(+), 165 deletions(-) diff --git a/google/cloud/bigtable/row_filters.py b/google/cloud/bigtable/row_filters.py index 8aa1a358c..a7581e339 100644 --- a/google/cloud/bigtable/row_filters.py +++ b/google/cloud/bigtable/row_filters.py @@ -17,13 +17,13 @@ import struct -from google.cloud.bigtable.data.row_filters import ( +from google.cloud.bigtable.data.row_filters import ( # noqa: F401 RowFilter, SinkFilter, - _BoolFilter as _BaseBoolFilter, + _BoolFilter, PassAllFilter, BlockAllFilter, - _RegexFilter as _BaseRegexFilter, + _RegexFilter, RowKeyRegexFilter, RowSampleFilter, FamilyNameRegexFilter, @@ -33,13 +33,13 @@ ColumnRangeFilter as BaseColumnRangeFilter, ValueRegexFilter, ValueRangeFilter, - _CellCountFilter as _BaseCellCountFilter, + _CellCountFilter, CellsRowOffsetFilter, CellsRowLimitFilter, CellsColumnLimitFilter, StripValueTransformerFilter, ApplyLabelFilter, - _FilterCombination as _BaseFilterCombination, + _FilterCombination, RowFilterChain, RowFilterUnion, ConditionalRowFilter as BaseConditionalRowFilter, @@ -47,41 +47,40 @@ _PACK_I64 = struct.Struct(">q").pack -# The classes defined below are to provide constructors and members -# that have an interface that does not match the one used by the data -# client, for backwards compatibility purposes. - -# Each underscored class is an ABC. Make them into classes that can be -# instantiated with a placeholder to_dict method for consistency. +class _MappableAttributesMixin: + """ + Mixin for classes that need some of their attribute names remapped. -class _BoolFilter(_BaseBoolFilter): - """Row filter that uses a boolean flag. + This is for taking some of the classes from the data client row filters + that are 1:1 with their legacy client counterparts but with some of their + attributes renamed. To use in a class, override the base class with this mixin + class and define a map _attribute_map from legacy client attributes to data client + attributes. - :type flag: bool - :param flag: An indicator if a setting is turned on or off. + Attributes are remapped and redefined in __init__ as well as getattr/setattr. """ - def _to_dict(self): - pass - + def __init__(self, *args, **kwargs): + new_kwargs = {self._attribute_map.get(k, k): v for (k, v) in kwargs.items()} + super(_MappableAttributesMixin, self).__init__(*args, **new_kwargs) -class _RegexFilter(_BaseRegexFilter): - """Row filter that uses a regular expression. + def __getattr__(self, name): + if name not in self._attribute_map: + raise AttributeError + return getattr(self, self._attribute_map[name]) - The ``regex`` must be valid RE2 patterns. See Google's - `RE2 reference`_ for the accepted syntax. + def __setattr__(self, name, value): + attribute = self._attribute_map.get(name, name) + super(_MappableAttributesMixin, self).__setattr__(attribute, value) - .. _RE2 reference: https://github.com/google/re2/wiki/Syntax - :type regex: bytes or str - :param regex: - A regular expression (RE2) for some row filter. String values - will be encoded as ASCII. - """ +# The classes defined below are to provide constructors and members +# that have an interface that does not match the one used by the data +# client, for backwards compatibility purposes. - def _to_dict(self): - pass +# Each underscored class is an ABC. Make them into classes that can be +# instantiated with a placeholder to_dict method for consistency. class TimestampRangeFilter(BaseTimestampRangeFilter): @@ -111,21 +110,7 @@ def __init__(self, value): super(ExactValueFilter, self).__init__(value) -class _CellCountFilter(_BaseCellCountFilter): - """Row filter that uses an integer count of cells. - - The cell count is used as an offset or a limit for the number - of results returned. - - :type num_cells: int - :param num_cells: An integer count / offset / limit. - """ - - def _to_dict(self): - pass - - -class ColumnRangeFilter(BaseColumnRangeFilter): +class ColumnRangeFilter(_MappableAttributesMixin, BaseColumnRangeFilter): """A row filter to restrict to a range of columns. Both the start and end column can be included or excluded in the range. @@ -162,63 +147,14 @@ class ColumnRangeFilter(BaseColumnRangeFilter): is set but no ``end_column`` is given """ - def __init__( - self, - column_family_id, - start_column=None, - end_column=None, - inclusive_start=None, - inclusive_end=None, - ): - super(ColumnRangeFilter, self).__init__( - family_id=column_family_id, - start_qualifier=start_column, - end_qualifier=end_column, - inclusive_start=inclusive_start, - inclusive_end=inclusive_end, - ) - - @property - def column_family_id(self): - return self.family_id - - @column_family_id.setter - def column_family_id(self, column_family_id): - self.family_id = column_family_id - - @property - def start_column(self): - return self.start_qualifier - - @start_column.setter - def start_column(self, start_column): - self.start_qualifier = start_column - - @property - def end_column(self): - return self.end_qualifier - - @end_column.setter - def end_column(self, end_column): - self.end_qualifier = end_column - - -class _FilterCombination(_BaseFilterCombination): - """Chain of row filters. - - Sends rows through several filters in sequence. The filters are "chained" - together to process a row. After the first filter is applied, the second - is applied to the filtered output and so on for subsequent filters. - - :type filters: list - :param filters: List of :class:`RowFilter` - """ + _attribute_map = { + "column_family_id": "family_id", + "start_column": "start_qualifier", + "end_column": "end_qualifier", + } - def _to_dict(self): - pass - -class ConditionalRowFilter(BaseConditionalRowFilter): +class ConditionalRowFilter(_MappableAttributesMixin, BaseConditionalRowFilter): """Conditional row filter which exhibits ternary behavior. Executes one of two filters based on another filter. If the ``base_filter`` @@ -248,36 +184,30 @@ class ConditionalRowFilter(BaseConditionalRowFilter): will be returned in the false case. """ - @property - def base_filter(self): - return self.predicate_filter - - @base_filter.setter - def base_filter(self, value: RowFilter): - self.predicate_filter = value + _attribute_map = {"base_filter": "predicate_filter"} __all__ = ( - RowFilter, - SinkFilter, - PassAllFilter, - BlockAllFilter, - RowKeyRegexFilter, - RowSampleFilter, - FamilyNameRegexFilter, - ColumnQualifierRegexFilter, - TimestampRange, - TimestampRangeFilter, - ColumnRangeFilter, - ValueRegexFilter, - ExactValueFilter, - ValueRangeFilter, - CellsRowOffsetFilter, - CellsRowLimitFilter, - CellsColumnLimitFilter, - StripValueTransformerFilter, - ApplyLabelFilter, - RowFilterChain, - RowFilterUnion, - ConditionalRowFilter, + "RowFilter", + "SinkFilter", + "PassAllFilter", + "BlockAllFilter", + "RowKeyRegexFilter", + "RowSampleFilter", + "FamilyNameRegexFilter", + "ColumnQualifierRegexFilter", + "TimestampRange", + "TimestampRangeFilter", + "ColumnRangeFilter", + "ValueRegexFilter", + "ExactValueFilter", + "ValueRangeFilter", + "CellsRowOffsetFilter", + "CellsRowLimitFilter", + "CellsColumnLimitFilter", + "StripValueTransformerFilter", + "ApplyLabelFilter", + "RowFilterChain", + "RowFilterUnion", + "ConditionalRowFilter", ) diff --git a/tests/unit/v2_client/test_row_filters.py b/tests/unit/v2_client/test_row_filters.py index 479447b5c..03055050f 100644 --- a/tests/unit/v2_client/test_row_filters.py +++ b/tests/unit/v2_client/test_row_filters.py @@ -15,18 +15,21 @@ import pytest +from google.cloud.bigtable.row_filters import ( + _BoolFilter as _BaseBoolFilter, + _RegexFilter as _BaseRegexFilter, + _CellCountFilter as _BaseCellCountFilter, + _FilterCombination as _BaseFilterCombination, +) -def test_bool_filter_constructor(): - from google.cloud.bigtable.row_filters import _BoolFilter +def test_bool_filter_constructor(): flag = object() row_filter = _BoolFilter(flag) assert row_filter.flag is flag def test_bool_filter___eq__type_differ(): - from google.cloud.bigtable.row_filters import _BoolFilter - flag = object() row_filter1 = _BoolFilter(flag) row_filter2 = object() @@ -34,8 +37,6 @@ def test_bool_filter___eq__type_differ(): def test_bool_filter___eq__same_value(): - from google.cloud.bigtable.row_filters import _BoolFilter - flag = object() row_filter1 = _BoolFilter(flag) row_filter2 = _BoolFilter(flag) @@ -43,8 +44,6 @@ def test_bool_filter___eq__same_value(): def test_bool_filter___ne__same_value(): - from google.cloud.bigtable.row_filters import _BoolFilter - flag = object() row_filter1 = _BoolFilter(flag) row_filter2 = _BoolFilter(flag) @@ -82,24 +81,18 @@ def test_block_all_filter_to_pb(): def test_regex_filterconstructor(): - from google.cloud.bigtable.row_filters import _RegexFilter - regex = b"abc" row_filter = _RegexFilter(regex) assert row_filter.regex is regex def test_regex_filterconstructor_non_bytes(): - from google.cloud.bigtable.row_filters import _RegexFilter - regex = "abc" row_filter = _RegexFilter(regex) assert row_filter.regex == b"abc" def test_regex_filter__eq__type_differ(): - from google.cloud.bigtable.row_filters import _RegexFilter - regex = b"def-rgx" row_filter1 = _RegexFilter(regex) row_filter2 = object() @@ -107,8 +100,6 @@ def test_regex_filter__eq__type_differ(): def test_regex_filter__eq__same_value(): - from google.cloud.bigtable.row_filters import _RegexFilter - regex = b"trex-regex" row_filter1 = _RegexFilter(regex) row_filter2 = _RegexFilter(regex) @@ -116,8 +107,6 @@ def test_regex_filter__eq__same_value(): def test_regex_filter__ne__same_value(): - from google.cloud.bigtable.row_filters import _RegexFilter - regex = b"abc" row_filter1 = _RegexFilter(regex) row_filter2 = _RegexFilter(regex) @@ -733,16 +722,12 @@ def test_value_range_filter_to_pb_exclusive_end(): def test_cell_count_constructor(): - from google.cloud.bigtable.row_filters import _CellCountFilter - num_cells = object() row_filter = _CellCountFilter(num_cells) assert row_filter.num_cells is num_cells def test_cell_count___eq__type_differ(): - from google.cloud.bigtable.row_filters import _CellCountFilter - num_cells = object() row_filter1 = _CellCountFilter(num_cells) row_filter2 = object() @@ -750,8 +735,6 @@ def test_cell_count___eq__type_differ(): def test_cell_count___eq__same_value(): - from google.cloud.bigtable.row_filters import _CellCountFilter - num_cells = object() row_filter1 = _CellCountFilter(num_cells) row_filter2 = _CellCountFilter(num_cells) @@ -759,8 +742,6 @@ def test_cell_count___eq__same_value(): def test_cell_count___ne__same_value(): - from google.cloud.bigtable.row_filters import _CellCountFilter - num_cells = object() row_filter1 = _CellCountFilter(num_cells) row_filter2 = _CellCountFilter(num_cells) @@ -854,23 +835,17 @@ def test_apply_label_filter_to_pb(): def test_filter_combination_constructor_defaults(): - from google.cloud.bigtable.row_filters import _FilterCombination - row_filter = _FilterCombination() assert row_filter.filters == [] def test_filter_combination_constructor_explicit(): - from google.cloud.bigtable.row_filters import _FilterCombination - filters = object() row_filter = _FilterCombination(filters=filters) assert row_filter.filters is filters def test_filter_combination___eq__(): - from google.cloud.bigtable.row_filters import _FilterCombination - filters = object() row_filter1 = _FilterCombination(filters=filters) row_filter2 = _FilterCombination(filters=filters) @@ -878,8 +853,6 @@ def test_filter_combination___eq__(): def test_filter_combination___eq__type_differ(): - from google.cloud.bigtable.row_filters import _FilterCombination - filters = object() row_filter1 = _FilterCombination(filters=filters) row_filter2 = object() @@ -887,8 +860,6 @@ def test_filter_combination___eq__type_differ(): def test_filter_combination___ne__(): - from google.cloud.bigtable.row_filters import _FilterCombination - filters = object() other_filters = object() row_filter1 = _FilterCombination(filters=filters) @@ -1116,6 +1087,26 @@ def test_conditional_row_filter_to_pb_false_only(): assert filter_pb == expected_pb +class _BoolFilter(_BaseBoolFilter): + def _to_dict(self): + pass + + +class _RegexFilter(_BaseRegexFilter): + def _to_dict(self): + pass + + +class _CellCountFilter(_BaseCellCountFilter): + def _to_dict(self): + pass + + +class _FilterCombination(_BaseFilterCombination): + def _to_dict(self): + pass + + def _ColumnRangePB(*args, **kw): from google.cloud.bigtable_v2.types import data as data_v2_pb2 From 8cb8af0e355324d70b48e25ad2ace738a1fb3e4b Mon Sep 17 00:00:00 2001 From: Kevin Zheng Date: Wed, 14 Jan 2026 18:35:15 +0000 Subject: [PATCH 3/5] feat: Rerouted CheckAndMutateRows and ReadModifyWriteRows --- google/cloud/bigtable/row.py | 159 ++++++---------- google/cloud/bigtable/table.py | 2 +- tests/system/v2_client/test_data_api.py | 39 +--- tests/unit/v2_client/test_row.py | 240 ++++++++++-------------- tests/unit/v2_client/test_table.py | 3 +- 5 files changed, 171 insertions(+), 272 deletions(-) diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index 6c54ac0e5..e5aa0503a 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -14,16 +14,13 @@ """User-friendly container for Google Cloud Bigtable Row.""" - -import struct - from google.cloud._helpers import _datetime_from_microseconds # type: ignore from google.cloud._helpers import _microseconds_from_datetime # type: ignore from google.cloud._helpers import _to_bytes # type: ignore -from google.cloud.bigtable_v2.types import data as data_v2_pb2 +from google.cloud.bigtable.data import mutations +from google.cloud.bigtable.data import read_modify_write_rules as rmw_rules -_PACK_I64 = struct.Struct(">q").pack MAX_MUTATIONS = 100000 """The maximum number of mutations that a row can accumulate.""" @@ -157,26 +154,21 @@ def _set_cell(self, column_family_id, column, value, timestamp=None, state=None) :param state: (Optional) The state that is passed along to :meth:`_get_mutations`. """ - column = _to_bytes(column) - if isinstance(value, int): - value = _PACK_I64(value) - value = _to_bytes(value) if timestamp is None: - # Use -1 for current Bigtable server time. - timestamp_micros = -1 + # Use current Bigtable server time. + timestamp_micros = mutations._SERVER_SIDE_TIMESTAMP else: timestamp_micros = _microseconds_from_datetime(timestamp) # Truncate to millisecond granularity. timestamp_micros -= timestamp_micros % 1000 - mutation_val = data_v2_pb2.Mutation.SetCell( - family_name=column_family_id, - column_qualifier=column, + mutation = mutations.SetCell( + family=column_family_id, + qualifier=column, + new_value=value, timestamp_micros=timestamp_micros, - value=value, ) - mutation_pb = data_v2_pb2.Mutation(set_cell=mutation_val) - self._get_mutations(state).append(mutation_pb) + self._get_mutations(state).append(mutation) def _delete(self, state=None): """Helper for :meth:`delete` @@ -191,9 +183,7 @@ def _delete(self, state=None): :param state: (Optional) The state that is passed along to :meth:`_get_mutations`. """ - mutation_val = data_v2_pb2.Mutation.DeleteFromRow() - mutation_pb = data_v2_pb2.Mutation(delete_from_row=mutation_val) - self._get_mutations(state).append(mutation_pb) + self._get_mutations(state).append(mutations.DeleteAllFromRow()) def _delete_cells(self, column_family_id, columns, time_range=None, state=None): """Helper for :meth:`delete_cell` and :meth:`delete_cells`. @@ -222,27 +212,28 @@ def _delete_cells(self, column_family_id, columns, time_range=None, state=None): """ mutations_list = self._get_mutations(state) if columns is self.ALL_COLUMNS: - mutation_val = data_v2_pb2.Mutation.DeleteFromFamily( - family_name=column_family_id + self._get_mutations(state).append( + mutations.DeleteAllFromFamily(family_to_delete=column_family_id) ) - mutation_pb = data_v2_pb2.Mutation(delete_from_family=mutation_val) - mutations_list.append(mutation_pb) else: - delete_kwargs = {} + start_timestamp_micros = None + end_timestamp_micros = None if time_range is not None: - delete_kwargs["time_range"] = time_range._to_pb() + timestamps = time_range._to_dict() + start_timestamp_micros = timestamps.get("start_timestamp_micros") + end_timestamp_micros = timestamps.get("end_timestamp_micros") to_append = [] for column in columns: column = _to_bytes(column) - # time_range will never change if present, but the rest of - # delete_kwargs will - delete_kwargs.update( - family_name=column_family_id, column_qualifier=column + to_append.append( + mutations.DeleteRangeFromColumn( + family=column_family_id, + qualifier=column, + start_timestamp_micros=start_timestamp_micros, + end_timestamp_micros=end_timestamp_micros, + ) ) - mutation_val = data_v2_pb2.Mutation.DeleteFromColumn(**delete_kwargs) - mutation_pb = data_v2_pb2.Mutation(delete_from_column=mutation_val) - to_append.append(mutation_pb) # We don't add the mutations until all columns have been # processed without error. @@ -284,7 +275,7 @@ class DirectRow(_SetDeleteRow): def __init__(self, row_key, table=None): super(DirectRow, self).__init__(row_key, table) - self._pb_mutations = [] + self._mutations = [] def _get_mutations(self, state=None): # pylint: disable=unused-argument """Gets the list of mutations for a given state. @@ -299,7 +290,12 @@ def _get_mutations(self, state=None): # pylint: disable=unused-argument :rtype: list :returns: The list to add new mutations to (for the current state). """ - return self._pb_mutations + return self._mutations + + def _get_mutation_pbs(self): + """Gets the list of mutation protos.""" + + return [mut._to_pb() for mut in self._get_mutations()] def get_mutations_size(self): """Gets the total mutations size for current row @@ -313,7 +309,7 @@ def get_mutations_size(self): """ mutation_size = 0 - for mutation in self._get_mutations(): + for mutation in self._get_mutation_pbs(): mutation_size += mutation._pb.ByteSize() return mutation_size @@ -486,7 +482,7 @@ def clear(self): :end-before: [END bigtable_api_row_clear] :dedent: 4 """ - del self._pb_mutations[:] + del self._mutations[:] class ConditionalRow(_SetDeleteRow): @@ -597,17 +593,15 @@ def commit(self): % (MAX_MUTATIONS, num_true_mutations, num_false_mutations) ) - data_client = self._table._instance._client.table_data_client - resp = data_client.check_and_mutate_row( - table_name=self._table.name, + table = self._table._table_impl + resp = table.check_and_mutate_row( row_key=self._row_key, - predicate_filter=self._filter._to_pb(), - app_profile_id=self._table._app_profile_id, - true_mutations=true_mutations, - false_mutations=false_mutations, + predicate=self._filter, + true_case_mutations=true_mutations, + false_case_mutations=false_mutations, ) self.clear() - return resp.predicate_matched + return resp # pylint: disable=arguments-differ def set_cell(self, column_family_id, column, value, timestamp=None, state=True): @@ -797,7 +791,7 @@ class AppendRow(Row): def __init__(self, row_key, table): super(AppendRow, self).__init__(row_key, table) - self._rule_pb_list = [] + self._rule_list = [] def clear(self): """Removes all currently accumulated modifications on current row. @@ -809,7 +803,7 @@ def clear(self): :end-before: [END bigtable_api_row_clear] :dedent: 4 """ - del self._rule_pb_list[:] + del self._rule_list[:] def append_cell_value(self, column_family_id, column, value): """Appends a value to an existing cell. @@ -842,12 +836,11 @@ def append_cell_value(self, column_family_id, column, value): the targeted cell is unset, it will be treated as containing the empty string. """ - column = _to_bytes(column) - value = _to_bytes(value) - rule_pb = data_v2_pb2.ReadModifyWriteRule( - family_name=column_family_id, column_qualifier=column, append_value=value + self._rule_list.append( + rmw_rules.AppendValueRule( + family=column_family_id, qualifier=column, append_value=value + ) ) - self._rule_pb_list.append(rule_pb) def increment_cell_value(self, column_family_id, column, int_value): """Increments a value in an existing cell. @@ -886,13 +879,11 @@ def increment_cell_value(self, column_family_id, column, int_value): big-endian signed integer), or the entire request will fail. """ - column = _to_bytes(column) - rule_pb = data_v2_pb2.ReadModifyWriteRule( - family_name=column_family_id, - column_qualifier=column, - increment_amount=int_value, + self._rule_list.append( + rmw_rules.IncrementRule( + family=column_family_id, qualifier=column, increment_amount=int_value + ) ) - self._rule_pb_list.append(rule_pb) def commit(self): """Makes a ``ReadModifyWriteRow`` API request. @@ -925,7 +916,7 @@ def commit(self): :raises: :class:`ValueError ` if the number of mutations exceeds the :data:`MAX_MUTATIONS`. """ - num_mutations = len(self._rule_pb_list) + num_mutations = len(self._rule_list) if num_mutations == 0: return {} if num_mutations > MAX_MUTATIONS: @@ -934,12 +925,10 @@ def commit(self): "allowable %d." % (num_mutations, MAX_MUTATIONS) ) - data_client = self._table._instance._client.table_data_client - row_response = data_client.read_modify_write_row( - table_name=self._table.name, + table = self._table._table_impl + row_response = table.read_modify_write_row( row_key=self._row_key, - rules=self._rule_pb_list, - app_profile_id=self._table._app_profile_id, + rules=self._rule_list, ) # Reset modifications after commit-ing request. @@ -983,47 +972,13 @@ def _parse_rmw_row_response(row_response): } """ result = {} - for column_family in row_response.row.families: - column_family_id, curr_family = _parse_family_pb(column_family) - result[column_family_id] = curr_family + for cell in row_response.cells: + result.setdefault(cell.family, {}).setdefault(cell.qualifier, []).append( + (cell.value, _datetime_from_microseconds(cell.timestamp_micros)) + ) return result -def _parse_family_pb(family_pb): - """Parses a Family protobuf into a dictionary. - - :type family_pb: :class:`._generated.data_pb2.Family` - :param family_pb: A protobuf - - :rtype: tuple - :returns: A string and dictionary. The string is the name of the - column family and the dictionary has column names (within the - family) as keys and cell lists as values. Each cell is - represented with a two-tuple with the value (in bytes) and the - timestamp for the cell. For example: - - .. code:: python - - { - b'col-name1': [ - (b'cell-val', datetime.datetime(...)), - (b'cell-val-newer', datetime.datetime(...)), - ], - b'col-name2': [ - (b'altcol-cell-val', datetime.datetime(...)), - ], - } - """ - result = {} - for column in family_pb.columns: - result[column.qualifier] = cells = [] - for cell in column.cells: - val_pair = (cell.value, _datetime_from_microseconds(cell.timestamp_micros)) - cells.append(val_pair) - - return family_pb.name, result - - class PartialRowData(object): """Representation of partial row in a Google Cloud Bigtable Table. diff --git a/google/cloud/bigtable/table.py b/google/cloud/bigtable/table.py index 9d2897daa..39e02e4c3 100644 --- a/google/cloud/bigtable/table.py +++ b/google/cloud/bigtable/table.py @@ -1368,7 +1368,7 @@ def _compile_mutation_entries(table_name, rows): for row in rows: _check_row_table_name(table_name, row) _check_row_type(row) - mutations = row._get_mutations() + mutations = row._get_mutation_pbs() entries.append(entry_klass(row_key=row.row_key, mutations=mutations)) mutations_count += len(mutations) diff --git a/tests/system/v2_client/test_data_api.py b/tests/system/v2_client/test_data_api.py index f41aa8377..0f3716472 100644 --- a/tests/system/v2_client/test_data_api.py +++ b/tests/system/v2_client/test_data_api.py @@ -14,7 +14,6 @@ import datetime import operator -import struct import pytest @@ -50,6 +49,8 @@ INITIAL_ROW_SPLITS = [b"row_split_1", b"row_split_2", b"row_split_3"] JOY_EMOJI = "\N{FACE WITH TEARS OF JOY}" +GAP_MARGIN_OF_ERROR = 0.05 + PASS_ALL_FILTER = row_filters.PassAllFilter(True) BLOCK_ALL_FILTER = row_filters.BlockAllFilter(True) @@ -266,7 +267,7 @@ def test_error_handler(exc): retry._initial * retry._multiplier**times_triggered, retry._maximum, ) - assert gap <= max_gap + assert gap <= max_gap + GAP_MARGIN_OF_ERROR times_triggered += 1 curr_time = next_time @@ -1041,22 +1042,11 @@ def test_table_direct_row_input_errors(data_table, rows_to_delete): with pytest.raises(TypeError): row.delete_cell(COLUMN_FAMILY_ID1, INT_COL_NAME) - # Unicode for column name and value does not get converted to bytes because - # internally we use to_bytes in ascii mode. - with pytest.raises(UnicodeEncodeError): - row.set_cell(COLUMN_FAMILY_ID1, JOY_EMOJI, CELL_VAL1) - - with pytest.raises(UnicodeEncodeError): - row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, JOY_EMOJI) - - with pytest.raises(UnicodeEncodeError): - row.delete_cell(COLUMN_FAMILY_ID1, JOY_EMOJI) - - # Various non int64s, we use struct to pack a Python int to bytes. - with pytest.raises(struct.error): + # Various non int64s + with pytest.raises(ValueError): row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, OVERFLOW_INT_CELL_VAL) - with pytest.raises(struct.error): + with pytest.raises(ValueError): row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, OVERFLOW_INT_CELL_VAL2) # Since floats aren't ints, they aren't converted to bytes via struct.pack, @@ -1101,22 +1091,11 @@ def test_table_conditional_row_input_errors(data_table, rows_to_delete): with pytest.raises(TypeError): true_row.delete_cell(COLUMN_FAMILY_ID1, INT_COL_NAME) - # Unicode for column name and value does not get converted to bytes because - # internally we use to_bytes in ascii mode. - with pytest.raises(UnicodeEncodeError): - true_row.set_cell(COLUMN_FAMILY_ID1, JOY_EMOJI, CELL_VAL1) - - with pytest.raises(UnicodeEncodeError): - true_row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, JOY_EMOJI) - - with pytest.raises(UnicodeEncodeError): - true_row.delete_cell(COLUMN_FAMILY_ID1, JOY_EMOJI) - - # Various non int64s, we use struct to pack a Python int to bytes. - with pytest.raises(struct.error): + # Various non int64s + with pytest.raises(ValueError): true_row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, OVERFLOW_INT_CELL_VAL) - with pytest.raises(struct.error): + with pytest.raises(ValueError): true_row.set_cell(COLUMN_FAMILY_ID1, COL_NAME1, OVERFLOW_INT_CELL_VAL2) # Since floats aren't ints, they aren't converted to bytes via struct.pack, diff --git a/tests/unit/v2_client/test_row.py b/tests/unit/v2_client/test_row.py index b8a1917a9..a8d27732b 100644 --- a/tests/unit/v2_client/test_row.py +++ b/tests/unit/v2_client/test_row.py @@ -19,6 +19,9 @@ from ._testing import _make_credentials +_INSTANCE_ID = "test-instance" + + def _make_client(*args, **kwargs): from google.cloud.bigtable.client import Client @@ -66,7 +69,7 @@ def test_direct_row_constructor(): row = _make_direct_row(row_key, table) assert row._row_key == row_key assert row._table is table - assert row._pb_mutations == [] + assert row._mutations == [] def test_direct_row_constructor_with_unicode(): @@ -89,10 +92,28 @@ def test_direct_row__get_mutations(): row_key = b"row_key" row = _make_direct_row(row_key, None) - row._pb_mutations = mutations = object() + row._mutations = mutations = object() assert mutations is row._get_mutations(None) +def test_direct_row__get_mutation_pbs(): + from google.cloud.bigtable.data.mutations import SetCell, _SERVER_SIDE_TIMESTAMP + + row_key = b"row_key" + row = _make_direct_row(row_key, None) + + mutation = SetCell( + family="column_family_id", + qualifier=b"column", + new_value=b"value", + timestamp_micros=_SERVER_SIDE_TIMESTAMP, + ) + + row._mutations = [mutation] + + assert row._get_mutation_pbs() == [mutation._to_pb()] + + def test_direct_row_get_mutations_size(): row_key = b"row_key" row = _make_direct_row(row_key, None) @@ -108,7 +129,7 @@ def test_direct_row_get_mutations_size(): row.set_cell(column_family_id2, column2, value) total_mutations_size = 0 - for mutation in row._get_mutations(): + for mutation in row._get_mutation_pbs(): total_mutations_size += mutation._pb.ByteSize() assert row.get_mutations_size() == total_mutations_size @@ -123,26 +144,27 @@ def _set_cell_helper( ): import struct + from google.cloud.bigtable.data.mutations import SetCell + row_key = b"row_key" column_family_id = "column_family_id" if column is None: column = b"column" table = object() row = _make_direct_row(row_key, table) - assert row._pb_mutations == [] + assert row._mutations == [] row.set_cell(column_family_id, column, value, timestamp=timestamp) if isinstance(value, int): value = struct.pack(">q", value) - expected_pb = _MutationPB( - set_cell=_MutationSetCellPB( - family_name=column_family_id, - column_qualifier=column_bytes or column, - timestamp_micros=timestamp_micros, - value=value, - ) + expected_mutation = SetCell( + family=column_family_id, + qualifier=column_bytes or column, + new_value=value, + timestamp_micros=timestamp_micros, ) - assert row._pb_mutations == [expected_pb] + + _assert_mutations_equal(row._mutations, [expected_mutation]) def test_direct_row_set_cell(): @@ -183,13 +205,15 @@ def test_direct_row_set_cell_with_non_null_timestamp(): def test_direct_row_delete(): + from google.cloud.bigtable.data.mutations import DeleteAllFromRow + row_key = b"row_key" row = _make_direct_row(row_key, object()) - assert row._pb_mutations == [] + assert row._mutations == [] row.delete() - expected_pb = _MutationPB(delete_from_row=_MutationDeleteFromRowPB()) - assert row._pb_mutations == [expected_pb] + expected_mutation = DeleteAllFromRow() + _assert_mutations_equal(row._mutations, [expected_mutation]) def test_direct_row_delete_cell(): @@ -213,14 +237,14 @@ def _delete_cells(self, *args, **kwargs): mock_row = MockRow(row_key, table) # Make sure no values are set before calling the method. - assert mock_row._pb_mutations == [] + assert mock_row._mutations == [] assert mock_row._args == [] assert mock_row._kwargs == [] # Actually make the request against the mock class. time_range = object() mock_row.delete_cell(column_family_id, column, time_range=time_range) - assert mock_row._pb_mutations == [] + assert mock_row._mutations == [] assert mock_row._args == [(column_family_id, [column])] assert mock_row._kwargs == [{"state": None, "time_range": time_range}] @@ -238,19 +262,18 @@ def test_direct_row_delete_cells_non_iterable(): def test_direct_row_delete_cells_all_columns(): from google.cloud.bigtable.row import DirectRow + from google.cloud.bigtable.data.mutations import DeleteAllFromFamily row_key = b"row_key" column_family_id = "column_family_id" table = object() row = _make_direct_row(row_key, table) - assert row._pb_mutations == [] + assert row._mutations == [] row.delete_cells(column_family_id, DirectRow.ALL_COLUMNS) - expected_pb = _MutationPB( - delete_from_family=_MutationDeleteFromFamilyPB(family_name=column_family_id) - ) - assert row._pb_mutations == [expected_pb] + expected_mutation = DeleteAllFromFamily(family_to_delete=column_family_id) + _assert_mutations_equal(row._mutations, [expected_mutation]) def test_direct_row_delete_cells_no_columns(): @@ -260,12 +283,14 @@ def test_direct_row_delete_cells_no_columns(): row = _make_direct_row(row_key, table) columns = [] - assert row._pb_mutations == [] + assert row._mutations == [] row.delete_cells(column_family_id, columns) - assert row._pb_mutations == [] + assert row._mutations == [] def _delete_cells_helper(time_range=None): + from google.cloud.bigtable.data.mutations import DeleteRangeFromColumn + row_key = b"row_key" column = b"column" column_family_id = "column_family_id" @@ -273,17 +298,19 @@ def _delete_cells_helper(time_range=None): row = _make_direct_row(row_key, table) columns = [column] - assert row._pb_mutations == [] + assert row._mutations == [] row.delete_cells(column_family_id, columns, time_range=time_range) - expected_pb = _MutationPB( - delete_from_column=_MutationDeleteFromColumnPB( - family_name=column_family_id, column_qualifier=column - ) - ) + expected_mutation = DeleteRangeFromColumn(family=column_family_id, qualifier=column) if time_range is not None: - expected_pb.delete_from_column.time_range._pb.CopyFrom(time_range._to_pb()._pb) - assert row._pb_mutations == [expected_pb] + time_range_pb = time_range._to_pb() + if time_range_pb.start_timestamp_micros: + expected_mutation.start_timestamp_micros = ( + time_range_pb.start_timestamp_micros + ) + if time_range_pb.end_timestamp_micros: + expected_mutation.end_timestamp_micros = time_range_pb.end_timestamp_micros + _assert_mutations_equal(row._mutations, [expected_mutation]) def test_direct_row_delete_cells_no_time_range(): @@ -311,13 +338,15 @@ def test_direct_row_delete_cells_with_bad_column(): row = _make_direct_row(row_key, table) columns = [column, object()] - assert row._pb_mutations == [] + assert row._mutations == [] with pytest.raises(TypeError): row.delete_cells(column_family_id, columns) - assert row._pb_mutations == [] + assert row._mutations == [] def test_direct_row_delete_cells_with_string_columns(): + from google.cloud.bigtable.data.mutations import DeleteRangeFromColumn + row_key = b"row_key" column_family_id = "column_family_id" column1 = "column1" @@ -328,20 +357,16 @@ def test_direct_row_delete_cells_with_string_columns(): row = _make_direct_row(row_key, table) columns = [column1, column2] - assert row._pb_mutations == [] + assert row._mutations == [] row.delete_cells(column_family_id, columns) - expected_pb1 = _MutationPB( - delete_from_column=_MutationDeleteFromColumnPB( - family_name=column_family_id, column_qualifier=column1_bytes - ) + expected_mutation1 = DeleteRangeFromColumn( + family=column_family_id, qualifier=column1_bytes ) - expected_pb2 = _MutationPB( - delete_from_column=_MutationDeleteFromColumnPB( - family_name=column_family_id, column_qualifier=column2_bytes - ) + expected_mutation2 = DeleteRangeFromColumn( + family=column_family_id, qualifier=column2_bytes ) - assert row._pb_mutations == [expected_pb1, expected_pb2] + _assert_mutations_equal(row._mutations, [expected_mutation1, expected_mutation2]) def test_direct_row_commit(): @@ -517,50 +542,54 @@ def test_append_row_constructor(): row = _make_append_row(row_key, table) assert row._row_key == row_key assert row._table is table - assert row._rule_pb_list == [] + assert row._rule_list == [] def test_append_row_clear(): row_key = b"row_key" table = object() row = _make_append_row(row_key, table) - row._rule_pb_list = [1, 2, 3] + row._rule_list = [1, 2, 3] row.clear() - assert row._rule_pb_list == [] + assert row._rule_list == [] def test_append_row_append_cell_value(): + from google.cloud.bigtable.data.read_modify_write_rules import AppendValueRule + table = object() row_key = b"row_key" row = _make_append_row(row_key, table) - assert row._rule_pb_list == [] + assert row._rule_list == [] column = b"column" column_family_id = "column_family_id" value = b"bytes-val" row.append_cell_value(column_family_id, column, value) - expected_pb = _ReadModifyWriteRulePB( - family_name=column_family_id, column_qualifier=column, append_value=value + expected_pb = AppendValueRule( + family=column_family_id, qualifier=column, append_value=value ) - assert row._rule_pb_list == [expected_pb] + _assert_mutations_equal(row._rule_list, [expected_pb]) def test_append_row_increment_cell_value(): + from google.cloud.bigtable.data.read_modify_write_rules import IncrementRule + table = object() row_key = b"row_key" row = _make_append_row(row_key, table) - assert row._rule_pb_list == [] + assert row._rule_list == [] column = b"column" column_family_id = "column_family_id" int_value = 281330 row.increment_cell_value(column_family_id, column, int_value) - expected_pb = _ReadModifyWriteRulePB( - family_name=column_family_id, - column_qualifier=column, + expected_pb = IncrementRule( + family=column_family_id, + qualifier=column, increment_amount=int_value, ) - assert row._rule_pb_list == [expected_pb] + _assert_mutations_equal(row._rule_list, [expected_pb]) def test_append_row_commit(): @@ -605,7 +634,7 @@ def mock_parse_rmw_row_response(row_response): call_args = api.read_modify_write_row.call_args_list[0] assert app_profile_id == call_args.app_profile_id[0] assert result == expected_result - assert row._rule_pb_list == [] + assert row._rule_list == [] def test_append_row_commit_no_rules(): @@ -618,7 +647,7 @@ def test_append_row_commit_no_rules(): client = _make_client(project=project_id, credentials=credentials, admin=True) table = _Table(None, client=client) row = _make_append_row(row_key, table) - assert row._rule_pb_list == [] + assert row._rule_list == [] # Patch the stub used by the API method. stub = _FakeStub() @@ -637,8 +666,8 @@ def test_append_row_commit_too_many_mutations(): row_key = b"row_key" table = object() row = _make_append_row(row_key, table) - row._rule_pb_list = [1, 2, 3] - num_mutations = len(row._rule_pb_list) + row._rule_list = [1, 2, 3] + num_mutations = len(row._rule_list) with _Monkey(MUT, MAX_MUTATIONS=num_mutations - 1): with pytest.raises(ValueError): row.commit() @@ -648,6 +677,8 @@ def test__parse_rmw_row_response(): from google.cloud._helpers import _datetime_from_microseconds from google.cloud.bigtable.row import _parse_rmw_row_response + from google.cloud.bigtable.data.row import Row + col_fam1 = "col-fam-id" col_fam2 = "col-fam-id2" col_name1 = b"col-name1" @@ -696,59 +727,16 @@ def test__parse_rmw_row_response(): ), ] ) - sample_input = _ReadModifyWriteRowResponsePB(row=response_row) + sample_input = Row._from_pb(response_row) assert expected_output == _parse_rmw_row_response(sample_input) -def test__parse_family_pb(): - from google.cloud._helpers import _datetime_from_microseconds - from google.cloud.bigtable.row import _parse_family_pb - - col_fam1 = "col-fam-id" - col_name1 = b"col-name1" - col_name2 = b"col-name2" - cell_val1 = b"cell-val" - cell_val2 = b"cell-val-newer" - cell_val3 = b"altcol-cell-val" - - microseconds = 5554441037 - timestamp = _datetime_from_microseconds(microseconds) - expected_dict = { - col_name1: [(cell_val1, timestamp), (cell_val2, timestamp)], - col_name2: [(cell_val3, timestamp)], - } - expected_output = (col_fam1, expected_dict) - sample_input = _FamilyPB( - name=col_fam1, - columns=[ - _ColumnPB( - qualifier=col_name1, - cells=[ - _CellPB(value=cell_val1, timestamp_micros=microseconds), - _CellPB(value=cell_val2, timestamp_micros=microseconds), - ], - ), - _ColumnPB( - qualifier=col_name2, - cells=[_CellPB(value=cell_val3, timestamp_micros=microseconds)], - ), - ], - ) - assert expected_output == _parse_family_pb(sample_input) - - def _CheckAndMutateRowResponsePB(*args, **kw): from google.cloud.bigtable_v2.types import bigtable as messages_v2_pb2 return messages_v2_pb2.CheckAndMutateRowResponse(*args, **kw) -def _ReadModifyWriteRowResponsePB(*args, **kw): - from google.cloud.bigtable_v2.types import bigtable as messages_v2_pb2 - - return messages_v2_pb2.ReadModifyWriteRowResponse(*args, **kw) - - def _CellPB(*args, **kw): from google.cloud.bigtable_v2.types import data as data_v2_pb2 @@ -767,46 +755,18 @@ def _FamilyPB(*args, **kw): return data_v2_pb2.Family(*args, **kw) -def _MutationPB(*args, **kw): - from google.cloud.bigtable_v2.types import data as data_v2_pb2 - - return data_v2_pb2.Mutation(*args, **kw) - - -def _MutationSetCellPB(*args, **kw): - from google.cloud.bigtable_v2.types import data as data_v2_pb2 - - return data_v2_pb2.Mutation.SetCell(*args, **kw) - - -def _MutationDeleteFromColumnPB(*args, **kw): - from google.cloud.bigtable_v2.types import data as data_v2_pb2 - - return data_v2_pb2.Mutation.DeleteFromColumn(*args, **kw) - - -def _MutationDeleteFromFamilyPB(*args, **kw): - from google.cloud.bigtable_v2.types import data as data_v2_pb2 - - return data_v2_pb2.Mutation.DeleteFromFamily(*args, **kw) - - -def _MutationDeleteFromRowPB(*args, **kw): - from google.cloud.bigtable_v2.types import data as data_v2_pb2 - - return data_v2_pb2.Mutation.DeleteFromRow(*args, **kw) - - def _RowPB(*args, **kw): from google.cloud.bigtable_v2.types import data as data_v2_pb2 return data_v2_pb2.Row(*args, **kw) -def _ReadModifyWriteRulePB(*args, **kw): - from google.cloud.bigtable_v2.types import data as data_v2_pb2 +def _assert_mutations_equal(mutations_1, mutations_2): + assert len(mutations_1) == len(mutations_2) - return data_v2_pb2.ReadModifyWriteRule(*args, **kw) + for i in range(0, len(mutations_1)): + assert type(mutations_1[i]) is type(mutations_2[i]) + assert mutations_1[i]._to_pb() == mutations_2[i]._to_pb() class _Instance(object): @@ -822,6 +782,12 @@ def __init__(self, name, client=None, app_profile_id=None): self.client = client self.mutated_rows = [] + self._table_impl = self._instance._client._veneer_data_client.get_table( + _INSTANCE_ID, + self.name, + app_profile_id=self._app_profile_id, + ) + def mutate_rows(self, rows): from google.rpc import status_pb2 diff --git a/tests/unit/v2_client/test_table.py b/tests/unit/v2_client/test_table.py index 5601f6b5e..d1cc518f7 100644 --- a/tests/unit/v2_client/test_table.py +++ b/tests/unit/v2_client/test_table.py @@ -1736,10 +1736,9 @@ def _do_mutate_retryable_rows_helper( expected_entries = [] for row, prior_status in zip(rows, worker.responses_statuses): if prior_status is None or prior_status.code in RETRYABLES: - mutations = row._get_mutations().copy() # row clears on success entry = data_messages_v2_pb2.MutateRowsRequest.Entry( row_key=row.row_key, - mutations=mutations, + mutations=row._get_mutation_pbs().copy(), # row clears on success ) expected_entries.append(entry) From 36cfaaf6df4a5f6b547f4ca71490f7a0acdb7666 Mon Sep 17 00:00:00 2001 From: Kevin Zheng Date: Tue, 20 Jan 2026 16:37:50 +0000 Subject: [PATCH 4/5] Addressed review feedback + fixed system tests. --- google/cloud/bigtable/row.py | 16 ++++++------- tests/system/v2_client/test_data_api.py | 30 ++++--------------------- tests/unit/v2_client/test_row.py | 10 +++------ 3 files changed, 14 insertions(+), 42 deletions(-) diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py index e5aa0503a..b67d26742 100644 --- a/google/cloud/bigtable/row.py +++ b/google/cloud/bigtable/row.py @@ -210,7 +210,6 @@ def _delete_cells(self, column_family_id, columns, time_range=None, state=None): :param state: (Optional) The state that is passed along to :meth:`_get_mutations`. """ - mutations_list = self._get_mutations(state) if columns is self.ALL_COLUMNS: self._get_mutations(state).append( mutations.DeleteAllFromFamily(family_to_delete=column_family_id) @@ -218,10 +217,9 @@ def _delete_cells(self, column_family_id, columns, time_range=None, state=None): else: start_timestamp_micros = None end_timestamp_micros = None - if time_range is not None: - timestamps = time_range._to_dict() - start_timestamp_micros = timestamps.get("start_timestamp_micros") - end_timestamp_micros = timestamps.get("end_timestamp_micros") + timestamps = time_range._to_dict() if time_range else {} + start_timestamp_micros = timestamps.get("start_timestamp_micros") + end_timestamp_micros = timestamps.get("end_timestamp_micros") to_append = [] for column in columns: @@ -237,7 +235,7 @@ def _delete_cells(self, column_family_id, columns, time_range=None, state=None): # We don't add the mutations until all columns have been # processed without error. - mutations_list.extend(to_append) + self._get_mutations(state).extend(to_append) class DirectRow(_SetDeleteRow): @@ -973,9 +971,9 @@ def _parse_rmw_row_response(row_response): """ result = {} for cell in row_response.cells: - result.setdefault(cell.family, {}).setdefault(cell.qualifier, []).append( - (cell.value, _datetime_from_microseconds(cell.timestamp_micros)) - ) + column_family = result.setdefault(cell.family, {}) + column = column_family.setdefault(cell.qualifier, []) + column.append((cell.value, _datetime_from_microseconds(cell.timestamp_micros))) return result diff --git a/tests/system/v2_client/test_data_api.py b/tests/system/v2_client/test_data_api.py index 0f3716472..e6a15d95e 100644 --- a/tests/system/v2_client/test_data_api.py +++ b/tests/system/v2_client/test_data_api.py @@ -1144,39 +1144,17 @@ def test_table_append_row_input_errors(data_table, rows_to_delete): rows_to_delete.append(data_table.direct_row(ROW_KEY)) # Column names should be convertible to bytes (str or bytes) - with pytest.raises(TypeError): + with pytest.raises(AttributeError): row.append_cell_value(COLUMN_FAMILY_ID1, INT_COL_NAME, CELL_VAL1) - with pytest.raises(TypeError): + with pytest.raises(AttributeError): row.increment_cell_value(COLUMN_FAMILY_ID1, INT_COL_NAME, 1) - # Unicode for column name and value - with pytest.raises(UnicodeEncodeError): - row.append_cell_value(COLUMN_FAMILY_ID1, JOY_EMOJI, CELL_VAL1) - - with pytest.raises(UnicodeEncodeError): - row.append_cell_value(COLUMN_FAMILY_ID1, COL_NAME1, JOY_EMOJI) - - with pytest.raises(UnicodeEncodeError): - row.increment_cell_value(COLUMN_FAMILY_ID1, JOY_EMOJI, 1) - - # Non-integer cell values for increment_cell_value with pytest.raises(ValueError): row.increment_cell_value(COLUMN_FAMILY_ID1, COL_NAME1, OVERFLOW_INT_CELL_VAL) - # increment_cell_value does not do input validation on the int_value, instead using - # proto-plus to do validation. - row.increment_cell_value(COLUMN_FAMILY_ID1, COL_NAME1, FLOAT_CELL_VAL) - row.increment_cell_value(COLUMN_FAMILY_ID1, COL_NAME2, FLOAT_CELL_VAL2) - row.commit() - - row_data = data_table.read_row(ROW_KEY) - assert row_data.cells[COLUMN_FAMILY_ID1][COL_NAME1][0].value == int( - FLOAT_CELL_VAL - ).to_bytes(8, byteorder="big", signed=True) - assert row_data.cells[COLUMN_FAMILY_ID1][COL_NAME2][0].value == int( - FLOAT_CELL_VAL2 - ).to_bytes(8, byteorder="big", signed=True) + with pytest.raises(TypeError): + row.increment_cell_value(COLUMN_FAMILY_ID1, COL_NAME1, FLOAT_CELL_VAL) # Can't have more than MAX_MUTATIONS mutations, but only enforced after # a row.commit diff --git a/tests/unit/v2_client/test_row.py b/tests/unit/v2_client/test_row.py index a8d27732b..5af70c8b0 100644 --- a/tests/unit/v2_client/test_row.py +++ b/tests/unit/v2_client/test_row.py @@ -303,13 +303,9 @@ def _delete_cells_helper(time_range=None): expected_mutation = DeleteRangeFromColumn(family=column_family_id, qualifier=column) if time_range is not None: - time_range_pb = time_range._to_pb() - if time_range_pb.start_timestamp_micros: - expected_mutation.start_timestamp_micros = ( - time_range_pb.start_timestamp_micros - ) - if time_range_pb.end_timestamp_micros: - expected_mutation.end_timestamp_micros = time_range_pb.end_timestamp_micros + timestamps = time_range._to_dict() + expected_mutation.start_timestamp_micros = timestamps.get("start_timestamp_micros") + expected_mutation.end_timestamp_micros = timestamps.get("end_timestamp_micros") _assert_mutations_equal(row._mutations, [expected_mutation]) From 1f3811957c087e622270e78bf267a0cc5020d710 Mon Sep 17 00:00:00 2001 From: Kevin Zheng Date: Tue, 20 Jan 2026 18:04:55 +0000 Subject: [PATCH 5/5] linting --- tests/unit/v2_client/test_row.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/v2_client/test_row.py b/tests/unit/v2_client/test_row.py index 5af70c8b0..d5b80d6f8 100644 --- a/tests/unit/v2_client/test_row.py +++ b/tests/unit/v2_client/test_row.py @@ -304,7 +304,9 @@ def _delete_cells_helper(time_range=None): expected_mutation = DeleteRangeFromColumn(family=column_family_id, qualifier=column) if time_range is not None: timestamps = time_range._to_dict() - expected_mutation.start_timestamp_micros = timestamps.get("start_timestamp_micros") + expected_mutation.start_timestamp_micros = timestamps.get( + "start_timestamp_micros" + ) expected_mutation.end_timestamp_micros = timestamps.get("end_timestamp_micros") _assert_mutations_equal(row._mutations, [expected_mutation])