From 9c617869bdd18678254d1cfb536a82d586a40345 Mon Sep 17 00:00:00 2001 From: TFLM-bot Date: Thu, 19 Mar 2026 14:20:57 +0000 Subject: [PATCH] Sync from upstream TF. --- .../lite/core/api/flatbuffer_conversions.cc | 16 ++++- .../kernels/internal/reference/batch_matmul.h | 30 +++++----- .../kernels/internal/reference/leaky_relu.h | 29 ++++++--- tensorflow/lite/kernels/kernel_util.cc | 59 +++++++++---------- tensorflow/lite/kernels/kernel_util.h | 3 + 5 files changed, 81 insertions(+), 56 deletions(-) diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index a0c190641b5..4fc99a07e5b 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -1882,9 +1882,7 @@ TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter, params->activation = ConvertActivation(schema_params->fused_activation_function()); } else { - // TODO(b/157480169): We should either return kTfLiteError or fill in some - // reasonable defaults in the params struct. We are not doing so until we - // better understand the ramifications of changing the legacy behavior. + // Default activation is none. } *builtin_data = params.release(); @@ -2430,6 +2428,18 @@ TfLiteStatus ParseStablehloComposite(const Operator* op, const StableHLOCompositeOptions* schema_params = op->builtin_options_2_as_StableHLOCompositeOptions(); if (schema_params) { + if (schema_params->name() == nullptr) { + TF_LITE_REPORT_ERROR( + error_reporter, + "'stablehlo.composite' missing required option 'name'."); + return kTfLiteError; + } + if (schema_params->composite_attributes() == nullptr) { + TF_LITE_REPORT_ERROR(error_reporter, + "'stablehlo.composite' missing required option " + "'composite_attributes'."); + return kTfLiteError; + } params->name = schema_params->name()->c_str(); params->version = schema_params->version(); params->subgraph_index = schema_params->decomposition_subgraph_index(); diff --git a/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/tensorflow/lite/kernels/internal/reference/batch_matmul.h index 54908bd24ee..71f456703a3 100644 --- a/tensorflow/lite/kernels/internal/reference/batch_matmul.h +++ b/tensorflow/lite/kernels/internal/reference/batch_matmul.h @@ -202,11 +202,13 @@ inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data, } } -template +template inline void BatchMatMul(const FullyConnectedParams& params, - const RuntimeShape& lhs_shape, const T* lhs_data, - const RuntimeShape& rhs_shape, const T* rhs_data, - const RuntimeShape& output_shape, T* output_data) { + const RuntimeShape& lhs_shape, const lhsT* lhs_data, + const RuntimeShape& rhs_shape, const rhsT* rhs_data, + const RuntimeShape& output_shape, + outputT* output_data) { const RuntimeShape extended_lhs_shape = RuntimeShape::ExtendedShape(5, lhs_shape); const RuntimeShape extended_rhs_shape = @@ -241,17 +243,17 @@ inline void BatchMatMul(const FullyConnectedParams& params, TFLITE_DCHECK_LE(output_activation_min, output_activation_max); for (int b0 = 0; b0 < batch_dim0; ++b0) { - const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); - const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); + const lhsT* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); + const rhsT* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); for (int b1 = 0; b1 < batch_dim1; ++b1) { - const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; - const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; + const lhsT* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; + const rhsT* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; for (int b2 = 0; b2 < batch_dim2; ++b2) { - const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; - const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; - T* out_ptr = output_data + - ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * - lhs_rows * rhs_cols; + const lhsT* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; + const rhsT* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; + outputT* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; for (int j = 0; j < rhs_cols; ++j) { for (int i = 0; i < lhs_rows; ++i) { @@ -267,7 +269,7 @@ inline void BatchMatMul(const FullyConnectedParams& params, total_scaled = std::max(total_scaled, output_activation_min); total_scaled = std::min(total_scaled, output_activation_max); const int idx = lhs_rows * j + i; - out_ptr[idx] = static_cast(total_scaled); + out_ptr[idx] = static_cast(total_scaled); } } } diff --git a/tensorflow/lite/kernels/internal/reference/leaky_relu.h b/tensorflow/lite/kernels/internal/reference/leaky_relu.h index 06f691abd6c..1e727d7184f 100644 --- a/tensorflow/lite/kernels/internal/reference/leaky_relu.h +++ b/tensorflow/lite/kernels/internal/reference/leaky_relu.h @@ -43,20 +43,31 @@ inline void QuantizeLeakyRelu(const LeakyReluParams& params, const int flat_size = MatchingFlatSize(input_shape, output_shape); static const int32_t quantized_min = std::numeric_limits::min(); static const int32_t quantized_max = std::numeric_limits::max(); + + // Extract the sign and create a safely positive multiplier outside the loop. + // This supports negative alpha values (matching float execution behavior) + // while preventing assertion failures, as MultiplyByQuantizedMultiplier + // strictly requires a non-negative multiplier. + const bool is_alpha_negative = params.output_multiplier_alpha < 0; + const int32_t safe_alpha_multiplier = is_alpha_negative + ? -params.output_multiplier_alpha + : params.output_multiplier_alpha; + for (int i = 0; i < flat_size; ++i) { const int32_t input_value = input_data[i] - params.input_offset; - int32_t unclamped_output; + + int32_t unclamped_output = params.output_offset; if (input_value >= 0) { - unclamped_output = params.output_offset + - MultiplyByQuantizedMultiplier( - input_value, params.output_multiplier_identity, - params.output_shift_identity); + unclamped_output += MultiplyByQuantizedMultiplier( + input_value, params.output_multiplier_identity, + params.output_shift_identity); } else { - unclamped_output = params.output_offset + - MultiplyByQuantizedMultiplier( - input_value, params.output_multiplier_alpha, - params.output_shift_alpha); + int32_t scaled_alpha_value = MultiplyByQuantizedMultiplier( + input_value, safe_alpha_multiplier, params.output_shift_alpha); + unclamped_output += + is_alpha_negative ? -scaled_alpha_value : scaled_alpha_value; } + const T clamped_output = std::min(quantized_max, std::max(quantized_min, unclamped_output)); output_data[i] = static_cast(clamped_output); diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc index 2333caebce5..62feffc1c0a 100644 --- a/tensorflow/lite/kernels/kernel_util.cc +++ b/tensorflow/lite/kernels/kernel_util.cc @@ -528,51 +528,50 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, // Size of string is not constant, return 0 in such case. int TfLiteTypeGetSize(TfLiteType type) { + int size_bits = TfLiteTypeGetSizeBits(type); + if (size_bits % 8 == 0) { + return size_bits / 8; + } else { + // For non-byte sized types, return 0. + return 0; + } +} + +int TfLiteTypeGetSizeBits(TfLiteType type) { switch (type) { + case kTfLiteInt2: + return 2; + case kTfLiteInt4: + case kTfLiteUInt4: + return 4; case kTfLiteUInt8: - static_assert(sizeof(uint8_t) == 1, ""); - return 1; case kTfLiteInt8: - static_assert(sizeof(int8_t) == 1, ""); - return 1; - case kTfLiteBool: - return sizeof(bool); + return 8; case kTfLiteUInt16: - static_assert(sizeof(uint16_t) == 2, ""); - return 2; case kTfLiteInt16: - static_assert(sizeof(int16_t) == 2, ""); - return 2; case kTfLiteFloat16: - static_assert(sizeof(int16_t) == 2, ""); - return 2; + case kTfLiteBFloat16: + return 16; case kTfLiteFloat32: - static_assert(sizeof(float) == 4, ""); - return 4; case kTfLiteInt32: - static_assert(sizeof(int32_t) == 4, ""); - return 4; case kTfLiteUInt32: - static_assert(sizeof(uint32_t) == 4, ""); - return 4; + return 32; case kTfLiteInt64: - static_assert(sizeof(int64_t) == 8, ""); - return 8; case kTfLiteUInt64: - static_assert(sizeof(uint64_t) == 8, ""); - return 8; case kTfLiteFloat64: - static_assert(sizeof(double) == 8, ""); - return 8; case kTfLiteComplex64: - static_assert(sizeof(std::complex) == 8, ""); - return 8; + return 64; case kTfLiteComplex128: - static_assert(sizeof(std::complex) == 16, ""); - return 16; - default: - return 0; + return 128; + case kTfLiteBool: + return sizeof(bool) * 8; + case kTfLiteString: + case kTfLiteNoType: + case kTfLiteResource: + case kTfLiteVariant: + break; } + return 0; } bool IsMobilePlatform() { diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index 070f363b5a6..25e5386ccb6 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -332,6 +332,9 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, // Return the size of given type in bytes. Return 0 in case of string. int TfLiteTypeGetSize(TfLiteType type); +// Return the size of given type in bits. Returns 0 in case of string. +int TfLiteTypeGetSizeBits(TfLiteType type); + // Whether the current platform is mobile (Android or iOS). bool IsMobilePlatform();