diff --git a/python/tflite_micro/numpy_utils.cc b/python/tflite_micro/numpy_utils.cc index 0daabf00347..fb728a18a1e 100644 --- a/python/tflite_micro/numpy_utils.cc +++ b/python/tflite_micro/numpy_utils.cc @@ -58,6 +58,9 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) { case kTfLiteInt4: // TODO(b/246806634): NPY_INT4 currently doesn't exist return NPY_BYTE; + case kTfLiteInt2: + // TODO(b/246806634): NPY_INT2 currently doesn't exist + return NPY_BYTE; case kTfLiteInt8: return NPY_INT8; case kTfLiteInt64: diff --git a/tensorflow/compiler/mlir/lite/core/c/tflite_types.h b/tensorflow/compiler/mlir/lite/core/c/tflite_types.h index 068facb1076..f09923dda5f 100644 --- a/tensorflow/compiler/mlir/lite/core/c/tflite_types.h +++ b/tensorflow/compiler/mlir/lite/core/c/tflite_types.h @@ -64,6 +64,7 @@ typedef enum { kTfLiteUInt16 = 17, kTfLiteInt4 = 18, kTfLiteBFloat16 = 19, + kTfLiteInt2 = 20, } TfLiteType; // LINT.ThenChange(//tensorflow/lite/profiling/proto/model_runtime_info.proto:EdgeDataType) diff --git a/tensorflow/compiler/mlir/lite/schema/schema.fbs b/tensorflow/compiler/mlir/lite/schema/schema.fbs index dcf82e38a96..d9da8f0a331 100644 --- a/tensorflow/compiler/mlir/lite/schema/schema.fbs +++ b/tensorflow/compiler/mlir/lite/schema/schema.fbs @@ -59,6 +59,7 @@ enum TensorType : byte { UINT16 = 16, INT4 = 17, BFLOAT16 = 18, + INT2 = 19, } // Custom quantization parameters for experimenting with new quantization diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 882b839049c..cc31fb44714 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -1088,6 +1088,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, case TensorType_INT4: *type = kTfLiteInt4; return kTfLiteOk; + case TensorType_INT2: + *type = kTfLiteInt2; + return kTfLiteOk; default: *type = kTfLiteNoType; TF_LITE_REPORT_ERROR(error_reporter, diff --git a/tensorflow/lite/core/c/common.cc b/tensorflow/lite/core/c/common.cc index 4f404c93a18..5d483bdf977 100644 --- a/tensorflow/lite/core/c/common.cc +++ b/tensorflow/lite/core/c/common.cc @@ -509,6 +509,8 @@ const char* TfLiteTypeGetName(TfLiteType type) { return "VARIANT"; case kTfLiteInt4: return "INT4"; + case kTfLiteInt2: + return "INT2"; } return "Unknown type"; } diff --git a/tensorflow/lite/kernels/internal/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/portable_tensor_utils.cc index 0928d4b0d0d..efc6ba5a9c0 100644 --- a/tensorflow/lite/kernels/internal/portable_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/portable_tensor_utils.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" #include +#include #include #include @@ -92,23 +93,90 @@ void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements, } } -void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements, - int8_t* dst_buffer) { - // num_elements means the number of elements regardless of packed or unpacked. - // For example, 3 elements means both - // 1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes. - // stored in src_buffer[0] and src_buffer[1] (i = 0..1) - // 2) Unpacked: 3 int8's = 3 bytes. - // stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2) - for (int i = 0; i < num_elements - 1; i += 2) { - dst_buffer[i / 2] = src_buffer[i] & 0x0F; - dst_buffer[i / 2] |= src_buffer[i + 1] << 4; +void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements, + int bit_width, int8_t* dst_buffer) { + assert(bit_width == 2 || bit_width == 4); + if (bit_width == 4) { + // num_elements means the number of elements regardless of packed or + // unpacked. For example, 3 elements means both + // 1) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes. + // stored in src_buffer[0] and src_buffer[1] (i = 0..1) + // 2) Unpacked: 3 int8's = 3 bytes. + //. stored in dst_buffer[0], dst_buffer[1] and dst_buffer[2] (j = 0..2) + for (int i = 0; i < num_elements / 2; i++) { + int8_t byte = src_buffer[i]; + // Shift left first so that sign is properly extended when shifted right + int8_t lower = static_cast(byte << 4) >> 4; + int8_t higher = byte >> 4; + dst_buffer[2 * i] = lower; + dst_buffer[2 * i + 1] = higher; + } + + // If the buffer size is odd, extract the final lower nibble. + if (num_elements % 2 != 0) { + dst_buffer[num_elements - 1] = + static_cast(src_buffer[num_elements / 2] << 4) >> 4; + } + } else if (bit_width == 2) { + for (int i = 0; i < num_elements / 4; i++) { + int8_t byte = src_buffer[i]; + // Shift left first so that sign is properly extended when shifted right + int8_t val1 = static_cast(byte << 6) >> 6; + int8_t val2 = static_cast((byte << 4) & 0xFF) >> 6; + int8_t val3 = static_cast((byte << 2) & 0xFF) >> 6; + int8_t val4 = byte >> 6; + dst_buffer[4 * i] = val1; + dst_buffer[4 * i + 1] = val2; + dst_buffer[4 * i + 2] = val3; + dst_buffer[4 * i + 3] = val4; + } + + // Handle the remaining elements. + int remaining_elements = num_elements % 4; + if (remaining_elements > 0) { + int8_t byte = src_buffer[num_elements / 4]; + for (int i = 0; i < remaining_elements; i++) { + dst_buffer[num_elements - remaining_elements + i] = + static_cast((byte << (6 - 2 * i)) & 0xFF) >> 6; + } + } } - auto packed_size = (num_elements + 1) / 2; +} - // Copy the final nibble if the buffer is odd-lengthed - if (num_elements % 2 != 0) { - dst_buffer[packed_size - 1] = src_buffer[num_elements - 1] & 0x0F; +void PackInt8IntoDenseInt(const int8_t* src_buffer, int num_elements, + int bit_width, int8_t* dst_buffer) { + assert(bit_width == 2 || bit_width == 4); + if (bit_width == 4) { + // num_elements means the number of elements regardless of packed or + // unpacked. For example, 3 elements means both + // 1) Unpacked: 3 int8's = 3 bytes. + // stored in src_buffer[0], src_buffer[1] and src_buffer[2] (j = 0..2) + // 2) Packed: 3 int4's = 12 bit -> 16 bits (padded) = 2 bytes. + // stored in dst_buffer[0] and dst_buffer[1] (i = 0..1) + for (int i = 0; i < num_elements / 2; ++i) { + dst_buffer[i] = (src_buffer[2 * i] & 0x0F) | (src_buffer[2 * i + 1] << 4); + } + // If the buffer size is odd, pack the final nibble. + if (num_elements % 2 != 0) { + dst_buffer[num_elements / 2] = src_buffer[num_elements - 1] & 0x0F; + } + } else if (bit_width == 2) { + for (int i = 0; i < num_elements / 4; ++i) { + dst_buffer[i] = (src_buffer[4 * i] & 0x03) | + ((src_buffer[4 * i + 1] & 0x03) << 2) | + ((src_buffer[4 * i + 2] & 0x03) << 4) | + ((src_buffer[4 * i + 3] & 0x03) << 6); + } + // Handle the remaining elements. + int remaining_elements = num_elements % 4; + if (remaining_elements > 0) { + int8_t packed_val = 0; + for (int i = 0; i < remaining_elements; ++i) { + packed_val |= (src_buffer[num_elements - remaining_elements + i] & 0x03) + << (i * 2); + } + dst_buffer[num_elements / 4] = packed_val; + } } } diff --git a/tensorflow/lite/kernels/internal/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/portable_tensor_utils.h index a361a2d0e5d..c70ac94db5f 100644 --- a/tensorflow/lite/kernels/internal/portable_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/portable_tensor_utils.h @@ -618,20 +618,41 @@ void ApplySignbitToVector(const float* __restrict__ vector, int v_size, void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements, int8_t* dst_buffer); -// Pack `src_buffer` into a densely packed buffer of int4 values. +// Unpack or inflate `src_buffer` by taking each byte and splitting it into +// multiple elements into `dst_buffer`. Supports 2-bit and 4-bit packed integers // Parameters: -// src_buffer : Buffer containing int4 values stored in int8 memory. +// src_buffer : Densely packed buffer containing int2 or int4 values. +// num_elements : Number of unpacked elements to be read from the buffer. +// This should be equal to the size of `dst_buffer`. +// bit_width : The bit width of the packed elements (either 2 or 4). +// dst_buffer : Buffer to unpack into. Should be allocated by the caller. +// Size should be at least `num_elements`. +// Notes: +// For 4-bit unpacking: e.g., `src_buffer = {0x12, 0x34};` (num_elements = 4) +// will return `dst_buffer = {0x02, 0x01, 0x04, 0x03}`. +// For 2-bit unpacking: e.g., `src_buffer = {0x12};` (num_elements = 4) +// will return `dst_buffer = {0x02, 0x00, 0x01, 0x00}` (sign extended). +void UnpackPackedIntToInt8(const int8_t* src_buffer, int num_elements, + int bit_width, int8_t* dst_buffer); + +// Pack `src_buffer` into a densely packed buffer of int2 or int4 values. +// Parameters: +// src_buffer : Buffer containing int2 or int4 values stored in int8 +// memory. // num_elements : Number of elements stored in the buffer. Note that this can // be smaller than the size of `src_buffer` by 1 if it's odd, // in which case the last nibble in `src_buffer` is ignored. // This should be equal to the size of `dst_buffer`. +// bit_width : The bit width of the packed elements (either 2 or 4). // dst_buffer : Buffer to pack into. Should be allocated by the caller. // Size should be at least `num_elements`. // Notes: -// For example, given `src_buffer = {0x02, 0x01, 0x04, 0x03}`, calling this -// function will return `dst_buffer = {0x12, 0x34}`. -void PackInt8IntoDenseInt4(const int8_t* src_buffer, int num_elements, - int8_t* dst_buffer); +// For 4-bit packing: e.g., given `src_buffer = {0x02, 0x01, 0x04, 0x03}`, +// calling this function will return `dst_buffer = {0x12, 0x34}`. +// For 2-bit packing: e.g., given `src_buffer = {0x00, 0x01, 0x00, 0x02}`, +// calling this function will return `dst_buffer = {0x84}`. +void PackInt8IntoDenseInt(const int8_t* src_buffer, int num_elements, + int bit_width, int8_t* dst_buffer); } // namespace tensor_utils } // namespace tflite diff --git a/tensorflow/lite/micro/tools/layer_by_layer.cc b/tensorflow/lite/micro/tools/layer_by_layer.cc index 91d325e51c7..b72517523fa 100644 --- a/tensorflow/lite/micro/tools/layer_by_layer.cc +++ b/tensorflow/lite/micro/tools/layer_by_layer.cc @@ -120,6 +120,9 @@ TfLiteStatus ConvertTensorType(TfLiteType type, TensorTypes& tensor_type) { case kTfLiteInt4: tensor_type = TensorTypes_INT4; return kTfLiteOk; + case kTfLiteInt2: + tensor_type = TensorTypes_INT2; + return kTfLiteOk; case kTfLiteNoType: MicroPrintf("Unsupported data type %d in tensor\n", tensor_type); return kTfLiteError; diff --git a/tensorflow/lite/micro/tools/layer_by_layer_schema.fbs b/tensorflow/lite/micro/tools/layer_by_layer_schema.fbs index 4183c9cf52c..b788399a839 100644 --- a/tensorflow/lite/micro/tools/layer_by_layer_schema.fbs +++ b/tensorflow/lite/micro/tools/layer_by_layer_schema.fbs @@ -35,6 +35,7 @@ enum TensorTypes : byte { UINT16 = 16, INT4 = 17, BFLOAT16 = 18, + INT2 = 19, } table TensorData { diff --git a/tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h b/tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h index 67a2caa7850..25f101bc183 100644 --- a/tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h +++ b/tensorflow/lite/micro/tools/layer_by_layer_schema_generated.h @@ -59,11 +59,12 @@ enum TensorTypes : int8_t { TensorTypes_UINT16 = 16, TensorTypes_INT4 = 17, TensorTypes_BFLOAT16 = 18, + TensorTypes_INT2 = 19, TensorTypes_MIN = TensorTypes_FLOAT32, - TensorTypes_MAX = TensorTypes_BFLOAT16 + TensorTypes_MAX = TensorTypes_INT2 }; -inline const TensorTypes (&EnumValuesTensorTypes())[19] { +inline const TensorTypes (&EnumValuesTensorTypes())[20] { static const TensorTypes values[] = { TensorTypes_FLOAT32, TensorTypes_FLOAT16, @@ -83,13 +84,14 @@ inline const TensorTypes (&EnumValuesTensorTypes())[19] { TensorTypes_UINT32, TensorTypes_UINT16, TensorTypes_INT4, - TensorTypes_BFLOAT16 + TensorTypes_BFLOAT16, + TensorTypes_INT2 }; return values; } inline const char * const *EnumNamesTensorTypes() { - static const char * const names[20] = { + static const char * const names[21] = { "FLOAT32", "FLOAT16", "INT32", @@ -109,13 +111,14 @@ inline const char * const *EnumNamesTensorTypes() { "UINT16", "INT4", "BFLOAT16", + "INT2", nullptr }; return names; } inline const char *EnumNameTensorTypes(TensorTypes e) { - if (::flatbuffers::IsOutRange(e, TensorTypes_FLOAT32, TensorTypes_BFLOAT16)) return ""; + if (::flatbuffers::IsOutRange(e, TensorTypes_FLOAT32, TensorTypes_INT2)) return ""; const size_t index = static_cast(e); return EnumNamesTensorTypes()[index]; } diff --git a/tensorflow/lite/python/schema_py_generated.py b/tensorflow/lite/python/schema_py_generated.py index 5fb12737d43..648cfd043af 100755 --- a/tensorflow/lite/python/schema_py_generated.py +++ b/tensorflow/lite/python/schema_py_generated.py @@ -27,6 +27,7 @@ class TensorType(object): UINT16 = 16 INT4 = 17 BFLOAT16 = 18 + INT2 = 19 class QuantizationDetails(object): diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index 1f055d2045f..35dd124b367 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -703,11 +703,12 @@ enum TensorType : int8_t { TensorType_UINT16 = 16, TensorType_INT4 = 17, TensorType_BFLOAT16 = 18, + TensorType_INT2 = 19, TensorType_MIN = TensorType_FLOAT32, - TensorType_MAX = TensorType_BFLOAT16 + TensorType_MAX = TensorType_INT2 }; -inline const TensorType (&EnumValuesTensorType())[19] { +inline const TensorType (&EnumValuesTensorType())[20] { static const TensorType values[] = { TensorType_FLOAT32, TensorType_FLOAT16, @@ -727,13 +728,14 @@ inline const TensorType (&EnumValuesTensorType())[19] { TensorType_UINT32, TensorType_UINT16, TensorType_INT4, - TensorType_BFLOAT16 + TensorType_BFLOAT16, + TensorType_INT2 }; return values; } inline const char * const *EnumNamesTensorType() { - static const char * const names[20] = { + static const char * const names[21] = { "FLOAT32", "FLOAT16", "INT32", @@ -753,13 +755,14 @@ inline const char * const *EnumNamesTensorType() { "UINT16", "INT4", "BFLOAT16", + "INT2", nullptr }; return names; } inline const char *EnumNameTensorType(TensorType e) { - if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_BFLOAT16)) return ""; + if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT2)) return ""; const size_t index = static_cast(e); return EnumNamesTensorType()[index]; } diff --git a/tensorflow/lite/tools/visualize.py b/tensorflow/lite/tools/visualize.py index de7ef820079..cd4bcfa7aaf 100644 --- a/tensorflow/lite/tools/visualize.py +++ b/tensorflow/lite/tools/visualize.py @@ -33,7 +33,7 @@ from tflite_micro.tensorflow.lite.python import schema_py_generated as schema_fb else: # This file is part of tflite_runtime package. - from tflite_runtime import schema_py_generated as schema_fb + from tflite_micro.tensorflow.lite_runtime import schema_py_generated as schema_fb # A CSS description for making the visualizer _CSS = """