Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deps/histogram/include/hdr/hdr_histogram_version.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
#ifndef HDR_HISTOGRAM_VERSION_H
#define HDR_HISTOGRAM_VERSION_H

#define HDR_HISTOGRAM_VERSION "0.11.9"
#define HDR_HISTOGRAM_VERSION "0.11.10"

#endif // HDR_HISTOGRAM_VERSION_H
118 changes: 102 additions & 16 deletions deps/histogram/src/hdr_histogram.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,25 @@

#include HDR_MALLOC_INCLUDE

/* Prefetch hint for upcoming write access */
#if defined(__GNUC__) || defined(__clang__)
# define HDR_PREFETCH_WRITE(addr) __builtin_prefetch((addr), 1, 3)
# define HDR_LIKELY(x) __builtin_expect(!!(x), 1)
# define HDR_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
# define HDR_PREFETCH_WRITE(addr) ((void)(addr))
# define HDR_LIKELY(x) (x)
# define HDR_UNLIKELY(x) (x)
#endif

/* Runtime-dispatched AVX2 path: keep the rest of this TU at the project's
baseline ISA so the shipped binary does not silently require AVX2. */
#if (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) \
&& (defined(__GNUC__) || defined(__clang__)) && !defined(__INTEL_COMPILER)
# define HDR_HAS_AVX2_DISPATCH 1
# include <immintrin.h>
#endif

/* ###### ####### ## ## ## ## ######## ###### */
/* ## ## ## ## ## ## ### ## ## ## ## */
/* ## ## ## ## ## #### ## ## ## */
Expand Down Expand Up @@ -67,24 +86,48 @@ static int64_t counts_get_normalised(const struct hdr_histogram* h, int32_t inde
static void counts_inc_normalised(
struct hdr_histogram* h, int32_t index, int64_t value)
{
int32_t normalised_index = normalize_index(h, index);
h->counts[normalised_index] += value;
if (HDR_LIKELY(h->normalizing_index_offset == 0))
{
HDR_PREFETCH_WRITE(&h->counts[index]);
h->counts[index] += value;
}
else
{
int32_t normalised_index = normalize_index(h, index);
HDR_PREFETCH_WRITE(&h->counts[normalised_index]);
h->counts[normalised_index] += value;
}
h->total_count += value;
}

static void counts_inc_normalised_atomic(
struct hdr_histogram* h, int32_t index, int64_t value)
{
int32_t normalised_index = normalize_index(h, index);

hdr_atomic_add_fetch_64(&h->counts[normalised_index], value);
if (HDR_LIKELY(h->normalizing_index_offset == 0))
{
HDR_PREFETCH_WRITE(&h->counts[index]);
hdr_atomic_add_fetch_64(&h->counts[index], value);
}
else
{
int32_t normalised_index = normalize_index(h, index);
HDR_PREFETCH_WRITE(&h->counts[normalised_index]);
hdr_atomic_add_fetch_64(&h->counts[normalised_index], value);
}
hdr_atomic_add_fetch_64(&h->total_count, value);
}

static void update_min_max(struct hdr_histogram* h, int64_t value)
{
h->min_value = (value < h->min_value && value != 0) ? value : h->min_value;
h->max_value = (value > h->max_value) ? value : h->max_value;
if (HDR_UNLIKELY(value > h->max_value))
{
h->max_value = value;
}

if (HDR_UNLIKELY(value != 0 && value < h->min_value))
{
h->min_value = value;
}
}

static void update_min_max_atomic(struct hdr_histogram* h, int64_t value)
Expand Down Expand Up @@ -498,7 +541,7 @@ bool hdr_record_values(struct hdr_histogram* h, int64_t value, int64_t count)
}

counts_index = counts_index_for(h, value);
if (counts_index < 0 || h->counts_len <= counts_index)
if ((uint32_t)counts_index >= (uint32_t)h->counts_len)
{
return false;
}
Expand All @@ -520,7 +563,7 @@ bool hdr_record_values_atomic(struct hdr_histogram* h, int64_t value, int64_t co

counts_index = counts_index_for(h, value);

if (counts_index < 0 || h->counts_len <= counts_index)
if ((uint32_t)counts_index >= (uint32_t)h->counts_len)
{
return false;
}
Expand Down Expand Up @@ -665,22 +708,65 @@ int64_t hdr_min(const struct hdr_histogram* h)
return non_zero_min(h);
}

static int64_t get_value_from_idx_up_to_count(const struct hdr_histogram* h, int64_t count_at_percentile)
static int64_t get_value_from_idx_up_to_count_scalar(
const struct hdr_histogram* h, int64_t count_at_percentile)
{
int64_t count_to_idx = 0;

count_at_percentile = 0 < count_at_percentile ? count_at_percentile : 1;
for (int32_t idx = 0; idx < h->counts_len; idx++)
{
for (int32_t idx = 0; idx < h->counts_len; idx++) {
count_to_idx += h->counts[idx];
if (count_to_idx >= count_at_percentile)
{
return hdr_value_at_index(h, idx);
}
}
return 0;
}

#ifdef HDR_HAS_AVX2_DISPATCH
__attribute__((target("avx2")))
static int64_t get_value_from_idx_up_to_count_avx2(
const struct hdr_histogram* h, int64_t count_at_percentile)
{
int64_t running = 0;
int32_t idx = 0;
const int32_t limit = h->counts_len & ~3;

for (; idx < limit; idx += 4) {
__m256i v = _mm256_loadu_si256((const __m256i*)&h->counts[idx]);
__m128i lo = _mm256_castsi256_si128(v);
__m128i hi = _mm256_extracti128_si256(v, 1);
__m128i s = _mm_add_epi64(lo, hi);
/* Lanes are non-negative counts whose total fits in int64_t (total_count
invariant), so the chunk sum cannot overflow under valid state. Use
unsigned add to avoid signed-overflow UB if invariants are violated. */
int64_t chunk = (int64_t)((uint64_t)_mm_extract_epi64(s, 0)
+ (uint64_t)_mm_extract_epi64(s, 1));

if (__builtin_expect(running + chunk >= count_at_percentile, 0)) {
for (int32_t j = idx; j < idx + 4; j++) {
running += h->counts[j];
if (running >= count_at_percentile)
return hdr_value_at_index(h, j);
}
}
running += chunk;
}
for (; idx < h->counts_len; idx++) {
running += h->counts[idx];
if (running >= count_at_percentile)
return hdr_value_at_index(h, idx);
}
return 0;
}
#endif

static int64_t get_value_from_idx_up_to_count(const struct hdr_histogram* h, int64_t count_at_percentile)
{
count_at_percentile = count_at_percentile > 0 ? count_at_percentile : 1;
#ifdef HDR_HAS_AVX2_DISPATCH
if (__builtin_cpu_supports("avx2"))
return get_value_from_idx_up_to_count_avx2(h, count_at_percentile);
#endif
return get_value_from_idx_up_to_count_scalar(h, count_at_percentile);
}


int64_t hdr_value_at_percentile(const struct hdr_histogram* h, double percentile)
Expand Down
Loading