Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion benchmark/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

TOPDIR = ..
include $(TOPDIR)/Makefile.system

Expand Down Expand Up @@ -56,7 +84,7 @@ GOTO_LAPACK_TARGETS=
endif

ifeq ($(BUILD_BFLOAT16),1)
GOTO_BFLOAT_TARGETS=sbgemm.goto
GOTO_BFLOAT_TARGETS=bgemm.goto sbgemm.goto
else
GOTO_BFLOAT_TARGETS=
endif
Expand Down Expand Up @@ -635,6 +663,8 @@ zcholesky.essl : zcholesky.$(SUFFIX)

##################################### Sgemm ####################################################
ifeq ($(BUILD_BFLOAT16),1)
bgemm.goto : bgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
endif
Expand Down Expand Up @@ -2970,6 +3000,8 @@ zcholesky.$(SUFFIX) : cholesky.c
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^

ifeq ($(BUILD_BFLOAT16),1)
bgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -DBFLOAT16 -DBGEMM -UCOMPLEX -UDOUBLE -o $(@F) $^
sbgemm.$(SUFFIX) : gemm.c
$(CC) $(CFLAGS) -c -DBFLOAT16 -UCOMPLEX -UDOUBLE -o $(@F) $^
endif
Expand Down
12 changes: 12 additions & 0 deletions benchmark/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#elif defined(BFLOAT16) && defined(BGEMM)
#define GEMM BLASFUNC(bgemm)
#elif defined(BFLOAT16)
#define GEMM BLASFUNC(sbgemm)
#undef IFLOAT
Expand Down Expand Up @@ -60,8 +62,18 @@ int main(int argc, char *argv[]){

IFLOAT *a, *b;
FLOAT *c;
#ifdef BGEMM
blasint one=1;
blasint two=2;
float alpha_in[] = {1.0, 0.0};
float beta_in[] = {0.0, 0.0};
FLOAT alpha[2], beta[2];
sbstobf16_(&two, alpha_in, &one, alpha, &one);
sbstobf16_(&two, beta_in, &one, beta, &one);
#else
FLOAT alpha[] = {1.0, 0.0};
FLOAT beta [] = {0.0, 0.0};
#endif
char transa = 'N';
char transb = 'N';
blasint m, n, k, i, j, lda, ldb, ldc;
Expand Down
14 changes: 10 additions & 4 deletions common_b.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,16 @@
#define COMMON_B_H

#ifndef DYNAMIC_ARCH
#define BGEMM_ONCOPY bgemm_oncopy
#define BGEMM_OTCOPY bgemm_otcopy
#define BGEMM_INCOPY bgemm_incopy
#define BGEMM_ITCOPY bgemm_itcopy
#define BGEMM_ONCOPY bgemm_oncopy
#define BGEMM_OTCOPY bgemm_otcopy

#if BGEMM_DEFAULT_UNROLL_M == BGEMM_DEFAULT_UNROLL_N
#define BGEMM_INCOPY bgemm_oncopy
#define BGEMM_ITCOPY bgemm_otcopy
#else
#define BGEMM_INCOPY bgemm_incopy
#define BGEMM_ITCOPY bgemm_itcopy
#endif

#define BGEMM_BETA bgemm_beta
#define BGEMM_KERNEL bgemm_kernel
Expand Down
30 changes: 29 additions & 1 deletion interface/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

TOPDIR = ..
include $(TOPDIR)/Makefile.system

Expand Down Expand Up @@ -526,7 +554,7 @@ ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif

FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(SHBLASOBJS)
FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(BBLASOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(SHBLASOBJS)

ifeq ($(EXPRECISION), 1)
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
Expand Down
21 changes: 20 additions & 1 deletion kernel/Makefile.L3
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,10 @@ ZBLASOBJS += \
endif

ifeq ($(BUILD_BFLOAT16), 1)
BGEMMINCOPYOBJ_P = $(BGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
BGEMMITCOPYOBJ_P = $(BGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
BGEMMONCOPYOBJ_P = $(BGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
BGEMMOTCOPYOBJ_P = $(BGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
SBGEMMINCOPYOBJ_P = $(SBGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
SBGEMMITCOPYOBJ_P = $(SBGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
Expand Down Expand Up @@ -2998,6 +3002,20 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)


ifeq ($(BUILD_BFLOAT16), 1)
$(BGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(BGEMMONCOPY)
$(CC) $(PFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@

$(BGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(BGEMMOTCOPY)
$(CC) $(PFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@

ifneq ($(BGEMM_UNROLL_M), $(BGEMM_UNROLL_N))
$(BGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(BGEMMINCOPY)
$(CC) $(PFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@

$(BGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(BGEMMITCOPY)
$(CC) $(PFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@
endif

$(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY)
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@

Expand All @@ -3010,7 +3028,6 @@ $(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY)

$(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY)
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@

endif
endif

Expand Down Expand Up @@ -3137,6 +3154,8 @@ endif


ifeq ($(BUILD_BFLOAT16), 1)
$(KDIR)bgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMMKERNEL) $(BGEMMDEPEND)
$(CC) $(PFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@
$(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
endif
Expand Down
43 changes: 42 additions & 1 deletion kernel/arm64/KERNEL.NEOVERSEV1
Original file line number Diff line number Diff line change
@@ -1,10 +1,51 @@
###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

include $(KERNELDIR)/KERNEL.ARMV8SVE

SGEMVNKERNEL = gemv_n_sve_v1x3.c
DGEMVNKERNEL = gemv_n_sve_v1x3.c
SGEMVTKERNEL = gemv_t_sve_v1x3.c
DGEMVTKERNEL = gemv_t_sve_v1x3.c
ifeq ($(BUILD_BFLOAT16), 1)
BGEMM_BETA = bgemm_beta_neon.c
BGEMMKERNEL = bgemm_kernel_$(BGEMM_UNROLL_M)x$(BGEMM_UNROLL_N)_neoversev1.c
ifneq ($(BGEMM_UNROLL_M), $(BGEMM_UNROLL_N))
BGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversev1.c
BGEMMITCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_M)_neoversev1.c
BGEMMINCOPYOBJ = bgemm_incopy$(TSUFFIX).$(SUFFIX)
BGEMMITCOPYOBJ = bgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
BGEMMONCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_N)_neoversev1.c
BGEMMOTCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_N)_neoversev1.c
BGEMMONCOPYOBJ = bgemm_oncopy$(TSUFFIX).$(SUFFIX)
BGEMMOTCOPYOBJ = bgemm_otcopy$(TSUFFIX).$(SUFFIX)

SBGEMM_BETA = sbgemm_beta_neoversev1.c
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversev1.c
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
Expand All @@ -21,4 +62,4 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
SBGEMVNKERNEL = sbgemv_n_neon.c
SBGEMVTKERNEL = sbgemv_t_bfdot.c

endif
endif
107 changes: 107 additions & 0 deletions kernel/arm64/bgemm_beta_neon.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/***************************************************************************
* Copyright (c) 2025, The OpenBLAS Project
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* 3. Neither the name of the OpenBLAS project nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* *****************************************************************************/

#include "common.h"

#include <arm_neon.h>

int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta_in, IFLOAT *dummy2,
BLASLONG dummy3, IFLOAT *dummy4, BLASLONG dummy5, FLOAT *c,
BLASLONG ldc) {
BLASLONG i, j;
BLASLONG chunk, remain;

bfloat16_t *ptr_c, *ptr_c0;

bfloat16x8_t x0, z0;
float32x4_t y0, y1;

float x;
bfloat16_t z;

bfloat16_t zero_bf16 = vcvth_bf16_f32(0.0f);
bfloat16x8_t zeros = vdupq_n_bf16(zero_bf16);

bfloat16_t beta_bf16;
memcpy(&beta_bf16, &beta_in, sizeof(bfloat16_t));
float beta = vcvtah_f32_bf16(beta_bf16);
float32x4_t beta_neon = vdupq_n_f32(beta);

ptr_c = (bfloat16_t *)c;

chunk = m >> 3;
remain = m & 7;

if (beta == 0.0f){
for (j = 0; j < n; j ++){
ptr_c0 = ptr_c;
ptr_c += ldc;

for (i = 0; i < chunk; i ++){
vst1q_bf16(ptr_c0, zeros);
ptr_c0 += 8;
}

for (i = 0; i < remain; i ++){
ptr_c0[0] = zero_bf16;
ptr_c0 ++;
}
}
} else {
for (j = 0; j < n; j ++){
ptr_c0 = ptr_c;
ptr_c += ldc;

for (i = 0; i < chunk; i ++){
x0 = vld1q_bf16(ptr_c0);

y0 = vcvtq_low_f32_bf16(x0);
y1 = vcvtq_high_f32_bf16(x0);

y0 = vmulq_f32(y0, beta_neon);
y1 = vmulq_f32(y1, beta_neon);

z0 = vcvtq_low_bf16_f32(y0);
z0 = vcvtq_high_bf16_f32(z0, y1);

vst1q_bf16(ptr_c0, z0);

ptr_c0 += 8;
}

for (i = 0; i < remain; i ++){
x = vcvtah_f32_bf16(ptr_c0[0]);
z = vcvth_bf16_f32(x * beta);

ptr_c0[0] = z;
ptr_c0 ++;
}
}
}
return 0;
};
Loading
Loading