Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions libcob/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@

2026-04-10 Denis Hugonnard-Roche <dhugonnard@users.sourceforge.net>

* intrinsic.c (setup_cob_pi, cob_exit_intrinsic): add cob_half_pi
constant
* intrinsic.c (cob_normalize_angle): simplify & optimize, use
cob_halp_pi
* intrinsic.c (cob_mpf_xxxx: sin, cos, acos, asin, atan): use cob_half_pi

2026-04-04 Denis Hugonnard-Roche <dhugonnard@users.sourceforge.net>

* intrinsic.c (cob_normalize_angle): new_function
* intrinsic.c (cob_mpf_cos): better performabce with use of
cob_normalize_angle
* intrinsic.c (cob_mpf_sin): better performabce with use of
cob_mpf_sin

2026-03-02 Fabrice Le Fessant <fabrice.le_fessant@ocamlpro.com>

* coblocal.h, common.c, profiling.c: rename is_test to cob_is_test
as it is an external value.

2025-12-04 Simon Sobisch <simonsobisch@gnu.org>

* fileio.c (indexed_open) [WITH_DB]: if open was successful but checking
Expand Down
219 changes: 138 additions & 81 deletions libcob/intrinsic.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
Copyright (C) 2005-2012, 2014-2024 Free Software Foundation, Inc.
Written by Roger While, Simon Sobisch, Edward Hart, Brian Tiffin
Copyright (C) 2005-2012, 2014-2026 Free Software Foundation, Inc.
Written by Roger While, Simon Sobisch, Edward Hart, Brian Tiffin,
Denis Hugonnard-Roche

This file is part of GnuCOBOL.

Expand Down Expand Up @@ -100,6 +101,7 @@ static mpf_t cob_mpft2;
static mpf_t cob_mpft_get;

static mpf_t cob_pi;
static mpf_t cob_half_pi;
static mpf_t cob_sqrt_two;
static mpf_t cob_log_half;
static mpf_t cob_log_ten;
Expand Down Expand Up @@ -370,6 +372,8 @@ static const struct winlocale wintable[] =

#endif

#define COB_COS_DIVIDE_FACTOR 16

static COB_NOINLINE void
setup_cob_pi (void)
{
Expand Down Expand Up @@ -397,6 +401,11 @@ setup_cob_pi (void)

mpf_init2 (cob_pi, COB_PI_LEN);
mpf_set_str (cob_pi, cob_pi_str, 10);

mpf_init2 (cob_half_pi, COB_PI_LEN);
mpf_set (cob_half_pi, cob_pi);
mpf_div_ui (cob_half_pi, cob_pi, 2UL);

set_cob_pi = 1;
}

Expand Down Expand Up @@ -994,102 +1003,150 @@ cob_mpf_log10 (mpf_t dst_val, const mpf_t src_val)
mpf_clear (dst_temp);
}

/* Sin function */
/* sin (x) = (reduce to pi/2) */
/* {n = 0, ...} ( (-1 ^ n) * ( x ^ (2n + 1)) / (2n + 1) ) */
/* Takes an angle X as input and calculates the angle Y reduced to [0;P2/2] */
/* such that abs( cos(X) ) = abs( cos(Y) ) and abs( sin(X) ) = abs( sin(Y) ) */
/* and returns the quadrant to which the initial angle belongs (from 0 to 3) */

static void
cob_mpf_sin (mpf_t dst_val, const mpf_t src_val)
static cob_u16_t
cob_normalize_angle (mpf_t dst, const mpf_t src_val)
{
mpf_t vf1, vf2, vf3, vf4, vf5;
mpf_t dst_temp;
cob_uli_t arcquad;
cob_uli_t n;
int sign;
mpf_t vf1;
mpf_t k ;
mpz_t q ;
unsigned long n ;

mpf_init2 (dst_temp, COB_MPF_PREC);
if (!set_cob_pi) setup_cob_pi ();
const int sign = mpf_sgn (src_val);
if (sign == 0) {
mpf_set_ui (dst, 0UL);
return 0 ;
}

mpf_init2 (vf1, COB_MPF_PREC);
mpf_init2 (vf2, COB_MPF_PREC);
mpf_init2 (vf3, COB_MPF_PREC);
mpf_init2 (vf4, COB_MPF_PREC);
mpf_init2 (vf5, COB_MPF_PREC);
sign = mpf_sgn (src_val);

mpf_abs (vf4, src_val);
mpf_set (vf3, cob_pi);
mpf_div_2exp (vf3, vf3, 1UL);
mpf_div (vf1, vf4, vf3);
mpf_floor (vf4, vf1);

if (mpf_cmp_ui (vf4, 4UL) >= 0) {
mpf_div_2exp (vf2, vf4, 2UL);
mpf_floor (vf2, vf2);
mpf_mul_2exp (vf2, vf2, 2UL);
mpf_sub (vf2, vf4, vf2);
} else {
mpf_set (vf2, vf4);
mpf_set (dst, src_val);
if (sign == -1) {
mpf_neg (dst, dst);
}
/* Now dst contains abs(src_val) */

arcquad = mpf_get_ui (vf2);
mpf_sub (vf2, vf1, vf4);
mpf_mul (vf4, vf3, vf2);
mpf_init2 (vf1, COB_MPF_PREC);
mpf_init2 (k, COB_MPF_PREC);
mpz_init2 (q, COB_MPZ_DEF);

/* Get Quadrant */
mpf_div (vf1, dst, cob_half_pi);
mpf_trunc (k, vf1);
mpz_set_f(q, k);
mpz_mod_ui (q, q, 4UL);
n = mpz_get_ui (q);

/* Compute the resulting angle reduce on [0;PI/2] */
/* dst - k*Pi/2 */
mpf_mul (vf1, k, cob_half_pi);
mpf_sub (dst, dst, vf1);

if (arcquad > 1) {
sign = -sign;
/* Process quadrant */
if (n & 1) {
/* if n odd we have to get the complementary angle */
mpf_sub (dst, cob_half_pi, dst);
}
if (arcquad & 1) {
mpf_sub (vf4, vf3, vf4);

if (sign == -1) {
n = 3 - n ;
}

mpf_mul (vf3, vf4, vf4);
mpf_neg (vf3, vf3);
mpf_clear (vf1);
mpf_clear (k);
mpz_clear (q);

n = 1;
mpf_set_ui (vf2, 1UL);
mpf_set_ui (dst_temp, 1UL);
return n;
}

/* Cos function */
/* cos(x) = ( 1 - x^2/2 + x^4/4! - x^6/6! ...*/
/* (x) = (reduced to pi/2) */
/* The angle is divided by 2^COB_COS_DIVIDE to make it closer to 0 */
/* We use the equality cos(2X) = 2*cos^2(X)-1 to have the final value*/

static void
cob_mpf_cos (mpf_t dst_val, const mpf_t src_val)
{
mpf_t term, val_serie, angle;
cob_u16_t arcquad;
cob_uli_t n;

if (!set_cob_pi) setup_cob_pi ();

mpf_init2 (term, COB_MPF_PREC);
mpf_init2 (val_serie, COB_MPF_PREC);
mpf_init2 (angle, COB_MPF_PREC);

arcquad = cob_normalize_angle (angle, src_val);

/* Divide nn time by 2 */
mpf_div_2exp (angle, angle, COB_COS_DIVIDE_FACTOR );

mpf_mul (angle, angle, angle);

mpf_set_si (term, -1L);

/* compute first term */
mpf_set (dst_val, angle);
mpf_div_ui (dst_val, dst_val, 2UL);
mpf_neg (dst_val, dst_val);
mpf_set (term, dst_val); /* init first term to -x^2/2 */
mpf_add_ui (dst_val, dst_val, 1UL);

n = 4;
do {
++n;
mpf_div_ui (vf2, vf2, n);
++n;
mpf_div_ui (vf2, vf2, n);
mpf_mul (vf2, vf2, vf3);
mpf_set (vf5, dst_temp);
mpf_add (dst_temp, dst_temp, vf2);
} while (!mpf_eq (vf5, dst_temp, COB_MPF_PREC));
const cob_uli_t j = (n-1UL) * n;

mpf_mul (dst_temp, dst_temp, vf4);
if (sign < 0) {
mpf_neg (dst_temp, dst_temp);
mpf_set (val_serie, dst_val);
mpf_mul (term, term, angle);
mpf_div_ui (term, term, j);
Comment thread
GitMensch marked this conversation as resolved.
mpf_neg (term, term);

mpf_add (dst_val, dst_val, term);

n += 2;
} while (!mpf_eq (dst_val, val_serie, COB_MPF_PREC));

/* compute 2*n^2 -1 n times */
{
int i ;
for (i = 0 ; i < COB_COS_DIVIDE_FACTOR; i++) {
mpf_mul (dst_val, dst_val, dst_val);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the use of a temp variable (term is already allocated and can be re-used here) as a destination with a final mpf_set (dst_val, term) at the end faster? This possibly reduces pagefaults, if this is the case then the same (with a different pre-initialized var) may be useful in the do..while loop above as well.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No mesurable differences :


====================
TEMP VAR
====================

 Performance counter stats for './test_cos_perf':

     4 529 506 456      task-clock                       #    1,000 CPUs utilized             
                59      context-switches                 #   13,026 /sec                      
                19      cpu-migrations                   #    4,195 /sec                      
               312      page-faults                      #   68,882 /sec                      
    49 595 367 964      instructions                     #    2,71  insn per cycle            
                                                  #    0,01  stalled cycles per insn     (71,44%)
    18 285 994 582      cycles                           #    4,037 GHz                         (71,44%)
       723 136 269      stalled-cycles-frontend          #    3,95% frontend cycles idle        (71,44%)
     3 666 946 142      branches                         #  809,569 M/sec                       (71,44%)
        15 033 589      branch-misses                    #    0,41% of all branches             (71,44%)
    15 626 081 389      L1-dcache-loads                  #    3,450 G/sec                       (71,41%)
         5 524 211      L1-dcache-load-misses            #    0,04% of all L1-dcache accesses   (71,41%)

       4,550282821 seconds time elapsed

       4,548419000 seconds user
       0,002000000 seconds sys

====================
NOT TEMP VAR
====================

 Performance counter stats for './test_cos_perf':

     4 548 744 078      task-clock                       #    1,000 CPUs utilized             
                47      context-switches                 #   10,333 /sec                      
                 5      cpu-migrations                   #    1,099 /sec                      
               312      page-faults                      #   68,590 /sec                      
    49 572 959 453      instructions                     #    2,70  insn per cycle            
                                                  #    0,01  stalled cycles per insn     (71,42%)
    18 332 746 462      cycles                           #    4,030 GHz                         (71,43%)
       740 182 750      stalled-cycles-frontend          #    4,04% frontend cycles idle        (71,43%)
     3 672 331 368      branches                         #  807,329 M/sec                       (71,43%)
        15 160 288      branch-misses                    #    0,41% of all branches             (71,45%)
    15 559 261 023      L1-dcache-loads                  #    3,421 G/sec                       (71,44%)
         4 821 360      L1-dcache-load-misses            #    0,03% of all L1-dcache accesses   (71,42%)

       4,550326491 seconds time elapsed

       4,546552000 seconds user
       0,003000000 seconds sys

mpf_mul_ui (dst_val, dst_val, 2UL);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a difference in cpu instructions when replacing this one by mpf_mul_2exp (dst_val, dst_val, 1)?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests done , using mul_ui is a little bit faster

====================
MUL UI
====================
 

 Performance counter stats for './test_cos_perf':

     4 593 703 213      task-clock                       #    1,000 CPUs utilized             
                47      context-switches                 #   10,231 /sec                      
                 7      cpu-migrations                   #    1,524 /sec                      
               312      page-faults                      #   67,919 /sec                      
    49 576 433 528      instructions                     #    2,70  insn per cycle            
                                                  #    0,01  stalled cycles per insn     (71,40%)
    18 331 911 765      cycles                           #    3,991 GHz                         (71,44%)
       731 980 015      stalled-cycles-frontend          #    3,99% frontend cycles idle        (71,44%)
     3 672 416 906      branches                         #  799,446 M/sec                       (71,44%)
        15 151 690      branch-misses                    #    0,41% of all branches             (71,44%)
    15 558 760 741      L1-dcache-loads                  #    3,387 G/sec                       (71,44%)
         4 835 861      L1-dcache-load-misses            #    0,03% of all L1-dcache accesses   (71,39%)

       4,595219614 seconds time elapsed

       4,593533000 seconds user
       0,001000000 seconds sys


 
====================
MUL_2EXP
====================

 Performance counter stats for './test_cos_perf':

     4 625 403 700      task-clock                       #    1,000 CPUs utilized             
                45      context-switches                 #    9,729 /sec                      
                 6      cpu-migrations                   #    1,297 /sec                      
               312      page-faults                      #   67,454 /sec                      
    49 589 222 149      instructions                     #    2,71  insn per cycle            
                                                  #    0,01  stalled cycles per insn     (71,42%)
    18 292 760 949      cycles                           #    3,955 GHz                         (71,42%)
       739 615 391      stalled-cycles-frontend          #    4,04% frontend cycles idle        (71,42%)
     3 666 989 588      branches                         #  792,793 M/sec                       (71,43%)
        15 404 884      branch-misses                    #    0,42% of all branches             (71,45%)
    15 619 091 449      L1-dcache-loads                  #    3,377 G/sec                       (71,44%)
         5 375 435      L1-dcache-load-misses            #    0,03% of all L1-dcache accesses   (71,42%)

       4,626889177 seconds time elapsed

       4,624220000 seconds user
       0,002000000 seconds sys

mpf_sub_ui (dst_val, dst_val, 1UL);
}
}

mpf_set (dst_val, dst_temp);
mpf_clear (dst_temp);
if (arcquad == 1 || arcquad == 2) {
mpf_neg (dst_val, dst_val);
}

mpf_clear (vf5);
mpf_clear (vf4);
mpf_clear (vf3);
mpf_clear (vf2);
mpf_clear (vf1);
mpf_clear (term);
mpf_clear (angle);
mpf_clear (val_serie);
}

/* Cos function */
/* cos (x) = sin ((pi / 2) - x) */
/* sin function */
/* sin(X) = cos( PI/2 - X) */

static void
cob_mpf_cos (mpf_t dst_val, const mpf_t src_val)
cob_mpf_sin (mpf_t dst_val, const mpf_t src_val)
{
mpf_t vf1;
mpf_t vf1;

mpf_init2 (vf1, COB_MPF_PREC);
if (!set_cob_pi) setup_cob_pi ();

mpf_set (vf1, cob_pi);
mpf_div_2exp (vf1, vf1, 1UL);
mpf_sub (vf1, vf1, src_val);
cob_mpf_sin (dst_val, vf1);
mpf_init2 (vf1, COB_MPF_PREC);

/* compute complementary angle = PI/2 - Angle */
mpf_set (vf1, cob_half_pi);
if (mpf_cmp_ui (src_val, 0UL) != 0) {
mpf_sub (vf1, vf1, src_val);
}

cob_mpf_cos (dst_val, vf1);

mpf_clear (vf1);
}
Expand Down Expand Up @@ -1136,15 +1193,14 @@ cob_mpf_atan (mpf_t dst_val, const mpf_t src_val)
mpf_add_ui (vf3, cob_sqrt_two, 1UL);

if (mpf_cmp (vf1, vf3) > 0) {
mpf_set (dst_temp, cob_pi);
mpf_div_2exp (dst_temp, dst_temp, 1UL);
mpf_set (dst_temp, cob_half_pi);
mpf_ui_div (vf1, 1UL, vf1);
mpf_neg (vf1, vf1);
} else {
mpf_sub_ui (vf4, cob_sqrt_two, 1UL);
if (mpf_cmp (vf1, vf4) > 0) {
mpf_set (dst_temp, cob_pi);
mpf_div_2exp (dst_temp, dst_temp, 2UL);
mpf_set (dst_temp, cob_half_pi);
mpf_div_2exp (dst_temp, dst_temp, 1UL);
mpf_sub_ui (vf3, vf1, 1UL);
mpf_add_ui (vf4, vf1, 1UL);
mpf_div (vf1, vf3, vf4);
Expand Down Expand Up @@ -1192,8 +1248,8 @@ cob_mpf_asin (mpf_t dst_val, const mpf_t src_val)
if (!set_cob_pi) setup_cob_pi ();

if (!mpf_cmp_ui (src_val, 1UL) || !mpf_cmp_si (src_val, -1L)) {
mpf_set (dst_temp, cob_pi);
mpf_div_ui (dst_temp, dst_temp, 2UL);
mpf_set (dst_temp, cob_half_pi);
mpf_div_ui (dst_temp, dst_temp, 1UL);
if (mpf_sgn (src_val) < 0) {
mpf_neg (dst_temp, dst_temp);
}
Expand Down Expand Up @@ -1240,8 +1296,8 @@ cob_mpf_acos (mpf_t dst_val, const mpf_t src_val)
if (!set_cob_pi) setup_cob_pi ();

if (!mpf_sgn (src_val)) {
mpf_set (dst_temp, cob_pi);
mpf_div_ui (dst_temp, dst_temp, 2UL);
mpf_set (dst_temp, cob_half_pi);
mpf_div_ui (dst_temp, dst_temp, 1UL);
mpf_set (dst_val, dst_temp);
mpf_clear (dst_temp);
return;
Expand Down Expand Up @@ -7189,6 +7245,7 @@ cob_exit_intrinsic (void)
}
if (set_cob_pi) {
mpf_clear (cob_pi);
mpf_clear (cob_half_pi);
}
if (set_cob_log_half) {
mpf_clear (cob_log_half);
Expand Down
Loading
Loading