From b3faeb346ef2d90ab0f03713bd161d2ccbd47309 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Wed, 17 Dec 2025 14:54:58 +0000 Subject: [PATCH] Ensure indirect function calls have the correct type. Some HTSlib interfaces (notably kgetline(), kgetline2(), hts_itr_querys() and hts_parse_region()) have function callbacks that are intended to be generic, so the function signatures include a `void *` for data to be passed in. E.g. for kgetline() this would be the FILE * or hFILE * to read. Historically they have often been called with a function that has an explicit type (e.g. FILE * for fgets() used with kgetline) on the assumption that casting it to void * is harmless. Unfortunately doing this is strictly undefined behaviour and will fail in some conditions - for example if compiling with control flow integrity turned on. Recent versions of clang will also call this out when using undefined behaviour sanitizer. The solution is to create wrapper functions that do take a void *, and call the intended function with suitable casts. Some new interfaces are also added so that the messy implementation details can be hidden away, and the inputs can be better type checked. Hopefully this should also allow the wrapper functions to be inlined away. * khgetline() to read lines from an hFILE * to a kstring. * kfgetline() to read lines from a FILE * to a kstring. * tbx_itr_querys1() to create a tabix iterator on a region. Used to reimplement the existing tbx_itr_querys() macro. * bcf_itr_querys1() to create a region iterator an a bcf file Used to reimplement the existing bcf_itr_querys() macro. --- faidx.c | 2 +- header.c | 2 +- hfile.c | 13 +++++++++++++ hfile_libcurl.c | 2 +- hfile_s3.c | 5 ++--- hts.c | 7 ++++++- htslib/hfile.h | 13 +++++++++++++ htslib/kstring.h | 4 ++++ htslib/tbx.h | 5 ++++- htslib/vcf.h | 6 +++++- kstring.c | 13 +++++++++++++ sam.c | 15 ++++++++++----- tbx.c | 11 +++++++++++ vcf.c | 14 +++++++++++++- 14 files changed, 97 insertions(+), 15 deletions(-) diff --git a/faidx.c b/faidx.c index ed39c0ca0..c7fc022a8 100644 --- a/faidx.c +++ b/faidx.c @@ -1022,7 +1022,7 @@ const char *fai_parse_region(const faidx_t *fai, const char *s, int *tid, hts_pos_t *beg, hts_pos_t *end, int flags) { - return hts_parse_region(s, tid, beg, end, (hts_name2id_f)fai_name2id, (void *)fai, flags); + return hts_parse_region(s, tid, beg, end, fai_name2id, (void *)fai, flags); } void fai_set_cache_size(faidx_t *fai, int cache_size) { diff --git a/header.c b/header.c index ec5f266b9..1b4fbc2a0 100644 --- a/header.c +++ b/header.c @@ -1439,7 +1439,7 @@ int sam_hdr_build_from_sam_file(sam_hdr_t *hdr, htsFile* fp) { if (f == NULL) goto error; - while (line.l = 0, kgetline(&line, (kgets_func*) hgets, f) >= 0) { + while (line.l = 0, khgetline(&line, f) >= 0) { char* tab = strchr(line.s, '\t'); size_t l_start = str.l; sam_hrec_type_t *h_type; diff --git a/hfile.c b/hfile.c index 044f0d4bc..097085353 100644 --- a/hfile.c +++ b/hfile.c @@ -297,6 +297,19 @@ char *hgets(char *buffer, int size, hFILE *fp) return hgetln(buffer, size, fp) > 0 ? buffer : NULL; } +// Wrap around hgets() to get the right signature for kgets_func +static char *hgets_wrapper(char *buffer, int size, void *fp) +{ + return hgets(buffer, size, (hFILE *) fp); +} + +int khgetline(struct kstring_t *kstr, hFILE *fp) +{ + if (!kstr || !fp) + return EOF; + return kgetline(kstr, hgets_wrapper, fp); +} + ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) { size_t n = fp->end - fp->begin; diff --git a/hfile_libcurl.c b/hfile_libcurl.c index 0ca15d0e9..d3872c42a 100644 --- a/hfile_libcurl.c +++ b/hfile_libcurl.c @@ -517,7 +517,7 @@ static int read_auth_plain(auth_token *tok, hFILE *auth_fp) { kstring_t token = {0, 0, NULL}; const char *start, *end; - if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error; + if (khgetline(&line, auth_fp) < 0) goto error; if (kputc('\0', &line) < 0) goto error; for (start = line.s; *start && isspace_c(*start); start++) {} diff --git a/hfile_s3.c b/hfile_s3.c index c7797a2a2..30de86e27 100644 --- a/hfile_s3.c +++ b/hfile_s3.c @@ -249,7 +249,6 @@ static FILE *expand_tilde_open(const char *fname, const char *mode) return fp; } - static void parse_ini(const char *fname, const char *section, ...) { kstring_t line = { 0, 0, NULL }; @@ -259,7 +258,7 @@ static void parse_ini(const char *fname, const char *section, ...) FILE *fp = expand_tilde_open(fname, "r"); if (fp == NULL) return; - while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0) + while (line.l = 0, kfgetline(&line, fp) >= 0) if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) { *s = '\0'; active = (strcmp(&line.s[1], section) == 0); @@ -301,7 +300,7 @@ static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret) FILE *fp = expand_tilde_open(fname, "r"); if (fp == NULL) return; - while (kgetline(&text, (kgets_func *) fgets, fp) >= 0) + while (kfgetline(&text, fp) >= 0) kputc(' ', &text); fclose(fp); diff --git a/hts.c b/hts.c index 14134a01f..eb1c78566 100644 --- a/hts.c +++ b/hts.c @@ -2027,6 +2027,11 @@ off_t hts_utell(htsFile *fp) return htell(fp->fp.hfile); } +// Wrap hgetln() with a kgets_func2 signature for kgetline2() +static ssize_t hgetln_wrapper(char *buf, size_t len, void *vfp) { + return hgetln(buf, len, (hFILE *) vfp); +} + int hts_getline(htsFile *fp, int delimiter, kstring_t *str) { int ret; @@ -2038,7 +2043,7 @@ int hts_getline(htsFile *fp, int delimiter, kstring_t *str) switch (fp->format.compression) { case no_compression: str->l = 0; - ret = kgetline2(str, (kgets_func2 *) hgetln, fp->fp.hfile); + ret = kgetline2(str, hgetln_wrapper, fp->fp.hfile); if (ret >= 0) ret = (str->l <= INT_MAX)? (int) str->l : INT_MAX; else if (herrno(fp->fp.hfile)) ret = -2, errno = herrno(fp->fp.hfile); else ret = -1; diff --git a/htslib/hfile.h b/htslib/hfile.h index e851faf43..cbf6bf68e 100644 --- a/htslib/hfile.h +++ b/htslib/hfile.h @@ -211,6 +211,19 @@ kstring's `kgetline()` to read arbitrarily-long lines into a _kstring_t_. HTSLIB_EXPORT char *hgets(char *buffer, int size, hFILE *fp) HTS_RESULT_USED; +/// Read a line from a stream and append it to a kstring +/** @param kstr The destination kstring + @param fp The file stream + @return 0 on success; EOF on end of file or if an error occurred. + @since 1.24 + +Reads a "\n"- or "\r\n"- terminated line from fp into kstr. +The line read is appended without its terminator and 0 is returned; +EOF is returned at end of file or on error. +*/ +HTSLIB_EXPORT +int khgetline(struct kstring_t *kstr, hFILE *fp) HTS_RESULT_USED; + /// Peek at characters to be read without removing them from buffers /** @param fp The file stream @param buffer The buffer to which the peeked bytes will be written diff --git a/htslib/kstring.h b/htslib/kstring.h index 51c6e4ba3..d8b7afea8 100644 --- a/htslib/kstring.h +++ b/htslib/kstring.h @@ -129,6 +129,10 @@ extern "C" { HTSLIB_EXPORT int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp); + /* Convenience function to call kgetline with a FILE * */ + HTSLIB_EXPORT + int kfgetline(kstring_t *s, FILE *fp); + /* kgetline2() uses the supplied hgetln()-like function to read a "\n"- * or "\r\n"-terminated line from fp. The line read is appended to the * ksring without its terminator and 0 is returned; EOF is returned at diff --git a/htslib/tbx.h b/htslib/tbx.h index f4b5bd856..8b5ac1e4b 100644 --- a/htslib/tbx.h +++ b/htslib/tbx.h @@ -58,10 +58,13 @@ extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sa #define tbx_itr_destroy(iter) hts_itr_destroy(iter) #define tbx_itr_queryi(tbx, tid, beg, end) hts_itr_query((tbx)->idx, (tid), (beg), (end), tbx_readrec) - #define tbx_itr_querys(tbx, s) hts_itr_querys((tbx)->idx, (s), (hts_name2id_f)(tbx_name2id), (tbx), hts_itr_query, tbx_readrec) + #define tbx_itr_querys(tbx, s) tbx_itr_querys1((tbx), (s)) #define tbx_itr_next(htsfp, tbx, itr, r) hts_itr_next(hts_get_bgzfp(htsfp), (itr), (r), (tbx)) #define tbx_bgzf_itr_next(bgzfp, tbx, itr, r) hts_itr_next((bgzfp), (itr), (r), (tbx)) + HTSLIB_EXPORT + hts_itr_t *tbx_itr_querys1(tbx_t *tbx, const char *region); + HTSLIB_EXPORT int tbx_name2id(tbx_t *tbx, const char *ss); diff --git a/htslib/vcf.h b/htslib/vcf.h index 36d45ad82..256201790 100644 --- a/htslib/vcf.h +++ b/htslib/vcf.h @@ -1318,7 +1318,11 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write(). #define bcf_itr_destroy(iter) hts_itr_destroy(iter) #define bcf_itr_queryi(idx, tid, beg, end) hts_itr_query((idx), (tid), (beg), (end), bcf_readrec) - #define bcf_itr_querys(idx, hdr, s) hts_itr_querys((idx), (s), (hts_name2id_f)(bcf_hdr_name2id), (hdr), hts_itr_query, bcf_readrec) + #define bcf_itr_querys(idx, hdr, s) bcf_itr_querys1((idx), (hdr), (s)) + + HTSLIB_EXPORT + hts_itr_t *bcf_itr_querys1(const hts_idx_t *idx, bcf_hdr_t *hdr, + const char *region); static inline int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { if (htsfp->is_bgzf) diff --git a/kstring.c b/kstring.c index c6432573f..60b7763aa 100644 --- a/kstring.c +++ b/kstring.c @@ -290,6 +290,19 @@ int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp) return 0; } +// Wrap around fgets to get the right signature for kgets_func +static char * fgets_wrapper(char *buffer, int size, void *stream) +{ + return fgets(buffer, size, (FILE *) stream); +} + +int kfgetline(kstring_t *s, FILE *fp) +{ + if (!s || !fp) + return EOF; + return kgetline(s, fgets_wrapper, fp); +} + int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp) { size_t l0 = s->l; diff --git a/sam.c b/sam.c index 42327006b..56deec42e 100644 --- a/sam.c +++ b/sam.c @@ -414,9 +414,14 @@ int bam_hdr_write(BGZF *fp, const sam_hdr_t *h) return 0; } +// Wrap around bam_name2id() to get the right signature for hts_name2id_f +static int bam_name2id_wrapper(void *vhdr, const char *ref) { + return bam_name2id((sam_hdr_t *) vhdr, ref); +} + const char *sam_parse_region(sam_hdr_t *h, const char *s, int *tid, hts_pos_t *beg, hts_pos_t *end, int flags) { - return hts_parse_region(s, tid, beg, end, (hts_name2id_f)bam_name2id, h, flags); + return hts_parse_region(s, tid, beg, end, bam_name2id_wrapper, h, flags); } /************************* @@ -1755,7 +1760,7 @@ static int cram_name2id(void *fdv, const char *ref) hts_itr_t *sam_itr_querys(const hts_idx_t *idx, sam_hdr_t *hdr, const char *region) { const hts_cram_idx_t *cidx = (const hts_cram_idx_t *) idx; - return hts_itr_querys(idx, region, (hts_name2id_f)(bam_name2id), hdr, + return hts_itr_querys(idx, region, bam_name2id_wrapper, hdr, cidx->fmt == HTS_FMT_CRAI ? cram_itr_query : hts_itr_query, sam_readrec); } @@ -1777,10 +1782,10 @@ hts_itr_t *sam_itr_regarray(const hts_idx_t *idx, sam_hdr_t *hdr, char **regarra itr = hts_itr_regions(idx, r_list, r_count, cram_name2id, cidx->cram, hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); } else { - r_list = hts_reglist_create(regarray, regcount, &r_count, hdr, (hts_name2id_f)(bam_name2id)); + r_list = hts_reglist_create(regarray, regcount, &r_count, hdr, bam_name2id_wrapper); if (!r_list) return NULL; - itr = hts_itr_regions(idx, r_list, r_count, (hts_name2id_f)(bam_name2id), hdr, + itr = hts_itr_regions(idx, r_list, r_count, bam_name2id_wrapper, hdr, hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); } @@ -1801,7 +1806,7 @@ hts_itr_t *sam_itr_regions(const hts_idx_t *idx, sam_hdr_t *hdr, hts_reglist_t * return hts_itr_regions(idx, reglist, regcount, cram_name2id, cidx->cram, hts_itr_multi_cram, cram_readrec, cram_pseek, cram_ptell); else - return hts_itr_regions(idx, reglist, regcount, (hts_name2id_f)(bam_name2id), hdr, + return hts_itr_regions(idx, reglist, regcount, bam_name2id_wrapper, hdr, hts_itr_multi_bam, sam_readrec, bam_pseek, bam_ptell); } diff --git a/tbx.c b/tbx.c index dfb9c6313..61721dc3a 100644 --- a/tbx.c +++ b/tbx.c @@ -640,3 +640,14 @@ const char **tbx_seqnames(tbx_t *tbx, int *n) return names; } +// Wrap around tbx_name2id() to get the right signature for hts_name2id_f +static int tbx_name2id_wrapper(void *vhdr, const char *ref) +{ + return tbx_name2id((tbx_t *) vhdr, ref); +} + +hts_itr_t *tbx_itr_querys1(tbx_t *tbx, const char *region) +{ + return hts_itr_querys(tbx->idx, region, tbx_name2id_wrapper, tbx, + hts_itr_query, tbx_readrec); +} diff --git a/vcf.c b/vcf.c index 3566fa62c..544fe8c01 100644 --- a/vcf.c +++ b/vcf.c @@ -2618,7 +2618,7 @@ bcf_hdr_t *vcf_hdr_read(htsFile *fp) hts_log_error("Couldn't open \"%s\"", fp->fn_aux); goto error; } - while (tmp.l = 0, kgetline(&tmp, (kgets_func *) hgets, f) >= 0) { + while (tmp.l = 0, khgetline(&tmp, f) >= 0) { char *tab = strchr(tmp.s, '\t'); if (tab == NULL) continue; e |= (kputs("##contig=