Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ fastp

# Test Output
*.json
*.html
*.html
11 changes: 4 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ DIR_OBJ := ./obj

PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
INCLUDE_DIRS ?=
LIBRARY_DIRS ?=
INCLUDE_DIRS ?=
LIBRARY_DIRS ?=

SRC := $(wildcard ${DIR_SRC}/*.cpp)
OBJ := $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${SRC}))
Expand All @@ -16,7 +16,7 @@ BIN_TARGET := ${TARGET}

CXX ?= g++
CXXFLAGS := -std=c++11 -pthread -g -O3 -MD -MP -I. -I${DIR_INC} $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) ${CXXFLAGS}
LIBS := -lisal -ldeflate -lhwy -lpthread
LIBS := -lisal -ldeflate -lzstd -lhwy -lpthread

UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
Expand All @@ -27,10 +27,7 @@ else
FIND_STATIC = $(firstword $(foreach d,$(LIBRARY_DIRS),$(wildcard $(d)/lib$(1).a)) $(wildcard /usr/local/lib/lib$(1).a /opt/homebrew/lib/lib$(1).a))
STATIC_LIBS :=
DYNAMIC_LIBS :=
$(foreach lib,isal deflate hwy,\
$(if $(call FIND_STATIC,$(lib)),\
$(eval STATIC_LIBS += $(call FIND_STATIC,$(lib))),\
$(eval DYNAMIC_LIBS += -l$(lib))))
$(foreach lib,isal deflate zstd hwy, $(if $(call FIND_STATIC,$(lib)), $(eval STATIC_LIBS += $(call FIND_STATIC,$(lib))), $(eval DYNAMIC_LIBS += -l$(lib))))
LD_FLAGS := $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(STATIC_LIBS) $(DYNAMIC_LIBS) -lpthread $(LD_FLAGS)
endif

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ If you use fastp in your work, you can cite fastp as: *Shifu Chen. fastp 1.0: A
11. support long reads (data from PacBio / Nanopore devices).
12. support reading from STDIN and writing to STDOUT
13. support interleaved input
14. support reading Zstandard-compressed FASTQ/FASTA files (`.zst` / `.zstd`)
14. support ultra-fast FASTQ-level deduplication
15. ...

Expand Down Expand Up @@ -182,6 +183,7 @@ make -j INCLUDE_DIRS=/opt/homebrew/include LIBRARY_DIRS=/opt/homebrew/lib
* for PE data, you should also specify read2 input by `-I` or `--in2`, and specify read2 output by `-O` or `--out2`.
* if you don't specify the output file names, no output files will be written, but the QC will still be done for both data before and after filtering.
* the output will be gzip-compressed if its file name ends with `.gz`
* the input can be gzip-compressed (`.gz`) or Zstandard-compressed (`.zst`, `.zstd`); compression is auto-detected from the extension
## output to STDOUT
`fastp` supports streaming the passing-filter reads to STDOUT, so that it can be passed to other compressors like `bzip2`, or be passed to aligners like `bwa` and `bowtie2`.
* specify `--stdout` to enable this mode to stream output to STDOUT
Expand Down
121 changes: 115 additions & 6 deletions src/fastqreader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,24 @@ FastqReader::FastqReader(string filename, bool hasQuality, bool phred64){
mHasNoLineBreakAtEnd = false;
mGzipInputUsedBytes = 0;
mReadPool = NULL;
mUseZstd = false;
mZstdFinished = false;
mZstdStream = NULL;
mZstdInput.src = NULL;
mZstdInput.size = 0;
mZstdInput.pos = 0;
mZstdInputUsedBytes = 0;
init();
}

FastqReader::~FastqReader(){
close();
delete[] mFastqBuf;
delete[] mGzipInputBuffer;
if(mZstdStream){
ZSTD_freeDStream(mZstdStream);
mZstdStream = NULL;
}
}

bool FastqReader::hasNoLineBreakAtEnd() {
Expand All @@ -69,11 +80,13 @@ void FastqReader::setReadPool(ReadPool* rp) {


bool FastqReader::bufferFinished() {
if(mZipped) {
return eof() && mGzipState.avail_in == 0;
} else {
if(!mZipped)
return eof();
}

if(mUseZstd)
return mZstdFinished && mZstdInput.pos == mZstdInput.size;

return eof() && mGzipState.avail_in == 0;
}

void FastqReader::readToBufIgzip(){
Expand Down Expand Up @@ -139,10 +152,67 @@ void FastqReader::readToBufIgzip(){
}
}

void FastqReader::readToBufZstd(){
mBufDataLen = 0;
if(mZstdFinished)
return;

ZSTD_outBuffer outBuf;
outBuf.dst = mFastqBuf;
outBuf.size = mGzipOutputBufferSize;
outBuf.pos = 0;

while(outBuf.pos == 0){
if(mZstdInput.pos == mZstdInput.size){
size_t readBytes = fread(mGzipInputBuffer, 1, mGzipInputBufferSize, mFile);
if(readBytes == 0){
if(eof()){
mZstdFinished = true;
break;
} else {
error_exit("zstd: read error on file: " + mFilename);
}
}
mZstdInput.src = mGzipInputBuffer;
mZstdInput.size = readBytes;
mZstdInput.pos = 0;
mZstdInputUsedBytes += readBytes;
}

size_t ret = ZSTD_decompressStream(mZstdStream, &outBuf, &mZstdInput);
if(ZSTD_isError(ret)){
error_exit("zstd: decompression error for file: " + mFilename + ", error: " + string(ZSTD_getErrorName(ret)));
}

if(ret == 0){
if(mZstdInput.pos < mZstdInput.size || !eof()){
size_t resetRet = ZSTD_initDStream(mZstdStream);
if(ZSTD_isError(resetRet)){
error_exit("zstd: failed to reset stream for file: " + mFilename + ", error: " + string(ZSTD_getErrorName(resetRet)));
}
} else {
mZstdFinished = true;
}
}

if(eof() && mZstdInput.pos == mZstdInput.size && ret != 0){
error_exit("zstd: unexpected eof found in file: " + mFilename);
}

if(mZstdFinished || outBuf.pos > 0)
break;
}

mBufDataLen = outBuf.pos;
}

void FastqReader::readToBuf() {
mBufDataLen = 0;
if(mZipped) {
readToBufIgzip();
if(mUseZstd)
readToBufZstd();
else
readToBufIgzip();
} else {
if(!eof())
mBufDataLen = fread(mFastqBuf, 1, FQ_BUF_SIZE, mFile);
Expand Down Expand Up @@ -173,6 +243,26 @@ void FastqReader::init(){
}
mZipped = true;
}
else if (ends_with(mFilename, ".zst") || ends_with(mFilename, ".zstd")){
mFile = fopen(mFilename.c_str(), "rb");
if(mFile == NULL) {
error_exit("Failed to open file: " + mFilename);
}
mZstdStream = ZSTD_createDStream();
if(mZstdStream == NULL) {
error_exit("zstd: failed to allocate decompressor for file: " + mFilename);
}
size_t ret = ZSTD_initDStream(mZstdStream);
if(ZSTD_isError(ret)){
error_exit("zstd: failed to init decompressor for file: " + mFilename + ", error: " + string(ZSTD_getErrorName(ret)));
}
mZipped = true;
mUseZstd = true;
mZstdFinished = false;
mZstdInput.src = mGzipInputBuffer;
mZstdInput.size = 0;
mZstdInput.pos = 0;
}
else {
if(mFilename == "/dev/stdin") {
mFile = stdin;
Expand All @@ -189,7 +279,10 @@ void FastqReader::init(){

void FastqReader::getBytes(size_t& bytesRead, size_t& bytesTotal) {
if(mZipped) {
bytesRead = mGzipInputUsedBytes - mGzipState.avail_in;
if(mUseZstd)
bytesRead = mZstdInputUsedBytes - (mZstdInput.size - mZstdInput.pos);
else
bytesRead = mGzipInputUsedBytes - mGzipState.avail_in;
} else {
bytesRead = ftell(mFile);//mFile.tellg();
}
Expand Down Expand Up @@ -362,6 +455,22 @@ bool FastqReader::isZipFastq(string filename) {
return true;
else if (ends_with(filename, ".fa.gz"))
return true;
else if (ends_with(filename, ".fastq.zst"))
return true;
else if (ends_with(filename, ".fq.zst"))
return true;
else if (ends_with(filename, ".fastq.zstd"))
return true;
else if (ends_with(filename, ".fq.zstd"))
return true;
else if (ends_with(filename, ".fasta.zst"))
return true;
else if (ends_with(filename, ".fa.zst"))
return true;
else if (ends_with(filename, ".fasta.zstd"))
return true;
else if (ends_with(filename, ".fa.zstd"))
return true;
else
return false;
}
Expand Down
7 changes: 7 additions & 0 deletions src/fastqreader.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ SOFTWARE.
#else
#include "igzip_lib.h"
#endif
#include <zstd.h>
#include "readpool.h"

class FastqReader{
Expand Down Expand Up @@ -65,6 +66,7 @@ class FastqReader{
void clearLineBreaks(char* line);
void readToBuf();
void readToBufIgzip();
void readToBufZstd();
bool bufferFinished();

private:
Expand All @@ -87,6 +89,11 @@ class FastqReader{
bool mHasQuality;
bool mPhred64;
ReadPool* mReadPool;
bool mUseZstd;
bool mZstdFinished;
ZSTD_DStream* mZstdStream;
ZSTD_inBuffer mZstdInput;
size_t mZstdInputUsedBytes;

};

Expand Down