From bb4ed4551b46c45697bbc557c8fbd2165ee6ffbb Mon Sep 17 00:00:00 2001 From: Nick Semenkovich Date: Fri, 14 Jul 2023 06:13:12 -0500 Subject: [PATCH 1/2] Expand two-color SBS definitions Expanded parsing of Illumina 2-color SBS definitions for poly-g trimming. These values are via: https://knowledge.illumina.com/instrumentation/general/instrumentation-general-reference_material-list/000003880 This expands the previous 2-color list by adding: Novaseq 1000/2000 (@VL @VH) Novaseq X Plus (@LH) This changes the Novaseq 6000 header from (@A0 to @A) per Illumina's doc. (I do not see @NDX documented by illumina, but this might be their NextSeq 550Dx FDA-regulated sequencer.) --- src/evaluator.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/evaluator.cpp b/src/evaluator.cpp index 48bca94f..ea1620b5 100644 --- a/src/evaluator.cpp +++ b/src/evaluator.cpp @@ -21,8 +21,13 @@ bool Evaluator::isTwoColorSystem() { if(!r) return false; - // NEXTSEQ500, NEXTSEQ 550/550DX, NOVASEQ - if(starts_with(r->mName, "@NS") || starts_with(r->mName, "@NB") || starts_with(r->mName, "@NDX") || starts_with(r->mName, "@A0")) { + // Via https://knowledge.illumina.com/instrumentation/general/instrumentation-general-reference_material-list/000003880 + // NEXTSEQ 500/550: @NS @NB + // ? NEXTSEQ 550DX: @NDX + // NEXTSEQ 1000/2000: @VL @VH + // NOVASEQ 6000: @A + // NOVASEQ X PLUS: @LH + if(starts_with(r->mName, "@NS") || starts_with(r->mName, "@NB") || starts_with(r->mName, "@NDX") || starts_with(r->mName, "@VL") || starts_with(r->mName, "@VH") || starts_with(r->mName, "@A") || starts_with(r->mName, "@LH")) { delete r; return true; } From b3c14e0de01739dde208dec8c5fca95864998e75 Mon Sep 17 00:00:00 2001 From: Nick Semenkovich Date: Mon, 19 Jan 2026 11:42:14 -0600 Subject: [PATCH 2/2] Improve and expand sequencer list for poly-g trimming Simplify the poly-g trimming test and add modern sequencers that use two-channel chemistry and would benefit from polyG tail trimming. Signed-off-by: Nick Semenkovich --- src/evaluator.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/evaluator.cpp b/src/evaluator.cpp index b821e8d4..99b0eb92 100644 --- a/src/evaluator.cpp +++ b/src/evaluator.cpp @@ -21,15 +21,23 @@ bool Evaluator::isTwoColorSystem() { if(!r) return false; - // Via https://knowledge.illumina.com/instrumentation/general/instrumentation-general-reference_material-list/000003880 - // NEXTSEQ 500/550: @NS @NB - // ? NEXTSEQ 550DX: @NDX - // NEXTSEQ 1000/2000: @VL @VH - // NOVASEQ 6000: @A - // NOVASEQ X PLUS: @LH - if(starts_with(r->mName, "@NS") || starts_with(r->mName, "@NB") || starts_with(r->mName, "@NDX") || starts_with(r->mName, "@VL") || starts_with(r->mName, "@VH") || starts_with(r->mName, "@A") || starts_with(r->mName, "@LH")) { - delete r; - return true; + // Two-color system instrument prefixes + // See https://github.com/OpenGene/fastp/pull/508 for a detailed discussion + const vector twoColorPrefixes = { + "@FS", // iSeq 100 + "@MN", "@SH", // MiniSeq + "@NS", "@NB", // NextSeq 500/550 + "@NDX", // NextSeq 550DX + "@VL", "@VH", // NextSeq 1000/2000 + "@A", "@NA", // NovaSeq 6000 + "@LH" // NovaSeq X + }; + + for (const string& prefix : twoColorPrefixes) { + if (starts_with(r->mName, prefix)) { + delete r; + return true; + } } delete r;